## Used to Develop New Benchmarking Features
### Alan's Notebook
Assumes that `df_raw.pkl`, `df_processed.pkl`, and `util_obj.pkl` are located in the directory above this one.

In [2]:
import pickle
import pandas as pd
import numpy as np
from importlib import reload
import sys
from datetime import datetime

import matplotlib.pyplot as plt
%matplotlib inline

# bench_util module is in the parent directory, because it will be part of
# the production code.  Add the parent directory to the Path so Python can
# import from there
sys.path.insert(0, '../')   
import bench_util as bu
import graph_util as gu


In [3]:
# Unpickle the raw DataFrame
df_raw = pd.read_pickle('../df_raw.pkl')

# Unpickle the processed DataFrame.
dfp = pd.read_pickle('../df_processed.pkl')

# Unpickle the Utility object
ut = pickle.load(open('../util_obj.pkl', 'rb'))

In [4]:
# Show the first record of the raw dataframe
df_raw.iloc[0]

Site ID                                              CLXGP2
Site Name                 FNSB-CLRA River Park General Area
Vendor Code                                        VU797000
Vendor Name                         Army Corps of Engineers
Account Number                                01 River Park
Bill Date                                        01/31/2010
Due Date                                         01/31/2010
Entry Date                                       01/13/2011
Invoice #                                               NaN
Voucher #                                               NaN
From                                    2009-12-31 00:00:00
Thru                                    2010-01-31 00:00:00
Service Name                                    Electricity
Item Description                              Energy charge
Meter Number                                            NaN
Usage                                                     0
Cost                                    

In [5]:
# Show the first record of the processed dataframe
dfp.iloc[0]

site_id                    03
service_type      electricity
cal_year                 2009
cal_mo                     12
item_desc       Energy charge
units                     kWh
cost                  2607.98
usage                 14360.6
mmbtu                 48.9985
fiscal_year              2010
fiscal_mo                   6
Name: 0, dtype: object

In [6]:
# Show type information and counts of processed dataframe
dfp.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 64343 entries, 0 to 64342
Data columns (total 11 columns):
site_id         64343 non-null object
service_type    64343 non-null object
cal_year        64343 non-null int64
cal_mo          64343 non-null int64
item_desc       64343 non-null object
units           64343 non-null object
cost            64343 non-null float64
usage           64343 non-null float64
mmbtu           64343 non-null float64
fiscal_year     64343 non-null int64
fiscal_mo       64343 non-null int64
dtypes: float64(3), int64(4), object(4)
memory usage: 5.4+ MB


In [7]:
# Show stats for numeric columns
dfp.describe()

Unnamed: 0,cal_year,cal_mo,cost,usage,mmbtu,fiscal_year,fiscal_mo
count,64343.0,64343.0,64343.0,64343.0,64343.0,64343.0,64343.0
mean,2014.018697,6.497941,1450.56796,11312.06,51.650916,2014.517119,6.516871
std,2.484807,3.448452,3878.98522,43765.35,1357.65548,2.515183,3.463266
min,2009.0,1.0,-15844.611452,-6104.919,0.0,2009.0,1.0
25%,2012.0,4.0,20.0,0.0,0.0,2012.0,3.0
50%,2014.0,6.0,126.37679,0.0,0.0,2015.0,7.0
75%,2016.0,9.0,876.723269,1496.329,1.005428,2017.0,10.0
max,2018.0,12.0,149076.345136,1946610.0,239533.571429,2019.0,12.0


In [8]:
# Show counts of service types
dfp.service_type.value_counts()

electricity      27682
water            13502
sewer             7835
fuel_oil          5965
refuse            4448
natural_gas       3346
district_heat     1565
Name: service_type, dtype: int64

In [9]:
dfp.item_desc.value_counts()

Other Charge             23892
Energy charge            10530
Water Usage (Gallons)     7825
KW Charge                 7802
Fuel Oil #1 (Gallons)     5965
Sewer Usage (Gallons)     2479
Refuse (Loads)            1823
Natural gas (CCF)         1802
Electricity charge         631
Steam (MMBtu) CDHW         536
Steam (klbs)               448
Steam (lbs)                285
Water (kGal)               171
Refuse (Tons)              119
Utility Charge              21
Demand Charge               11
Water (Cgallons)             3
Name: item_desc, dtype: int64

In [10]:
pd.pivot_table(dfp, values='site_id', index='service_type', columns='units', aggfunc='count')

units,-,CCF,Cgallons,Gallons,Loads,MMBtu,Tons,kGal,kW,kWh,klbs,lbs
service_type,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
district_heat,332.0,,,,,517.0,,,,,441.0,275.0
electricity,14914.0,,,,,,,,3157.0,9611.0,,
fuel_oil,107.0,,,5858.0,,,,,,,,
natural_gas,2121.0,1225.0,,,,,,,,,,
refuse,2640.0,,,,1772.0,,36.0,,,,,
sewer,5872.0,,,1963.0,,,,,,,,
water,6998.0,,3.0,6364.0,,,,137.0,,,,


In [11]:
# See how the building_info() method of the Utility object works
ut.building_info('03')

{'site_name': 'SD-Denali Elementary',
 'site_category': 'School District',
 'address': nan,
 'city': nan,
 'group': 'School District',
 'division': nan,
 'campus': nan,
 'primary_func': 'Elementary School',
 'year_built': nan,
 'sq_ft': 49210.0,
 'onsite_gen': nan,
 'dd_site': 'PAFA',
 'full_address': nan,
 'source_fuel_oil': 'Sourdough Fuel  (Petro Star)',
 'source_natural_gas': 'Fairbanks Natural Gas',
 'source_electricity': 'Golden Valley Electric',
 'source_propane': '',
 'source_wood': '',
 'source_district_heat': '',
 'source_coal': '',
 'source_water': 'Golden Heart Utilities',
 'source_sewer': 'Golden Heart Utilities',
 'source_refuse': 'Alaska Waste',
 'acct_fuel_oil': '55010001',
 'acct_natural_gas': '10283 - DENALI',
 'acct_electricity': '172230',
 'acct_propane': '',
 'acct_wood': '',
 'acct_district_heat': '',
 'acct_coal': '',
 'acct_water': '1998005610',
 'acct_sewer': '1998005610',
 'acct_refuse': 'AW23-DNL03',
 'site_id': '03'}