# Shows how to Use the Data Alan Created and Use Utility Functions

In [1]:
import pickle
from collections import namedtuple
import pandas as pd
import numpy as np
from importlib import reload
import sys

# bench_util module is in the parent directory, because it will be part of
# the production code.  Add the parent directory to the Path so Python can
# import from there
sys.path.insert(0, '../')   
import bench_util

## Unpickle the Dataframe for Preprocessed Utility Data & Make Utility Function Object

In [26]:
df = pickle.load(open('dfu3.pkl', 'rb'))

# this is only needed to update any code changes I may have made
# since last importing the module.
reload(bench_util)     

# Make an object that has the various utility functions.
# The object needs access to the raw utility bill DataFrame and the spreadsheet
# containing other application data.
df_raw = pickle.load(open('df_raw.pkl', 'rb'))
ut = bench_util.Util(df_raw, '../data/Other_Building_Data.xlsx')

df.head()

Unnamed: 0,site_id,service_type,cal_year,cal_mo,item_desc,units,cost,days_served,usage,fiscal_year,fiscal_mo,mmbtu
0,3,Electricity,2010,7,Electricity charge,-,24.137931,35.0,0.0,2011,1,
1,3,Electricity,2010,7,Electricity charge,kWh,1304.274655,9.5,6656.551724,2011,1,22.712154
2,3,Electricity,2010,8,Electricity charge,-,39.498433,62.0,0.0,2011,2,
3,3,Electricity,2010,8,Electricity charge,kWh,4940.646254,31.0,26822.236155,2011,2,91.51747
4,3,Electricity,2010,9,Electricity charge,-,16.363636,27.0,0.0,2011,3,


## DataFrame for "Utility Cost Overview" Report Page

In [3]:
site = 'ANSBG1'
# From the main DataFrame, get only the rows for this site, and only get
# the needed columns for this analysis
df1 = df.query('site_id == @site')[['service_type', 'fiscal_year', 'fiscal_mo', 'cost']]
df1.head()

Unnamed: 0,service_type,fiscal_year,fiscal_mo,cost
17200,Electricity,2011,1,602.3
17201,Electricity,2011,1,0.0
17202,Electricity,2011,1,699.36
17203,Electricity,2011,2,1212.035
17204,Electricity,2011,2,0.0


In [4]:
df2 = pd.pivot_table(
    df1, 
    values='cost', 
    index=['fiscal_year'], 
    columns=['service_type'],
    aggfunc=np.sum
)
df2

service_type,Electricity,Natural Gas,Sewer,Water
fiscal_year,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2011,33894.895909,110858.4165,4716.576852,4379.291667
2012,41994.697841,94490.242371,5484.028148,5090.420833
2013,36961.451595,79970.329194,5169.3245,4783.8545
2014,35843.822155,48967.142581,4840.116833,4375.768167
2015,38017.419167,47128.677258,4593.016167,4156.942333
2016,29639.448485,45647.201628,6108.2275,5113.3625
2017,21849.234848,26682.420469,3326.44,2875.33


In [5]:
# Add in columns for the missing services
missing_services = bench_util.missing_services(df2.columns)
bench_util.add_columns(df2, missing_services)
df2

service_type,Electricity,Natural Gas,Sewer,Water,Oil #1,Steam,Refuse
fiscal_year,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2011,33894.895909,110858.4165,4716.576852,4379.291667,0.0,0.0,0.0
2012,41994.697841,94490.242371,5484.028148,5090.420833,0.0,0.0,0.0
2013,36961.451595,79970.329194,5169.3245,4783.8545,0.0,0.0,0.0
2014,35843.822155,48967.142581,4840.116833,4375.768167,0.0,0.0,0.0
2015,38017.419167,47128.677258,4593.016167,4156.942333,0.0,0.0,0.0
2016,29639.448485,45647.201628,6108.2275,5113.3625,0.0,0.0,0.0
2017,21849.234848,26682.420469,3326.44,2875.33,0.0,0.0,0.0


In [6]:
# Add a Total column that sums the other columns
df2['Total'] = df2.sum(axis=1)
df2

service_type,Electricity,Natural Gas,Sewer,Water,Oil #1,Steam,Refuse,Total
fiscal_year,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
2011,33894.895909,110858.4165,4716.576852,4379.291667,0.0,0.0,0.0,153849.180928
2012,41994.697841,94490.242371,5484.028148,5090.420833,0.0,0.0,0.0,147059.389193
2013,36961.451595,79970.329194,5169.3245,4783.8545,0.0,0.0,0.0,126884.959788
2014,35843.822155,48967.142581,4840.116833,4375.768167,0.0,0.0,0.0,94026.849736
2015,38017.419167,47128.677258,4593.016167,4156.942333,0.0,0.0,0.0,93896.054925
2016,29639.448485,45647.201628,6108.2275,5113.3625,0.0,0.0,0.0,86508.240113
2017,21849.234848,26682.420469,3326.44,2875.33,0.0,0.0,0.0,54733.425317


In [7]:
# Add a percent change column
df2['pct_change'] = df2.Total.pct_change()
df2

service_type,Electricity,Natural Gas,Sewer,Water,Oil #1,Steam,Refuse,Total,pct_change
fiscal_year,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
2011,33894.895909,110858.4165,4716.576852,4379.291667,0.0,0.0,0.0,153849.180928,
2012,41994.697841,94490.242371,5484.028148,5090.420833,0.0,0.0,0.0,147059.389193,-0.044133
2013,36961.451595,79970.329194,5169.3245,4783.8545,0.0,0.0,0.0,126884.959788,-0.137186
2014,35843.822155,48967.142581,4840.116833,4375.768167,0.0,0.0,0.0,94026.849736,-0.25896
2015,38017.419167,47128.677258,4593.016167,4156.942333,0.0,0.0,0.0,93896.054925,-0.001391
2016,29639.448485,45647.201628,6108.2275,5113.3625,0.0,0.0,0.0,86508.240113,-0.078681
2017,21849.234848,26682.420469,3326.44,2875.33,0.0,0.0,0.0,54733.425317,-0.367304


In [8]:
# Add in degree days
months_present = bench_util.months_present(df1)
deg_days = ut.degree_days_yearly(months_present, site)
df2['hdd'] = deg_days
df2

service_type,Electricity,Natural Gas,Sewer,Water,Oil #1,Steam,Refuse,Total,pct_change,hdd
fiscal_year,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
2011,33894.895909,110858.4165,4716.576852,4379.291667,0.0,0.0,0.0,153849.180928,,13802
2012,41994.697841,94490.242371,5484.028148,5090.420833,0.0,0.0,0.0,147059.389193,-0.044133,12393
2013,36961.451595,79970.329194,5169.3245,4783.8545,0.0,0.0,0.0,126884.959788,-0.137186,12171
2014,35843.822155,48967.142581,4840.116833,4375.768167,0.0,0.0,0.0,94026.849736,-0.25896,13802
2015,38017.419167,47128.677258,4593.016167,4156.942333,0.0,0.0,0.0,93896.054925,-0.001391,12393
2016,29639.448485,45647.201628,6108.2275,5113.3625,0.0,0.0,0.0,86508.240113,-0.078681,12171
2017,21849.234848,26682.420469,3326.44,2875.33,0.0,0.0,0.0,54733.425317,-0.367304,12356


In [9]:
# Add in a column to show the numbers of months present for each year
# This will help to identify partial years.
mo_count = bench_util.month_count(months_present)
df2['month_count'] = mo_count
df2

service_type,Electricity,Natural Gas,Sewer,Water,Oil #1,Steam,Refuse,Total,pct_change,hdd,month_count
fiscal_year,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
2011,33894.895909,110858.4165,4716.576852,4379.291667,0.0,0.0,0.0,153849.180928,,13802,12
2012,41994.697841,94490.242371,5484.028148,5090.420833,0.0,0.0,0.0,147059.389193,-0.044133,12393,12
2013,36961.451595,79970.329194,5169.3245,4783.8545,0.0,0.0,0.0,126884.959788,-0.137186,12171,12
2014,35843.822155,48967.142581,4840.116833,4375.768167,0.0,0.0,0.0,94026.849736,-0.25896,13802,12
2015,38017.419167,47128.677258,4593.016167,4156.942333,0.0,0.0,0.0,93896.054925,-0.001391,12393,12
2016,29639.448485,45647.201628,6108.2275,5113.3625,0.0,0.0,0.0,86508.240113,-0.078681,12171,12
2017,21849.234848,26682.420469,3326.44,2875.33,0.0,0.0,0.0,54733.425317,-0.367304,12356,9


In [10]:
# Reverse the DataFrame
df2.sort_index(ascending=False, inplace=True)
df2

service_type,Electricity,Natural Gas,Sewer,Water,Oil #1,Steam,Refuse,Total,pct_change,hdd,month_count
fiscal_year,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
2017,21849.234848,26682.420469,3326.44,2875.33,0.0,0.0,0.0,54733.425317,-0.367304,12356,9
2016,29639.448485,45647.201628,6108.2275,5113.3625,0.0,0.0,0.0,86508.240113,-0.078681,12171,12
2015,38017.419167,47128.677258,4593.016167,4156.942333,0.0,0.0,0.0,93896.054925,-0.001391,12393,12
2014,35843.822155,48967.142581,4840.116833,4375.768167,0.0,0.0,0.0,94026.849736,-0.25896,13802,12
2013,36961.451595,79970.329194,5169.3245,4783.8545,0.0,0.0,0.0,126884.959788,-0.137186,12171,12
2012,41994.697841,94490.242371,5484.028148,5090.420833,0.0,0.0,0.0,147059.389193,-0.044133,12393,12
2011,33894.895909,110858.4165,4716.576852,4379.291667,0.0,0.0,0.0,153849.180928,,13802,12


In [11]:
# Export the rows of the DataFrame into a list of dictionaries for use
# in the template.  Rename columns to match template variables.
row_dicts = bench_util.df_to_dictionaries(df2, change_names={'Natural Gas': 'natural_gas', 'Oil #1': 'fuel_oil'})
row_dicts

[{'Electricity': 21849.234848484841,
  'Refuse': 0.0,
  'Sewer': 3326.4400000000001,
  'Steam': 0.0,
  'Total': 54733.42531723484,
  'Water': 2875.3299999999995,
  'fiscal_year': 2017,
  'fuel_oil': 0.0,
  'hdd': 12356.0,
  'month_count': 9.0,
  'natural_gas': 26682.420468749999,
  'pct_change': -0.36730390947936598},
 {'Electricity': 29639.448484848483,
  'Refuse': 0.0,
  'Sewer': 6108.2274999999991,
  'Steam': 0.0,
  'Total': 86508.240112872678,
  'Water': 5113.3625000000011,
  'fiscal_year': 2016,
  'fuel_oil': 0.0,
  'hdd': 12171.0,
  'month_count': 12.0,
  'natural_gas': 45647.201628024202,
  'pct_change': -0.078680779696023495},
 {'Electricity': 38017.419166666674,
  'Refuse': 0.0,
  'Sewer': 4593.0161666666672,
  'Steam': 0.0,
  'Total': 93896.054924731172,
  'Water': 4156.9423333333343,
  'fiscal_year': 2015,
  'fuel_oil': 0.0,
  'hdd': 12393.0,
  'month_count': 12.0,
  'natural_gas': 47128.677258064497,
  'pct_change': -0.0013910368310102728},
 {'Electricity': 35843.8221551724

In [12]:
# Put results into the final report dictionary that will be passed to the Template.
# This assumes you have created a couple graphs with the file names shown below.
pg3 = dict(rows=row_dicts, 
           g1='output/images/{}_pg2_g1.png'.format(site), 
           g2='output/images/{}_pg2_g2.png'.format(site)
      )
pg3

{'g1': 'output/images/ANSBG1_pg2_g1.png',
 'g2': 'output/images/ANSBG1_pg2_g2.png',
 'rows': [{'Electricity': 21849.234848484841,
   'Refuse': 0.0,
   'Sewer': 3326.4400000000001,
   'Steam': 0.0,
   'Total': 54733.42531723484,
   'Water': 2875.3299999999995,
   'fiscal_year': 2017,
   'fuel_oil': 0.0,
   'hdd': 12356.0,
   'month_count': 9.0,
   'natural_gas': 26682.420468749999,
   'pct_change': -0.36730390947936598},
  {'Electricity': 29639.448484848483,
   'Refuse': 0.0,
   'Sewer': 6108.2274999999991,
   'Steam': 0.0,
   'Total': 86508.240112872678,
   'Water': 5113.3625000000011,
   'fiscal_year': 2016,
   'fuel_oil': 0.0,
   'hdd': 12171.0,
   'month_count': 12.0,
   'natural_gas': 45647.201628024202,
   'pct_change': -0.078680779696023495},
  {'Electricity': 38017.419166666674,
   'Refuse': 0.0,
   'Sewer': 4593.0161666666672,
   'Steam': 0.0,
   'Total': 93896.054924731172,
   'Water': 4156.9423333333343,
   'fiscal_year': 2015,
   'fuel_oil': 0.0,
   'hdd': 12393.0,
   'month

## Fiscal Month Abbreviations

In [13]:
bench_util.mo_list

['Jul',
 'Aug',
 'Sep',
 'Oct',
 'Nov',
 'Dec',
 'Jan',
 'Feb',
 'Mar',
 'Apr',
 'May',
 'Jun']

In [14]:
bench_util.mo_dict

{1: 'Jul',
 2: 'Aug',
 3: 'Sep',
 4: 'Oct',
 5: 'Nov',
 6: 'Dec',
 7: 'Jan',
 8: 'Feb',
 9: 'Mar',
 10: 'Apr',
 11: 'May',
 12: 'Jun'}

## Get Additional Information on a Building

In [23]:
# Read in the Building Information from the Other Data file
df_bldg = pd.read_excel(
        '../data/Other_Building_Data.xlsx', 
        sheetname='Building', 
        skiprows=3, 
        index_col='site_ID'
        )
# Create a named tuple to hold info for each building
# The fields of the tuple are the columns from the spreadsheet that
# was just read, but also a number of other fields related to 
# service providers and account numbers.
tup_cols = list(df_bldg.columns) + [
    'source_elec',
    'source_oil',
    'source_nat_gas',
    'source_steam',
    'source_water',
    'source_sewer',
    'source_refuse',
    'acct_elec',
    'acct_oil',
    'acct_nat_gas',
    'acct_steam',
    'acct_water',
    'acct_sewer',
    'acct_refuse',
]

BldgInfo = namedtuple('BldgInfo', tup_cols)
flds = BldgInfo._fields
dict(zip(flds, ['']*len(flds)))

{'acct_elec': '',
 'acct_nat_gas': '',
 'acct_oil': '',
 'acct_refuse': '',
 'acct_sewer': '',
 'acct_steam': '',
 'acct_water': '',
 'address': '',
 'city': '',
 'dd_site': '',
 'onsite_gen': '',
 'primary_func': '',
 'site_name': '',
 'source_elec': '',
 'source_nat_gas': '',
 'source_oil': '',
 'source_refuse': '',
 'source_sewer': '',
 'source_steam': '',
 'source_water': '',
 'sq_ft': '',
 'year_built': ''}

In [29]:
# use the Util object from before to access additional Building Information.

# Value retunred below is a "named tuple"
rec = ut.bldg_info['ANSBG1']
rec

BldgInfo(site_name='FNSB-Animal Control', address='2408 Davis Road', city='Fairbanks', primary_func='Animal Shelter', year_built=1993.0, sq_ft=10123, onsite_gen=nan, dd_site='PAFA', source_elec='', source_oil='', source_nat_gas='', source_steam='', source_water='', source_sewer='', source_refuse='', acct_elec='', acct_oil='', acct_nat_gas='', acct_steam='', acct_water='', acct_sewer='', acct_refuse='')

In [16]:
# You can get individual fields through use of the dot operator
print(rec.address, ', ', rec.year_built)

2408 Davis Road ,  1993.0


In [17]:
# Shows two account numbers for one vendor and site.
dft = df[(df.site_id=='04') & (df.service_type=='Water') & (df.thru_dt=='2016-06-24')]
dft
#dft.groupby('Thru').nunique()['Account Number']

AttributeError: 'DataFrame' object has no attribute 'thru_dt'

In [None]:
df_svc = df_raw[(df_raw['Site ID']=='04') & (df_raw['Service Name']=='Water')]
last_date = df_svc['Thru'].max()
recs = df_svc[df_svc.Thru == last_date]
recs

In [None]:
recs['Account Number'].unique()