In [181]:
import pickle
from collections import namedtuple
import pandas as pd
import numpy as np
from importlib import reload
import sys
import matplotlib.pyplot as plt
import matplotlib as mpl
import matplotlib.dates as mdates
import datetime

# bench_util module is in the parent directory, because it will be part of
# the production code.  Add the parent directory to the Path so Python can
# import from there
sys.path.insert(0, '../')   
import bench_util as bu
import graph_util as gu

## Unpickle the Dataframe for Preprocessed Utility Data & Make Utility Function Object

In [182]:
# Unpickle the pre-processed DataFrame
df = pickle.load(open('dfu3.pkl', 'rb'))

# Unpickle the raw utility bill DataFrame, which is needed below to make
# the utility function object.
df_raw = pickle.load(open('df_raw.pkl', 'rb'))

df.head()  # the processed data

Unnamed: 0,site_id,service_type,cal_year,cal_mo,item_desc,units,cost,days_served,usage,fiscal_year,fiscal_mo,mmbtu
0,3,Electricity,2005,12,Electricity charge,kWh,1904.65788,49.5,14790.748577,2006,6,50.466034
1,3,Electricity,2006,1,Electricity charge,kWh,5430.493797,93.0,42665.790911,2006,7,145.575679
2,3,Electricity,2006,2,Electricity charge,kWh,5764.40673,84.0,45010.439348,2006,8,153.575619
3,3,Electricity,2006,3,Electricity charge,kWh,6349.255299,93.0,46311.547557,2006,9,158.015
4,3,Electricity,2006,4,Electricity charge,kWh,5529.385224,90.0,40392.812893,2006,10,137.820278


In [183]:
# this is only needed to update any code changes I may have made
# since last importing the module above.
reload(bu) 
reload(gu)

# Make an object that has the various utility functions.
# The object needs access to the raw utility bill DataFrame and the spreadsheet
# containing other application data.
ut = bu.Util(df_raw, '../data/Other_Building_Data.xlsx')

In [184]:
# Testing site.  Final code will loop through all sites
site = 'ANSBG1'

In [185]:
template_data = {}

# Energy Use Overview Report - Page 4


In [186]:
# From the main DataFrame, get only the rows for this site, and only get
# the needed columns for this analysis
usage_df1 = df.query('site_id == @site')[['service_type', 'fiscal_year', 'fiscal_mo', 'mmbtu']]
usage_df1.head()

Unnamed: 0,service_type,fiscal_year,fiscal_mo,mmbtu
30353,Electricity,2006,12,24.71994
30354,Electricity,2006,12,0.0
30355,Electricity,2006,12,
30356,Electricity,2007,1,47.29032
30357,Electricity,2007,1,0.0


In [187]:
usage_df2 = pd.pivot_table(
    usage_df1, 
    values='mmbtu', 
    index=['fiscal_year'], 
    columns=['service_type'],
    aggfunc=np.sum
)

usage_df2 = usage_df2.drop(labels=['Sewer', 'Water'], axis=1)

usage_df2

service_type,Electricity,Natural Gas,Oil #1
fiscal_year,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2006,24.71994,,
2007,630.5376,,
2008,708.924675,,
2009,656.786639,4472.2971,264.465
2010,641.697942,4416.02159,
2011,664.086865,5203.09351,
2012,683.037908,4183.582316,
2013,600.549539,3545.521645,
2014,616.351563,2163.879,
2015,664.534768,2082.877839,


In [188]:
# Add in columns for the missing services
missing_services = bu.missing_energy_services(usage_df2.columns)
bu.add_columns(usage_df2, missing_services)
usage_df2

service_type,Electricity,Natural Gas,Oil #1,Steam
fiscal_year,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2006,24.71994,,,0.0
2007,630.5376,,,0.0
2008,708.924675,,,0.0
2009,656.786639,4472.2971,264.465,0.0
2010,641.697942,4416.02159,,0.0
2011,664.086865,5203.09351,,0.0
2012,683.037908,4183.582316,,0.0
2013,600.549539,3545.521645,,0.0
2014,616.351563,2163.879,,0.0
2015,664.534768,2082.877839,,0.0


In [189]:
# Add a Total column that sums the other columns
usage_df2['total_energy'] = usage_df2.sum(axis=1)
cols = ['{}_mmbtu'.format(bu.change_name(col)) for col in usage_df2.columns]
usage_df2.columns = cols
usage_df2

Unnamed: 0_level_0,electricity_mmbtu,natural_gas_mmbtu,fuel_oil_mmbtu,district_heat_mmbtu,total_energy_mmbtu
fiscal_year,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2006,24.71994,,,0.0,24.71994
2007,630.5376,,,0.0,630.5376
2008,708.924675,,,0.0,708.924675
2009,656.786639,4472.2971,264.465,0.0,5393.548739
2010,641.697942,4416.02159,,0.0,5057.719531
2011,664.086865,5203.09351,,0.0,5867.180376
2012,683.037908,4183.582316,,0.0,4866.620224
2013,600.549539,3545.521645,,0.0,4146.071185
2014,616.351563,2163.879,,0.0,2780.230563
2015,664.534768,2082.877839,,0.0,2747.412607


In [190]:
# Create a list of columns to loop through and calculate percent total energy
usage_cols = list(usage_df2.columns.values)
print (usage_cols)
usage_cols.remove('total_energy_mmbtu')

for col in usage_cols:
    col_name = col.split('_mmbtu')[0] + "_pct"
    usage_df2[col_name] = usage_df2[col] / usage_df2.total_energy_mmbtu

usage_df2

['electricity_mmbtu', 'natural_gas_mmbtu', 'fuel_oil_mmbtu', 'district_heat_mmbtu', 'total_energy_mmbtu']


Unnamed: 0_level_0,electricity_mmbtu,natural_gas_mmbtu,fuel_oil_mmbtu,district_heat_mmbtu,total_energy_mmbtu,electricity_pct,natural_gas_pct,fuel_oil_pct,district_heat_pct
fiscal_year,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
2006,24.71994,,,0.0,24.71994,1.0,,,0.0
2007,630.5376,,,0.0,630.5376,1.0,,,0.0
2008,708.924675,,,0.0,708.924675,1.0,,,0.0
2009,656.786639,4472.2971,264.465,0.0,5393.548739,0.121773,0.829194,0.049034,0.0
2010,641.697942,4416.02159,,0.0,5057.719531,0.126875,0.873125,,0.0
2011,664.086865,5203.09351,,0.0,5867.180376,0.113187,0.886813,,0.0
2012,683.037908,4183.582316,,0.0,4866.620224,0.140352,0.859648,,0.0
2013,600.549539,3545.521645,,0.0,4146.071185,0.144848,0.855152,,0.0
2014,616.351563,2163.879,,0.0,2780.230563,0.221691,0.778309,,0.0
2015,664.534768,2082.877839,,0.0,2747.412607,0.241877,0.758123,,0.0


In [191]:
# Add in degree days
months_present = bu.months_present(usage_df1)
deg_days = ut.degree_days_yearly(months_present, site)
usage_df2['hdd'] = deg_days
usage_df2

Unnamed: 0_level_0,electricity_mmbtu,natural_gas_mmbtu,fuel_oil_mmbtu,district_heat_mmbtu,total_energy_mmbtu,electricity_pct,natural_gas_pct,fuel_oil_pct,district_heat_pct,hdd
fiscal_year,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
2006,24.71994,,,0.0,24.71994,1.0,,,0.0,206
2007,630.5376,,,0.0,630.5376,1.0,,,0.0,14075
2008,708.924675,,,0.0,708.924675,1.0,,,0.0,13382
2009,656.786639,4472.2971,264.465,0.0,5393.548739,0.121773,0.829194,0.049034,0.0,14471
2010,641.697942,4416.02159,,0.0,5057.719531,0.126875,0.873125,,0.0,13119
2011,664.086865,5203.09351,,0.0,5867.180376,0.113187,0.886813,,0.0,13667
2012,683.037908,4183.582316,,0.0,4866.620224,0.140352,0.859648,,0.0,13960
2013,600.549539,3545.521645,,0.0,4146.071185,0.144848,0.855152,,0.0,14939
2014,616.351563,2163.879,,0.0,2780.230563,0.221691,0.778309,,0.0,12531
2015,664.534768,2082.877839,,0.0,2747.412607,0.241877,0.758123,,0.0,12345


In [192]:
# Add in a column to show the numbers of months present for each year
# This will help to identify partial years.
mo_count = bu.month_count(months_present)
usage_df2['month_count'] = mo_count
usage_df2

Unnamed: 0_level_0,electricity_mmbtu,natural_gas_mmbtu,fuel_oil_mmbtu,district_heat_mmbtu,total_energy_mmbtu,electricity_pct,natural_gas_pct,fuel_oil_pct,district_heat_pct,hdd,month_count
fiscal_year,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
2006,24.71994,,,0.0,24.71994,1.0,,,0.0,206,1
2007,630.5376,,,0.0,630.5376,1.0,,,0.0,14075,12
2008,708.924675,,,0.0,708.924675,1.0,,,0.0,13382,12
2009,656.786639,4472.2971,264.465,0.0,5393.548739,0.121773,0.829194,0.049034,0.0,14471,12
2010,641.697942,4416.02159,,0.0,5057.719531,0.126875,0.873125,,0.0,13119,12
2011,664.086865,5203.09351,,0.0,5867.180376,0.113187,0.886813,,0.0,13667,12
2012,683.037908,4183.582316,,0.0,4866.620224,0.140352,0.859648,,0.0,13960,12
2013,600.549539,3545.521645,,0.0,4146.071185,0.144848,0.855152,,0.0,14939,12
2014,616.351563,2163.879,,0.0,2780.230563,0.221691,0.778309,,0.0,12531,12
2015,664.534768,2082.877839,,0.0,2747.412607,0.241877,0.758123,,0.0,12345,12


In [193]:
# Calculate total heat energy and normalized heating usage
usage_df2['total_heat_mmbtu'] = usage_df2.natural_gas_mmbtu + usage_df2.district_heat_mmbtu + usage_df2.fuel_oil_mmbtu
usage_df2['total_specific_heat'] = usage_df2.total_heat_mmbtu * 1000 / usage_df2.hdd
usage_df2 = usage_df2.query("month_count == 12")
usage_df2

Unnamed: 0_level_0,electricity_mmbtu,natural_gas_mmbtu,fuel_oil_mmbtu,district_heat_mmbtu,total_energy_mmbtu,electricity_pct,natural_gas_pct,fuel_oil_pct,district_heat_pct,hdd,month_count,total_heat_mmbtu,total_specific_heat
fiscal_year,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
2007,630.5376,,,0.0,630.5376,1.0,,,0.0,14075,12,,
2008,708.924675,,,0.0,708.924675,1.0,,,0.0,13382,12,,
2009,656.786639,4472.2971,264.465,0.0,5393.548739,0.121773,0.829194,0.049034,0.0,14471,12,4736.7621,327.327904
2010,641.697942,4416.02159,,0.0,5057.719531,0.126875,0.873125,,0.0,13119,12,,
2011,664.086865,5203.09351,,0.0,5867.180376,0.113187,0.886813,,0.0,13667,12,,
2012,683.037908,4183.582316,,0.0,4866.620224,0.140352,0.859648,,0.0,13960,12,,
2013,600.549539,3545.521645,,0.0,4146.071185,0.144848,0.855152,,0.0,14939,12,,
2014,616.351563,2163.879,,0.0,2780.230563,0.221691,0.778309,,0.0,12531,12,,
2015,664.534768,2082.877839,,0.0,2747.412607,0.241877,0.758123,,0.0,12345,12,,
2016,595.196414,2151.683625,,0.0,2746.880039,0.216681,0.783319,,0.0,12017,12,,


In [194]:
# Reverse the DataFrame
usage_df2.sort_index(ascending=False, inplace=True)
usage_df2 = usage_df2.drop('month_count', axis=1)
usage_df2

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  from ipykernel import kernelapp as app


Unnamed: 0_level_0,electricity_mmbtu,natural_gas_mmbtu,fuel_oil_mmbtu,district_heat_mmbtu,total_energy_mmbtu,electricity_pct,natural_gas_pct,fuel_oil_pct,district_heat_pct,hdd,total_heat_mmbtu,total_specific_heat
fiscal_year,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
2017,880.345629,2112.299698,,0.0,2992.645327,0.29417,0.70583,,0.0,13802,,
2016,595.196414,2151.683625,,0.0,2746.880039,0.216681,0.783319,,0.0,12017,,
2015,664.534768,2082.877839,,0.0,2747.412607,0.241877,0.758123,,0.0,12345,,
2014,616.351563,2163.879,,0.0,2780.230563,0.221691,0.778309,,0.0,12531,,
2013,600.549539,3545.521645,,0.0,4146.071185,0.144848,0.855152,,0.0,14939,,
2012,683.037908,4183.582316,,0.0,4866.620224,0.140352,0.859648,,0.0,13960,,
2011,664.086865,5203.09351,,0.0,5867.180376,0.113187,0.886813,,0.0,13667,,
2010,641.697942,4416.02159,,0.0,5057.719531,0.126875,0.873125,,0.0,13119,,
2009,656.786639,4472.2971,264.465,0.0,5393.548739,0.121773,0.829194,0.049034,0.0,14471,4736.7621,327.327904
2008,708.924675,,,0.0,708.924675,1.0,,,0.0,13382,,


## Create Energy Usage Overview Graphs

In [195]:
# Reset the index so the fiscal year column can be passed to the graphing function
reset_usage_df2 = usage_df2.reset_index()

p4g2_filename, p4g2_url = gu.graph_filename_url(site, 'annual_energy_usage_distribution')

# Create the area graph
gu.area_use_distribution(reset_usage_df2, 'fiscal_year', usage_cols, p4g2_filename)



In [196]:
p4g1_filename, p4g1_url = gu.graph_filename_url(site, "annual_energy_usage")

gu.energy_use_stacked_bar(reset_usage_df2, 'fiscal_year', usage_cols, p4g1_filename)

In [197]:
# Convert df to dictionary
energy_use_overview_rows = bu.df_to_dictionaries(usage_df2)

# Add data and graphs to main dictionary
template_data['energy_usage_overview'] = dict(
    graphs=[p4g1_url, p4g2_url],
    table={'rows': energy_use_overview_rows},
)

# Create Usage Pie Charts

In [202]:
p5g1_filename, p5g1_url = gu.graph_filename_url(site, "energy_usage")
gu.usage_pie_charts(usage_df2, usage_cols, 1, p5g1_filename, site)

in singular transformations; automatically expanding.
left=0.75, right=0.75
  'left=%s, right=%s') % (left, right))
in singular transformations; automatically expanding.
bottom=0.75, top=0.75
  'bottom=%s, top=%s') % (bottom, top))


ValueError: posx and posy should be finite values

In [None]:
# Add pie charts to template dictionary
template_data['energy_cost_usage'] = dict(
    graphs=[p5g1_url])

# Electrical Usage Analysis - Page 6

In [203]:
site_df = df.query("site_id == @site")
site_df.head()

Unnamed: 0,site_id,service_type,cal_year,cal_mo,item_desc,units,cost,days_served,usage,fiscal_year,fiscal_mo,mmbtu
30353,ANSBG1,Electricity,2006,6,Energy charge,kWh,739.28,16.5,7245.0,2006,12,24.71994
30354,ANSBG1,Electricity,2006,6,KW Charge,kW,0.0,16.5,14.5,2006,12,0.0
30355,ANSBG1,Electricity,2006,6,Other Charge,-,183.065,16.5,,2006,12,
30356,ANSBG1,Electricity,2006,7,Energy charge,kWh,1414.275,31.0,13860.0,2007,1,47.29032
30357,ANSBG1,Electricity,2006,7,KW Charge,kW,0.0,31.0,29.5,2007,1,0.0


In [204]:
# only look at elecricity records
electric_df = site_df.query("service_type == 'Electricity'")

In [205]:
# Make sure I'm not potentially missing anything with funky unit names
check_df = electric_df.query("usage > 0")
check_df.units.unique()

array(['kWh', 'kW'], dtype=object)

In [206]:
electric_df = electric_df.query("units == 'kWh' or units == 'kW'")
electric_df.head()

Unnamed: 0,site_id,service_type,cal_year,cal_mo,item_desc,units,cost,days_served,usage,fiscal_year,fiscal_mo,mmbtu
30353,ANSBG1,Electricity,2006,6,Energy charge,kWh,739.28,16.5,7245.0,2006,12,24.71994
30354,ANSBG1,Electricity,2006,6,KW Charge,kW,0.0,16.5,14.5,2006,12,0.0
30356,ANSBG1,Electricity,2006,7,Energy charge,kWh,1414.275,31.0,13860.0,2007,1,47.29032
30357,ANSBG1,Electricity,2006,7,KW Charge,kW,0.0,31.0,29.5,2007,1,0.0
30359,ANSBG1,Electricity,2006,8,Energy charge,kWh,1432.643065,31.0,14040.0,2007,2,47.90448


In [207]:
electric_df.query("units == 'kWh'")['item_desc'].unique()

array(['Energy charge', 'Utility Charge'], dtype=object)

In [208]:
electric_df.item_desc.unique()

array(['Energy charge', 'KW Charge', 'Utility Charge'], dtype=object)

In [209]:
electric_pivot_monthly = pd.pivot_table(electric_df,
                                        index=['fiscal_year', 'fiscal_mo'], 
                                        columns=['units'],
                                        values='usage',
                                        aggfunc=np.sum)

electric_pivot_monthly.head()

Unnamed: 0_level_0,units,kW,kWh
fiscal_year,fiscal_mo,Unnamed: 2_level_1,Unnamed: 3_level_1
2006,12,14.5,7245.0
2007,1,29.5,13860.0
2007,2,29.903226,14040.0
2007,3,28.043203,10510.714286
2007,4,28.763249,11772.02765


In [210]:
# Do a month count for the elecricity bills 
elec_months_present = bu.months_present(electric_pivot_monthly.reset_index())
elec_mo_count = bu.month_count(elec_months_present)
elec_mo_count_df = pd.DataFrame(elec_mo_count)
elec_mo_count_df

Unnamed: 0_level_0,month
year,Unnamed: 1_level_1
2006,1
2007,12
2008,12
2009,12
2010,12
2011,12
2012,12
2013,12
2014,12
2015,12


In [211]:
electric_pivot_annual = pd.pivot_table(electric_df,
                                       index=['fiscal_year'],
                                       columns=['units'],
                                       values='usage',
                                       aggfunc=np.sum
                                      )
                                       
electric_use_annual = electric_pivot_annual[['kWh']]
electric_use_annual = electric_use_annual.rename(columns={'kWh':'ann_electric_usage_kWh'})
electric_use_annual

units,ann_electric_usage_kWh
fiscal_year,Unnamed: 1_level_1
2006,7245.0
2007,184800.0
2008,207773.9375
2009,192493.153409
2010,188070.909091
2011,194632.727273
2012,200186.960227
2013,176011.002155
2014,180642.310345
2015,194764.0


In [212]:
# Get average annual demand usage
electric_demand_avg = electric_pivot_monthly.groupby(['fiscal_year']).mean()
electric_demand_avg = electric_demand_avg[['kW']]
electric_demand_avg = electric_demand_avg.rename(columns={'kW': 'avg_demand_kW'})
electric_demand_avg

units,avg_demand_kW
fiscal_year,Unnamed: 1_level_1
2006,14.5
2007,31.128906
2008,32.985937
2009,31.425182
2010,30.372475
2011,32.692803
2012,33.901728
2013,16.671875
2014,
2015,


In [213]:
# Find annual maximum demand usage
electric_demand_max = electric_pivot_monthly.groupby(['fiscal_year']).max()
electric_demand_max = electric_demand_max[['kW']]
electric_demand_max = electric_demand_max.rename(columns={'kW': 'max_demand_kW'})
electric_demand_max

units,max_demand_kW
fiscal_year,Unnamed: 1_level_1
2006,14.5
2007,36.607143
2008,37.793382
2009,35.030977
2010,33.789706
2011,38.7
2012,38.55
2013,17.637931
2014,
2015,


In [214]:
# Combine dataframes
electric_demand_join = pd.merge(electric_demand_max, electric_demand_avg, how='outer', left_index=True, right_index=True)
annual_electric_data = pd.merge(electric_demand_join, electric_use_annual, how='outer', left_index=True, right_index=True)
annual_electric_data

units,max_demand_kW,avg_demand_kW,ann_electric_usage_kWh
fiscal_year,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2006,14.5,14.5,7245.0
2007,36.607143,31.128906,184800.0
2008,37.793382,32.985937,207773.9375
2009,35.030977,31.425182,192493.153409
2010,33.789706,30.372475,188070.909091
2011,38.7,32.692803,194632.727273
2012,38.55,33.901728,200186.960227
2013,17.637931,16.671875,176011.002155
2014,,,180642.310345
2015,,,194764.0


In [215]:
# Add percent change columns
annual_electric_data['usage_pct_change'] = annual_electric_data.ann_electric_usage_kWh.pct_change()
annual_electric_data['avg_demand_pct_change'] = annual_electric_data.avg_demand_kW.pct_change()
annual_electric_data['max_demand_pct_change'] = annual_electric_data.max_demand_kW.pct_change()
annual_electric_data = annual_electric_data.rename(columns={'avg_demand_kW': 'Average kW',
                                                           'ann_electric_usage_kWh': 'Total kWh'})
annual_electric_data = pd.merge(annual_electric_data, elec_mo_count_df, left_index=True, right_index=True, how='left')
annual_electric_data = annual_electric_data.query("month == 12")
annual_electric_data = annual_electric_data.sort_index(ascending=False)
annual_electric_data = annual_electric_data.rename(columns={'max_demand_kW':'kw_max',
                                                           'Average kW':'kw_avg',
                                                           'Total kWh':'kwh',
                                                           'usage_pct_change':'kwh_pct_change',
                                                           'avg_demand_pct_change':'kw_avg_pct_change',
                                                           'max_demand_pct_change':'kw_max_pct_change'})
annual_electric_data = annual_electric_data.drop('month', axis=1)
annual_electric_data

Unnamed: 0_level_0,kw_max,kw_avg,kwh,kwh_pct_change,kw_avg_pct_change,kw_max_pct_change
fiscal_year,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2017,,,258014.545455,0.479084,,
2016,,,174442.090909,-0.104341,,
2015,,,194764.0,0.078175,,
2014,,,180642.310345,0.026313,,
2013,17.637931,16.671875,176011.002155,-0.120767,-0.508229,-0.542466
2012,38.55,33.901728,200186.960227,0.028537,0.036978,-0.003876
2011,38.7,32.692803,194632.727273,0.03489,0.076396,0.145319
2010,33.789706,30.372475,188070.909091,-0.022974,-0.033499,-0.035434
2009,35.030977,31.425182,192493.153409,-0.073545,-0.047316,-0.073092
2008,37.793382,32.985937,207773.9375,0.124318,0.059656,0.032405


## Create Electrical Usage Analysis Graphs - Page 6

In [216]:
# Axes labels
ylabel1 = 'Electricity Usage [kWh]'
ylabel2 = 'Electricity Demand [kW]'

In [217]:
p6g1_filename, p6g1_url = gu.graph_filename_url(site, "electricity_usage")
gu.stacked_bar_with_line(annual_electric_data.reset_index(), 'fiscal_year', ['kwh'], 'kw_avg', 
                      ylabel1, ylabel2, "Test Title", p6g1_filename)

In [218]:
p6g2_filename, p6g2_url = gu.graph_filename_url(site, "monthly_electricity_usage_profile")
gu.create_monthly_profile(electric_pivot_monthly, 'kWh', 'Monthly Electricity Usage Profile [kWh]', 'blue',
                         "Test Title", p6g2_filename)

In [219]:
# Convert df to dictionary
electric_use_rows = bu.df_to_dictionaries(annual_electric_data)

# Add data and graphs to main dictionary
template_data['electrical_usage_analysis'] = dict(
    graphs=[p6g1_url, p6g2_url],
    table={'rows': electric_use_rows},
)

## Electrical Cost Analysis Table - Page 7

In [220]:
# only look at elecricity records
electric_cost_df = site_df.query("service_type == 'Electricity'")

In [221]:
electric_cost_df.item_desc.unique()

array(['Energy charge', 'KW Charge', 'Other Charge', 'On peak demand',
       'Utility Charge'], dtype=object)

In [222]:
# Costs don't always have units, so split the data into demand charges and usage charges (which includes other charges)
electric_cost_df['cost_categories'] = np.where(electric_cost_df.item_desc.isin(['KW Charge', 'On peak demand', 'Demand Charge']),
                                               'demand_cost', 'usage_cost')

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  app.launch_new_instance()


In [223]:
# Sum costs by demand and usage
electric_annual_cost = pd.pivot_table(electric_cost_df,
                                       index=['fiscal_year'],
                                       columns=['cost_categories'],
                                       values='cost',
                                       aggfunc=np.sum
                                      )
# Create a total column
electric_annual_cost['Total Cost'] = electric_annual_cost.demand_cost + electric_annual_cost.usage_cost

electric_annual_cost

cost_categories,demand_cost,usage_cost,Total Cost
fiscal_year,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2006,0.0,922.345,922.345
2007,0.0,24319.055469,24319.055469
2008,0.0,33274.530938,33274.530938
2009,0.0,30307.350563,30307.350563
2010,0.0,32688.29303,32688.29303
2011,0.0,35138.080909,35138.080909
2012,0.0,41994.697841,41994.697841
2013,0.0,36961.451595,36961.451595
2014,0.0,35843.822155,35843.822155
2015,,38017.419167,


In [224]:
# Add percent change columns
electric_annual_cost['usage_cost_pct_change'] = electric_annual_cost.usage_cost.pct_change()
electric_annual_cost['demand_cost_pct_change'] = electric_annual_cost.demand_cost.pct_change()
electric_annual_cost['total_cost_pct_change'] = electric_annual_cost['Total Cost'].pct_change()
electric_annual_cost

cost_categories,demand_cost,usage_cost,Total Cost,usage_cost_pct_change,demand_cost_pct_change,total_cost_pct_change
fiscal_year,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2006,0.0,922.345,922.345,,,
2007,0.0,24319.055469,24319.055469,25.36655,,25.36655
2008,0.0,33274.530938,33274.530938,0.368249,,0.368249
2009,0.0,30307.350563,30307.350563,-0.089173,,-0.089173
2010,0.0,32688.29303,32688.29303,0.07856,,0.07856
2011,0.0,35138.080909,35138.080909,0.074944,,0.074944
2012,0.0,41994.697841,41994.697841,0.195134,,0.195134
2013,0.0,36961.451595,36961.451595,-0.119854,,-0.119854
2014,0.0,35843.822155,35843.822155,-0.030238,,-0.030238
2015,,38017.419167,,0.060641,,


In [225]:
# Left join the cost data to the annual electric data, which only shows complete years
electric_use_and_cost = pd.merge(annual_electric_data, electric_annual_cost, left_index=True, right_index=True, how='left')
electric_use_and_cost = electric_use_and_cost.sort_index(ascending=False)
electric_use_and_cost = electric_use_and_cost.drop(['kw_max', 'kw_max_pct_change'], axis=1)
electric_use_and_cost = electric_use_and_cost.rename(columns={'demand_cost':'kw_avg_cost',
                                                              'usage_cost':'kwh_cost',
                                                              'Total Cost':'total_cost',
                                                              'usage_cost_pct_change':'kwh_cost_pct_change',
                                                              'demand_cost_pct_change':'kw_avg_cost_pct_change'
                                                             })
electric_use_and_cost


Unnamed: 0_level_0,kw_avg,kwh,kwh_pct_change,kw_avg_pct_change,kw_avg_cost,kwh_cost,total_cost,kwh_cost_pct_change,kw_avg_cost_pct_change,total_cost_pct_change
fiscal_year,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
2017,,258014.545455,0.479084,,,54161.057879,,0.672097,,
2016,,174442.090909,-0.104341,,,32391.098485,,-0.147993,,
2015,,194764.0,0.078175,,,38017.419167,,0.060641,,
2014,,180642.310345,0.026313,,0.0,35843.822155,35843.822155,-0.030238,,-0.030238
2013,16.671875,176011.002155,-0.120767,-0.508229,0.0,36961.451595,36961.451595,-0.119854,,-0.119854
2012,33.901728,200186.960227,0.028537,0.036978,0.0,41994.697841,41994.697841,0.195134,,0.195134
2011,32.692803,194632.727273,0.03489,0.076396,0.0,35138.080909,35138.080909,0.074944,,0.074944
2010,30.372475,188070.909091,-0.022974,-0.033499,0.0,32688.29303,32688.29303,0.07856,,0.07856
2009,31.425182,192493.153409,-0.073545,-0.047316,0.0,30307.350563,30307.350563,-0.089173,,-0.089173
2008,32.985937,207773.9375,0.124318,0.059656,0.0,33274.530938,33274.530938,0.368249,,0.368249


## Create Electrical Cost Analysis Graphs - Page 7

In [226]:
p7g1_filename, p7g1_url = gu.graph_filename_url(site, "electricity_cost")

renamed_use_and_cost = electric_use_and_cost.rename(columns={'kwh_cost':'Electricity Usage Cost [$]',
                                                            'kw_avg_cost':'Electricity Demand Cost [$]'})
gu.create_stacked_bar(renamed_use_and_cost.reset_index(), 'fiscal_year', ['Electricity Usage Cost [$]', 
                                                                          'Electricity Demand Cost [$]'], 
                      'Electricity Cost [$]', "test title", p7g1_filename)

In [227]:
# Create Monthly Profile of Electricity Demand
p7g2_filename, p7g2_url = gu.graph_filename_url(site, "monthly_electricity_demand_profile")
gu.create_monthly_profile(electric_pivot_monthly, 'kW', 'Monthly Electricity Demand Profile [kW]', 'blue', 
                          "test title", p7g2_filename)

In [228]:
# Convert df to dictionary
electric_cost_rows = bu.df_to_dictionaries(electric_use_and_cost)

# Add data and graphs to main dictionary
template_data['electrical_cost_analysis'] = dict(
    graphs=[p7g1_url, p7g2_url],
    table={'rows': electric_cost_rows},
)

# Create Heating Usage Analysis Table - Page 8

In [229]:
usage_df2.head()

Unnamed: 0_level_0,electricity_mmbtu,natural_gas_mmbtu,fuel_oil_mmbtu,district_heat_mmbtu,total_energy_mmbtu,electricity_pct,natural_gas_pct,fuel_oil_pct,district_heat_pct,hdd,total_heat_mmbtu,total_specific_heat
fiscal_year,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
2017,880.345629,2112.299698,,0.0,2992.645327,0.29417,0.70583,,0.0,13802,,
2016,595.196414,2151.683625,,0.0,2746.880039,0.216681,0.783319,,0.0,12017,,
2015,664.534768,2082.877839,,0.0,2747.412607,0.241877,0.758123,,0.0,12345,,
2014,616.351563,2163.879,,0.0,2780.230563,0.221691,0.778309,,0.0,12531,,
2013,600.549539,3545.521645,,0.0,4146.071185,0.144848,0.855152,,0.0,14939,,


In [230]:
# Take only needed columns from earlier usage df
heating_usage = usage_df2[['natural_gas_mmbtu', 'fuel_oil_mmbtu', 'district_heat_mmbtu', 'hdd', 'total_heat_mmbtu']]
heating_usage

Unnamed: 0_level_0,natural_gas_mmbtu,fuel_oil_mmbtu,district_heat_mmbtu,hdd,total_heat_mmbtu
fiscal_year,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2017,2112.299698,,0.0,13802,
2016,2151.683625,,0.0,12017,
2015,2082.877839,,0.0,12345,
2014,2163.879,,0.0,12531,
2013,3545.521645,,0.0,14939,
2012,4183.582316,,0.0,13960,
2011,5203.09351,,0.0,13667,
2010,4416.02159,,0.0,13119,
2009,4472.2971,264.465,0.0,14471,4736.7621
2008,,,0.0,13382,


In [231]:
# Add in percent change columns
heating_usage['fuel_oil_pct_change'] = heating_usage.fuel_oil_mmbtu.pct_change()
heating_usage['natural_gas_pct_change'] = heating_usage.natural_gas_mmbtu.pct_change()
heating_usage['district_heat_pct_change'] = heating_usage.district_heat_mmbtu.pct_change()
heating_usage['total_heat_pct_change'] = heating_usage.total_heat_mmbtu.pct_change()
heating_usage

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  from ipykernel import kernelapp as app
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  app.launch_new_instance()
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas

Unnamed: 0_level_0,natural_gas_mmbtu,fuel_oil_mmbtu,district_heat_mmbtu,hdd,total_heat_mmbtu,fuel_oil_pct_change,natural_gas_pct_change,district_heat_pct_change,total_heat_pct_change
fiscal_year,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
2017,2112.299698,,0.0,13802,,,,,
2016,2151.683625,,0.0,12017,,,0.018645,,
2015,2082.877839,,0.0,12345,,,-0.031978,,
2014,2163.879,,0.0,12531,,,0.038889,,
2013,3545.521645,,0.0,14939,,,0.638503,,
2012,4183.582316,,0.0,13960,,,0.179962,,
2011,5203.09351,,0.0,13667,,,0.243693,,
2010,4416.02159,,0.0,13119,,,-0.15127,,
2009,4472.2971,264.465,0.0,14471,4736.7621,,0.012743,,
2008,,,0.0,13382,,,,,


In [232]:
# Get the number of gallons, ccf, and 1,000 pounds of district heat by converting MMBTUs using the supplied conversions
heating_usage['fuel_oil_usage'] = heating_usage.fuel_oil_mmbtu * 1000000 / ut.fuel_btus_per_unit('Oil #1', 'gallons')
heating_usage['natural_gas_usage'] = heating_usage.natural_gas_mmbtu * 1000000 / ut.fuel_btus_per_unit('Natural Gas', 'ccf')
heating_usage

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  from ipykernel import kernelapp as app
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  app.launch_new_instance()


Unnamed: 0_level_0,natural_gas_mmbtu,fuel_oil_mmbtu,district_heat_mmbtu,hdd,total_heat_mmbtu,fuel_oil_pct_change,natural_gas_pct_change,district_heat_pct_change,total_heat_pct_change,fuel_oil_usage,natural_gas_usage
fiscal_year,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
2017,2112.299698,,0.0,13802,,,,,,,20708.820565
2016,2151.683625,,0.0,12017,,,0.018645,,,,21094.9375
2015,2082.877839,,0.0,12345,,,-0.031978,,,,20420.370968
2014,2163.879,,0.0,12531,,,0.038889,,,,21214.5
2013,3545.521645,,0.0,14939,,,0.638503,,,,34760.016129
2012,4183.582316,,0.0,13960,,,0.179962,,,,41015.512903
2011,5203.09351,,0.0,13667,,,0.243693,,,,51010.72069
2010,4416.02159,,0.0,13119,,,-0.15127,,,,43294.32931
2009,4472.2971,264.465,0.0,14471,4736.7621,,0.012743,,,1959.0,43846.05
2008,,,0.0,13382,,,,,,,


## Create Heating Usage Analysis Graphs - Page 8

In [233]:
p8g1_filename, p8g1_url = gu.graph_filename_url(site, "heating_degree_days")
gu.stacked_bar_with_line(heating_usage.reset_index(), 'fiscal_year', ['natural_gas_mmbtu', 'fuel_oil_mmbtu',
                                                                                'district_heat_mmbtu'], 'hdd',
                        'Heating Fuel Usage [MMBTU/yr]', 'Heating Degree Days [Base 65F]', "test title", p8g1_filename)

### Create Monthly Heating Usage dataframe for graph

In [234]:
monthly_heating = pd.pivot_table(usage_df1,
                                values='mmbtu',
                                index=['fiscal_year', 'fiscal_mo'],
                                columns=['service_type'],
                                aggfunc=np.sum
                                )
monthly_heating.head()

Unnamed: 0_level_0,service_type,Electricity,Natural Gas,Oil #1,Sewer,Water
fiscal_year,fiscal_mo,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2006,12,24.71994,,,,
2007,1,47.29032,,,,
2007,2,47.90448,,,,
2007,3,35.862557,,,,
2007,4,40.166158,,,0.0,0.0


In [235]:
monthly_heating

Unnamed: 0_level_0,service_type,Electricity,Natural Gas,Oil #1,Sewer,Water
fiscal_year,fiscal_mo,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2006,12,24.719940,,,,
2007,1,47.290320,,,,
2007,2,47.904480,,,,
2007,3,35.862557,,,,
2007,4,40.166158,,,0.0,0.0
2007,5,54.319685,,,0.0,0.0
2007,6,60.303120,,,0.0,0.0
2007,7,63.518595,,,0.0,0.0
2007,8,58.795584,,,0.0,0.0
2007,9,64.179720,,,0.0,0.0


In [236]:
# Add in columns for the missing energy services
missing_services = bu.missing_energy_services(monthly_heating.columns)
bu.add_columns(monthly_heating, missing_services)

# Drop the non-heating services
monthly_heating = monthly_heating.drop(labels=['Electricity', 'Sewer', 'Water'], axis=1)

# Create a total heating column
monthly_heating['total_heating_energy'] = monthly_heating.sum(axis=1)

monthly_heating.head()

Unnamed: 0_level_0,service_type,Natural Gas,Oil #1,Steam,total_heating_energy
fiscal_year,fiscal_mo,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2006,12,,,0.0,0.0
2007,1,,,0.0,0.0
2007,2,,,0.0,0.0
2007,3,,,0.0,0.0
2007,4,,,0.0,0.0


In [237]:
p8g2_filename, p8g2_url = gu.graph_filename_url(site, "monthly_heating_energy_profile")
gu.create_monthly_profile(monthly_heating, 'total_heating_energy', "Monthly Heating Energy Profile [MMBTU]", 'red',
                          "test title", p8g2_filename)

In [238]:
# Convert df to dictionary
heating_use_rows = bu.df_to_dictionaries(heating_usage)

# Add data and graphs to main dictionary
template_data['heating_usage_analysis'] = dict(
    graphs=[p8g1_url, p8g2_url],
    table={'rows': heating_use_rows},
)

## Heating Cost Analysis Table - Page 9

In [239]:
# Import df that I exported from "alan_report_pages".  This can be removed once code is combined
df2 = pd.read_csv(r"C:\Users\dustin\Google Drive\FNSB Data Analysis\data_from_alan_report_pages_df2.csv")

# Use only necessary columns
heating_cost = df2[['fiscal_year', 'Natural Gas', 'Oil #1', 'Steam', 'Total', 'pct_change']]

# Change column names so they aren't the same as the heating usage dataframe
heating_cost = heating_cost.rename(columns={'Natural Gas':'natural_gas_cost',
                                           'Oil #1': 'fuel_oil_cost',
                                           'Steam': 'district_heat_cost',
                                           'Total': 'total_heat_cost',
                                           'pct_change': 'total_heat_cost_pct_change'})
heating_cost

Unnamed: 0,fiscal_year,natural_gas_cost,fuel_oil_cost,district_heat_cost,total_heat_cost,total_heat_cost_pct_change
0,2017,53336.961562,29028.075,0.0,294496.131925,-0.211853
1,2016,690.478276,90349.225,0.0,373656.286356,-0.090911
2,2015,97125.406774,48709.171396,0.0,411022.962057,-0.127236
3,2014,164359.650963,15747.134827,0.0,470943.816735,-0.07334
4,2013,173104.449037,20369.326596,0.0,508216.253007,0.048115
5,2012,163613.406387,17351.334533,0.0,484885.747289,0.157875
6,2011,20902.767,139185.262647,0.0,418772.262631,


In [240]:
heating_usage

Unnamed: 0_level_0,natural_gas_mmbtu,fuel_oil_mmbtu,district_heat_mmbtu,hdd,total_heat_mmbtu,fuel_oil_pct_change,natural_gas_pct_change,district_heat_pct_change,total_heat_pct_change,fuel_oil_usage,natural_gas_usage
fiscal_year,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
2017,2112.299698,,0.0,13802,,,,,,,20708.820565
2016,2151.683625,,0.0,12017,,,0.018645,,,,21094.9375
2015,2082.877839,,0.0,12345,,,-0.031978,,,,20420.370968
2014,2163.879,,0.0,12531,,,0.038889,,,,21214.5
2013,3545.521645,,0.0,14939,,,0.638503,,,,34760.016129
2012,4183.582316,,0.0,13960,,,0.179962,,,,41015.512903
2011,5203.09351,,0.0,13667,,,0.243693,,,,51010.72069
2010,4416.02159,,0.0,13119,,,-0.15127,,,,43294.32931
2009,4472.2971,264.465,0.0,14471,4736.7621,,0.012743,,,1959.0,43846.05
2008,,,0.0,13382,,,,,,,


In [241]:
# Combine the heating cost and heating use dataframes
heating_cost_and_use = pd.merge(heating_cost, heating_usage, left_on='fiscal_year', right_index=True, how='right')
heating_cost_and_use

Unnamed: 0,fiscal_year,natural_gas_cost,fuel_oil_cost,district_heat_cost,total_heat_cost,total_heat_cost_pct_change,natural_gas_mmbtu,fuel_oil_mmbtu,district_heat_mmbtu,hdd,total_heat_mmbtu,fuel_oil_pct_change,natural_gas_pct_change,district_heat_pct_change,total_heat_pct_change,fuel_oil_usage,natural_gas_usage
0,2017,53336.961562,29028.075,0.0,294496.131925,-0.211853,2112.299698,,0.0,13802,,,,,,,20708.820565
1,2016,690.478276,90349.225,0.0,373656.286356,-0.090911,2151.683625,,0.0,12017,,,0.018645,,,,21094.9375
2,2015,97125.406774,48709.171396,0.0,411022.962057,-0.127236,2082.877839,,0.0,12345,,,-0.031978,,,,20420.370968
3,2014,164359.650963,15747.134827,0.0,470943.816735,-0.07334,2163.879,,0.0,12531,,,0.038889,,,,21214.5
4,2013,173104.449037,20369.326596,0.0,508216.253007,0.048115,3545.521645,,0.0,14939,,,0.638503,,,,34760.016129
5,2012,163613.406387,17351.334533,0.0,484885.747289,0.157875,4183.582316,,0.0,13960,,,0.179962,,,,41015.512903
6,2011,20902.767,139185.262647,0.0,418772.262631,,5203.09351,,0.0,13667,,,0.243693,,,,51010.72069
6,2010,,,,,,4416.02159,,0.0,13119,,,-0.15127,,,,43294.32931
6,2009,,,,,,4472.2971,264.465,0.0,14471,4736.7621,,0.012743,,,1959.0,43846.05
6,2008,,,,,,,,0.0,13382,,,,,,,


In [242]:
# Create percent change columns
heating_cost_and_use['fuel_oil_pct_change'] = heating_cost_and_use.fuel_oil_cost.pct_change()
heating_cost_and_use['natural_gas_pct_change'] = heating_cost_and_use.natural_gas_cost.pct_change()
heating_cost_and_use['district_heat_pct_change'] = heating_cost_and_use.district_heat_cost.pct_change()

In [243]:
# Create unit cost columns
heating_cost_and_use['fuel_oil_unit_cost'] = heating_cost_and_use.fuel_oil_cost / heating_cost_and_use.fuel_oil_mmbtu
heating_cost_and_use['natural_gas_unit_cost'] = heating_cost_and_use.natural_gas_cost / heating_cost_and_use.natural_gas_mmbtu
heating_cost_and_use['district_heat_unit_cost'] = heating_cost_and_use.district_heat_cost / heating_cost_and_use.district_heat_mmbtu
heating_cost_and_use['building_heat_unit_cost'] = heating_cost_and_use.total_heat_cost / heating_cost_and_use.total_heat_mmbtu
heating_cost_and_use

Unnamed: 0,fiscal_year,natural_gas_cost,fuel_oil_cost,district_heat_cost,total_heat_cost,total_heat_cost_pct_change,natural_gas_mmbtu,fuel_oil_mmbtu,district_heat_mmbtu,hdd,...,fuel_oil_pct_change,natural_gas_pct_change,district_heat_pct_change,total_heat_pct_change,fuel_oil_usage,natural_gas_usage,fuel_oil_unit_cost,natural_gas_unit_cost,district_heat_unit_cost,building_heat_unit_cost
0,2017,53336.961562,29028.075,0.0,294496.131925,-0.211853,2112.299698,,0.0,13802,...,,,,,,20708.820565,,25.25066,,
1,2016,690.478276,90349.225,0.0,373656.286356,-0.090911,2151.683625,,0.0,12017,...,2.112477,-0.987054,,,,21094.9375,,0.320901,,
2,2015,97125.406774,48709.171396,0.0,411022.962057,-0.127236,2082.877839,,0.0,12345,...,-0.460879,139.663957,,,,20420.370968,,46.63039,,
3,2014,164359.650963,15747.134827,0.0,470943.816735,-0.07334,2163.879,,0.0,12531,...,-0.676711,0.692242,,,,21214.5,,75.956027,,
4,2013,173104.449037,20369.326596,0.0,508216.253007,0.048115,3545.521645,,0.0,14939,...,0.293526,0.053205,,,,34760.016129,,48.823408,,
5,2012,163613.406387,17351.334533,0.0,484885.747289,0.157875,4183.582316,,0.0,13960,...,-0.148164,-0.054828,,,,41015.512903,,39.108447,,
6,2011,20902.767,139185.262647,0.0,418772.262631,,5203.09351,,0.0,13667,...,7.021588,-0.872243,,,,51010.72069,,4.017373,,
6,2010,,,,,,4416.02159,,0.0,13119,...,,,,,,43294.32931,,,,
6,2009,,,,,,4472.2971,264.465,0.0,14471,...,,,,,1959.0,43846.05,,,,
6,2008,,,,,,,,0.0,13382,...,,,,,,,,,,


In [244]:
# Remove all columns not needed for the Heating Cost Analysis Table
heating_cost_and_use = heating_cost_and_use[['fiscal_year',
                                              'fuel_oil_cost',
                                              'fuel_oil_pct_change',
                                              'natural_gas_cost',
                                              'natural_gas_pct_change',
                                              'district_heat_cost',
                                              'district_heat_pct_change',
                                              'fuel_oil_unit_cost',
                                              'natural_gas_unit_cost',
                                              'district_heat_unit_cost',
                                              'building_heat_unit_cost',
                                              'total_heat_cost',
                                              'total_heat_cost_pct_change']]

heating_cost_and_use

Unnamed: 0,fiscal_year,fuel_oil_cost,fuel_oil_pct_change,natural_gas_cost,natural_gas_pct_change,district_heat_cost,district_heat_pct_change,fuel_oil_unit_cost,natural_gas_unit_cost,district_heat_unit_cost,building_heat_unit_cost,total_heat_cost,total_heat_cost_pct_change
0,2017,29028.075,,53336.961562,,0.0,,,25.25066,,,294496.131925,-0.211853
1,2016,90349.225,2.112477,690.478276,-0.987054,0.0,,,0.320901,,,373656.286356,-0.090911
2,2015,48709.171396,-0.460879,97125.406774,139.663957,0.0,,,46.63039,,,411022.962057,-0.127236
3,2014,15747.134827,-0.676711,164359.650963,0.692242,0.0,,,75.956027,,,470943.816735,-0.07334
4,2013,20369.326596,0.293526,173104.449037,0.053205,0.0,,,48.823408,,,508216.253007,0.048115
5,2012,17351.334533,-0.148164,163613.406387,-0.054828,0.0,,,39.108447,,,484885.747289,0.157875
6,2011,139185.262647,7.021588,20902.767,-0.872243,0.0,,,4.017373,,,418772.262631,
6,2010,,,,,,,,,,,,
6,2009,,,,,,,,,,,,
6,2008,,,,,,,,,,,,


## Create DataFrame with the Monthly Average Price Per MMBTU for All Sites

In [245]:
print (df.shape[0])

# Filter out natural gas customer charges as the unit cost goes to infinity if there is a charge but no use
df_no_gas_cust_charges = df.drop(df[(df['service_type'] == 'Natural Gas') & (df['units'] != 'CCF')].index)

# Filter out records with zero usage, which correspond to things like customer charges, etc.
nonzero_usage = df_no_gas_cust_charges.query("usage > 0")
print (nonzero_usage.shape[0])
nonzero_usage.head()

74268
37607


Unnamed: 0,site_id,service_type,cal_year,cal_mo,item_desc,units,cost,days_served,usage,fiscal_year,fiscal_mo,mmbtu
0,3,Electricity,2005,12,Electricity charge,kWh,1904.65788,49.5,14790.748577,2006,6,50.466034
1,3,Electricity,2006,1,Electricity charge,kWh,5430.493797,93.0,42665.790911,2006,7,145.575679
2,3,Electricity,2006,2,Electricity charge,kWh,5764.40673,84.0,45010.439348,2006,8,153.575619
3,3,Electricity,2006,3,Electricity charge,kWh,6349.255299,93.0,46311.547557,2006,9,158.015
4,3,Electricity,2006,4,Electricity charge,kWh,5529.385224,90.0,40392.812893,2006,10,137.820278


In [246]:
# Check to make sure it is okay to drop records w/ zero mmbtu
zero_mmbtu = nonzero_usage.query("mmbtu == 0")
zero_mmbtu.service_type.unique()

array(['Refuse', 'Water', 'Electricity', 'Sewer'], dtype=object)

In [247]:
nonzero_usage = nonzero_usage.query("mmbtu > 0")
print (nonzero_usage.shape[0])
nonzero_usage.head()

21159


Unnamed: 0,site_id,service_type,cal_year,cal_mo,item_desc,units,cost,days_served,usage,fiscal_year,fiscal_mo,mmbtu
0,3,Electricity,2005,12,Electricity charge,kWh,1904.65788,49.5,14790.748577,2006,6,50.466034
1,3,Electricity,2006,1,Electricity charge,kWh,5430.493797,93.0,42665.790911,2006,7,145.575679
2,3,Electricity,2006,2,Electricity charge,kWh,5764.40673,84.0,45010.439348,2006,8,153.575619
3,3,Electricity,2006,3,Electricity charge,kWh,6349.255299,93.0,46311.547557,2006,9,158.015
4,3,Electricity,2006,4,Electricity charge,kWh,5529.385224,90.0,40392.812893,2006,10,137.820278


In [248]:
nonzero_usage.cost.min()

-15844.611451612904

In [249]:
# Further analysis showed that these zero and less than zero costs were for waste oil; presumably less than zero costs
# was because they were able to avoid disposal fees or something
nonzero_usage.query("cost <= 0")

Unnamed: 0,site_id,service_type,cal_year,cal_mo,item_desc,units,cost,days_served,usage,fiscal_year,fiscal_mo,mmbtu
280,03,Natural Gas,2009,12,Natural gas (CCF),CCF,0.0,1.5,0.051724,2010,6,0.005276
283,03,Natural Gas,2010,1,Natural gas (CCF),CCF,0.0,27.5,0.948276,2010,7,0.096724
15895,23,Oil #1,2010,6,Fuel Oil #1 (Gallons),Gallons,-15844.611452,29.5,6420.774194,2010,12,866.804516
25351,43,Steam,2008,10,Steam (klbs),klbs,0.0,31.0,240.414747,2009,4,287.055207
25352,43,Steam,2008,11,Steam (klbs),klbs,0.0,30.0,239.870968,2009,5,286.405935
25353,43,Steam,2008,12,Steam (klbs),klbs,0.0,31.0,240.129032,2009,6,286.714065
25354,43,Steam,2009,1,Steam (klbs),klbs,0.0,31.0,240.0,2009,7,286.56
25355,43,Steam,2009,2,Steam (klbs),klbs,0.0,28.0,239.585253,2009,8,286.064793
25356,43,Steam,2009,3,Steam (klbs),klbs,0.0,31.0,240.414747,2009,9,287.055207
25357,43,Steam,2009,4,Steam (klbs),klbs,0.0,30.0,239.870968,2009,10,286.405935


In [250]:
# Looks like waste oil accounts for the negative costs
BALHHW = df_raw[(df_raw['Site ID'] == 'BALHHW')].sort_values(by='Cost')
BALHHW[BALHHW['Service Name'] == 'Oil #1']

Unnamed: 0,Site ID,Site Name,Vendor Code,Vendor Name,Account Number,Bill Date,Due Date,Entry Date,Invoice #,Voucher #,From,Thru,Service Name,Item Description,Meter Number,Usage,Cost,Units,Account Financial Code,Site Financial Code
82088,BALHHW,FNSB-Solid Waste Baler/Office/HHW,VP287678,Sourdough Fuel (Petro Star),39389003 (LANDFILL OFF),03/10/2016,03/10/2016,10/11/2017,,,2016-03-09,2016-03-10,Oil #1,Fuel Oil #1 (Gallons),,-500.1,-768.85,,,
21078,BALHHW,FNSB-Solid Waste Baler/Office/HHW,WO,Waste Oil,1234,07/31/2006,07/31/2006,04/08/2011,,,2006-07-01,2006-07-31,Oil #1,Fuel Oil #1 (Gallons),,1569.0,0.00,Gallons,,
21178,BALHHW,FNSB-Solid Waste Baler/Office/HHW,WO,Waste Oil,1234,11/30/2014,11/30/2014,09/28/2015,,,2014-10-31,2014-11-30,Oil #1,Fuel Oil #1 (Gallons),,3330.0,0.00,Gallons,,
21177,BALHHW,FNSB-Solid Waste Baler/Office/HHW,WO,Waste Oil,1234,10/31/2014,10/31/2014,09/28/2015,,,2014-09-30,2014-10-31,Oil #1,Fuel Oil #1 (Gallons),,1435.0,0.00,Gallons,,
21176,BALHHW,FNSB-Solid Waste Baler/Office/HHW,WO,Waste Oil,1234,09/30/2014,09/30/2014,09/28/2015,,,2014-08-31,2014-09-30,Oil #1,Fuel Oil #1 (Gallons),,0.0,0.00,,,
21175,BALHHW,FNSB-Solid Waste Baler/Office/HHW,WO,Waste Oil,1234,08/31/2014,08/31/2014,09/28/2015,,,2014-07-31,2014-08-31,Oil #1,Fuel Oil #1 (Gallons),,1314.0,0.00,Gallons,,
21174,BALHHW,FNSB-Solid Waste Baler/Office/HHW,WO,Waste Oil,1234,07/31/2014,07/31/2014,09/28/2015,,,2014-06-30,2014-07-31,Oil #1,Fuel Oil #1 (Gallons),,0.0,0.00,,,
21173,BALHHW,FNSB-Solid Waste Baler/Office/HHW,WO,Waste Oil,1234,06/30/2014,06/30/2014,09/28/2015,,,2014-05-31,2014-06-30,Oil #1,Fuel Oil #1 (Gallons),,0.0,0.00,,,
21172,BALHHW,FNSB-Solid Waste Baler/Office/HHW,WO,Waste Oil,1234,05/31/2014,05/31/2014,09/28/2015,,,2014-04-30,2014-05-31,Oil #1,Fuel Oil #1 (Gallons),,1632.0,0.00,Gallons,,
21171,BALHHW,FNSB-Solid Waste Baler/Office/HHW,WO,Waste Oil,1234,04/30/2014,04/30/2014,09/28/2015,,,2014-03-31,2014-04-30,Oil #1,Fuel Oil #1 (Gallons),,568.0,0.00,Gallons,,


In [251]:
# No idea why these costs are negative but it still seems like it should be filtered out
df_raw[(df_raw['Site ID'] == 'BENBG1')].sort_values(by='Cost')

Unnamed: 0,Site ID,Site Name,Vendor Code,Vendor Name,Account Number,Bill Date,Due Date,Entry Date,Invoice #,Voucher #,From,Thru,Service Name,Item Description,Meter Number,Usage,Cost,Units,Account Financial Code,Site Financial Code
82569,BENBG1,FNSB-Solid Waste Entrance Scales,VP287678,Sourdough Fuel (Petro Star),39389009 (SCALES),10/20/2011,10/20/2011,04/09/2013,,,2011-09-20,2011-10-20,Oil #1,Fuel Oil #1 (Gallons),,198.0,-694.40,Gallons,,
82571,BENBG1,FNSB-Solid Waste Entrance Scales,VP287678,Sourdough Fuel (Petro Star),39389009 (SCALES),10/20/2011,10/20/2011,04/09/2013,,,2011-09-20,2011-10-20,Oil #1,Misc. fee,,,-0.40,,,
82570,BENBG1,FNSB-Solid Waste Entrance Scales,VP287678,Sourdough Fuel (Petro Star),39389009 (SCALES),10/20/2011,10/20/2011,04/09/2013,,,2011-09-20,2011-10-20,Oil #1,FED LUS TX,,,-0.20,,,
66899,BENBG1,FNSB-Solid Waste Entrance Scales,VG354933,Golden Valley Electric,28219,05/08/2009,05/08/2009,02/09/2011,,,2009-04-07,2009-05-07,Electricity,Fuel Adjustment,,,0.00,,,
66895,BENBG1,FNSB-Solid Waste Entrance Scales,VG354933,Golden Valley Electric,28219,04/08/2009,04/08/2009,02/09/2011,,,2009-03-09,2009-04-07,Electricity,Fuel Adjustment,,,0.00,,,
66891,BENBG1,FNSB-Solid Waste Entrance Scales,VG354933,Golden Valley Electric,28219,03/11/2009,03/11/2009,02/09/2011,,,2009-02-06,2009-03-09,Electricity,Fuel Adjustment,,,0.00,,,
82558,BENBG1,FNSB-Solid Waste Entrance Scales,VP287678,Sourdough Fuel (Petro Star),39389009 (SCALES),11/07/2009,11/07/2009,02/11/2011,,,2009-02-19,2009-11-07,Oil #1,FED LUS TX,,,0.04,,,
82585,BENBG1,FNSB-Solid Waste Entrance Scales,VP287678,Sourdough Fuel (Petro Star),39389009 (SCALES),02/27/2012,02/27/2012,08/03/2012,,,2012-02-03,2012-02-27,Oil #1,FED LUS TX,,,0.06,,,
82588,BENBG1,FNSB-Solid Waste Entrance Scales,VP287678,Sourdough Fuel (Petro Star),39389009 (SCALES),03/26/2012,03/26/2012,08/03/2012,,,2012-02-27,2012-03-26,Oil #1,FED LUS TX,,,0.07,,,
82550,BENBG1,FNSB-Solid Waste Entrance Scales,VP287678,Sourdough Fuel (Petro Star),39389009 (SCALES),11/26/2007,11/26/2007,02/10/2011,,,2007-08-09,2007-11-26,Oil #1,FED LUS TX,,,0.07,,,


In [252]:
# Filter out zero cost or less records (see analysis above)
nonzero_usage = nonzero_usage.query("cost > 0")

In [253]:
# Get the total fuel cost and usage for all buildings by year and month
grouped_nonzero_usage = nonzero_usage.groupby(['service_type', 'fiscal_year', 'fiscal_mo']).sum()

# Divide the total cost for all building by the total usage for all buildings so that the average is weighted correctly
grouped_nonzero_usage['avg_price_per_mmbtu'] = grouped_nonzero_usage.cost / grouped_nonzero_usage.mmbtu

# Get only the desired outcome, price per million BTU for each fuel type, and the number of calendar months it is based on
# i.e. the number of months of bills for each fuel for all buildings for that particular month.
grouped_nonzero_usage = grouped_nonzero_usage[['avg_price_per_mmbtu', 'cal_mo']]

In [254]:
# Drop electricity from the dataframe.
grouped_nonzero_usage = grouped_nonzero_usage.reset_index()
grouped_nonzero_heatfuel_use = grouped_nonzero_usage.query("service_type != 'Electricity'")

# Create a column for each service type
grouped_nonzero_heatfuel_use = pd.pivot_table(grouped_nonzero_heatfuel_use,
                                              values='avg_price_per_mmbtu',
                                              index=['fiscal_year', 'fiscal_mo'],
                                              columns='service_type'
                                                )
grouped_nonzero_heatfuel_use = grouped_nonzero_heatfuel_use.reset_index()

grouped_nonzero_heatfuel_use.head()

service_type,fiscal_year,fiscal_mo,Natural Gas,Oil #1,Steam
0,2006,2,,17.464387,
1,2006,3,,17.464387,
2,2006,4,,17.464387,
3,2006,5,,17.464387,11.374288
4,2006,6,12.480403,17.464387,9.529815


## Monthly Cost Per MMBTU: Data and Graphs - Page 9

In [255]:
raw_oil = df_raw[(df_raw['Service Name'] == 'Oil #1') & (df_raw['Item Description'] != 'Fuel Oil #1 (Gallons)')]

In [256]:
raw_oil['Item Description'].unique()

array(['FED LUS TX', 'FED OS TX', 'Misc. fee', 'Tax: Regulatory',
       'Surcharge', 'Other charges', 'HO#2', 'Service charge'], dtype=object)

In [257]:
raw_oil.query("Units != 'Gallons'")['Cost'].sum()

9845.660000000027

In [258]:
raw_gas_analysis = df_raw[(df_raw['Service Name'] == 'Natural Gas') & (df_raw['Item Description'] != 'Natural gas (CCF)')]

In [259]:
raw_gas_analysis['Item Description'].unique()

array(['Regulatory Cost Charge', 'Customer Charge', 'Late charge',
       'Gas Charge (CCF)', 'Other charges', 'Cost adjustments',
       'Misc. credit', 'Previous balance adj.', 'Tax: Regulatory',
       'Misc. fee', 'Service activation'], dtype=object)

In [260]:
raw_gas_analysis[raw_gas_analysis['Item Description'] == 'Misc. credit']

Unnamed: 0,Site ID,Site Name,Vendor Code,Vendor Name,Account Number,Bill Date,Due Date,Entry Date,Invoice #,Voucher #,From,Thru,Service Name,Item Description,Meter Number,Usage,Cost,Units,Account Financial Code,Site Financial Code
4707,TRGR,FNSB-Transit Garage,VF314940,Fairbanks Natural Gas,10282 (3175 PEGER-TRG BG1),07/31/2012,07/31/2012,08/09/2012,,,2012-06-30,2012-07-31,Natural Gas,Misc. credit,,,-4.34,,,
4711,TRGR,FNSB-Transit Garage,VF314940,Fairbanks Natural Gas,10282 (3175 PEGER-TRG BG1),08/31/2012,08/31/2012,09/13/2012,,,2012-07-31,2012-08-31,Natural Gas,Misc. credit,,,-4.11,,,
4715,TRGR,FNSB-Transit Garage,VF314940,Fairbanks Natural Gas,10282 (3175 PEGER-TRG BG1),09/28/2012,09/28/2012,02/06/2013,,,2012-08-31,2012-09-28,Natural Gas,Misc. credit,,,0.0,,,
4719,TRGR,FNSB-Transit Garage,VF314940,Fairbanks Natural Gas,10282 (3175 PEGER-TRG BG1),10/31/2012,10/31/2012,12/18/2012,,,2012-09-28,2012-10-31,Natural Gas,Misc. credit,,,0.0,,,
4723,TRGR,FNSB-Transit Garage,VF314940,Fairbanks Natural Gas,10282 (3175 PEGER-TRG BG1),11/30/2012,11/30/2012,12/17/2012,,,2012-10-31,2012-11-30,Natural Gas,Misc. credit,,,0.0,,,
4736,TRGR,FNSB-Transit Garage,VF314940,Fairbanks Natural Gas,10282 (3175 PEGER-TRG BG1),03/29/2013,03/29/2013,07/22/2013,,,2013-02-28,2013-03-29,Natural Gas,Misc. credit,,,0.0,,,
4740,TRGR,FNSB-Transit Garage,VF314940,Fairbanks Natural Gas,10282 (3175 PEGER-TRG BG1),04/30/2013,04/30/2013,07/22/2013,,,2013-03-29,2013-04-30,Natural Gas,Misc. credit,,,0.0,,,
4744,TRGR,FNSB-Transit Garage,VF314940,Fairbanks Natural Gas,10282 (3175 PEGER-TRG BG1),05/31/2013,05/31/2013,07/22/2013,,,2013-04-30,2013-05-31,Natural Gas,Misc. credit,,,0.0,,,
4748,TRGR,FNSB-Transit Garage,VF314940,Fairbanks Natural Gas,10282 (3175 PEGER-TRG BG1),06/30/2013,06/30/2013,07/22/2013,,,2013-05-31,2013-06-30,Natural Gas,Misc. credit,,,0.0,,,
4753,TRGR,FNSB-Transit Garage,VF314940,Fairbanks Natural Gas,10282 (3175 PEGER-TRG BG1),07/31/2013,07/31/2013,08/08/2013,,,2013-06-30,2013-07-31,Natural Gas,Misc. credit,,,0.0,,,


In [261]:
raw_gas_analysis[raw_gas_analysis['Item Description'] == 'Cost adjustments']

Unnamed: 0,Site ID,Site Name,Vendor Code,Vendor Name,Account Number,Bill Date,Due Date,Entry Date,Invoice #,Voucher #,From,Thru,Service Name,Item Description,Meter Number,Usage,Cost,Units,Account Financial Code,Site Financial Code
4496,TRGR,FNSB-Transit Garage,VF314940,Fairbanks Natural Gas,10282 (3175 PEGER-TRG BG1),10/01/2006,10/01/2006,02/24/2011,,,2006-09-01,2006-09-29,Natural Gas,Cost adjustments,,,-653.8,,,


In [262]:
raw_gas_analysis[raw_gas_analysis['Item Description'] == 'Previous balance adj.']

Unnamed: 0,Site ID,Site Name,Vendor Code,Vendor Name,Account Number,Bill Date,Due Date,Entry Date,Invoice #,Voucher #,From,Thru,Service Name,Item Description,Meter Number,Usage,Cost,Units,Account Financial Code,Site Financial Code
4749,TRGR,FNSB-Transit Garage,VF314940,Fairbanks Natural Gas,10282 (3175 PEGER-TRG BG1),06/30/2013,06/30/2013,07/22/2013,,,2013-05-31,2013-06-30,Natural Gas,Previous balance adj.,,,-25.75,,,
4754,TRGR,FNSB-Transit Garage,VF314940,Fairbanks Natural Gas,10282 (3175 PEGER-TRG BG1),07/31/2013,07/31/2013,08/08/2013,,,2013-06-30,2013-07-31,Natural Gas,Previous balance adj.,,,0.0,,,
4759,TRGR,FNSB-Transit Garage,VF314940,Fairbanks Natural Gas,10282 (3175 PEGER-TRG BG1),08/31/2013,08/31/2013,09/12/2013,,,2013-07-31,2013-08-30,Natural Gas,Previous balance adj.,,,0.0,,,
4764,TRGR,FNSB-Transit Garage,VF314940,Fairbanks Natural Gas,10282 (3175 PEGER-TRG BG1),09/30/2013,09/30/2013,10/17/2013,,,2013-08-30,2013-09-30,Natural Gas,Previous balance adj.,,,0.0,,,
4769,TRGR,FNSB-Transit Garage,VF314940,Fairbanks Natural Gas,10282 (3175 PEGER-TRG BG1),10/31/2013,10/31/2013,11/20/2013,,,2013-09-30,2013-10-31,Natural Gas,Previous balance adj.,,,0.0,,,


In [263]:
# Heating energy use, in MMBTUs
monthly_heating.head()

Unnamed: 0_level_0,service_type,Natural Gas,Oil #1,Steam,total_heating_energy
fiscal_year,fiscal_mo,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2006,12,,,0.0,0.0
2007,1,,,0.0,0.0
2007,2,,,0.0,0.0
2007,3,,,0.0,0.0
2007,4,,,0.0,0.0


In [264]:
# Query the dataframe for natural gas charges with CCF only?  
df.query("service_type == 'Natural Gas'").head()

Unnamed: 0,site_id,service_type,cal_year,cal_mo,item_desc,units,cost,days_served,usage,fiscal_year,fiscal_mo,mmbtu
180,3,Natural Gas,2006,1,Natural gas (CCF),CCF,9412.56,30.0,7394.0,2006,7,754.188
181,3,Natural Gas,2006,1,Other Charge,-,45.54,30.0,,2006,7,
182,3,Natural Gas,2006,2,Natural gas (CCF),CCF,6684.52,27.0,5251.0,2006,8,535.602
183,3,Natural Gas,2006,2,Other Charge,-,37.35,27.0,,2006,8,
184,3,Natural Gas,2006,3,Natural gas (CCF),CCF,3653.51,30.0,2870.0,2006,9,292.74


In [265]:
# Exclude demand charges from the natural gas costs.  This is because the unit costs for natural gas go to infinity
# when there is zero usage but a customer charge
cost_df1 = df.drop(df[(df['service_type'] == 'Natural Gas') & (df['units'] != 'CCF')].index)
cost_df1.query("service_type == 'Natural Gas'").head()

Unnamed: 0,site_id,service_type,cal_year,cal_mo,item_desc,units,cost,days_served,usage,fiscal_year,fiscal_mo,mmbtu
180,3,Natural Gas,2006,1,Natural gas (CCF),CCF,9412.56,30.0,7394.0,2006,7,754.188
182,3,Natural Gas,2006,2,Natural gas (CCF),CCF,6684.52,27.0,5251.0,2006,8,535.602
184,3,Natural Gas,2006,3,Natural gas (CCF),CCF,3653.51,30.0,2870.0,2006,9,292.74
186,3,Natural Gas,2006,4,Natural gas (CCF),CCF,2599.47,29.0,2042.0,2006,10,208.284
188,3,Natural Gas,2006,5,Natural gas (CCF),CCF,1015.85,30.0,798.0,2006,11,81.396


In [266]:
# Create cost dataframe for given site from processed data
cost_df1 = cost_df1.query('site_id == @site')[['service_type', 'fiscal_year', 'fiscal_mo', 'cost']]
cost_df1.head()

Unnamed: 0,service_type,fiscal_year,fiscal_mo,cost
30353,Electricity,2006,12,739.28
30354,Electricity,2006,12,0.0
30355,Electricity,2006,12,183.065
30356,Electricity,2007,1,1414.275
30357,Electricity,2007,1,0.0


In [267]:
# Split out by service type
monthly_heating_cost = pd.pivot_table(cost_df1,
                                values='cost',
                                index=['fiscal_year', 'fiscal_mo'],
                                columns=['service_type'],
                                aggfunc=np.sum
                                )
monthly_heating_cost.head()

Unnamed: 0_level_0,service_type,Electricity,Natural Gas,Oil #1,Sewer,Water
fiscal_year,fiscal_mo,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2006,12,922.345,,,,
2007,1,1765.355,,,,
2007,2,1870.268065,,,,
2007,3,1459.435507,,,,
2007,4,1632.385461,,,437.556613,362.837258


In [268]:
# Add in columns for the missing energy services
missing_services = bu.missing_energy_services(monthly_heating_cost.columns)
bu.add_columns(monthly_heating_cost, missing_services)

# Drop the non-heating services
monthly_heating_cost = monthly_heating_cost.drop(labels=['Electricity', 'Sewer', 'Water'], axis=1)

# Create a total heating column
monthly_heating_cost['total_heating_cost'] = monthly_heating_cost.sum(axis=1)

monthly_heating_cost.head()

Unnamed: 0_level_0,service_type,Natural Gas,Oil #1,Steam,total_heating_cost
fiscal_year,fiscal_mo,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2006,12,,,0.0,0.0
2007,1,,,0.0,0.0
2007,2,,,0.0,0.0
2007,3,,,0.0,0.0
2007,4,,,0.0,0.0


In [269]:
monthly_heating_cost = monthly_heating_cost.rename(columns={'Natural Gas':'Natural Gas Cost',
                                                           'Oil #1':'Oil #1 Cost',
                                                           'Steam': 'Steam Cost'})
monthly_heating_cost.head()

Unnamed: 0_level_0,service_type,Natural Gas Cost,Oil #1 Cost,Steam Cost,total_heating_cost
fiscal_year,fiscal_mo,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2006,12,,,0.0,0.0
2007,1,,,0.0,0.0
2007,2,,,0.0,0.0
2007,3,,,0.0,0.0
2007,4,,,0.0,0.0


In [270]:
monthly_heat_energy_and_use = pd.merge(monthly_heating_cost, monthly_heating, left_index=True, right_index=True, how='outer')
monthly_heat_energy_and_use.head()

Unnamed: 0_level_0,service_type,Natural Gas Cost,Oil #1 Cost,Steam Cost,total_heating_cost,Natural Gas,Oil #1,Steam,total_heating_energy
fiscal_year,fiscal_mo,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
2006,12,,,0.0,0.0,,,0.0,0.0
2007,1,,,0.0,0.0,,,0.0,0.0
2007,2,,,0.0,0.0,,,0.0,0.0
2007,3,,,0.0,0.0,,,0.0,0.0
2007,4,,,0.0,0.0,,,0.0,0.0


In [271]:
# Create unit cost columns in $ / MMBTU for each fuel type
monthly_heat_energy_and_use['fuel_oil_unit_cost'] = monthly_heat_energy_and_use['Oil #1 Cost'] / monthly_heat_energy_and_use['Oil #1']
monthly_heat_energy_and_use['natural_gas_unit_cost'] = monthly_heat_energy_and_use['Natural Gas Cost'] / monthly_heat_energy_and_use['Natural Gas']
monthly_heat_energy_and_use['district_heat_unit_cost'] = monthly_heat_energy_and_use['Steam Cost'] / monthly_heat_energy_and_use['Steam']
monthly_heat_energy_and_use['building_unit_cost'] = monthly_heat_energy_and_use.total_heating_cost / monthly_heat_energy_and_use.total_heating_energy
monthly_heat_energy_and_use.head()

Unnamed: 0_level_0,service_type,Natural Gas Cost,Oil #1 Cost,Steam Cost,total_heating_cost,Natural Gas,Oil #1,Steam,total_heating_energy,fuel_oil_unit_cost,natural_gas_unit_cost,district_heat_unit_cost,building_unit_cost
fiscal_year,fiscal_mo,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
2006,12,,,0.0,0.0,,,0.0,0.0,,,,
2007,1,,,0.0,0.0,,,0.0,0.0,,,,
2007,2,,,0.0,0.0,,,0.0,0.0,,,,
2007,3,,,0.0,0.0,,,0.0,0.0,,,,
2007,4,,,0.0,0.0,,,0.0,0.0,,,,


In [272]:
# Reset the index for easier processing
monthly_heat_energy_and_use = monthly_heat_energy_and_use.reset_index()

In [273]:
# Add in unit costs for fuels that are currently blank

unit_cost_cols = ['fuel_oil_unit_cost', 'natural_gas_unit_cost', 'district_heat_unit_cost']
service_types = ['Oil #1_avg_unit_cost', 'Natural Gas_avg_unit_cost', 'Steam_avg_unit_cost']

unit_cost_dict = dict(zip(unit_cost_cols,service_types))

In [274]:
# Add in average unit costs calculated from all sites for each month
monthly_heat_energy_and_use = pd.merge(monthly_heat_energy_and_use, grouped_nonzero_heatfuel_use, 
                                       left_on=['fiscal_year', 'fiscal_mo'], right_on=['fiscal_year', 'fiscal_mo'],
                                      how='left', suffixes=('', '_avg_unit_cost'))
monthly_heat_energy_and_use.head()

service_type,fiscal_year,fiscal_mo,Natural Gas Cost,Oil #1 Cost,Steam Cost,total_heating_cost,Natural Gas,Oil #1,Steam,total_heating_energy,fuel_oil_unit_cost,natural_gas_unit_cost,district_heat_unit_cost,building_unit_cost,Natural Gas_avg_unit_cost,Oil #1_avg_unit_cost,Steam_avg_unit_cost
0,2006,12,,,0.0,0.0,,,0.0,0.0,,,,,12.512925,17.000551,6.219821
1,2007,1,,,0.0,0.0,,,0.0,0.0,,,,,13.72549,13.708584,4.803764
2,2007,2,,,0.0,0.0,,,0.0,0.0,,,,,13.689365,16.168198,8.501139
3,2007,3,,,0.0,0.0,,,0.0,0.0,,,,,12.494336,14.966004,9.015808
4,2007,4,,,0.0,0.0,,,0.0,0.0,,,,,13.72549,14.520701,9.843281


In [275]:
# Check each column to see if it is NaN (identified when the value does not equal itself) and if it is, fill with the average
# price per MMBTU taken from all sites
for col, service in unit_cost_dict.items():
    monthly_heat_energy_and_use[col] = np.where(monthly_heat_energy_and_use[col] != monthly_heat_energy_and_use[col],
                                               monthly_heat_energy_and_use[service], 
                                               monthly_heat_energy_and_use[col])

In [276]:
def fiscal_to_calendar(fiscal_year, fiscal_mo):
    """Converts a fiscal year and month into a calendar year and month for graphing purposes.
    Returns (calendar_year, calendar_month) tuple."""
    
    if fiscal_mo > 6:
        calendar_month = fiscal_mo - 6
        calendar_year = fiscal_year
    else:
        calendar_month = fiscal_mo + 6
        calendar_year = fiscal_year - 1
        
    return (calendar_year, calendar_month)

In [277]:
# Add calendar year and month columns
cal_year = []
cal_mo = []
for fiscal_year, fiscal_mo in zip(monthly_heat_energy_and_use.fiscal_year, monthly_heat_energy_and_use.fiscal_mo):
    CalYear, CalMo = fiscal_to_calendar(fiscal_year, fiscal_mo)
    cal_year.append(CalYear)
    cal_mo.append(CalMo)
monthly_heat_energy_and_use['calendar_year'] = cal_year
monthly_heat_energy_and_use['calendar_mo'] = cal_mo
monthly_heat_energy_and_use.head()

service_type,fiscal_year,fiscal_mo,Natural Gas Cost,Oil #1 Cost,Steam Cost,total_heating_cost,Natural Gas,Oil #1,Steam,total_heating_energy,fuel_oil_unit_cost,natural_gas_unit_cost,district_heat_unit_cost,building_unit_cost,Natural Gas_avg_unit_cost,Oil #1_avg_unit_cost,Steam_avg_unit_cost,calendar_year,calendar_mo
0,2006,12,,,0.0,0.0,,,0.0,0.0,17.000551,12.512925,6.219821,,12.512925,17.000551,6.219821,2006,6
1,2007,1,,,0.0,0.0,,,0.0,0.0,13.708584,13.72549,4.803764,,13.72549,13.708584,4.803764,2006,7
2,2007,2,,,0.0,0.0,,,0.0,0.0,16.168198,13.689365,8.501139,,13.689365,16.168198,8.501139,2006,8
3,2007,3,,,0.0,0.0,,,0.0,0.0,14.966004,12.494336,9.015808,,12.494336,14.966004,9.015808,2006,9
4,2007,4,,,0.0,0.0,,,0.0,0.0,14.520701,13.72549,9.843281,,13.72549,14.520701,9.843281,2006,10


In [278]:
# Create a date column using the calendar year and month to pass to the graphing function

def get_date(row):
    return datetime.date(year=row['calendar_year'], month=row['calendar_mo'], day=1)

monthly_heat_energy_and_use['date'] = monthly_heat_energy_and_use[['calendar_year','calendar_mo']].apply(get_date, axis=1)
monthly_heat_energy_and_use.head()

service_type,fiscal_year,fiscal_mo,Natural Gas Cost,Oil #1 Cost,Steam Cost,total_heating_cost,Natural Gas,Oil #1,Steam,total_heating_energy,fuel_oil_unit_cost,natural_gas_unit_cost,district_heat_unit_cost,building_unit_cost,Natural Gas_avg_unit_cost,Oil #1_avg_unit_cost,Steam_avg_unit_cost,calendar_year,calendar_mo,date
0,2006,12,,,0.0,0.0,,,0.0,0.0,17.000551,12.512925,6.219821,,12.512925,17.000551,6.219821,2006,6,2006-06-01
1,2007,1,,,0.0,0.0,,,0.0,0.0,13.708584,13.72549,4.803764,,13.72549,13.708584,4.803764,2006,7,2006-07-01
2,2007,2,,,0.0,0.0,,,0.0,0.0,16.168198,13.689365,8.501139,,13.689365,16.168198,8.501139,2006,8,2006-08-01
3,2007,3,,,0.0,0.0,,,0.0,0.0,14.966004,12.494336,9.015808,,12.494336,14.966004,9.015808,2006,9,2006-09-01
4,2007,4,,,0.0,0.0,,,0.0,0.0,14.520701,13.72549,9.843281,,13.72549,14.520701,9.843281,2006,10,2006-10-01


In [289]:
%matplotlib inline

In [290]:
p9g1_filename, p9g1_url = gu.graph_filename_url(site, "energy_cost")
gu.fuel_price_comparison_graph(monthly_heat_energy_and_use, 'date', unit_cost_cols, 'building_unit_cost', p9g1_filename)


## Realized Savings from Fuel Switching: Page 9, Graph 2

In [280]:
monthly_heat_energy_and_use.head()

service_type,fiscal_year,fiscal_mo,Natural Gas Cost,Oil #1 Cost,Steam Cost,total_heating_cost,Natural Gas,Oil #1,Steam,total_heating_energy,fuel_oil_unit_cost,natural_gas_unit_cost,district_heat_unit_cost,building_unit_cost,Natural Gas_avg_unit_cost,Oil #1_avg_unit_cost,Steam_avg_unit_cost,calendar_year,calendar_mo,date
0,2006,12,,,0.0,0.0,,,0.0,0.0,17.000551,12.512925,6.219821,,12.512925,17.000551,6.219821,2006,6,2006-06-01
1,2007,1,,,0.0,0.0,,,0.0,0.0,13.708584,13.72549,4.803764,,13.72549,13.708584,4.803764,2006,7,2006-07-01
2,2007,2,,,0.0,0.0,,,0.0,0.0,16.168198,13.689365,8.501139,,13.689365,16.168198,8.501139,2006,8,2006-08-01
3,2007,3,,,0.0,0.0,,,0.0,0.0,14.966004,12.494336,9.015808,,12.494336,14.966004,9.015808,2006,9,2006-09-01
4,2007,4,,,0.0,0.0,,,0.0,0.0,14.520701,13.72549,9.843281,,13.72549,14.520701,9.843281,2006,10,2006-10-01


In [281]:
old_usage_cols = ['Natural Gas', 'Oil #1', 'Steam']

In [282]:
# Create an indicator for whether a given heating fuel is available for the facility.  This is done by checking the use for all
# months- if it is zero, then that building doesn't have the option to use that type of fuel.



for col in old_usage_cols:
    new_col_name = col + "_available"
    monthly_heat_energy_and_use[new_col_name] = np.where(monthly_heat_energy_and_use[col].sum() == 0, 0, 1)

monthly_heat_energy_and_use.head()

service_type,fiscal_year,fiscal_mo,Natural Gas Cost,Oil #1 Cost,Steam Cost,total_heating_cost,Natural Gas,Oil #1,Steam,total_heating_energy,...,building_unit_cost,Natural Gas_avg_unit_cost,Oil #1_avg_unit_cost,Steam_avg_unit_cost,calendar_year,calendar_mo,date,Natural Gas_available,Oil #1_available,Steam_available
0,2006,12,,,0.0,0.0,,,0.0,0.0,...,,12.512925,17.000551,6.219821,2006,6,2006-06-01,1,1,0
1,2007,1,,,0.0,0.0,,,0.0,0.0,...,,13.72549,13.708584,4.803764,2006,7,2006-07-01,1,1,0
2,2007,2,,,0.0,0.0,,,0.0,0.0,...,,13.689365,16.168198,8.501139,2006,8,2006-08-01,1,1,0
3,2007,3,,,0.0,0.0,,,0.0,0.0,...,,12.494336,14.966004,9.015808,2006,9,2006-09-01,1,1,0
4,2007,4,,,0.0,0.0,,,0.0,0.0,...,,13.72549,14.520701,9.843281,2006,10,2006-10-01,1,1,0


In [283]:
# Calculate what it would have cost if the building used only one fuel type

available_cols = ['Oil #1_available','Natural Gas_available','Steam_available']
available_dict = dict(zip(unit_cost_cols, available_cols))
print (available_dict)
hypothetical_cost_cols = []

for unit_cost, avail_col in available_dict.items():
    new_col_name = unit_cost + "_hypothetical"
    hypothetical_cost_cols.append(new_col_name)
    monthly_heat_energy_and_use[new_col_name] = monthly_heat_energy_and_use[unit_cost] * \
    monthly_heat_energy_and_use.total_heating_energy * monthly_heat_energy_and_use[avail_col]
    
monthly_heat_energy_and_use.head()

{'natural_gas_unit_cost': 'Natural Gas_available', 'district_heat_unit_cost': 'Steam_available', 'fuel_oil_unit_cost': 'Oil #1_available'}


service_type,fiscal_year,fiscal_mo,Natural Gas Cost,Oil #1 Cost,Steam Cost,total_heating_cost,Natural Gas,Oil #1,Steam,total_heating_energy,...,Steam_avg_unit_cost,calendar_year,calendar_mo,date,Natural Gas_available,Oil #1_available,Steam_available,natural_gas_unit_cost_hypothetical,district_heat_unit_cost_hypothetical,fuel_oil_unit_cost_hypothetical
0,2006,12,,,0.0,0.0,,,0.0,0.0,...,6.219821,2006,6,2006-06-01,1,1,0,0.0,0.0,0.0
1,2007,1,,,0.0,0.0,,,0.0,0.0,...,4.803764,2006,7,2006-07-01,1,1,0,0.0,0.0,0.0
2,2007,2,,,0.0,0.0,,,0.0,0.0,...,8.501139,2006,8,2006-08-01,1,1,0,0.0,0.0,0.0
3,2007,3,,,0.0,0.0,,,0.0,0.0,...,9.015808,2006,9,2006-09-01,1,1,0,0.0,0.0,0.0
4,2007,4,,,0.0,0.0,,,0.0,0.0,...,9.843281,2006,10,2006-10-01,1,1,0,0.0,0.0,0.0


In [284]:
# Calculate the monthly savings to the building by not using the most expensive available fuel entirely
monthly_heat_energy_and_use['fuel_switching_savings'] = monthly_heat_energy_and_use[hypothetical_cost_cols].max(axis=1) \
                                                        - monthly_heat_energy_and_use.total_heating_cost

In [285]:
# Sort dataframe to calculate cumulative value
monthly_heat_energy_and_use = monthly_heat_energy_and_use.sort_values(by='date', ascending=True)

# Calculate cumulative value
monthly_heat_energy_and_use['cumulative_fuel_switching_savings'] = np.cumsum(monthly_heat_energy_and_use.fuel_switching_savings)

monthly_heat_energy_and_use.head()

service_type,fiscal_year,fiscal_mo,Natural Gas Cost,Oil #1 Cost,Steam Cost,total_heating_cost,Natural Gas,Oil #1,Steam,total_heating_energy,...,calendar_mo,date,Natural Gas_available,Oil #1_available,Steam_available,natural_gas_unit_cost_hypothetical,district_heat_unit_cost_hypothetical,fuel_oil_unit_cost_hypothetical,fuel_switching_savings,cumulative_fuel_switching_savings
0,2006,12,,,0.0,0.0,,,0.0,0.0,...,6,2006-06-01,1,1,0,0.0,0.0,0.0,0.0,0.0
1,2007,1,,,0.0,0.0,,,0.0,0.0,...,7,2006-07-01,1,1,0,0.0,0.0,0.0,0.0,0.0
2,2007,2,,,0.0,0.0,,,0.0,0.0,...,8,2006-08-01,1,1,0,0.0,0.0,0.0,0.0,0.0
3,2007,3,,,0.0,0.0,,,0.0,0.0,...,9,2006-09-01,1,1,0,0.0,0.0,0.0,0.0,0.0
4,2007,4,,,0.0,0.0,,,0.0,0.0,...,10,2006-10-01,1,1,0,0.0,0.0,0.0,0.0,0.0


In [286]:
p9g2_filename, p9g2_url = gu.graph_filename_url(site, "cumulative_fuel_switching_savings")
gu.create_monthly_line_graph(monthly_heat_energy_and_use, 'date', 'cumulative_fuel_switching_savings',
                            'Cumulative Fuel Switching Savings Realized [$]', p9g2_filename)

In [287]:
# Convert df to dictionary
heating_cost_rows = bu.df_to_dictionaries(heating_cost_and_use)

# Add data and graphs to main dictionary
template_data['heating_cost_analysis'] = dict(
    graphs=[p9g1_url, p9g2_url],
    table={'rows': heating_cost_rows},
)

# Water Analysis Table - Page 10

In [None]:
water_use = df.query('site_id == @site')[['service_type', 'fiscal_year', 'fiscal_mo','cost', 'usage', 'units']]
water_use.head()

In [None]:
# Create month count field for all months that have water and sewer bills
water_use_only = water_use.query("service_type == 'Water'")
water_months_present = bu.months_present(water_use_only)
water_mo_count = bu.month_count(water_months_present)
water_mo_count

In [None]:
# Create annual water gallon usage dataframe
water_gal_df = pd.pivot_table(water_use, 
                              values='usage',
                              index=['fiscal_year',], 
                              columns=['service_type'],
                              aggfunc=np.sum
)

water_gal_df.head()

In [None]:
# Use only required columns 
water_gal_df = water_gal_df[['Water']]
# Calculate percent change column
water_gal_df['water_use_pct_change'] = water_gal_df.Water.pct_change()

In [None]:
# Create annual water and sewer cost dataframe
water_cost_df = pd.pivot_table(water_use, 
                              values='cost',
                              index=['fiscal_year',], 
                              columns=['service_type'],
                              aggfunc=np.sum
)

water_cost_df.head()

In [None]:
    # Calculate totals, percent change
    water_cost_df = water_cost_df[water_cost_df.columns.difference(['Electricity', 'Natural Gas', 'Oil #1', 'Steam', 'Refuse'])]
    
    # Rename columns only if they exist in the water cost dataframe
    rename_dict = {'Sewer': 'Sewer Cost',
                   'Water': 'Water Cost'}
    water_cost_df = water_cost_df.rename(columns={k: v for k, v in rename_dict.items() if k in water_cost_df})
    
    # First check to make sure sewer data is included; if so, calculate total cost
    water_cost_df['total_water_sewer_cost'] = np.where('Sewer Cost' in list(water_cost_df.columns.values),
                                                       water_cost_df['Sewer Cost'] + water_cost_df['Water Cost'],
                                                       water_cost_df['Water Cost'])

In [None]:
    water_cost_df['water_cost_pct_change'] = water_cost_df['Water Cost'].pct_change()
                                                       
    # First check to make sure sewer data is included; if so, calculate percent change
    water_cost_df['sewer_cost_pct_change'] = np.where('Sewer Cost' in list(water_cost_df.columns.values),
                                                      water_cost_df['Sewer Cost'].pct_change(),
                                                      np.nan)
                                                       
    water_cost_df['total_water_sewer_cost_pct_change'] = water_cost_df.total_water_sewer_cost.pct_change()

In [None]:
# Merge use and cost dataframes
water_use_and_cost = pd.merge(water_cost_df, water_gal_df, left_index=True, right_index=True, how='outer')
water_use_and_cost.head()

In [None]:
water_use_and_cost['water_unit_cost'] = water_use_and_cost.total_water_sewer_cost / water_use_and_cost.Water
water_use_and_cost['water_unit_cost_pct_change'] = water_use_and_cost.water_unit_cost.pct_change()

# Use only complete years 
water_use_and_cost['month_count'] = water_mo_count
water_use_and_cost = water_use_and_cost.query("month_count == 12")
water_use_and_cost = water_use_and_cost.drop('month_count', axis=1)
water_use_and_cost = water_use_and_cost.sort_index(ascending=False)
water_use_and_cost = water_use_and_cost.rename(columns={'Sewer Cost':'sewer_cost',
                                                       'Water Cost':'water_cost',
                                                       'total_water_sewer_cost':'total_cost',
                                                       'total_water_sewer_cost_pct_change':'total_cost_pct_change',
                                                       'Water':'total_usage',
                                                       'water_usage_pct_change':'total_usage_pct_change',
                                                       'water_unit_cost':'total_unit_cost',
                                                       'water_unit_cost_pct_change':'total_unit_cost_pct_change'
                                                       })
water_use_and_cost

## Create Water Cost Stacked Bar Graph - Page 10 Graph 1

In [None]:
p10g1_filename, p10g1_url = gu.graph_filename_url(site, "utility_cost")
gu.create_stacked_bar(water_use_and_cost.reset_index(), 'fiscal_year', ['sewer_cost', 'water_cost'], 
                      'Utility Cost [$]', "test title", p10g1_filename)

## Create Monthly Profile of Water Usage - Page 10 Graph 2

In [None]:
# Create monthly water gallon dataframe
water_gal_df_monthly = pd.pivot_table(water_use, 
                              values='usage',
                              index=['fiscal_year', 'fiscal_mo'], 
                              columns=['service_type'],
                              aggfunc=np.sum
)

water_gal_df_monthly.head()

In [None]:
p10g2_filename, p10g2_url = gu.graph_filename_url(site, "monthly_water_usage_profile")
gu.create_monthly_profile(water_gal_df_monthly, 'Water', 'Monthly Water Usage Profile [gallons]', 'green', p10g2_filename)

In [None]:
# Convert df to dictionary
water_rows = bu.df_to_dictionaries(water_use_and_cost)

# Add data and graphs to main dictionary
template_data['water_analysis'] = dict(
    graphs=[p10g1_url, p10g2_url],
    table={'rows': water_rows},
)

In [None]:
template_data