In [1]:
import pickle
import pandas as pd
import numpy as np
from importlib import reload
import sys
from datetime import datetime

import matplotlib.pyplot as plt
%matplotlib inline

# bench_util module is in the parent directory, because it will be part of
# the production code.  Add the parent directory to the Path so Python can
# import from there
sys.path.insert(0, '../')   
import bench_util as bu
import graph_util as gu
import os 

In [2]:
# Unpickle the raw DataFrame
df_raw = pd.read_pickle('../df_raw.pkl')

# Unpickle the processed DataFrame.
dfp = pd.read_pickle('../df_processed.pkl')

# Unpickle the Utility object
ut = pickle.load(open('../util_obj.pkl', 'rb'))

In [14]:
df = dfp
util_obj = ut


In [33]:
df

Unnamed: 0,cal_mo,cal_year,cost,group,item_desc,mmbtu,service_type,site_id,units,usage,fiscal_year,fiscal_mo
0,12,2009,2607.979752,facility,Energy charge,48.998471,electricity,03,kWh,14360.630515,2010,6
1,1,2010,7849.394090,facility,Energy charge,147.799171,electricity,03,kWh,43317.459347,2010,7
2,2,2010,7344.389601,facility,Energy charge,135.459603,electricity,03,kWh,39700.938710,2010,8
3,3,2010,7404.968981,facility,Energy charge,133.029802,electricity,03,kWh,38988.804762,2010,9
4,4,2010,6472.904545,facility,Energy charge,122.476738,electricity,03,kWh,35895.878788,2010,10
5,5,2010,5753.127085,facility,Energy charge,105.088466,electricity,03,kWh,30799.667666,2010,11
6,6,2010,23.069404,facility,Energy charge,0.000000,electricity,03,-,0.000000,2010,12
7,6,2010,4523.949977,facility,Energy charge,75.133586,electricity,03,kWh,22020.394406,2010,12
8,7,2010,41.068527,facility,Energy charge,0.000000,electricity,03,-,0.000000,2011,1
9,7,2010,4085.840623,facility,Energy charge,67.010921,electricity,03,kWh,19639.777531,2011,1


In [23]:
ut.building_info(site_id)['group']


'School District'

In [42]:
ut.building_info(site_id)

{'site_name': 'SD-Denali Elementary',
 'site_category': 'School District',
 'address': nan,
 'city': nan,
 'group': 'School District',
 'division': nan,
 'campus': nan,
 'primary_func': 'Elementary School',
 'year_built': nan,
 'sq_ft': 49210.0,
 'onsite_gen': nan,
 'dd_site': 'PAFA',
 'full_address': nan,
 'source_fuel_oil': 'Sourdough Fuel  (Petro Star)',
 'source_natural_gas': 'Fairbanks Natural Gas',
 'source_electricity': 'Golden Valley Electric',
 'source_propane': '',
 'source_wood': '',
 'source_district_heat': '',
 'source_coal': '',
 'source_water': 'Golden Heart Utilities',
 'source_sewer': 'Golden Heart Utilities',
 'source_refuse': 'Alaska Waste',
 'acct_fuel_oil': '55010001',
 'acct_natural_gas': '10283 - DENALI',
 'acct_electricity': '172230',
 'acct_propane': '',
 'acct_wood': '',
 'acct_district_heat': '',
 'acct_coal': '',
 'acct_water': '1998005610',
 'acct_sewer': '1998005610',
 'acct_refuse': 'AW23-DNL03',
 'facility_list': '',
 'grouping': 'facility',
 'site_id': 

In [27]:
site = '03'

In [35]:
# -------------------------- Energy Index Report ----------------------------

def energy_index_report(site, df, ut):
    """As well as returning template data, this function writes a spreadsheet
    that summarizes values for every building.  The spreadsheet is written to
    'output/extra_data/site_summary_FYYYY.xlsx'.
    """

# Start a dictionary with the main key to hold the template data
template_data = {'energy_index_comparison': {}}

# --------- Table 1, Yearly Table

# Filter down to just this site's bills and only services that
# are energy services.
energy_services = bu.missing_energy_services([])
df1 = df.query('site_id==@site and service_type==@energy_services')

# Only do this table if there are energy services.
if not df1.empty:

    # Sum Energy Costs and Usage
    df2 = pd.pivot_table(df1, index='fiscal_year', values=['cost', 'mmbtu'], aggfunc=np.sum)

    # Add a column showing number of months present in each fiscal year.
    bu.add_month_count_column(df2, df1)

    # Make a column with just the Heat MMBtu
    dfe = df1.query("service_type=='electricity'").groupby('fiscal_year').sum()[['mmbtu']]
    dfe.rename(columns={'mmbtu': 'elec_mmbtu'}, inplace = True)
    df2 = df2.merge(dfe, how='left', left_index=True, right_index=True)
    df2['elec_mmbtu'] = df2['elec_mmbtu'].fillna(0.0)
    df2['heat_mmbtu'] = df2.mmbtu - df2.elec_mmbtu

    # Add in degree days to DataFrame
    months_present = bu.months_present(df1)
    deg_days = ut.degree_days_yearly(months_present, site)
    df2['hdd'] = deg_days

    # Get building square footage and calculate EUIs and ECI.
    sq_ft = ut.building_info(site)['sq_ft']
    df2['eui'] = df2.mmbtu * 1e3 / sq_ft
    df2['eci'] = df2.cost / sq_ft
    df2['specific_eui'] = df2.heat_mmbtu * 1e6 / df2.hdd / sq_ft

    # Restrict to full years
    df2 = df2.query("month_count == 12").copy()

    # Reverse the years
    df2.sort_index(ascending=False, inplace=True)

    # get the rows as a list of dictionaries and put into
    # final template data dictionary.
    template_data['energy_index_comparison']['yearly_table'] = {
        'rows': bu.df_to_dictionaries(df2)
    }

In [37]:
#df1

In [44]:
# ---------- Table 2, Details Table

# Use the last complete year for this site as the year for the Details
# table.  If there was no complete year for the site, then use the
# last complete year for the entire dataset.
if 'df2' in locals() and len(df2):
    last_complete_year = df2.index.max()
else:
    # Determine month count by year for Electricity in entire dataset
    # to determine the latest complete year.
    electric_only = df.query("service_type == 'electricity'")
    electric_months_present = bu.months_present(electric_only)
    electric_mo_count = bu.month_count(electric_months_present)
    last_complete_year = max(electric_mo_count[electric_mo_count==12].index)

# Filter down to just the records of the targeted fiscal year and group
site_grp = ut.building_info(site)['grouping']
df1 = df.query('fiscal_year == @last_complete_year and group == @site_grp')

In [49]:
site_grp


'facility'

In [47]:
# Get Total Utility cost by building. This includes non-energy utilities as well.
df2 = df1.pivot_table(index='site_id', values=['cost'], aggfunc=np.sum)
df2.columns = ['total_cost']

# Save this into the Final DataFrame that we will build up as we go.
df_final = df2.copy()

# Get a list of the Energy Services and restrict the data to
# just these services
energy_svcs = bu.missing_energy_services([])
df2 = df1.query('service_type == @energy_svcs')

# Summarize Cost by Service Type
df3 = pd.pivot_table(df2, index='site_id', columns='service_type', values='cost', aggfunc=np.sum)

# Add in any missing columns
bu.add_missing_columns(df3, energy_svcs)

# Change column names
cols = ['{}_cost'.format(col) for col in df3.columns]
df3.columns = cols

# Add a total energy cost column
df3['total_energy_cost'] = df3.sum(axis=1)

# Add a total Heat Cost Column
df3['total_heat_cost'] = df3.total_energy_cost.fillna(0.0) - df3.electricity_cost.fillna(0.0)

# Add this to the final DataFrame
df_final = pd.concat([df_final, df3], axis=1, sort=True)

# Summarize MMBtu by Service Type
df3 = pd.pivot_table(df2, index='site_id', columns='service_type', values='mmbtu', aggfunc=np.sum)

# Add in any missing columns
bu.add_missing_columns(df3, energy_svcs)

# Change column names
cols = ['{}_mmbtu'.format(col) for col in df3.columns]
df3.columns = cols

# Add a total mmbtu column
df3['total_mmbtu'] = df3.sum(axis=1)

# Add a total Heat mmbtu Column
df3['total_heat_mmbtu'] = df3.total_mmbtu.fillna(0.0) - df3.electricity_mmbtu.fillna(0.0)

# Add this to the final DataFrame
df_final = pd.concat([df_final, df3], axis=1, sort=True)

# Electricity kWh summed by building
df3 = pd.pivot_table(df2.query('units == "kWh"'), index='site_id', values='usage', aggfunc=np.sum)
df3.columns = ['electricity_kwh']

# Include in Final DF
df_final = pd.concat([df_final, df3], axis=1, sort=True)

# Electricity kW, both Average and Max by building
# First, sum up kW pieces for each month.
df3 = df2.query('units == "kW"').groupby(['site_id', 'fiscal_year', 'fiscal_mo']).sum()
df3 = pd.pivot_table(df3.reset_index(), index='site_id', values='usage', aggfunc=[np.mean, np.max])
df3.columns = ['electricity_kw_average', 'electricity_kw_max']

# Add into Final Frame
df_final = pd.concat([df_final, df3], axis=1, sort=True)

# Add in Square footage info
df_bldg = ut.building_info_df()[['sq_ft']]

# Add into Final Frame.  I do a merge here so as not to bring
# in buildings from the building info spreadsheet that are not in this
# dataset; this dataset has been restricted to one year.
df_final = pd.merge(df_final, df_bldg, how='left', left_index=True, right_index=True)

# Build a DataFrame that has monthly degree days for each site/year/month
# combination.
combos = set(zip(df1.site_id, df1.fiscal_year, df1.fiscal_mo))
df_dd = pd.DataFrame(data=list(combos), columns=['site_id', 'fiscal_year', 'fiscal_mo'])
ut.add_degree_days_col(df_dd)

# Add up the degree days by site (we've already filtered down to one year or less
# of data.)
dd_series = df_dd.groupby('site_id').sum()['degree_days']

# Put in final DataFrame
df_final = pd.concat([df_final, dd_series], axis=1)

# Add in a column that gives the number of months present for each site
# in this year.  Then filter down to just the sites that have 12 months
# of data.
df_final.reset_index(inplace=True)
df_final['fiscal_year'] = last_complete_year
df_final.set_index(['site_id', 'fiscal_year'], inplace=True)
df_final = bu.add_month_count_column_by_site(df_final, df2)
df_final = df_final.query('month_count==12').copy()
df_final.reset_index(inplace=True)
df_final.set_index('site_id', inplace=True)

# Calculate per square foot values for each building.
df_final['eui'] = df_final.total_mmbtu * 1e3 / df_final.sq_ft
df_final['eci'] = df_final.total_energy_cost / df_final.sq_ft
df_final['specific_eui'] = df_final.total_heat_mmbtu * 1e6 / df_final.sq_ft / df_final.degree_days

# Save this to a spreadsheet, if it has not already been saved
#    fn = 'output/extra_data/site_summary_FY{}.xlsx'.format(last_complete_year)
#    if not os.path.exists(fn):
#        with pd.ExcelWriter(fn) as excel_writer:
#            df_final.to_excel(excel_writer, sheet_name='Sites')

# Get the totals across all buildings
totals_all_bldgs = df_final.sum()

# Total Degree-Days are not relevant
totals_all_bldgs.drop(['degree_days'], inplace=True)

# Only use the set of buildings that have some energy use and non-zero
# square footage to determine EUI's and ECI's
energy_bldgs = df_final.query("total_mmbtu > 0 and sq_ft > 0")

# Get total square feet, energy use, and energy cost for these buildings
# and calculate EUI and ECI
sq_ft_energy_bldgs = energy_bldgs.sq_ft.sum()
energy_in_energy_bldgs = energy_bldgs.total_mmbtu.sum()
energy_cost_in_energy_bldgs = energy_bldgs.total_energy_cost.sum()
totals_all_bldgs['eui'] = energy_in_energy_bldgs * 1e3 / sq_ft_energy_bldgs
totals_all_bldgs['eci'] = energy_cost_in_energy_bldgs / sq_ft_energy_bldgs

# For calculating heating specific EUI, further filter the set of
# buildings down to those that have heating fuel use.
# Get separate square footage total and weighted average degree-day for these.
heat_bldgs = energy_bldgs.query("total_heat_mmbtu > 0")
heat_bldgs_sq_ft = heat_bldgs.sq_ft.sum()
heat_bldgs_heat_mmbtu = heat_bldgs.total_heat_mmbtu.sum()
heat_bldgs_degree_days = (heat_bldgs.total_heat_mmbtu * heat_bldgs.degree_days).sum() / heat_bldgs.total_heat_mmbtu.sum()
totals_all_bldgs['specific_eui'] = heat_bldgs_heat_mmbtu * 1e6 / heat_bldgs_sq_ft / heat_bldgs_degree_days

# calculate a rank DataFrame
df_rank = pd.DataFrame()
for col in df_final.columns:
    df_rank[col] = df_final[col].rank(ascending=False)

if site in df_final.index:
    # The site exists in the DataFrame
    site_info = df_final.loc[site]
    site_pct = site_info / totals_all_bldgs
    site_rank = df_rank.loc[site]
else:
    # Site is not there, probabaly because not present in this year.
    # Make variables with NaN values for all elements.
    site_info = df_final.iloc[0].copy()   # Just grab the first row to start with
    site_info[:] = np.NaN                 # Put
    site_pct = site_info.copy()
    site_rank = site_info.copy()

# Make a final dictioary to hold all the results for this table
tbl2_data = {
    'fiscal_year': 'FY {}'.format(last_complete_year),
    'bldg': site_info.to_dict(),
    'all': totals_all_bldgs.to_dict(),
    'pct': site_pct.to_dict(),
    'rank': site_rank.to_dict()
}
template_data['energy_index_comparison']['details_table'] = tbl2_data

In [48]:
template_data

{'energy_index_comparison': {'yearly_table': {'rows': [{'fiscal_year': 2018,
     'cost': 119018.99586379393,
     'mmbtu': 3355.8255972444153,
     'month_count': 12.0,
     'elec_mmbtu': 1240.7963915757575,
     'heat_mmbtu': 2115.0292056686576,
     'hdd': 12445.951743191383,
     'eui': 68.1939767779804,
     'eci': 2.418593697699531,
     'specific_eui': 3.453304630471062},
    {'fiscal_year': 2017,
     'cost': 132129.2864806073,
     'mmbtu': 5010.09347860715,
     'month_count': 12.0,
     'elec_mmbtu': 1244.6171695210164,
     'heat_mmbtu': 3765.476309086134,
     'hdd': 13803.2,
     'eui': 101.81047507838142,
     'eci': 2.685008869754263,
     'specific_eui': 5.543534744002966},
    {'fiscal_year': 2016,
     'cost': 124043.07555118999,
     'mmbtu': 5795.649293813422,
     'month_count': 12.0,
     'elec_mmbtu': 1262.0559849032256,
     'heat_mmbtu': 4533.593308910196,
     'hdd': 12172.3,
     'eui': 117.77381210756802,
     'eci': 2.520688387547043,
     'specific_eui': 

In [31]:
# -------------- Energy Comparison Graphs ---------------

# Filter down to only services that are energy services.
energy_services = bu.missing_energy_services([])
df4 = df.query('service_type==@energy_services').copy()

# Sum Energy Costs and Usage
df5 = pd.pivot_table(df4, index=['site_id', 'fiscal_year'], values=['cost', 'mmbtu'], aggfunc=np.sum)

# Add a column showing number of months present in each fiscal year.
df5 = bu.add_month_count_column_by_site(df5, df4)

# Create an Electric MMBtu column so it can be subtracted from total to determine
# Heat MMBtu.
dfe = df4.query("service_type=='Electricity'").groupby(['site_id', 'fiscal_year']).sum()[['mmbtu']]
dfe.rename(columns={'mmbtu': 'elec_mmbtu'}, inplace = True)
df5 = df5.merge(dfe, how='left', left_index=True, right_index=True)
df5['elec_mmbtu'] = df5['elec_mmbtu'].fillna(0.0)
df5['heat_mmbtu'] = df5.mmbtu - df5.elec_mmbtu

# Add in degree-days:
# Create a DataFrame with site, year, month and degree-days, but only one row
# for each site/year/month combo.
dfd = df4[['site_id', 'fiscal_year', 'fiscal_mo']].copy()
dfd.drop_duplicates(inplace=True)
ut.add_degree_days_col(dfd)

# Use the agg function below so that a NaN will be returned for the year
# if any monthly values are NaN
dfd = dfd.groupby(['site_id', 'fiscal_year']).agg({'degree_days': lambda x: np.sum(x.values)})[['degree_days']]
df5 = df5.merge(dfd, how='left', left_index=True, right_index=True)

# Add in some needed building info like square footage, primary function 
# and building category.
df_bldg = ut.building_info_df()

# Shrink to just the needed fields and remove index.
# Also, fill blank values with 'Unknown'.
df_info = df_bldg[['sq_ft', 'site_category', 'primary_func']].copy().reset_index()
df_info['site_category'] = df_info.site_category.fillna('Unknown')
df_info['primary_func'] = df_info.primary_func.fillna('Unknown Type')

# Also Remove the index from df5 and merge in building info
df5.reset_index(inplace=True)
df5 = df5.merge(df_info, how='left')

# Now calculate per square foot energy measures
df5['eui'] = df5.mmbtu * 1e3 / df5.sq_ft
df5['eci'] = df5.cost / df5.sq_ft
df5['specific_eui'] = df5.heat_mmbtu * 1e6 / df5.degree_days / df5.sq_ft

# Restrict to full years
df5 = df5.query("month_count == 12").copy()

# Make all of the comparison graphs
g1_fn, g1_url = gu.graph_filename_url(site, 'eci_func')
gu.building_type_comparison_graph(df5, 'eci', site, g1_fn)

g2_fn, g2_url = gu.graph_filename_url(site, 'eci_owner')
gu.building_owner_comparison_graph(df5, 'eci', site, g2_fn)

g3_fn, g3_url = gu.graph_filename_url(site, 'eui_func')
gu.building_type_comparison_graph(df5, 'eui', site, g3_fn)

g4_fn, g4_url = gu.graph_filename_url(site, 'eui_owner')
gu.building_owner_comparison_graph(df5, 'eui', site, g4_fn)

g5_fn, g5_url = gu.graph_filename_url(site, 'speui_func')
gu.building_type_comparison_graph(df5, 'specific_eui', site, g5_fn)

g6_fn, g6_url = gu.graph_filename_url(site, 'speui_owner')
gu.building_owner_comparison_graph(df5, 'specific_eui', site, g6_fn)

template_data['energy_index_comparison']['graphs'] = [
    g1_url, g2_url, g3_url, g4_url, g5_url, g6_url
]

#return template_data

ValueError: Length mismatch: Expected axis has 0 elements, new values have 1 elements

In [34]:
df1

Unnamed: 0,cal_mo,cal_year,cost,group,item_desc,mmbtu,service_type,site_id,units,usage,fiscal_year,fiscal_mo
