## Used to Develop New Benchmarking Features
### Ben's Notebook
Assumes that `df_raw.pkl`, `df_processed.pkl`, and `util_obj.pkl` are located in the directory above this one.

In [3]:
import pickle
import pandas as pd
import numpy as np
from importlib import reload
import sys
from datetime import datetime

import matplotlib.pyplot as plt
%matplotlib inline

# bench_util module is in the parent directory, because it will be part of
# the production code.  Add the parent directory to the Path so Python can
# import from there
sys.path.insert(0, '../')   
import bench_util as bu
import graph_util as gu
import os 

In [4]:
# Unpickle the raw DataFrame
df_raw = pd.read_pickle('../df_raw.pkl')

# Unpickle the processed DataFrame.
dfp = pd.read_pickle('../df_processed.pkl')

# Unpickle the Utility object
ut = pickle.load(open('../util_obj.pkl', 'rb'))

In [5]:
#dfp.site_id.unique()

In [6]:
# Show the first record of the raw dataframe
#df_raw.iloc[0]


In [7]:
# Show the first record of the processed dataframe
#dfp.iloc[0]

In [8]:
# Show type information and counts of processed dataframe
#dfp.info()

In [9]:
# Show stats for numeric columns
#dfp.describe()

In [10]:
# Show counts of service types
#dfp.service_type.value_counts()

In [11]:
#dfp.item_desc.value_counts()

In [12]:
pd.pivot_table(dfp, values='site_id', index='service_type', columns='units', aggfunc='count')

units,-,CCF,Cgallons,Gallons,Loads,MMBtu,Tons,kGal,kW,kWh,klbs,lbs
service_type,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
district_heat,877.0,,,,,1247.0,,,,,861.0,471.0
electricity,20170.0,,,,,,,,4617.0,12652.0,,
fuel_oil,291.0,,,8006.0,,,,,,,,
natural_gas,3897.0,2412.0,,,,,,,,,,
refuse,3355.0,,,,2120.0,,108.0,,,,,
sewer,8345.0,,,3465.0,,,,,,,,
water,9781.0,,9.0,8588.0,,,,231.0,,,,


In [13]:
dfp.head()

Unnamed: 0,cal_mo,cal_year,cost,group,item_desc,mmbtu,service_type,site_id,units,usage,fiscal_year,fiscal_mo
0,12,2009,2607.979752,facility,Energy charge,48.998471,electricity,3,kWh,14360.630515,2010,6
1,1,2010,7849.39409,facility,Energy charge,147.799171,electricity,3,kWh,43317.459347,2010,7
2,2,2010,7344.389601,facility,Energy charge,135.459603,electricity,3,kWh,39700.93871,2010,8
3,3,2010,7404.968981,facility,Energy charge,133.029802,electricity,3,kWh,38988.804762,2010,9
4,4,2010,6472.904545,facility,Energy charge,122.476738,electricity,3,kWh,35895.878788,2010,10


In [14]:
fy = dfp['fiscal_year'].unique()
fy

array([2010, 2011, 2012, 2013, 2014, 2015, 2016, 2017, 2018, 2019, 2009],
      dtype=int64)

In [15]:
df_fy = dfp.query('fiscal_year==2018')
df_fy.head()

Unnamed: 0,cal_mo,cal_year,cost,group,item_desc,mmbtu,service_type,site_id,units,usage,fiscal_year,fiscal_mo
312,7,2017,0.0,facility,Energy charge,0.0,electricity,3,-,0.0,2018,1
313,7,2017,2659.536764,facility,Energy charge,54.014323,electricity,3,kWh,15830.692641,2018,1
314,7,2017,0.0,facility,KW Charge,0.0,electricity,3,-,0.0,2018,1
315,7,2017,853.544459,facility,KW Charge,0.0,electricity,3,kW,59.721158,2018,1
316,7,2017,561.042054,facility,Other Charge,0.0,electricity,3,-,0.0,2018,1


In [16]:
df_FYtotal

NameError: name 'df_FYtotal' is not defined

In [None]:
def FY_spreadsheets(dfp, ut):
    """ Iterates through pre-processed billing dataframe and creates spreadsheet for each fiscal year. Saves .xlsx 
        spreadsheet for each fiscal year with a row of data for each sites and grouping.  Returns nothing.
    """
    
    # --- Read the CSV file and convert the billing period dates into 
    #     real Pandas dates

    ## Filter by FY, Pivot Table by Site
    fy = dfp['fiscal_year'].unique()

    for year in fy:

        df_fy = dfp.query('fiscal_year==@year')

        # Summarize FY Cost and usage by Service Type

        # Create pivot table of cost data
        df_FYcost = pd.pivot_table(df_fy, index=['site_id'], columns='service_type', values='cost', aggfunc=np.sum)
        df_FYcost = bu.add_missing_columns(df_FYcost, bu.missing_services([]))
        try: 
            df_FYcost['electricity_energy'] = pd.pivot_table(df_fy, index=['site_id'], columns='units', values='cost', aggfunc=np.sum)['kWh']
        except:
            df_FYcost['electricity_energy'] = 0.0
        try:
            df_FYcost['electricity_demand'] = pd.pivot_table(df_fy, index=['site_id'], columns='units', values='cost', aggfunc=np.sum)['kW']
        except:
            df_FYcost['electricity_demand'] = 0.0
        df_FYcost = df_FYcost.add_suffix('_cost')

        # Calculate additional cost totals
        df_FYcost['total_utility_cost'] = df_FYcost.sum(axis=1)
        df_FYcost['total_water_cost'] = df_FYcost[['water_cost', 'sewer_cost']].sum(axis=1)
        df_FYcost['total_energy_cost'] = df_FYcost.total_utility_cost - df_FYcost.total_water_cost
        df_FYcost['total_heat_cost'] = df_FYcost.total_energy_cost - df_FYcost.electricity_cost

        # Create pivot table of usage data in native units
        df_FYusage = pd.pivot_table(df_fy, index=['site_id'], columns='service_type', values='usage', aggfunc=np.sum)
        try:
            df_FYusage['electricity_energy'] = pd.pivot_table(df_fy, index=['site_id'], columns='units', values='usage', aggfunc=np.sum)['kWh']
        except:
            df_FYusage['electricity_energy'] = 0.0
        try:
            df_FYusage['electricity_demand'] = pd.pivot_table(df_fy, index=['site_id'], columns='units', values='usage', aggfunc=np.sum)['kW']
        except:
            df_FYusage['electricity_demand'] = 0.0
        df_FYusage = bu.add_missing_columns(df_FYusage, bu.missing_services([]))
        df_FYusage = df_FYusage.add_suffix('_usage')

        # Create pivot table of usage data in mmbtu units
        df_FYBTU = pd.pivot_table(df_fy, index=['site_id'], columns='service_type', values='mmbtu', aggfunc=np.sum)
        df_FYBTU = bu.add_missing_columns(df_FYBTU, bu.missing_services([]))
        df_FYBTU = df_FYBTU.add_suffix('_mmbtu')
        df_FYBTU['total_energy_mmbtu'] = df_FYBTU.sum(axis=1)
        df_FYBTU['total_heat_mmbtu'] = df_FYBTU.total_energy_mmbtu - df_FYBTU.electricity_mmbtu

        #Merge Dataframes
        df_FYtotal = pd.concat([df_FYcost, df_FYusage, df_FYBTU], axis=1)
        df_FYtotal['days'] = df_FYtotal['fiscal_mo'].map(fmo_days)
        # Add in HDD an sqft to df_FYtotal
        # iterate through sites

        sq_ft=[]
        dd=[]

        for site_id, row in df_FYtotal.iterrows():
            df_site = df_fy.query('site_id == @site_id')
            mo_present = bu.months_present(df_site, yr_col='fiscal_year', mo_col='fiscal_mo')
            dd_series = ut.degree_days_yearly(mo_present, site_id)
            dd.append(dd_series.iloc[0])
            try:
                bi = ut.building_info(site_id)
                sq = bi['sq_ft']
            except:
                print(site_id)    
                sq = np.nan
            sq_ft.append(sq)


        df_FYtotal['dd'] = dd
        df_FYtotal['sq_ft'] = sq_ft
        df_FYtotal.head()

        # Caclulate EUI, ECI

        #Use HDD and SQFT to calculate EUIs and ECI.
        df_FYtotal['eci'] = df_FYtotal.total_energy_cost / df_FYtotal.sq_ft
        df_FYtotal['uci'] = df_FYtotal.total_utility_cost / df_FYtotal.sq_ft
        df_FYtotal['eui'] = df_FYtotal.total_energy_mmbtu * 1e3 / df_FYtotal.sq_ft
        df_FYtotal['specific_eui'] = df_FYtotal.total_heat_mmbtu * 1e6 / df_FYtotal.dd / df_FYtotal.sq_ft

        #Select Desired Columns and export to excel  - This is the spreadsheet per site, row per month output

        df_export=df_FYtotal[['dd',
                            'sq_ft',
                            'electricity_energy_cost',
                            'electricity_demand_cost',
                            'electricity_cost', 
                            'fuel_oil_cost',
                            'natural_gas_cost',
                            'district_heat_cost',
                            'total_energy_cost',
                            'water_cost',
                            'sewer_cost',
                            'total_water_cost',
                            'total_utility_cost',
                            'eci',
                            'uci',
                            'electricity_energy_usage',
                            'electricity_demand_usage',
                            'electricity_mmbtu',
                            'fuel_oil_usage',
                            'fuel_oil_mmbtu',
                            'natural_gas_usage',
                            'natural_gas_mmbtu',
                            'district_heat_usage',
                            'total_heat_mmbtu',
                            'eui',
                            'specific_eui',
                            'total_energy_mmbtu',
                            'water_usage',
                            'sewer_usage']]

        df_export.to_excel(f"FY{year}_Site_Summary_Data.xlsx")
 


In [17]:
df_FYcost.head()

NameError: name 'df_FYcost' is not defined

In [18]:
df_FYusage.head()

NameError: name 'df_FYusage' is not defined

In [19]:
df_FYBTU.head()

NameError: name 'df_FYBTU' is not defined

In [20]:
df_FYtotal.index[1]

NameError: name 'df_FYtotal' is not defined

In [21]:
# See how the building_info() method of the Utility object works
ut.building_info('03')

{'site_name': 'SD-Denali Elementary',
 'site_category': 'School District',
 'address': nan,
 'city': nan,
 'group': 'School District',
 'division': nan,
 'campus': nan,
 'primary_func': 'Elementary School',
 'year_built': nan,
 'sq_ft': 49210.0,
 'onsite_gen': nan,
 'dd_site': 'PAFA',
 'full_address': nan,
 'source_fuel_oil': 'Sourdough Fuel  (Petro Star)',
 'source_natural_gas': 'Fairbanks Natural Gas',
 'source_electricity': 'Golden Valley Electric',
 'source_propane': '',
 'source_wood': '',
 'source_district_heat': '',
 'source_coal': '',
 'source_water': 'Golden Heart Utilities',
 'source_sewer': 'Golden Heart Utilities',
 'source_refuse': 'Alaska Waste',
 'acct_fuel_oil': '55010001',
 'acct_natural_gas': '10283 - DENALI',
 'acct_electricity': '172230',
 'acct_propane': '',
 'acct_wood': '',
 'acct_district_heat': '',
 'acct_coal': '',
 'acct_water': '1998005610',
 'acct_sewer': '1998005610',
 'acct_refuse': 'AW23-DNL03',
 'facility_list': '',
 'grouping': 'facility',
 'site_id': 

In [22]:
# See how the building_info() method of the Utility object works
ut.building_info('DIPMP1')

{'site_name': 'FNSB-Big Dipper',
 'site_category': 'Other',
 'address': nan,
 'city': nan,
 'group': 'FNSB',
 'division': 'Parks and Rec',
 'campus': nan,
 'primary_func': 'Ice Rink',
 'year_built': nan,
 'sq_ft': 63845.0,
 'onsite_gen': nan,
 'dd_site': 'PAFA',
 'full_address': nan,
 'source_fuel_oil': 'Sourdough Fuel  (Petro Star)',
 'source_natural_gas': 'Fairbanks Natural Gas',
 'source_electricity': 'Golden Valley Electric',
 'source_propane': '',
 'source_wood': '',
 'source_district_heat': '',
 'source_coal': '',
 'source_water': 'Golden Heart Utilities',
 'source_sewer': 'Golden Heart Utilities',
 'source_refuse': '',
 'acct_fuel_oil': '39384007',
 'acct_natural_gas': '10282 (1920 LATHROP)',
 'acct_electricity': '112651',
 'acct_propane': '',
 'acct_wood': '',
 'acct_district_heat': '',
 'acct_coal': '',
 'acct_water': '1995008500',
 'acct_sewer': '1995008500',
 'acct_refuse': '',
 'facility_list': '',
 'grouping': 'facility',
 'site_id': 'DIPMP1'}

In [23]:
# Create pivot table of cost data
df_FYcost = pd.pivot_table(df_fy, index=['site_id'], columns='service_type', values='cost', aggfunc=np.sum)
df_FYcost = bu.add_missing_columns(df_FYcost, bu.missing_services([]))
try: 
    df_FYcost['electricity_energy'] = pd.pivot_table(df_fy, index=['site_id'], columns='units', values='cost', aggfunc=np.sum)['kWh']
except:
    df_FYcost['electricity_energy'] = 0.0
try:
    df_FYcost['electricity_demand'] = pd.pivot_table(df_fy, index=['site_id'], columns='units', values='cost', aggfunc=np.sum)['kW']
except:
    df_FYcost['electricity_demand'] = 0.0
df_FYcost = df_FYcost.add_suffix('_cost')

# Calculate additional cost totals
df_FYcost['total_utility_cost'] = df_FYcost.sum(axis=1)
df_FYcost['total_water_cost'] = df_FYcost[['water_cost', 'sewer_cost']].sum(axis=1)
df_FYcost['total_energy_cost'] = df_FYcost.total_utility_cost - df_FYcost.total_water_cost
df_FYcost['total_heat_cost'] = df_FYcost.total_energy_cost - df_FYcost.electricity_cost

# Create pivot table of usage data in native units
df_FYusage = pd.pivot_table(df_fy, index=['site_id'], columns='service_type', values='usage', aggfunc=np.sum)
try:
    df_FYusage['electricity_energy'] = pd.pivot_table(df_fy, index=['site_id'], columns='units', values='usage', aggfunc=np.sum)['kWh']
except:
    df_FYusage['electricity_energy'] = 0.0
try:
    df_FYusage['electricity_demand'] = pd.pivot_table(df_fy, index=['site_id'], columns='units', values='usage', aggfunc=np.sum)['kW']
except:
    df_FYusage['electricity_demand'] = 0.0
df_FYusage = bu.add_missing_columns(df_FYusage, bu.missing_services([]))
df_FYusage = df_FYusage.add_suffix('_usage')

# Create pivot table of usage data in mmbtu units
df_FYBTU = pd.pivot_table(df_fy, index=['site_id'], columns='service_type', values='mmbtu', aggfunc=np.sum)
df_FYBTU = bu.add_missing_columns(df_FYBTU, bu.missing_services([]))
df_FYBTU = df_FYBTU.add_suffix('_mmbtu')
df_FYBTU['total_energy_mmbtu'] = df_FYBTU.sum(axis=1)
df_FYBTU['total_heat_mmbtu'] = df_FYBTU.total_energy_mmbtu - df_FYBTU.electricity_mmbtu

#Merge Dataframes
df_FYtotal = pd.concat([df_FYcost, df_FYusage, df_FYBTU], axis=1)
df_FYtotal['days'] = df_FYtotal['fiscal_mo'].map(fmo_days)
# Add in HDD an sqft to df_FYtotal
# iterate through sites

sq_ft=[]
dd=[]

for site_id, row in df_FYtotal.iterrows():
    df_site = df_fy.query('site_id == @site_id')
    mo_present = bu.months_present(df_site, yr_col='fiscal_year', mo_col='fiscal_mo')
    dd_series = ut.degree_days_yearly(mo_present, site_id)
    dd.append(dd_series.iloc[0])
    try:
        bi = ut.building_info(site_id)
        sq = bi['sq_ft']
    except:
        print(site_id)    
        sq = np.nan
    sq_ft.append(sq)


df_FYtotal['dd'] = dd
df_FYtotal['sq_ft'] = sq_ft
df_FYtotal.head()

# Caclulate EUI, ECI

#Use HDD and SQFT to calculate EUIs and ECI.
df_FYtotal['eci'] = df_FYtotal.total_energy_cost / df_FYtotal.sq_ft
df_FYtotal['uci'] = df_FYtotal.total_utility_cost / df_FYtotal.sq_ft
df_FYtotal['eui'] = df_FYtotal.total_energy_mmbtu * 1e3 / df_FYtotal.sq_ft
df_FYtotal['specific_eui'] = df_FYtotal.total_heat_mmbtu * 1e6 / df_FYtotal.dd / df_FYtotal.sq_ft


KeyError: 'fiscal_mo'

In [24]:
# Set up variables (site, df, ut):
"""As well as returning template data, this function writes a spreadsheet
that summarizes values for every building.  The spreadsheet is written to
'output/extra_data/site_summary_FYYYYY.xlsx'.
"""

site="DIPMP1"
df = dfp


In [25]:
# Start a dictionary with the main key to hold the template data
template_data = {'energy_index_comparison': {}}

# --------- Table 1, Yearly Table

# Filter down to just this site's bills and only services that
# are energy services.
energy_services = bu.missing_energy_services([])
df1 = df.query('site_id==@site') 
               # Removed to include water in df1:  and service_type==@energy_services')


In [26]:
# Filter down to just this site's bills and only services that
# are energy services.
energy_services = bu.missing_energy_services([])
df1 = df.query('site_id==@site') 
#if len(df1) == 0:
    #return

# Add in degree days to DataFrame
months_present = bu.months_present(df1)
deg_days = ut.degree_days_monthly(months_present, site)
deg_days.set_index(['fiscal_year', 'fiscal_mo'], inplace=True)


# Get building square footage and calculate EUIs and ECI.
sq_ft = ut.building_info(site)['sq_ft']


# Summarize Monthly Cost and usage by Service Type
df_monthlycost = pd.pivot_table(df1, index=['fiscal_year', 'fiscal_mo'], columns='service_type', values='cost', aggfunc=np.sum)
df_monthlycost = bu.add_missing_columns(df_monthlycost, bu.missing_services([]))



# Seperate kWh and kW electricity costs
df_units = pd.pivot_table(df1, index=['fiscal_year', 'fiscal_mo'], columns='units', values='cost', aggfunc=np.sum)
bu.add_missing_columns(df_units, ['kWh', 'kW'])
df_monthlycost['electricity_energy'] = df_units['kWh']
df_monthlycost['electricity_demand'] = df_units['kW']

# Add cost suffix
df_monthlycost = df_monthlycost.add_suffix('_cost')

df_monthlycost['total_utility_cost'] = df_monthlycost.sum(axis=1)
df_monthlycost['total_water_cost'] = df_monthlycost[['water_cost', 'sewer_cost']].sum(axis=1)
df_monthlycost['total_energy_cost'] = df_monthlycost.total_utility_cost - df_monthlycost.total_water_cost
df_monthlycost['total_heat_cost'] = df_monthlycost.total_energy_cost - df_monthlycost.electricity_cost
df_monthlycost['eci'] = df_monthlycost.total_energy_cost / sq_ft
df_monthlycost['uci'] = df_monthlycost.total_utility_cost / sq_ft

df_monthlycost_rolling = df_monthlycost.rolling(12, min_periods=None, center=False, win_type=None, on=None, axis=0, closed=None).sum().add_suffix('_12mo')



In [27]:
df_monthlycost.head()

Unnamed: 0_level_0,service_type,electricity_cost,fuel_oil_cost,natural_gas_cost,sewer_cost,water_cost,coal_cost,refuse_cost,propane_cost,district_heat_cost,wood_cost,electricity_energy_cost,electricity_demand_cost,total_utility_cost,total_water_cost,total_energy_cost,total_heat_cost,eci,uci
fiscal_year,fiscal_mo,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1
2010,6,11091.144531,4798.501818,27.600752,1204.614286,1095.785893,0.0,0.0,0.0,0.0,0.0,9420.829687,,27638.476968,2300.400179,25338.076789,14246.932258,0.396869,0.4329
2010,7,19826.218147,22785.539432,550.238473,1939.474464,1750.359107,0.0,0.0,0.0,0.0,0.0,16754.575848,,63606.405472,3689.833571,59916.571901,40090.353753,0.938469,0.996263
2010,8,19655.096429,12429.13375,100.603632,1803.30325,1608.745,0.0,0.0,0.0,0.0,0.0,16686.902321,,52283.784382,3412.04825,48871.736132,29216.639703,0.765475,0.818917
2010,9,21729.247256,13890.845,282.449729,2036.133714,1825.419643,0.0,0.0,0.0,0.0,0.0,18610.463506,,58374.558849,3861.553357,54513.005492,32783.758236,0.853834,0.914317
2010,10,20131.567136,8703.386379,2597.105985,1471.497321,1348.191964,0.0,0.0,0.0,0.0,0.0,17195.500136,,51447.248923,2819.689286,48627.559637,28495.992501,0.76165,0.805815


In [28]:
df_monthlycost.index

MultiIndex(levels=[[2010, 2011, 2012, 2013, 2014, 2015, 2016, 2017, 2018, 2019], [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12]],
           labels=[[0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 9, 9, 9, 9, 9, 9], [5, 6, 7, 8, 9, 10, 11, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 0, 1, 2, 3, 4, 5]],
           names=['fiscal_year', 'fiscal_mo'])

In [29]:
#fmo_days = [31, 31, 30, 31, 30, 31, 31, 28, 31, 30, 31, 30]
fmo_days = {1:31, 2:31, 3:30, 4:31, 5:30, 6:31, 7:31, 8:28, 9:31, 10:30, 11:31, 12:30}
fmo_days

df_fmo = pd.DataFrame( {'fiscal_mo' : [1,2,3,4,5,6,7,8,9,10,11,12],
                        'days' : [31, 31, 30, 31, 30, 31, 31, 28, 31, 30, 31, 30]})
df_fmo

Unnamed: 0,fiscal_mo,days
0,1,31
1,2,31
2,3,30
3,4,31
4,5,30
5,6,31
6,7,31
7,8,28
8,9,31
9,10,30


In [30]:
df_monthlycost['days'] = df_monthlycost.index['fiscal_mo'].map(fmo_days)

IndexError: only integers, slices (`:`), ellipsis (`...`), numpy.newaxis (`None`) and integer or boolean arrays are valid indices

In [31]:
df_temp = df_monthlycost
df_temp = pd.merge(df_temp, df_fmo, how = 'left', on = 'fiscal_mo')['days']
df_temp = pd.DataFrame(df_temp)
df_temp

Unnamed: 0,days
0,31
1,31
2,28
3,31
4,30
5,31
6,30
7,31
8,31
9,30


In [32]:
df_monthlycost['days'] = df_temp['days']

df_monthlycost

Unnamed: 0_level_0,service_type,electricity_cost,fuel_oil_cost,natural_gas_cost,sewer_cost,water_cost,coal_cost,refuse_cost,propane_cost,district_heat_cost,wood_cost,electricity_energy_cost,electricity_demand_cost,total_utility_cost,total_water_cost,total_energy_cost,total_heat_cost,eci,uci,days
fiscal_year,fiscal_mo,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1
2010,6,11091.144531,4798.501818,27.600752,1204.614286,1095.785893,0.0,0.0,0.0,0.0,0.0,9420.829687,,27638.476968,2300.400179,25338.076789,14246.932258,0.396869,0.432900,
2010,7,19826.218147,22785.539432,550.238473,1939.474464,1750.359107,0.0,0.0,0.0,0.0,0.0,16754.575848,,63606.405472,3689.833571,59916.571901,40090.353753,0.938469,0.996263,
2010,8,19655.096429,12429.133750,100.603632,1803.303250,1608.745000,0.0,0.0,0.0,0.0,0.0,16686.902321,,52283.784382,3412.048250,48871.736132,29216.639703,0.765475,0.818917,
2010,9,21729.247256,13890.845000,282.449729,2036.133714,1825.419643,0.0,0.0,0.0,0.0,0.0,18610.463506,,58374.558849,3861.553357,54513.005492,32783.758236,0.853834,0.914317,
2010,10,20131.567136,8703.386379,2597.105985,1471.497321,1348.191964,0.0,0.0,0.0,0.0,0.0,17195.500136,,51447.248923,2819.689286,48627.559637,28495.992501,0.761650,0.805815,
2010,11,20290.876500,3827.963621,5365.149610,2453.879393,2171.553107,0.0,0.0,0.0,0.0,0.0,17125.908661,,51235.330892,4625.432500,46609.898392,26319.021892,0.730048,0.802496,
2010,12,19277.316290,728.040000,3880.056818,2133.607214,1939.308679,0.0,0.0,0.0,0.0,0.0,16193.656452,,44151.985453,4072.915893,40079.069560,20801.753270,0.627756,0.691550,
2011,1,20423.527675,2241.939186,5067.511875,1740.370893,1621.868571,0.0,0.0,0.0,0.0,0.0,17307.613215,,48402.831415,3362.239464,45040.591951,24617.064276,0.705468,0.758130,
2011,2,20156.230580,3245.475444,901.445867,2149.180321,1961.562179,0.0,0.0,0.0,0.0,0.0,17123.208657,,45537.103048,4110.742500,41426.360548,21270.129968,0.648858,0.713245,
2011,3,19323.036834,9952.006909,419.713508,1325.490214,1249.274786,0.0,0.0,0.0,0.0,0.0,16148.896170,,48418.418421,2574.765000,45843.653421,26520.616587,0.718046,0.758374,


In [33]:
def Site_spreadsheets(site, df, ut):
    """ Uses pre-processed billing dataframe and creates spreadsheet for each site. Saves .xlsx 
        spreadsheet for given site with a row of data for each month.  Returns nothing.
        """
    # Filter down to just this site's bills and only services that
    # are energy services.
    energy_services = bu.missing_energy_services([])
    df1 = df.query('site_id==@site') 
    if len(df1) == 0:
        return

    # Add in degree days to DataFrame
    months_present = bu.months_present(df1)
    deg_days = ut.degree_days_monthly(months_present, site)
    deg_days.set_index(['fiscal_year', 'fiscal_mo'], inplace=True)


    # Get building square footage and calculate EUIs and ECI.
    sq_ft = ut.building_info(site)['sq_ft']


    # Summarize Monthly Cost and usage by Service Type
    df_monthlycost = pd.pivot_table(df1, index=['fiscal_year', 'fiscal_mo'], columns='service_type', values='cost', aggfunc=np.sum)
    df_monthlycost = bu.add_missing_columns(df_monthlycost, bu.missing_services([]))

    # Seperate kWh and kW electricity costs
    df_units = pd.pivot_table(df1, index=['fiscal_year', 'fiscal_mo'], columns='units', values='cost', aggfunc=np.sum)
    bu.add_missing_columns(df_units, ['kWh', 'kW'])
    df_monthlycost['electricity_energy'] = df_units['kWh']
    df_monthlycost['electricity_demand'] = df_units['kW']

    # Add cost suffix
    df_monthlycost = df_monthlycost.add_suffix('_cost')

    df_monthlycost['total_utility_cost'] = df_monthlycost.sum(axis=1)
    df_monthlycost['total_water_cost'] = df_monthlycost[['water_cost', 'sewer_cost']].sum(axis=1)
    df_monthlycost['total_energy_cost'] = df_monthlycost.total_utility_cost - df_monthlycost.total_water_cost
    df_monthlycost['total_heat_cost'] = df_monthlycost.total_energy_cost - df_monthlycost.electricity_cost
    df_monthlycost['eci'] = df_monthlycost.total_energy_cost / sq_ft
    df_monthlycost['uci'] = df_monthlycost.total_utility_cost / sq_ft

    df_monthlycost_rolling = df_monthlycost.rolling(12, min_periods=None, center=False, win_type=None, on=None, axis=0, closed=None).sum().add_suffix('_12mo')


    df_monthlyusage = pd.pivot_table(df1, index=['fiscal_year', 'fiscal_mo'], columns='service_type', values='usage', aggfunc=np.sum)

    # Seperate kWh and kW electricity costs
    df_units = pd.pivot_table(df1, index=['fiscal_year', 'fiscal_mo'], columns='units', values='usage', aggfunc=np.sum)
    bu.add_missing_columns(df_units, ['kWh', 'kW'])
    df_monthlyusage['electricity_energy'] = df_units['kWh']
    df_monthlyusage['electricity_demand'] = df_units['kW']

    df_monthlyusage = bu.add_missing_columns(df_monthlyusage, bu.missing_services([]))

    # Add usage suffix
    df_monthlyusage = df_monthlyusage.add_suffix('_usage')

    df_monthlyusage_rolling = df_monthlyusage.rolling(12, min_periods=None, center=False, win_type=None, on=None, axis=0, closed=None).sum().add_suffix('_12mo')
    df_monthlyusage_rolling['electricity_demand_usage_12mo'] = df_monthlyusage_rolling['electricity_demand_usage_12mo'] / 12


    df_monthlyBTU = pd.pivot_table(df1, index=['fiscal_year', 'fiscal_mo'], columns='service_type', values='mmbtu', aggfunc=np.sum)
    df_monthlyBTU = bu.add_missing_columns(df_monthlyBTU, bu.missing_services([]))
    df_monthlyBTU = df_monthlyBTU.add_suffix('_mmbtu')
    df_monthlyBTU['total_energy_mmbtu'] = df_monthlyBTU.sum(axis=1)
    df_monthlyBTU['total_heat_mmbtu'] = df_monthlyBTU.total_energy_mmbtu - df_monthlyBTU.electricity_mmbtu
    df_monthlyBTU['eui'] = df_monthlyBTU.total_energy_mmbtu * 1e3 / sq_ft

    df_monthlyBTU = pd.merge(df_monthlyBTU, deg_days, how='left', left_index=True, right_index=True)  #right_on=['fiscal_year', 'fiscal_mo'])

    df_monthlyBTU['specific eui'] = df_monthlyBTU.total_heat_mmbtu * 1e6 / df_monthlyBTU.dd / sq_ft

    df_monthlyBTU_rolling = df_monthlyBTU.rolling(12, min_periods=None, center=False, win_type=None, on=None, axis=0, closed=None).sum().add_suffix('_12mo')

    #Merge Dataframes

    df_total = pd.concat([df_monthlycost, df_monthlyusage, df_monthlyBTU, df_monthlycost_rolling, df_monthlyusage_rolling, df_monthlyBTU_rolling], axis=1)

    #Select Desired Columns and export to excel  - This is the spreadsheet per site, row per month output

    df_export=df_total[['dd', 
                        'electricity_energy_cost',
                        'electricity_demand_cost',
                        'electricity_cost', 
                        'fuel_oil_cost',
                        'natural_gas_cost',
                        'district_heat_cost',
                        'total_energy_cost',
                        'water_cost',
                        'sewer_cost',
                        'total_water_cost',
                        'total_utility_cost',
                        'eci',
                        'uci',
                        'electricity_energy_usage',
                        'electricity_demand_usage',
                        'electricity_mmbtu',
                        'fuel_oil_usage',
                        'fuel_oil_mmbtu',
                        'natural_gas_usage',
                        'natural_gas_mmbtu',
                        'district_heat_usage',
                        'total_heat_mmbtu',
                        'eui',
                        'specific eui',
                        'total_energy_mmbtu',
                        'water_usage',
                        'sewer_usage',
                        'dd_12mo',
                        'electricity_energy_cost_12mo',
                        'electricity_demand_cost_12mo',
                        'electricity_cost_12mo',
                        'fuel_oil_cost_12mo',
                        'natural_gas_cost_12mo',
                        'district_heat_cost_12mo',
                        'total_heat_cost_12mo',
                        'total_energy_cost_12mo',
                        'water_cost_12mo',
                        'sewer_cost_12mo',
                        'total_water_cost_12mo',
                        'total_utility_cost_12mo',
                        'eci_12mo',
                        'uci_12mo',
                        'electricity_energy_usage_12mo',
                        'electricity_demand_usage_12mo',
                        'electricity_mmbtu_12mo',
                        'fuel_oil_usage_12mo',
                        'fuel_oil_mmbtu_12mo',
                        'natural_gas_usage_12mo',
                        'natural_gas_mmbtu_12mo',
                        'district_heat_usage_12mo',
                        'total_heat_mmbtu_12mo',
                        'eui_12mo',
                        'specific eui_12mo',
                        'total_energy_mmbtu_12mo',
                        'water_usage_12mo',
                        'sewer_usage_12mo']]

    df_export.to_excel(f"Site_{site}_Monthly_Summary_Data.xlsx")

In [34]:
df1

Unnamed: 0,cal_mo,cal_year,cost,group,item_desc,mmbtu,service_type,site_id,units,usage,fiscal_year,fiscal_mo
43192,12,2009,9420.829687,facility,Energy charge,223.567861,electricity,DIPMP1,kWh,65523.992188,2010,6
43193,12,2009,1564.417969,facility,KW Charge,0.000000,electricity,DIPMP1,-,0.000000,2010,6
43194,12,2009,105.896875,facility,Other Charge,0.000000,electricity,DIPMP1,-,0.000000,2010,6
43195,1,2010,16754.575848,facility,Energy charge,397.639820,electricity,DIPMP1,kWh,116541.564955,2010,7
43196,1,2010,2885.665246,facility,KW Charge,0.000000,electricity,DIPMP1,-,0.000000,2010,7
43197,1,2010,185.977054,facility,Other Charge,0.000000,electricity,DIPMP1,-,0.000000,2010,7
43198,2,2010,16686.902321,facility,Energy charge,395.935085,electricity,DIPMP1,kWh,116041.935714,2010,8
43199,2,2010,2760.214643,facility,KW Charge,0.000000,electricity,DIPMP1,-,0.000000,2010,8
43200,2,2010,207.979464,facility,Other Charge,0.000000,electricity,DIPMP1,-,0.000000,2010,8
43201,3,2010,18610.463506,facility,Energy charge,441.467615,electricity,DIPMP1,kWh,129386.757143,2010,9


In [35]:
## Following cells create export spreadheet by site with row for every month

In [36]:
deg_days

Unnamed: 0_level_0,Unnamed: 1_level_0,dd
fiscal_year,fiscal_mo,Unnamed: 2_level_1
2010,6,2095.700000
2010,7,2447.400000
2010,8,1740.200000
2010,9,1641.100000
2010,10,735.400000
2010,11,362.600000
2010,12,190.000000
2011,1,142.000000
2011,2,215.900000
2011,3,561.800000


In [37]:
# Consolidate data to monthly rows




In [38]:
#deg_days
#df_monthlyBTU_rolling


In [39]:
## End of Ben edits for spreadsheet export

In [40]:
# Add in any missing columns
bu.add_missing_columns(df3, energy_svcs)

# Change column names
cols = ['{}_cost'.format(col) for col in df3.columns]
df3.columns = cols

# Add a total energy cost column
df3['total_energy_cost'] = df3.sum(axis=1)

# Add a total Heat Cost Column
df3['total_heat_cost'] = df3.total_energy_cost.fillna(0.0) - df3.electricity_cost.fillna(0.0)

# Add this to the final DataFrame
df_final = pd.concat([df_final, df3], axis=1, sort=True)
#for service_type
df_sitemonthly = df1.groupby(['fiscal_year', 'fiscal_mo'], as_index=False).sum()

#df1.query("service_type=='electricity'").groupby('fiscal_year').sum()[['mmbtu']]
df_sitemonthly
# Add columns to monthly site data

NameError: name 'df3' is not defined

In [41]:
# Only do this table if there are energy services.
if not df1.empty:

    # Sum Energy Costs and Usage
    df2 = pd.pivot_table(df1, index='fiscal_year', values=['cost', 'mmbtu'], aggfunc=np.sum)

    # Add a column showing number of months present in each fiscal year.
    bu.add_month_count_column(df2, df1)

    # Make a column with just the Heat MMBtu
    dfe = df1.query("service_type=='electricity'").groupby('fiscal_year').sum()[['mmbtu']]
    dfe.rename(columns={'mmbtu': 'elec_mmbtu'}, inplace = True)
    df2 = df2.merge(dfe, how='left', left_index=True, right_index=True)
    df2['elec_mmbtu'] = df2['elec_mmbtu'].fillna(0.0)
    df2['heat_mmbtu'] = df2.mmbtu - df2.elec_mmbtu

    # Add in degree days to DataFrame
    months_present = bu.months_present(df1)
    deg_days = ut.degree_days_yearly(months_present, site)
    df2['hdd'] = deg_days

    # Get building square footage and calculate EUIs and ECI.
    sq_ft = ut.building_info(site)['sq_ft']
    df2['eui'] = df2.mmbtu * 1e3 / sq_ft
    df2['eci'] = df2.cost / sq_ft
    df2['specific_eui'] = df2.heat_mmbtu * 1e6 / df2.hdd / sq_ft

    # Restrict to full years
    df2 = df2.query("month_count == 12").copy()

    # Reverse the years
    df2.sort_index(ascending=False, inplace=True)

    # get the rows as a list of dictionaries and put into
    # final template data dictionary.
    template_data['energy_index_comparison']['yearly_table'] = {
        'rows': bu.df_to_dictionaries(df2)
    }

    

In [42]:
df2

Unnamed: 0_level_0,cost,mmbtu,month_count,elec_mmbtu,heat_mmbtu,hdd,eui,eci,specific_eui
fiscal_year,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
2018,471863.705597,12274.104831,12,4706.19719,7567.907641,12445.951743,192.24849,7.39077,9.524031
2017,438092.216725,12086.902497,12,4810.999246,7275.90325,13803.2,189.316352,6.861809,8.256201
2016,394288.319556,12216.927356,12,5454.849356,6762.078,12172.3,191.352923,6.175712,8.70123
2015,431940.869985,12085.714159,12,4339.763318,7745.950841,12457.2,189.297739,6.765461,9.739292
2014,470329.176647,12618.20305,12,4665.265022,7952.938028,12573.3,197.638077,7.366735,9.907211
2013,507937.756804,13309.189278,12,4762.531691,8546.657587,14901.4,208.460949,7.955795,8.983433
2012,485917.490852,12565.609568,12,4526.866374,8038.743194,13970.8,196.814309,7.610893,9.01239
2011,437117.494581,12058.523181,12,4667.597322,7390.925859,13761.4,188.871849,6.846542,8.412195


In [43]:
template_data

{'energy_index_comparison': {'yearly_table': {'rows': [{'fiscal_year': 2018,
     'cost': 471863.70559671655,
     'mmbtu': 12274.104830829494,
     'month_count': 12.0,
     'elec_mmbtu': 4706.197190000001,
     'heat_mmbtu': 7567.907640829493,
     'hdd': 12445.951743191383,
     'eui': 192.2484897929281,
     'eci': 7.390769920850756,
     'specific_eui': 9.524031414049311},
    {'fiscal_year': 2017,
     'cost': 438092.21672471205,
     'mmbtu': 12086.902496751152,
     'month_count': 12.0,
     'elec_mmbtu': 4810.999246451613,
     'heat_mmbtu': 7275.90325029954,
     'hdd': 13803.2,
     'eui': 189.31635205186237,
     'eci': 6.861809330796649,
     'specific_eui': 8.256200734642265},
    {'fiscal_year': 2016,
     'cost': 394288.31955645187,
     'mmbtu': 12216.927356048383,
     'month_count': 12.0,
     'elec_mmbtu': 5454.849356048388,
     'heat_mmbtu': 6762.077999999996,
     'hdd': 12172.3,
     'eui': 191.35292279815778,
     'eci': 6.175711795073253,
     'specific_eui': 

In [44]:
# ---------- Table 2, Details Table

# Use the last complete year for this site as the year for the Details
# table.  If there was no complete year for the site, then use the
# last complete year for the entire dataset.
if 'df2' in locals() and len(df2):
    last_complete_year = df2.index.max()
else:
    # Determine month count by year for Electricity in entire dataset
    # to determine the latest complete year.
    electric_only = df.query("service_type == 'electricity'")
    electric_months_present = bu.months_present(electric_only)
    electric_mo_count = bu.month_count(electric_months_present)
    last_complete_year = max(electric_mo_count[electric_mo_count==12].index)

# Filter down to just the records of the targeted fiscal year
df1 = df.query('fiscal_year == @last_complete_year')

# Get Total Utility cost by building. This includes non-energy utilities as well.
df2 = df1.pivot_table(index='site_id', values=['cost'], aggfunc=np.sum)
df2.columns = ['total_cost']

# Save this into the Final DataFrame that we will build up as we go.
df_final = df2.copy()

In [45]:
df_final.head()

Unnamed: 0_level_0,total_cost
site_id,Unnamed: 1_level_1
3,124232.926771
4,153755.217435
5,592660.781339
6,156281.534148
7,143356.705633


In [46]:
# Get a list of the Energy Services and restrict the data to
# just these services
energy_svcs = bu.missing_energy_services([])
df2 = df1.query('service_type == @energy_svcs')

In [47]:
energy_svcs

['wood',
 'propane',
 'district_heat',
 'electricity',
 'fuel_oil',
 'coal',
 'natural_gas']

In [48]:
df2

Unnamed: 0,cal_mo,cal_year,cost,group,item_desc,mmbtu,service_type,site_id,units,usage,fiscal_year,fiscal_mo
312,7,2017,0.000000,facility,Energy charge,0.000000,electricity,03,-,0.000000,2018,1
313,7,2017,2659.536764,facility,Energy charge,54.014323,electricity,03,kWh,15830.692641,2018,1
314,7,2017,0.000000,facility,KW Charge,0.000000,electricity,03,-,0.000000,2018,1
315,7,2017,853.544459,facility,KW Charge,0.000000,electricity,03,kW,59.721158,2018,1
316,7,2017,561.042054,facility,Other Charge,0.000000,electricity,03,-,0.000000,2018,1
317,8,2017,0.000000,facility,Energy charge,0.000000,electricity,03,-,0.000000,2018,2
318,8,2017,3841.919420,facility,Energy charge,80.306053,electricity,03,kWh,23536.357799,2018,2
319,8,2017,0.000000,facility,KW Charge,0.000000,electricity,03,-,0.000000,2018,2
320,8,2017,1150.984040,facility,KW Charge,0.000000,electricity,03,kW,80.545982,2018,2
321,8,2017,408.265809,facility,Other Charge,0.000000,electricity,03,-,0.000000,2018,2


In [49]:
# Summarize Cost by Service Type
df3 = pd.pivot_table(df2, index='site_id', columns='service_type', values='cost', aggfunc=np.sum)

# Add in any missing columns
bu.add_missing_columns(df3, energy_svcs)

# Change column names
cols = ['{}_cost'.format(col) for col in df3.columns]
df3.columns = cols

# Add a total energy cost column
df3['total_energy_cost'] = df3.sum(axis=1)

# Add a total Heat Cost Column
df3['total_heat_cost'] = df3.total_energy_cost.fillna(0.0) - df3.electricity_cost.fillna(0.0)

# Add this to the final DataFrame
df_final = pd.concat([df_final, df3], axis=1, sort=True)

# Summarize MMBtu by Service Type
df3 = pd.pivot_table(df2, index='site_id', columns='service_type', values='mmbtu', aggfunc=np.sum)

# Add in any missing columns
bu.add_missing_columns(df3, energy_svcs)

# Change column names
cols = ['{}_mmbtu'.format(col) for col in df3.columns]
df3.columns = cols

# Add a total mmbtu column
df3['total_mmbtu'] = df3.sum(axis=1)

# Add a total Heat mmbtu Column
df3['total_heat_mmbtu'] = df3.total_mmbtu.fillna(0.0) - df3.electricity_mmbtu.fillna(0.0)

# Add this to the final DataFrame
df_final = pd.concat([df_final, df3], axis=1, sort=True)

# Electricity kWh summed by building
df3 = pd.pivot_table(df2.query('units == "kWh"'), index='site_id', values='usage', aggfunc=np.sum)
df3.columns = ['electricity_kwh']

# Include in Final DF
df_final = pd.concat([df_final, df3], axis=1, sort=True)

In [50]:
df_final.head()

Unnamed: 0,total_cost,district_heat_cost,electricity_cost,fuel_oil_cost,natural_gas_cost,wood_cost,propane_cost,coal_cost,total_energy_cost,total_heat_cost,district_heat_mmbtu,electricity_mmbtu,fuel_oil_mmbtu,natural_gas_mmbtu,wood_mmbtu,propane_mmbtu,coal_mmbtu,total_mmbtu,total_heat_mmbtu,electricity_kwh
3,124232.926771,,81547.822942,36281.177861,1189.99506,0.0,0.0,0.0,119018.995864,37471.172922,,1240.796392,2114.404044,0.625161,0.0,0.0,0.0,3355.825597,2115.029206,363656.6
4,153755.217435,,87483.312661,53918.794028,1447.666996,0.0,0.0,0.0,142849.773685,55366.461024,,1351.295084,3181.560145,15.912,0.0,0.0,0.0,4548.767229,3197.472145,396041.9
5,592660.781339,135280.658036,407977.401875,,,0.0,0.0,0.0,543258.059911,135280.658036,8258.571429,6334.545561,,,0.0,0.0,0.0,14593.11699,8258.571429,1856549.0
6,156281.534148,,81973.891094,58261.452499,1180.846996,0.0,0.0,0.0,141416.190589,59442.299495,,1274.48436,3491.156767,0.102,0.0,0.0,0.0,4765.743127,3491.258767,373530.0
7,143356.705633,,92184.879044,35495.459745,1187.856996,0.0,0.0,0.0,128868.195785,36683.316741,,1404.691949,2116.545745,0.51,0.0,0.0,0.0,3521.747694,2117.055745,411691.7


In [51]:
# Electricity kW, both Average and Max by building
# First, sum up kW pieces for each month.
df3 = df2.query('units == "kW"').groupby(['site_id', 'fiscal_year', 'fiscal_mo']).sum()
df3 = pd.pivot_table(df3.reset_index(), index='site_id', values='usage', aggfunc=[np.mean, np.max])
df3.columns = ['electricity_kw_average', 'electricity_kw_max']

# Add into Final Frame
df_final = pd.concat([df_final, df3], axis=1, sort=True)

In [52]:
# Add in Square footage info
df_bldg = ut.building_info_df()[['sq_ft']]

# Add into Final Frame.  I do a merge here so as not to bring
# in buildings from the building info spreadsheet that are not in this
# dataset; this dataset has been restricted to one year.
df_final = pd.merge(df_final, df_bldg, how='left', left_index=True, right_index=True)

In [None]:
df_bldg

In [None]:
df_final.head()

In [None]:
# Build a DataFrame that has monthly degree days for each site/year/month
# combination.
combos = set(zip(df1.site_id, df1.fiscal_year, df1.fiscal_mo))
df_dd = pd.DataFrame(data=list(combos), columns=['site_id', 'fiscal_year', 'fiscal_mo'])
ut.add_degree_days_col(df_dd)

# Add up the degree days by site (we've already filtered down to one year or less
# of data.)
dd_series = df_dd.groupby('site_id').sum()['degree_days']

# Put in final DataFrame
df_final = pd.concat([df_final, dd_series], axis=1)

# Add in a column that gives the number of months present for each site
# in this year.  Then filter down to just the sites that have 12 months
# of data.
df_final.reset_index(inplace=True)
df_final['fiscal_year'] = last_complete_year
df_final.set_index(['site_id', 'fiscal_year'], inplace=True)
df_final = bu.add_month_count_column_by_site(df_final, df2)
df_final = df_final.query('month_count==12').copy()
df_final.reset_index(inplace=True)
df_final.set_index('site_id', inplace=True)

# Calculate per square foot values for each building.
df_final['eui'] = df_final.total_mmbtu * 1e3 / df_final.sq_ft
df_final['eci'] = df_final.total_energy_cost / df_final.sq_ft
df_final['specific_eui'] = df_final.total_heat_mmbtu * 1e6 / df_final.sq_ft / df_final.degree_days

In [None]:
df_final.head()

In [None]:
# Save this to a spreadsheet, if it has not already been saved
fn = 'output/extra_data/site_summary_FY{}.xlsx'.format(last_complete_year)
if not os.path.exists(fn):
    with pd.ExcelWriter(fn) as excel_writer:
        df_final.to_excel(excel_writer, sheet_name='Sites')

# Get the totals across all buildings
totals_all_bldgs = df_final.sum()

# Total Degree-Days are not relevant
totals_all_bldgs.drop(['degree_days'], inplace=True)

# Only use the set of buildings that have some energy use and non-zero
# square footage to determine EUI's and ECI's
energy_bldgs = df_final.query("total_mmbtu > 0 and sq_ft > 0")

# Get total square feet, energy use, and energy cost for these buildings
# and calculate EUI and ECI
sq_ft_energy_bldgs = energy_bldgs.sq_ft.sum()
energy_in_energy_bldgs = energy_bldgs.total_mmbtu.sum()
energy_cost_in_energy_bldgs = energy_bldgs.total_energy_cost.sum()
totals_all_bldgs['eui'] = energy_in_energy_bldgs * 1e3 / sq_ft_energy_bldgs
totals_all_bldgs['eci'] = energy_cost_in_energy_bldgs / sq_ft_energy_bldgs

# For calculating heating specific EUI, further filter the set of
# buildings down to those that have heating fuel use.
# Get separate square footage total and weighted average degree-day for these.
heat_bldgs = energy_bldgs.query("total_heat_mmbtu > 0")
heat_bldgs_sq_ft = heat_bldgs.sq_ft.sum()
heat_bldgs_heat_mmbtu = heat_bldgs.total_heat_mmbtu.sum()
heat_bldgs_degree_days = (heat_bldgs.total_heat_mmbtu * heat_bldgs.degree_days).sum() / heat_bldgs.total_heat_mmbtu.sum()
totals_all_bldgs['specific_eui'] = heat_bldgs_heat_mmbtu * 1e6 / heat_bldgs_sq_ft / heat_bldgs_degree_days

# calculate a rank DataFrame
df_rank = pd.DataFrame()
for col in df_final.columns:
    df_rank[col] = df_final[col].rank(ascending=False)

if site in df_final.index:
    # The site exists in the DataFrame
    site_info = df_final.loc[site]
    site_pct = site_info / totals_all_bldgs
    site_rank = df_rank.loc[site]
else:
    # Site is not there, probabaly because not present in this year.
    # Make variables with NaN values for all elements.
    site_info = df_final.iloc[0].copy()   # Just grab the first row to start with
    site_info[:] = np.NaN                 # Put
    site_pct = site_info.copy()
    site_rank = site_info.copy()

# Make a final dictioary to hold all the results for this table
tbl2_data = {
    'fiscal_year': 'FY {}'.format(last_complete_year),
    'bldg': site_info.to_dict(),
    'all': totals_all_bldgs.to_dict(),
    'pct': site_pct.to_dict(),
    'rank': site_rank.to_dict()
}
template_data['energy_index_comparison']['details_table'] = tbl2_data

# -------------- Energy Comparison Graphs ---------------

# Filter down to only services that are energy services.
energy_services = bu.missing_energy_services([])
df4 = df.query('service_type==@energy_services').copy()

# Sum Energy Costs and Usage
df5 = pd.pivot_table(df4, index=['site_id', 'fiscal_year'], values=['cost', 'mmbtu'], aggfunc=np.sum)

# Add a column showing number of months present in each fiscal year.
df5 = bu.add_month_count_column_by_site(df5, df4)

# Create an Electric MMBtu column so it can be subtracted from total to determine
# Heat MMBtu.
dfe = df4.query("service_type=='Electricity'").groupby(['site_id', 'fiscal_year']).sum()[['mmbtu']]
dfe.rename(columns={'mmbtu': 'elec_mmbtu'}, inplace = True)
df5 = df5.merge(dfe, how='left', left_index=True, right_index=True)
df5['elec_mmbtu'] = df5['elec_mmbtu'].fillna(0.0)
df5['heat_mmbtu'] = df5.mmbtu - df5.elec_mmbtu

# Add in degree-days:
# Create a DataFrame with site, year, month and degree-days, but only one row
# for each site/year/month combo.
dfd = df4[['site_id', 'fiscal_year', 'fiscal_mo']].copy()
dfd.drop_duplicates(inplace=True)
ut.add_degree_days_col(dfd)

# Use the agg function below so that a NaN will be returned for the year
# if any monthly values are NaN
dfd = dfd.groupby(['site_id', 'fiscal_year']).agg({'degree_days': lambda x: np.sum(x.values)})[['degree_days']]
df5 = df5.merge(dfd, how='left', left_index=True, right_index=True)

# Add in some needed building info like square footage, primary function 
# and building category.
df_bldg = ut.building_info_df()

# Shrink to just the needed fields and remove index.
# Also, fill blank values with 'Unknown'.
df_info = df_bldg[['sq_ft', 'site_category', 'primary_func']].copy().reset_index()
df_info['site_category'] = df_info.site_category.fillna('Unknown')
df_info['primary_func'] = df_info.primary_func.fillna('Unknown Type')

# Also Remove the index from df5 and merge in building info
df5.reset_index(inplace=True)
df5 = df5.merge(df_info, how='left')

# Now calculate per square foot energy measures
df5['eui'] = df5.mmbtu * 1e3 / df5.sq_ft
df5['eci'] = df5.cost / df5.sq_ft
df5['specific_eui'] = df5.heat_mmbtu * 1e6 / df5.degree_days / df5.sq_ft

# Restrict to full years
df5 = df5.query("month_count == 12").copy()

# Make all of the comparison graphs
g1_fn, g1_url = gu.graph_filename_url(site, 'eci_func')
gu.building_type_comparison_graph(df5, 'eci', site, g1_fn)

g2_fn, g2_url = gu.graph_filename_url(site, 'eci_owner')
gu.building_owner_comparison_graph(df5, 'eci', site, g2_fn)

g3_fn, g3_url = gu.graph_filename_url(site, 'eui_func')
gu.building_type_comparison_graph(df5, 'eui', site, g3_fn)

g4_fn, g4_url = gu.graph_filename_url(site, 'eui_owner')
gu.building_owner_comparison_graph(df5, 'eui', site, g4_fn)

g5_fn, g5_url = gu.graph_filename_url(site, 'speui_func')
gu.building_type_comparison_graph(df5, 'specific_eui', site, g5_fn)

g6_fn, g6_url = gu.graph_filename_url(site, 'speui_owner')
gu.building_owner_comparison_graph(df5, 'specific_eui', site, g6_fn)

template_data['energy_index_comparison']['graphs'] = [
    g1_url, g2_url, g3_url, g4_url, g5_url, g6_url
]

# return template_data