## Used to Develop New Benchmarking Features
### Ben's Notebook
Assumes that `df_raw.pkl`, `df_processed.pkl`, and `util_obj.pkl` are located in the directory above this one.

In [1]:
import pickle
import pandas as pd
import numpy as np
from importlib import reload
import sys
from datetime import datetime

import matplotlib.pyplot as plt
%matplotlib inline

# bench_util module is in the parent directory, because it will be part of
# the production code.  Add the parent directory to the Path so Python can
# import from there
sys.path.insert(0, '../')   
import bench_util as bu
import graph_util as gu
import os 

In [2]:
# Unpickle the raw DataFrame
df_raw = pd.read_pickle('../df_raw.pkl')

# Unpickle the processed DataFrame.
dfp = pd.read_pickle('../df_processed.pkl')

# Unpickle the Utility object
ut = pickle.load(open('../util_obj.pkl', 'rb'))

In [4]:
#dfp.site_id.unique()

In [5]:
# Show the first record of the raw dataframe
#df_raw.iloc[0]


In [6]:
# Show the first record of the processed dataframe
#dfp.iloc[0]

In [7]:
# Show type information and counts of processed dataframe
#dfp.info()

In [8]:
# Show stats for numeric columns
#dfp.describe()

In [9]:
# Show counts of service types
#dfp.service_type.value_counts()

In [10]:
#dfp.item_desc.value_counts()

In [11]:
#pd.pivot_table(dfp, values='site_id', index='service_type', columns='units', aggfunc='count')

In [None]:
dfp.head()

In [21]:
fy = dfp['fiscal_year'].unique()
fy

array([2010, 2011, 2012, 2013, 2014, 2015, 2016, 2017, 2018, 2019, 2009],
      dtype=int64)

In [24]:
def FY_spreadsheets(dfp, ut):
    """ Iterates through pre-processed billing dataframe and creates spreadsheet for each fiscal year. Saves .xlsx 
        spreadsheet for each fiscal year with a row of data for each sites and grouping.  Returns nothing.
    """
    
    # --- Read the CSV file and convert the billing period dates into 
    #     real Pandas dates

    ## Filter by FY, Pivot Table by Site
    fy = dfp['fiscal_year'].unique()

    for year in fy:

        df_fy = dfp.query('fiscal_year==@year')

        # Summarize FY Cost and usage by Service Type

        # Create pivot table of cost data
        df_FYcost = pd.pivot_table(df_fy, index=['site_id'], columns='service_type', values='cost', aggfunc=np.sum)
        df_FYcost = bu.add_missing_columns(df_FYcost, bu.missing_services([]))
        try: 
            df_FYcost['electricity_energy'] = pd.pivot_table(df_fy, index=['site_id'], columns='units', values='cost', aggfunc=np.sum)['kWh']
        except:
            df_FYcost['electricity_energy'] = 0.0
        try:
            df_FYcost['electricity_demand'] = pd.pivot_table(df_fy, index=['site_id'], columns='units', values='cost', aggfunc=np.sum)['kW']
        except:
            df_FYcost['electricity_demand'] = 0.0
        df_FYcost = df_FYcost.add_suffix('_cost')

        # Calculate additional cost totals
        df_FYcost['total_utility_cost'] = df_FYcost.sum(axis=1)
        df_FYcost['total_water_cost'] = df_FYcost[['water_cost', 'sewer_cost']].sum(axis=1)
        df_FYcost['total_energy_cost'] = df_FYcost.total_utility_cost - df_FYcost.total_water_cost
        df_FYcost['total_heat_cost'] = df_FYcost.total_energy_cost - df_FYcost.electricity_cost

        # Create pivot table of usage data in native units
        df_FYusage = pd.pivot_table(df_fy, index=['site_id'], columns='service_type', values='usage', aggfunc=np.sum)
        try:
            df_FYusage['electricity_energy'] = pd.pivot_table(df_fy, index=['site_id'], columns='units', values='usage', aggfunc=np.sum)['kWh']
        except:
            df_FYusage['electricity_energy'] = 0.0
        try:
            df_FYusage['electricity_demand'] = pd.pivot_table(df_fy, index=['site_id'], columns='units', values='usage', aggfunc=np.sum)['kW']
        except:
            df_FYusage['electricity_demand'] = 0.0
        df_FYusage = bu.add_missing_columns(df_FYusage, bu.missing_services([]))
        df_FYusage = df_FYusage.add_suffix('_usage')

        # Create pivot table of usage data in mmbtu units
        df_FYBTU = pd.pivot_table(df_fy, index=['site_id'], columns='service_type', values='mmbtu', aggfunc=np.sum)
        df_FYBTU = bu.add_missing_columns(df_FYBTU, bu.missing_services([]))
        df_FYBTU = df_FYBTU.add_suffix('_mmbtu')
        df_FYBTU['total_energy_mmbtu'] = df_FYBTU.sum(axis=1)
        df_FYBTU['total_heat_mmbtu'] = df_FYBTU.total_energy_mmbtu - df_FYBTU.electricity_mmbtu

        #Merge Dataframes
        df_FYtotal = pd.concat([df_FYcost, df_FYusage, df_FYBTU], axis=1)

        # Add in HDD an sqft to df_FYtotal
        # iterate through sites

        sq_ft=[]
        dd=[]

        for site_id, row in df_FYtotal.iterrows():
            df_site = df_fy.query('site_id == @site_id')
            mo_present = bu.months_present(df_site, yr_col='fiscal_year', mo_col='fiscal_mo')
            dd_series = ut.degree_days_yearly(mo_present, site_id)
            dd.append(dd_series.iloc[0])
            try:
                bi = ut.building_info(site_id)
                sq = bi['sq_ft']
            except:
                print(site_id)    
                sq = np.nan
            sq_ft.append(sq)


        df_FYtotal['dd'] = dd
        df_FYtotal['sq_ft'] = sq_ft
        df_FYtotal.head()

        # Caclulate EUI, ECI

        #Use HDD and SQFT to calculate EUIs and ECI.
        df_FYtotal['eci'] = df_FYtotal.total_energy_cost / df_FYtotal.sq_ft
        df_FYtotal['uci'] = df_FYtotal.total_utility_cost / df_FYtotal.sq_ft
        df_FYtotal['eui'] = df_FYtotal.total_energy_mmbtu * 1e3 / df_FYtotal.sq_ft
        df_FYtotal['specific_eui'] = df_FYtotal.total_heat_mmbtu * 1e6 / df_FYtotal.dd / df_FYtotal.sq_ft

        #Select Desired Columns and export to excel  - This is the spreadsheet per site, row per month output

        df_export=df_FYtotal[['dd',
                            'sq_ft',
                            'electricity_energy_cost',
                            'electricity_demand_cost',
                            'electricity_cost', 
                            'fuel_oil_cost',
                            'natural_gas_cost',
                            'district_heat_cost',
                            'total_energy_cost',
                            'water_cost',
                            'sewer_cost',
                            'total_water_cost',
                            'total_utility_cost',
                            'eci',
                            'uci',
                            'electricity_energy_usage',
                            'electricity_demand_usage',
                            'electricity_mmbtu',
                            'fuel_oil_usage',
                            'fuel_oil_mmbtu',
                            'natural_gas_usage',
                            'natural_gas_mmbtu',
                            'district_heat_usage',
                            'total_heat_mmbtu',
                            'eui',
                            'specific_eui',
                            'total_energy_mmbtu',
                            'water_usage',
                            'sewer_usage']]

        df_export.to_excel(f"FY{year}_Site_Summary_Data.xlsx")
 


In [None]:
df_FYcost.head()

In [None]:
df_FYusage.head()

In [None]:
df_FYBTU.head()

In [None]:
df_FYtotal.index[1]

In [None]:
# See how the building_info() method of the Utility object works
ut.building_info('03')

In [None]:
# See how the building_info() method of the Utility object works
ut.building_info('DIPMP1')

In [25]:
# Set up variables (site, df, ut):
"""As well as returning template data, this function writes a spreadsheet
that summarizes values for every building.  The spreadsheet is written to
'output/extra_data/site_summary_FYYYYY.xlsx'.
"""

site="DIPMP1"
df = dfp

In [None]:
# Start a dictionary with the main key to hold the template data
template_data = {'energy_index_comparison': {}}

# --------- Table 1, Yearly Table

# Filter down to just this site's bills and only services that
# are energy services.
energy_services = bu.missing_energy_services([])
df1 = df.query('site_id==@site') 
               # Removed to include water in df1:  and service_type==@energy_services')

In [26]:
def Site_spreadsheets(site, df, ut):
    """ Uses pre-processed billing dataframe and creates spreadsheet for each site. Saves .xlsx 
        spreadsheet for given site with a row of data for each month.  Returns nothing.
        """
    # Filter down to just this site's bills and only services that
    # are energy services.
    energy_services = bu.missing_energy_services([])
    df1 = df.query('site_id==@site') 

    # Add in degree days to DataFrame
    months_present = bu.months_present(df1)
    deg_days = ut.degree_days_monthly(months_present, site)
    deg_days.set_index(['fiscal_year', 'fiscal_mo'], inplace=True)


    # Get building square footage and calculate EUIs and ECI.
    sq_ft = ut.building_info(site)['sq_ft']


    # Summarize Monthly Cost and usage by Service Type
    df_monthlycost = pd.pivot_table(df1, index=['fiscal_year', 'fiscal_mo'], columns='service_type', values='cost', aggfunc=np.sum)
    df_monthlycost = bu.add_missing_columns(df_monthlycost, bu.missing_services([]))
    df_monthlycost['electricity_energy'] = pd.pivot_table(df1, index=['fiscal_year', 'fiscal_mo'], columns='units', values='cost', aggfunc=np.sum)['kWh']
    df_monthlycost['electricity_demand'] = pd.pivot_table(df1, index=['fiscal_year', 'fiscal_mo'], columns='units', values='cost', aggfunc=np.sum)['kW']
    df_monthlycost = df_monthlycost.add_suffix('_cost')

    df_monthlycost['total_utility_cost'] = df_monthlycost.sum(axis=1)
    df_monthlycost['total_water_cost'] = df_monthlycost[['water_cost', 'sewer_cost']].sum(axis=1)
    df_monthlycost['total_energy_cost'] = df_monthlycost.total_utility_cost - df_monthlycost.total_water_cost
    df_monthlycost['total_heat_cost'] = df_monthlycost.total_energy_cost - df_monthlycost.electricity_cost
    df_monthlycost['eci'] = df_monthlycost.total_energy_cost / sq_ft
    df_monthlycost['uci'] = df_monthlycost.total_utility_cost / sq_ft

    df_monthlycost_rolling = df_monthlycost.rolling(12, min_periods=None, center=False, win_type=None, on=None, axis=0, closed=None).sum().add_suffix('_12mo')


    df_monthlyusage = pd.pivot_table(df1, index=['fiscal_year', 'fiscal_mo'], columns='service_type', values='usage', aggfunc=np.sum)

    df_monthlyusage['electricity_energy'] = pd.pivot_table(df1, index=['fiscal_year', 'fiscal_mo'], columns='units', values='usage', aggfunc=np.sum)['kWh']
    df_monthlyusage['electricity_demand'] = pd.pivot_table(df1, index=['fiscal_year', 'fiscal_mo'], columns='units', values='usage', aggfunc=np.sum)['kW']
    df_monthlyusage = bu.add_missing_columns(df_monthlyusage, bu.missing_services([]))
    df_monthlyusage = df_monthlyusage.add_suffix('_usage')

    df_monthlyusage_rolling = df_monthlyusage.rolling(12, min_periods=None, center=False, win_type=None, on=None, axis=0, closed=None).sum().add_suffix('_12mo')
    df_monthlyusage_rolling['electricity_demand_usage_12mo'] = df_monthlyusage_rolling['electricity_demand_usage_12mo'] / 12


    df_monthlyBTU = pd.pivot_table(df1, index=['fiscal_year', 'fiscal_mo'], columns='service_type', values='mmbtu', aggfunc=np.sum)
    df_monthlyBTU = bu.add_missing_columns(df_monthlyBTU, bu.missing_services([]))
    df_monthlyBTU = df_monthlyBTU.add_suffix('_mmbtu')
    df_monthlyBTU['total_energy_mmbtu'] = df_monthlyBTU.sum(axis=1)
    df_monthlyBTU['total_heat_mmbtu'] = df_monthlyBTU.total_energy_mmbtu - df_monthlyBTU.electricity_mmbtu
    df_monthlyBTU['eui'] = df_monthlyBTU.total_energy_mmbtu * 1e3 / sq_ft

    df_monthlyBTU = pd.merge(df_monthlyBTU, deg_days, how='left', left_index=True, right_index=True)  #right_on=['fiscal_year', 'fiscal_mo'])

    df_monthlyBTU['specific eui'] = df_monthlyBTU.total_heat_mmbtu * 1e6 / df_monthlyBTU.dd / sq_ft

    df_monthlyBTU_rolling = df_monthlyBTU.rolling(12, min_periods=None, center=False, win_type=None, on=None, axis=0, closed=None).sum().add_suffix('_12mo')

    #Merge Dataframes

    df_total = pd.concat([df_monthlycost, df_monthlyusage, df_monthlyBTU, df_monthlycost_rolling, df_monthlyusage_rolling, df_monthlyBTU_rolling], axis=1)

    #Select Desired Columns and export to excel  - This is the spreadsheet per site, row per month output

    df_export=df_total[['dd', 
                        'electricity_energy_cost',
                        'electricity_demand_cost',
                        'electricity_cost', 
                        'fuel_oil_cost',
                        'natural_gas_cost',
                        'district_heat_cost',
                        'total_energy_cost',
                        'water_cost',
                        'sewer_cost',
                        'total_water_cost',
                        'total_utility_cost',
                        'eci',
                        'uci',
                        'electricity_energy_usage',
                        'electricity_demand_usage',
                        'electricity_mmbtu',
                        'fuel_oil_usage',
                        'fuel_oil_mmbtu',
                        'natural_gas_usage',
                        'natural_gas_mmbtu',
                        'district_heat_usage',
                        'total_heat_mmbtu',
                        'eui',
                        'specific eui',
                        'total_energy_mmbtu',
                        'water_usage',
                        'sewer_usage',
                        'dd_12mo',
                        'electricity_energy_cost_12mo',
                        'electricity_demand_cost_12mo',
                        'electricity_cost_12mo',
                        'fuel_oil_cost_12mo',
                        'natural_gas_cost_12mo',
                        'district_heat_cost_12mo',
                        'total_heat_cost_12mo',
                        'total_energy_cost_12mo',
                        'water_cost_12mo',
                        'sewer_cost_12mo',
                        'total_water_cost_12mo',
                        'total_utility_cost_12mo',
                        'eci_12mo',
                        'uci_12mo',
                        'electricity_energy_usage_12mo',
                        'electricity_demand_usage_12mo',
                        'electricity_mmbtu_12mo',
                        'fuel_oil_usage_12mo',
                        'fuel_oil_mmbtu_12mo',
                        'natural_gas_usage_12mo',
                        'natural_gas_mmbtu_12mo',
                        'district_heat_usage_12mo',
                        'total_heat_mmbtu_12mo',
                        'eui_12mo',
                        'specific eui_12mo',
                        'total_energy_mmbtu_12mo',
                        'water_usage_12mo',
                        'sewer_usage_12mo']]

    df_export.to_excel(f"Site_{site}_Monthly_Summary_Data.xlsx")

In [None]:
## Following cells create export spreadheet by site with row for every month

In [None]:
# Consolidate data to monthly rows




In [None]:
#deg_days
#df_monthlyBTU_rolling


In [None]:
## End of Ben edits for spreadsheet export

In [None]:
# Add in any missing columns
bu.add_missing_columns(df3, energy_svcs)

# Change column names
cols = ['{}_cost'.format(col) for col in df3.columns]
df3.columns = cols

# Add a total energy cost column
df3['total_energy_cost'] = df3.sum(axis=1)

# Add a total Heat Cost Column
df3['total_heat_cost'] = df3.total_energy_cost.fillna(0.0) - df3.electricity_cost.fillna(0.0)

# Add this to the final DataFrame
df_final = pd.concat([df_final, df3], axis=1, sort=True)
#for service_type
df_sitemonthly = df1.groupby(['fiscal_year', 'fiscal_mo'], as_index=False).sum()

#df1.query("service_type=='electricity'").groupby('fiscal_year').sum()[['mmbtu']]
df_sitemonthly
# Add columns to monthly site data

In [None]:
# Only do this table if there are energy services.
if not df1.empty:

    # Sum Energy Costs and Usage
    df2 = pd.pivot_table(df1, index='fiscal_year', values=['cost', 'mmbtu'], aggfunc=np.sum)

    # Add a column showing number of months present in each fiscal year.
    bu.add_month_count_column(df2, df1)

    # Make a column with just the Heat MMBtu
    dfe = df1.query("service_type=='electricity'").groupby('fiscal_year').sum()[['mmbtu']]
    dfe.rename(columns={'mmbtu': 'elec_mmbtu'}, inplace = True)
    df2 = df2.merge(dfe, how='left', left_index=True, right_index=True)
    df2['elec_mmbtu'] = df2['elec_mmbtu'].fillna(0.0)
    df2['heat_mmbtu'] = df2.mmbtu - df2.elec_mmbtu

    # Add in degree days to DataFrame
    months_present = bu.months_present(df1)
    deg_days = ut.degree_days_yearly(months_present, site)
    df2['hdd'] = deg_days

    # Get building square footage and calculate EUIs and ECI.
    sq_ft = ut.building_info(site)['sq_ft']
    df2['eui'] = df2.mmbtu * 1e3 / sq_ft
    df2['eci'] = df2.cost / sq_ft
    df2['specific_eui'] = df2.heat_mmbtu * 1e6 / df2.hdd / sq_ft

    # Restrict to full years
    df2 = df2.query("month_count == 12").copy()

    # Reverse the years
    df2.sort_index(ascending=False, inplace=True)

    # get the rows as a list of dictionaries and put into
    # final template data dictionary.
    template_data['energy_index_comparison']['yearly_table'] = {
        'rows': bu.df_to_dictionaries(df2)
    }

    

In [None]:
df2

In [None]:
template_data

In [None]:
# ---------- Table 2, Details Table

# Use the last complete year for this site as the year for the Details
# table.  If there was no complete year for the site, then use the
# last complete year for the entire dataset.
if 'df2' in locals() and len(df2):
    last_complete_year = df2.index.max()
else:
    # Determine month count by year for Electricity in entire dataset
    # to determine the latest complete year.
    electric_only = df.query("service_type == 'electricity'")
    electric_months_present = bu.months_present(electric_only)
    electric_mo_count = bu.month_count(electric_months_present)
    last_complete_year = max(electric_mo_count[electric_mo_count==12].index)

# Filter down to just the records of the targeted fiscal year
df1 = df.query('fiscal_year == @last_complete_year')

# Get Total Utility cost by building. This includes non-energy utilities as well.
df2 = df1.pivot_table(index='site_id', values=['cost'], aggfunc=np.sum)
df2.columns = ['total_cost']

# Save this into the Final DataFrame that we will build up as we go.
df_final = df2.copy()

In [None]:
df_final.head()

In [None]:
# Get a list of the Energy Services and restrict the data to
# just these services
energy_svcs = bu.missing_energy_services([])
df2 = df1.query('service_type == @energy_svcs')

In [None]:
energy_svcs

In [None]:
df2

In [None]:
# Summarize Cost by Service Type
df3 = pd.pivot_table(df2, index='site_id', columns='service_type', values='cost', aggfunc=np.sum)

# Add in any missing columns
bu.add_missing_columns(df3, energy_svcs)

# Change column names
cols = ['{}_cost'.format(col) for col in df3.columns]
df3.columns = cols

# Add a total energy cost column
df3['total_energy_cost'] = df3.sum(axis=1)

# Add a total Heat Cost Column
df3['total_heat_cost'] = df3.total_energy_cost.fillna(0.0) - df3.electricity_cost.fillna(0.0)

# Add this to the final DataFrame
df_final = pd.concat([df_final, df3], axis=1, sort=True)

# Summarize MMBtu by Service Type
df3 = pd.pivot_table(df2, index='site_id', columns='service_type', values='mmbtu', aggfunc=np.sum)

# Add in any missing columns
bu.add_missing_columns(df3, energy_svcs)

# Change column names
cols = ['{}_mmbtu'.format(col) for col in df3.columns]
df3.columns = cols

# Add a total mmbtu column
df3['total_mmbtu'] = df3.sum(axis=1)

# Add a total Heat mmbtu Column
df3['total_heat_mmbtu'] = df3.total_mmbtu.fillna(0.0) - df3.electricity_mmbtu.fillna(0.0)

# Add this to the final DataFrame
df_final = pd.concat([df_final, df3], axis=1, sort=True)

# Electricity kWh summed by building
df3 = pd.pivot_table(df2.query('units == "kWh"'), index='site_id', values='usage', aggfunc=np.sum)
df3.columns = ['electricity_kwh']

# Include in Final DF
df_final = pd.concat([df_final, df3], axis=1, sort=True)

In [None]:
df_final.head()

In [None]:
# Electricity kW, both Average and Max by building
# First, sum up kW pieces for each month.
df3 = df2.query('units == "kW"').groupby(['site_id', 'fiscal_year', 'fiscal_mo']).sum()
df3 = pd.pivot_table(df3.reset_index(), index='site_id', values='usage', aggfunc=[np.mean, np.max])
df3.columns = ['electricity_kw_average', 'electricity_kw_max']

# Add into Final Frame
df_final = pd.concat([df_final, df3], axis=1, sort=True)

In [None]:
# Add in Square footage info
df_bldg = ut.building_info_df()[['sq_ft']]

# Add into Final Frame.  I do a merge here so as not to bring
# in buildings from the building info spreadsheet that are not in this
# dataset; this dataset has been restricted to one year.
df_final = pd.merge(df_final, df_bldg, how='left', left_index=True, right_index=True)

In [None]:
df_bldg

In [None]:
df_final.head()

In [None]:
# Build a DataFrame that has monthly degree days for each site/year/month
# combination.
combos = set(zip(df1.site_id, df1.fiscal_year, df1.fiscal_mo))
df_dd = pd.DataFrame(data=list(combos), columns=['site_id', 'fiscal_year', 'fiscal_mo'])
ut.add_degree_days_col(df_dd)

# Add up the degree days by site (we've already filtered down to one year or less
# of data.)
dd_series = df_dd.groupby('site_id').sum()['degree_days']

# Put in final DataFrame
df_final = pd.concat([df_final, dd_series], axis=1)

# Add in a column that gives the number of months present for each site
# in this year.  Then filter down to just the sites that have 12 months
# of data.
df_final.reset_index(inplace=True)
df_final['fiscal_year'] = last_complete_year
df_final.set_index(['site_id', 'fiscal_year'], inplace=True)
df_final = bu.add_month_count_column_by_site(df_final, df2)
df_final = df_final.query('month_count==12').copy()
df_final.reset_index(inplace=True)
df_final.set_index('site_id', inplace=True)

# Calculate per square foot values for each building.
df_final['eui'] = df_final.total_mmbtu * 1e3 / df_final.sq_ft
df_final['eci'] = df_final.total_energy_cost / df_final.sq_ft
df_final['specific_eui'] = df_final.total_heat_mmbtu * 1e6 / df_final.sq_ft / df_final.degree_days

In [None]:
df_final.head()

In [None]:
# Save this to a spreadsheet, if it has not already been saved
fn = 'output/extra_data/site_summary_FY{}.xlsx'.format(last_complete_year)
if not os.path.exists(fn):
    with pd.ExcelWriter(fn) as excel_writer:
        df_final.to_excel(excel_writer, sheet_name='Sites')

# Get the totals across all buildings
totals_all_bldgs = df_final.sum()

# Total Degree-Days are not relevant
totals_all_bldgs.drop(['degree_days'], inplace=True)

# Only use the set of buildings that have some energy use and non-zero
# square footage to determine EUI's and ECI's
energy_bldgs = df_final.query("total_mmbtu > 0 and sq_ft > 0")

# Get total square feet, energy use, and energy cost for these buildings
# and calculate EUI and ECI
sq_ft_energy_bldgs = energy_bldgs.sq_ft.sum()
energy_in_energy_bldgs = energy_bldgs.total_mmbtu.sum()
energy_cost_in_energy_bldgs = energy_bldgs.total_energy_cost.sum()
totals_all_bldgs['eui'] = energy_in_energy_bldgs * 1e3 / sq_ft_energy_bldgs
totals_all_bldgs['eci'] = energy_cost_in_energy_bldgs / sq_ft_energy_bldgs

# For calculating heating specific EUI, further filter the set of
# buildings down to those that have heating fuel use.
# Get separate square footage total and weighted average degree-day for these.
heat_bldgs = energy_bldgs.query("total_heat_mmbtu > 0")
heat_bldgs_sq_ft = heat_bldgs.sq_ft.sum()
heat_bldgs_heat_mmbtu = heat_bldgs.total_heat_mmbtu.sum()
heat_bldgs_degree_days = (heat_bldgs.total_heat_mmbtu * heat_bldgs.degree_days).sum() / heat_bldgs.total_heat_mmbtu.sum()
totals_all_bldgs['specific_eui'] = heat_bldgs_heat_mmbtu * 1e6 / heat_bldgs_sq_ft / heat_bldgs_degree_days

# calculate a rank DataFrame
df_rank = pd.DataFrame()
for col in df_final.columns:
    df_rank[col] = df_final[col].rank(ascending=False)

if site in df_final.index:
    # The site exists in the DataFrame
    site_info = df_final.loc[site]
    site_pct = site_info / totals_all_bldgs
    site_rank = df_rank.loc[site]
else:
    # Site is not there, probabaly because not present in this year.
    # Make variables with NaN values for all elements.
    site_info = df_final.iloc[0].copy()   # Just grab the first row to start with
    site_info[:] = np.NaN                 # Put
    site_pct = site_info.copy()
    site_rank = site_info.copy()

# Make a final dictioary to hold all the results for this table
tbl2_data = {
    'fiscal_year': 'FY {}'.format(last_complete_year),
    'bldg': site_info.to_dict(),
    'all': totals_all_bldgs.to_dict(),
    'pct': site_pct.to_dict(),
    'rank': site_rank.to_dict()
}
template_data['energy_index_comparison']['details_table'] = tbl2_data

# -------------- Energy Comparison Graphs ---------------

# Filter down to only services that are energy services.
energy_services = bu.missing_energy_services([])
df4 = df.query('service_type==@energy_services').copy()

# Sum Energy Costs and Usage
df5 = pd.pivot_table(df4, index=['site_id', 'fiscal_year'], values=['cost', 'mmbtu'], aggfunc=np.sum)

# Add a column showing number of months present in each fiscal year.
df5 = bu.add_month_count_column_by_site(df5, df4)

# Create an Electric MMBtu column so it can be subtracted from total to determine
# Heat MMBtu.
dfe = df4.query("service_type=='Electricity'").groupby(['site_id', 'fiscal_year']).sum()[['mmbtu']]
dfe.rename(columns={'mmbtu': 'elec_mmbtu'}, inplace = True)
df5 = df5.merge(dfe, how='left', left_index=True, right_index=True)
df5['elec_mmbtu'] = df5['elec_mmbtu'].fillna(0.0)
df5['heat_mmbtu'] = df5.mmbtu - df5.elec_mmbtu

# Add in degree-days:
# Create a DataFrame with site, year, month and degree-days, but only one row
# for each site/year/month combo.
dfd = df4[['site_id', 'fiscal_year', 'fiscal_mo']].copy()
dfd.drop_duplicates(inplace=True)
ut.add_degree_days_col(dfd)

# Use the agg function below so that a NaN will be returned for the year
# if any monthly values are NaN
dfd = dfd.groupby(['site_id', 'fiscal_year']).agg({'degree_days': lambda x: np.sum(x.values)})[['degree_days']]
df5 = df5.merge(dfd, how='left', left_index=True, right_index=True)

# Add in some needed building info like square footage, primary function 
# and building category.
df_bldg = ut.building_info_df()

# Shrink to just the needed fields and remove index.
# Also, fill blank values with 'Unknown'.
df_info = df_bldg[['sq_ft', 'site_category', 'primary_func']].copy().reset_index()
df_info['site_category'] = df_info.site_category.fillna('Unknown')
df_info['primary_func'] = df_info.primary_func.fillna('Unknown Type')

# Also Remove the index from df5 and merge in building info
df5.reset_index(inplace=True)
df5 = df5.merge(df_info, how='left')

# Now calculate per square foot energy measures
df5['eui'] = df5.mmbtu * 1e3 / df5.sq_ft
df5['eci'] = df5.cost / df5.sq_ft
df5['specific_eui'] = df5.heat_mmbtu * 1e6 / df5.degree_days / df5.sq_ft

# Restrict to full years
df5 = df5.query("month_count == 12").copy()

# Make all of the comparison graphs
g1_fn, g1_url = gu.graph_filename_url(site, 'eci_func')
gu.building_type_comparison_graph(df5, 'eci', site, g1_fn)

g2_fn, g2_url = gu.graph_filename_url(site, 'eci_owner')
gu.building_owner_comparison_graph(df5, 'eci', site, g2_fn)

g3_fn, g3_url = gu.graph_filename_url(site, 'eui_func')
gu.building_type_comparison_graph(df5, 'eui', site, g3_fn)

g4_fn, g4_url = gu.graph_filename_url(site, 'eui_owner')
gu.building_owner_comparison_graph(df5, 'eui', site, g4_fn)

g5_fn, g5_url = gu.graph_filename_url(site, 'speui_func')
gu.building_type_comparison_graph(df5, 'specific_eui', site, g5_fn)

g6_fn, g6_url = gu.graph_filename_url(site, 'speui_owner')
gu.building_owner_comparison_graph(df5, 'specific_eui', site, g6_fn)

template_data['energy_index_comparison']['graphs'] = [
    g1_url, g2_url, g3_url, g4_url, g5_url, g6_url
]

# return template_data