In [1]:
import time
import pickle
import glob
import os
import pprint
import datetime
import warnings
import pandas as pd
import numpy as np
import bench_util as bu
import graph_util as gu
import template_util
import shutil
import settings       # the file holding settings for this script
from importlib import reload

In [2]:
df = pickle.load(open('df_processed.pkl', 'rb'))
ut = pickle.load(open('util_obj.pkl', 'rb'))
df.head()

Unnamed: 0,site_id,service_type,cal_year,cal_mo,item_desc,units,cost,days_served,usage,fiscal_year,fiscal_mo,mmbtu
0,3,Electricity,2005,12,Electricity charge,kWh,1904.65788,49.5,14790.748577,2006,6,50.466034
1,3,Electricity,2006,1,Electricity charge,kWh,5430.493797,93.0,42665.790911,2006,7,145.575679
2,3,Electricity,2006,2,Electricity charge,kWh,5764.40673,84.0,45010.439348,2006,8,153.575619
3,3,Electricity,2006,3,Electricity charge,kWh,6349.255299,93.0,46311.547557,2006,9,158.015
4,3,Electricity,2006,4,Electricity charge,kWh,5529.385224,90.0,40392.812893,2006,10,137.820278


In [3]:
reload(bu)

# Filter down to only services that are energy services.
energy_services = bu.missing_energy_services([])
df4 = df.query('service_type==@energy_services').copy()

# Sum Energy Costs and Usage
df5 = pd.pivot_table(df4, index=['site_id', 'fiscal_year'], values=['cost', 'mmbtu'], aggfunc=np.sum)
df5.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,cost,mmbtu
site_id,fiscal_year,Unnamed: 2_level_1,Unnamed: 3_level_1
3,2006,57349.859443,2736.046098
3,2007,104483.553127,4365.666281
3,2008,129628.41241,4202.356387
3,2009,108600.953368,4003.379143
3,2010,115833.893536,3686.695168


In [4]:
# Add a column showing number of months present in each fiscal year.
df5 = bu.add_month_count_column_by_site(df5, df4)
df5.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,cost,mmbtu,month_count
site_id,fiscal_year,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
3,2006,57349.859443,2736.046098,7
3,2007,104483.553127,4365.666281,12
3,2008,129628.41241,4202.356387,12
3,2009,108600.953368,4003.379143,12
3,2010,115833.893536,3686.695168,12


In [5]:
dfe = df4.query("service_type=='Electricity'").groupby(['site_id', 'fiscal_year']).sum()[['mmbtu']]
dfe.rename(columns={'mmbtu': 'elec_mmbtu'}, inplace = True)
df5 = df5.merge(dfe, how='left', left_index=True, right_index=True)
df5['elec_mmbtu'] = df5['elec_mmbtu'].fillna(0.0)
df5['heat_mmbtu'] = df5.mmbtu - df5.elec_mmbtu
df5.head()
                  

Unnamed: 0_level_0,Unnamed: 1_level_0,cost,mmbtu,month_count,elec_mmbtu,heat_mmbtu
site_id,fiscal_year,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
3,2006,57349.859443,2736.046098,7,863.836098,1872.21
3,2007,104483.553127,4365.666281,12,1446.018281,2919.648
3,2008,129628.41241,4202.356387,12,1415.147115,2787.209273
3,2009,108600.953368,4003.379143,12,1487.590416,2515.788727
3,2010,115833.893536,3686.695168,12,1453.957168,2232.738


In [6]:
# Create a DataFrame with site, year, month and degree-days, but only one row
# for each site/year/month combo.
dfd = df4[['site_id', 'fiscal_year', 'fiscal_mo']].copy()
dfd.drop_duplicates(inplace=True)
ut.add_degree_days_col(dfd)
# Use the agg function below so that a NaN will be returned for the year
# if any monthly values are NaN
dfd = dfd.groupby(['site_id', 'fiscal_year']).agg({'degree_days': lambda x: np.sum(x.values)})[['degree_days']]
dfd.head()    

Unnamed: 0_level_0,Unnamed: 1_level_0,degree_days
site_id,fiscal_year,Unnamed: 2_level_1
3,2006,
3,2007,
3,2008,
3,2009,14388.3
3,2010,13186.1


In [7]:
df5 = df5.merge(dfd, how='left', left_index=True, right_index=True)
df5.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,cost,mmbtu,month_count,elec_mmbtu,heat_mmbtu,degree_days
site_id,fiscal_year,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
3,2006,57349.859443,2736.046098,7,863.836098,1872.21,
3,2007,104483.553127,4365.666281,12,1446.018281,2919.648,
3,2008,129628.41241,4202.356387,12,1415.147115,2787.209273,
3,2009,108600.953368,4003.379143,12,1487.590416,2515.788727,14388.3
3,2010,115833.893536,3686.695168,12,1453.957168,2232.738,13186.1


In [8]:
# Add in a square feet column
def get_ft2(site_yr):
    try:
        return ut.building_info(site_yr[0])['sq_ft']
    except:
        return np.NaN
df5['sq_ft'] = df5.index.map(get_ft2)
df5.tail()

Unnamed: 0_level_0,Unnamed: 1_level_0,cost,mmbtu,month_count,elec_mmbtu,heat_mmbtu,degree_days,sq_ft
site_id,fiscal_year,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
WSPSWP,2014,253228.886807,7445.955719,12,1994.479737,5451.475982,12573.3,28270.0
WSPSWP,2015,201691.953429,6668.185875,12,1921.952304,4746.233571,12457.2,28270.0
WSPSWP,2016,170604.14769,7244.765695,12,2019.561766,5225.203929,12172.3,28270.0
WSPSWP,2017,213829.628103,7385.673697,12,2414.959835,4970.713862,13803.2,28270.0
WSPSWP,2018,22344.175587,941.033927,3,222.028145,719.005781,857.9,28270.0


In [9]:
df5['eui'] = df5.mmbtu * 1e3 / df5.sq_ft
df5['eci'] = df5.cost / df5.sq_ft
df5['specific_eui'] = df5.heat_mmbtu * 1e6 / df5.degree_days / df5.sq_ft

# Restrict to full years
df5 = df5.query("month_count == 12").copy()
df5.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,cost,mmbtu,month_count,elec_mmbtu,heat_mmbtu,degree_days,sq_ft,eui,eci,specific_eui
site_id,fiscal_year,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
3,2007,104483.553127,4365.666281,12,1446.018281,2919.648,,49210.0,88.715023,2.123218,
3,2008,129628.41241,4202.356387,12,1415.147115,2787.209273,,49210.0,85.396391,2.634188,
3,2009,108600.953368,4003.379143,12,1487.590416,2515.788727,14388.3,49210.0,81.35296,2.206888,3.553132
3,2010,115833.893536,3686.695168,12,1453.957168,2232.738,13186.1,49210.0,74.917601,2.353869,3.440868
3,2011,133904.976207,3915.629866,12,1497.110866,2418.519,13761.4,49210.0,79.5698,2.721093,3.571359


In [10]:
df5 = df5[['eui', 'eci', 'specific_eui']]
df5.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,eui,eci,specific_eui
site_id,fiscal_year,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
3,2007,88.715023,2.123218,
3,2008,85.396391,2.634188,
3,2009,81.35296,2.206888,3.553132
3,2010,74.917601,2.353869,3.440868
3,2011,79.5698,2.721093,3.571359


In [12]:
df5.to_pickle('df5.pkl')

In [13]:
pd.read_pickle('df5.pkl').head()

Unnamed: 0_level_0,Unnamed: 1_level_0,eui,eci,specific_eui
site_id,fiscal_year,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
3,2007,88.715023,2.123218,
3,2008,85.396391,2.634188,
3,2009,81.35296,2.206888,3.553132
3,2010,74.917601,2.353869,3.440868
3,2011,79.5698,2.721093,3.571359
