# Example of Using the Utility Bill data in a Notebook

In [1]:
import pickle
import pandas as pd
import benchmark         # the main benchmarking script file
import bench_util        # some utility functions related to benchmarking

# Just in case you want to plot something:
import matplotlib.pyplot as plt
 
# Show Plots in the Notebook
%matplotlib inline

# Increase the size of plots and their fonts
plt.rcParams['figure.figsize']= (10, 8)   # set Chart Size
plt.rcParams['font.size'] = 14            # set Font size in Chart

# Use a plotting style'style' the plot using 'bmh' style
plt.style.use('bmh')

## Using Data from the Last Run of the Script
This saves about 4 minutes of time because reading the raw data and pre-processing it is avoided.

In [2]:
# Read the raw Utility Bill data into a DataFrame
df_raw = pd.read_pickle('df_raw.pkl')

# Read the pre-processed data
dfp = pd.read_pickle('df_processed.pkl')

# Read in the bench_util.Util object, which provides some other data that may 
# be useful.
util_obj = pickle.load(open('util_obj.pkl', 'rb'))

### Take a look at what you got

In [3]:
df_raw.head(3)

Unnamed: 0,Site ID,Site Name,Vendor Code,Vendor Name,Account Number,Bill Date,Due Date,Entry Date,Invoice #,Voucher #,From,Thru,Service Name,Item Description,Meter Number,Usage,Cost,Units,Account Financial Code,Site Financial Code
0,TRGR,FNSB-Transit Garage,VP287678,Sourdough Fuel (Petro Star),00013297 (closed),09/28/2010,09/28/2010,01/26/2011,,,2008-11-19,2010-09-28,Oil #1,FED LUS TX,,,3.0,,,
1,TRGR,FNSB-Transit Garage,VP287678,Sourdough Fuel (Petro Star),00013297 (closed),09/28/2010,09/28/2010,01/26/2011,,,2008-11-19,2010-09-28,Oil #1,Fuel Oil #1 (Gallons),,3000.0,7950.0,Gallons,,
2,TRGR,FNSB-Transit Garage,VP287678,Sourdough Fuel (Petro Star),00013297 (closed),09/30/2010,09/30/2010,01/26/2011,,,2010-09-28,2010-09-30,Oil #1,FED LUS TX,,,1.31,,,


In [4]:
dfp.head(3)

Unnamed: 0,site_id,service_type,cal_year,cal_mo,item_desc,units,cost,days_served,usage,fiscal_year,fiscal_mo,mmbtu
0,3,Electricity,2005,12,Electricity charge,kWh,1904.65788,49.5,14790.748577,2006,6,50.466034
1,3,Electricity,2006,1,Electricity charge,kWh,5430.493797,93.0,42665.790911,2006,7,145.575679
2,3,Electricity,2006,2,Electricity charge,kWh,5764.40673,84.0,45010.439348,2006,8,153.575619


In [5]:
# Get some other building info about the Big Dipper building
# A dictionary is returned.
util_obj.building_info('DIPMP1')

{'acct_elec': '112651',
 'acct_nat_gas': '10282  (1920 LATHROP)',
 'acct_oil': '39384007  (BIG DIPPER)',
 'acct_refuse': '',
 'acct_sewer': '1995008500',
 'acct_steam': '',
 'acct_water': '1995008500',
 'address': nan,
 'city': nan,
 'dd_site': 'PAFA',
 'full_address': nan,
 'onsite_gen': nan,
 'primary_func': nan,
 'site_category': 'Other',
 'site_id': 'DIPMP1',
 'site_name': 'FNSB-Big Dipper',
 'source_elec': 'Golden Valley Electric',
 'source_nat_gas': 'Fairbanks Natural Gas',
 'source_oil': 'Sourdough Fuel  (Petro Star)',
 'source_refuse': '',
 'source_sewer': 'Golden Heart Utilities',
 'source_steam': '',
 'source_water': 'Golden Heart Utilities',
 'sq_ft': 63845.0,
 'year_built': nan}

## Instead, Rerun the Preprocessing routine to get the DataFrames and Util object

In [6]:
# This calls the preprocessing function in the main benchmark script file.
# Values in the 'settings.py' file will determine which files are read.
# NOTE:  This will take 4 minutes to run, and the Jupyter Notebook does not 
# always show the messages that printed along the way, so it will look like
# your computer is frozen. Be patient.

# Uncomment the line below to run
# df_raw, dfp, util_obj = benchmark.preprocess_data()

## Look at Some Data to Illustrate the Duplicate kW Demand Charge Problem

In [7]:
# You can't use the Pandas "query" method for columns that have spaces in
# their names.  So use the more general, but cumbersome syntax to filter 
# the "Site ID" column.
# NOTE: a quick rename of the columns to eliminate spaces is another approach:
#  df_raw.columns = [c.replace(' ', '_') for c in df_raw.columns]
df_dip = df_raw[df_raw['Site ID']=='DIPMP1']

df_dip.query("From < '2017-06-01' and From > '2017-01-01' and Units=='kW'")

Unnamed: 0,Site ID,Site Name,Vendor Code,Vendor Name,Account Number,Bill Date,Due Date,Entry Date,Invoice #,Voucher #,From,Thru,Service Name,Item Description,Meter Number,Usage,Cost,Units,Account Financial Code,Site Financial Code
17620,DIPMP1,FNSB-Big Dipper,VG354933,Golden Valley Electric,112651,02/13/2017,02/13/2017,10/09/2017,,,2017-01-16,2017-02-13,Electricity,Actual demand,,290.5,4151.53,kW,61831.0,
17624,DIPMP1,FNSB-Big Dipper,VG354933,Golden Valley Electric,112651,02/14/2017,02/14/2017,03/13/2017,,,2017-01-16,2017-02-13,Electricity,Demand Charge,,290.5,4151.53,kW,61831.0,
17629,DIPMP1,FNSB-Big Dipper,VG354933,Golden Valley Electric,112651,03/14/2017,03/14/2017,10/09/2017,,,2017-02-13,2017-03-14,Electricity,Actual demand,,264.9,3785.71,kW,61831.0,
17633,DIPMP1,FNSB-Big Dipper,VG354933,Golden Valley Electric,112651,03/15/2017,03/15/2017,03/22/2017,,,2017-02-13,2017-03-14,Electricity,Demand Charge,,264.9,3785.71,kW,61831.0,
17638,DIPMP1,FNSB-Big Dipper,VG354933,Golden Valley Electric,112651,04/14/2017,04/14/2017,10/09/2017,,,2017-03-14,2017-04-14,Electricity,Actual demand,,286.3,4090.94,kW,61831.0,
17642,DIPMP1,FNSB-Big Dipper,VG354933,Golden Valley Electric,112651,04/17/2017,04/17/2017,04/18/2017,,,2017-03-14,2017-04-14,Electricity,Demand Charge,,286.3,4090.94,kW,61831.0,
17649,DIPMP1,FNSB-Big Dipper,VG354933,Golden Valley Electric,112651,05/15/2017,05/15/2017,07/03/2017,,,2017-04-14,2017-05-12,Electricity,Demand Charge,,278.5,3979.48,kW,61831.0,
17656,DIPMP1,FNSB-Big Dipper,VG354933,Golden Valley Electric,112651,06/15/2017,06/15/2017,08/02/2017,,,2017-05-12,2017-06-14,Electricity,Demand Charge,,270.1,3860.01,kW,61831.0,
