In [None]:
%reload_ext autoreload
%autoreload 2

# import packages
import pandas as pd
import numpy as np
#from pathlib import Path
import plotly.express as px
from os import path

# Load Data

In [None]:
# Select the regions we want to examine
ba_list = ['CISO']
ba = 'CISO'

# specify the data year we want to examine
ef_year = 2019

## Load the building demand data

In [None]:
# read the csv containing the metadata for each of the files
metadata = pd.read_csv('A:/Research/lbnl-load-enduse-shapes/lbnl-load-enduse-shapes/anonymized_1in2_actual_actual_2014/anonymized_1in2_actual_actual_2014_cluster_summary.csv')

# get a list of all filenames for which we want to load data
filename_list = list(metadata['cluster'])

print(f'Number of building clusters: {len(filename_list)}')
display(metadata[['customer_count','sector']].groupby(['sector']).agg(['count','sum']))

In [None]:
if path.exists('../data/processed/california_demand_data.csv'):
    # load the data from csv if it has already been saved previously
    demand = pd.read_csv('../data/processed/california_demand_data.csv', header=[0,1], index_col=0, parse_dates=True)
    display(demand.head(5))
else:
    # we will use 1-in-2 profiles, representing a typical weather year, rather than the 1-in-10 profiles, which represent a "hot" year
    lbnl_dir = 'A:/Research/lbnl-load-enduse-shapes/lbnl-load-enduse-shapes/anonymized_1in2_actual_actual_2014/'

    demand = pd.DataFrame()
    for filename in filename_list:
        # get the total electricity data for each file
        df = pd.read_csv(lbnl_dir + f'{filename}.csv', usecols=['total']).rename(columns={'total':filename})

        # need to shift the data to align the day of week with the correct date in the current year
        # The original data was from 2014, where the first day of the year is a Wednesday
        # Jan 1, 2019 is a Tuesday, so we need to roll the original data forward by 24 hours
        df[filename] = np.roll(df[filename], 24)

        # add the data from this file to the larger demand dataframe
        demand = demand.join(df, how='right')

    # set the location as CAISO
    demand['location'] = ba

    # create a datetime column with hourly timestamps for 2019
    demand['datetime_local'] = pd.date_range(start='2019-01-01 00:00:00', end='2019-12-31 23:00:00', freq='H')

    # melt the data into long format
    demand = demand.melt(id_vars=['location','datetime_local'], var_name='cluster', value_name='demand_kw')

    # then re-pivot the data
    demand = demand.pivot(index='datetime_local', columns=['location','cluster'], values='demand_kw')

    # export the loaded demand data to a csv
    demand.to_csv('../data/processed/california_demand_data.csv')

    display(demand.head(5))

## Load the EF Data

In [None]:
# Load the hourly emission factors
##################################
hourly_ef = pd.read_csv('../data/processed/emission_factors/emission_factors_monthhour.csv', index_col='datetime_local', parse_dates=True, usecols=ba_list+['datetime_local'])

# calculate monthly and annual average EFs
##########################################

# calculate the monthly average
mo_average = hourly_ef.copy()
mo_average['month'] = mo_average.index.month
mo_average = mo_average.groupby('month').mean().reset_index()
#mo_average.columns = pd.MultiIndex.from_product([mo_average.columns, ['monthly']])

#calculate the annual average
yr_average = hourly_ef.copy()
for col in yr_average.columns:
    yr_average[col] = yr_average[col].mean()
#yr_average.columns = pd.MultiIndex.from_product([yr_average.columns, ['annual']])

# calculate the month-hour average
mh_average = hourly_ef.copy()
mh_average['month'] = mh_average.index.month
mh_average['hour'] = mh_average.index.hour

mh_average = mh_average.groupby(['month','hour']).mean().reset_index()

# merge the averages
ef = hourly_ef.copy()
ef['month'] = ef.index.month
ef['hour'] = ef.index.hour

# merge month-hourly
ef = ef.merge(mh_average, how='left', on=['month','hour'], suffixes=('_hourly','_monthhourly')).set_index(hourly_ef.index).drop(columns=['month','hour'])

# merge the monthly
ef['month'] = ef.index.month
ef = ef.merge(mo_average.add_suffix('_monthly'), how='left', left_on='month', right_on='month_monthly').set_index(hourly_ef.index).drop(columns=['month', 'month_monthly'])

# merge the annual data
ef = ef.merge(yr_average.add_suffix('_annual'), how='left', left_index=True, right_index=True)

# split the columns into a multiindex by region and resolution
split_columns = ef.columns.str.split('_', expand=True)
ef.columns = split_columns
split_columns = pd.MultiIndex.from_product([split_columns.levels[0], split_columns.levels[1]])
ef = ef.reindex(columns=split_columns)


ef.head(3)

# Calculate GHG inventory for each building at each CI resolution

In [None]:
# hourly inventory
hourly_inventory = demand.copy() * ef.loc[:, (slice(None), 'hourly')].droplevel(1, axis=1).reindex(columns=demand.columns, level='location')

mh_inventory = demand.copy() * ef.loc[:, (slice(None), 'monthhourly')].droplevel(1, axis=1).reindex(columns=demand.columns, level='location')

# monthly inventory
monthly_inventory = demand.copy() * ef.loc[:, (slice(None), 'monthly')].droplevel(1, axis=1).reindex(columns=demand.columns, level='location')

# annual inventory
annual_inventory = demand.copy() * ef.loc[:, (slice(None), 'annual')].droplevel(1, axis=1).reindex(columns=demand.columns, level='location')

combined_inventory = pd.DataFrame()
combined_inventory['hourly'] = hourly_inventory.sum()
combined_inventory['monthhourly'] = mh_inventory.sum()
combined_inventory['monthly'] = monthly_inventory.sum()
combined_inventory['annual'] = annual_inventory.sum()

combined_inventory.head(3)

# Examine Results

In [None]:
# calculate error metrics

# calculate percent error
percent_error = pd.DataFrame()
percent_error['monthly'] = (monthly_inventory.sum() - hourly_inventory.sum()) / hourly_inventory.sum()
percent_error['monthhourly'] = (mh_inventory.sum() - hourly_inventory.sum()) / hourly_inventory.sum()
percent_error['annual'] = (annual_inventory.sum() - hourly_inventory.sum()) / hourly_inventory.sum()

# round to 3 decimals
percent_error = percent_error.round(3)

# add a column for the building category
percent_error['building_type'] = percent_error.reset_index()['cluster'].str.split('-', expand=True)[3].values

# melt the data into long format
percent_error = percent_error.reset_index().drop(columns='location').melt(id_vars=['cluster', 'building_type'], var_name='resolution', value_name='error')
# convert to percentage out of 100
percent_error['error'] = percent_error['error'] * 100

# rename building types
# building names from LBNL DR potential study, Phase 2, Appendix C-5
building_names = {'com_other':'Other Commercial', 
                  'office':'Office', 
                  'retail':'Retail', 
                  'chemical':'Chemical Manufacturing', 
                  'comp_elec':'Computer/Electronics Manufacturing', 
                  'crop':'Agricultural Irrigation',
                  'data_center':'Data Center', 
                  'food_bev':'Food/Beverage Processing', 
                  'ind_other':'Other Industrial', 
                  'metals':'Primary Metal Manufacturing', 
                  'plas_rub':'Plastic/Rubber Manufacturing',
                  'water':'Water Utility', 
                  'wwater':'Wastewater Treatment Plant', 
                  'res_misc':'Residential',
                  'other':'Other',
                  'petrol':'Petroleum Refinery',
                  'ref_wh':'Refrigerated Warehouse'}

percent_error = percent_error.replace({'building_type': building_names})
percent_error

## Examine Annual Biases

In [None]:
building_order = ['Residential', 'Office', 'Retail', 'Refrigerated Warehouse', 'Data Center', 'Computer/Electronics Manufacturing',  'Primary Metal Manufacturing', 'Petroleum Refinery', 'Chemical Manufacturing', 'Plastic/Rubber Manufacturing',  'Food/Beverage Processing', 'Agricultural Irrigation', 'Water Utility', 'Wastewater Treatment Plant', 'Other Commercial', 'Other Industrial', 'Other']

caiso_bias = px.box(percent_error[percent_error['resolution'] == 'annual'], 
       x='building_type', 
       y='error', 
       category_orders={'building_type':building_order}, 
       template='plotly_white',  
       labels={'error':'% by which annual accounting...','resolution':'Inventory Resolution','building_type':'Building Type'}, 
       width=1200, 
       height=600, 
       hover_data=['cluster'],
       color_discrete_sequence=['purple']) \
.update_yaxes(zeroline=True, zerolinewidth=2, zerolinecolor='black', dtick=5) \
.update_xaxes(tickangle=30, showgrid=True, tickson="boundaries") \
.add_annotation(x=0, y=15,
                text="overestimates GHG",
                showarrow=False,
                textangle=-90,
                xref='paper',
                xshift=-45) \
.add_annotation(x=0, y=-20,
                text="underestimates GHG",
                showarrow=False,
                textangle=-90,
                xref='paper',
                xshift=-45) \
.add_vline(x=0.5, line_width=1, line_dash="dash") \
.add_annotation(x=2.5, y=35, text="<b>Commercial</b>", showarrow=False) \
.add_vline(x=4.5, line_width=1, line_dash="dash") \
.add_annotation(x=7.5, y=35, text="<b>Industrial</b>", showarrow=False) \
.add_vline(x=10.5, line_width=1, line_dash="dash") \
.add_annotation(x=12, y=35, text="<b>Agriculture/Water</b>", showarrow=False) \
.add_vline(x=13.5, line_width=1, line_dash="dash") \
.add_annotation(x=15, y=35, text="<b>Other</b>", showarrow=False) \
.update_layout(font_family="Helvetica", font_size=14)

caiso_bias.show()

In [None]:
caiso_bias.write_image("../results/figures/figure_3/caiso_error_by_sector.svg", scale=1.77165)
caiso_bias.write_image("../results/figures/figure_3/caiso_error_by_sector.jpeg")

In [None]:
# create a table with statistics
annual_percent_error = percent_error.copy()[percent_error['resolution'] == 'annual']
error_table = annual_percent_error.groupby(['building_type']).describe().droplevel(level=0, axis=1).drop(columns=['count','mean','std'])
error_table['Bias IQR'] = error_table[['25%','75%']].values.round(2).tolist()
error_table['Bias Range'] = error_table[['min','max']].values.round(2).tolist()
error_table['Median Absolute Bias'] = annual_percent_error.groupby(['building_type']).agg(lambda x: abs(x).median())

error_table = error_table.drop(columns=['min','25%','50%','75%','max']).sort_values(by='Median Absolute Bias')
error_table[['Median Absolute Bias','Bias IQR','Bias Range']]

In [None]:
# reformat the percent error data into a wid format
percent_error_wide = percent_error.copy().pivot(index=['building_type','cluster'], columns='resolution', values='error').reset_index()

In [None]:
building_sector = {'Residential':'Residential', 
                   'Office':'Commercial', 
                   'Retail':'Commercial', 
                   'Refrigerated Warehouse':'Commercial', 
                   'Data Center':'Commercial', 
                   'Computer/Electronics Manufacturing':'Industrial',  
                   'Primary Metal Manufacturing':'Industrial', 
                   'Petroleum Refinery':'Industrial', 
                   'Chemical Manufacturing':'Industrial', 
                   'Plastic/Rubber Manufacturing':'Industrial',  
                   'Food/Beverage Processing':'Industrial', 
                   'Agricultural Irrigation':'Agriculture/Water', 
                   'Water Utility':'Agriculture/Water', 
                   'Wastewater Treatment Plant':'Agriculture/Water', 
                   'Other Commercial':'Other', 
                   'Other Industrial':'Other', 
                   'Other':'Other'}

building_sector = {'Residential':'Residential', 
                   'Office':'C&I', 
                   'Retail':'C&I', 
                   'Refrigerated Warehouse':'C&I', 
                   'Data Center':'C&I', 
                   'Computer/Electronics Manufacturing':'C&I',  
                   'Primary Metal Manufacturing':'C&I', 
                   'Petroleum Refinery':'C&I', 
                   'Chemical Manufacturing':'C&I', 
                   'Plastic/Rubber Manufacturing':'C&I',  
                   'Food/Beverage Processing':'C&I', 
                   'Agricultural Irrigation':'Agriculture/Water', 
                   'Water Utility':'Agriculture/Water', 
                   'Wastewater Treatment Plant':'Agriculture/Water', 
                   'Other Commercial':'C&I', 
                   'Other Industrial':'C&I', 
                   'Other':'C&I'}

percent_error_wide['building_sector'] = percent_error_wide['building_type'].map(building_sector)

percent_error_wide

In [None]:
caiso_annual_vs_monthly = px.scatter(percent_error_wide, 
           x=abs(percent_error_wide['annual']), 
           y=abs(percent_error_wide['monthly']), 
           labels={'x':'Absolute % Error (Annual)', 'y':'Absolute % Error (Monthly)', 'building_sector':'Building Sector'}, 
           title='(b) California, Monthly Average Accounting',
           width=600, 
           height=600, 
           color='building_sector',
           hover_data=['building_type','cluster'], 
           color_discrete_sequence=['green', 'blue','red'],
           template='plotly_white',
           trendline='ols') \
.update_yaxes(scaleanchor = "x", scaleratio = 1, range=[0,35], constrain='domain', zeroline=True, zerolinewidth=2, zerolinecolor='black') \
.update_xaxes(range=[0,35], dtick=5, constrain='domain', zeroline=True, zerolinewidth=2, zerolinecolor='black') \
.update_traces(marker=dict(opacity=0.5)) \
.add_shape(type="line", x0=0, y0=0, x1=40, y1=40, line=dict(color="Black", width=1)) \
.add_annotation(x=25, y=25,
                text="no change in bias",
                showarrow=False,
                textangle=-45,
                yshift=15) \
.add_annotation(x=10, y=30,
                text="Monthly CI<br>increases bias",
                showarrow=False,
                textangle=0,
                yshift=0) \
.add_annotation(x=30, y=10,
                text="Monthly CI<br>decreases bias",
                showarrow=False,
                textangle=0,
                yshift=0) \
.update_layout(legend=dict(
    yanchor="bottom",
    x=0.5,
    y=0.98,
    xanchor='center',
    orientation='h'),
    title=dict(yanchor="bottom",xanchor='center',x=0.5, y=0.9)) \
.update_layout(font_family="Helvetica", font_size=14)

caiso_annual_vs_monthly.show()

In [None]:
caiso_annual_vs_monthly.write_image("../results/figures/figure_4/caiso_annual_vs_monthly.svg", scale=1.77165)

In [None]:
caiso_annual_vs_monthTOD = px.scatter(percent_error_wide, 
           x=abs(percent_error_wide['annual']), 
           y=abs(percent_error_wide['monthhourly']), 
           labels={'x':'Absolute % Error (Annual)', 'y':'Absolute % Error (Monthly TOD)', 'building_sector':'Building Sector'}, 
           title='(d) California, Monthly TOD Average Accounting',
           width=600, 
           height=600, 
           trendline='ols',
           color='building_sector',
           hover_data=['building_type','cluster'], 
           color_discrete_sequence=['green', 'blue','red'],
           template='plotly_white') \
.update_yaxes(scaleanchor = "x", scaleratio = 1, range=[0,35], constrain='domain', zeroline=True, zerolinewidth=2, zerolinecolor='black') \
.update_xaxes(range=[0,35], dtick=5, constrain='domain', zeroline=True, zerolinewidth=2, zerolinecolor='black') \
.update_traces(marker=dict(opacity=0.5)) \
.add_shape(type="line", x0=0, y0=0, x1=40, y1=40, line=dict(color="Black", width=1)) \
.add_annotation(x=25, y=25,
                text="no change in bias",
                showarrow=False,
                textangle=-45,
                yshift=15,
                font=dict(family="Arial",size=14)) \
.add_annotation(x=10, y=30,
                text="Monthly TOD CI<br>increases bias",
                showarrow=False,
                textangle=0,
                yshift=0,
                font=dict(family="Arial",size=14)) \
.add_annotation(x=30, y=10,
                text="Monthly TOD CI<br>decreases bias",
                showarrow=False,
                textangle=0,
                yshift=0,
                font=dict(family="Arial",size=14)) \
.update_layout(legend=dict(
    yanchor="bottom",
    x=0.5,
    y=0.98,
    xanchor='center',
    orientation='h'),
    title=dict(yanchor="bottom",xanchor='center',x=0.5, y=0.9)) \
.update_layout(font_family="Helvetica", font_size=14)


caiso_annual_vs_monthTOD.show()

In [None]:
caiso_annual_vs_monthTOD.write_image("../results/figures/figure_4/caiso_annual_vs_monthTOD.svg", scale=1.77165)

In [None]:
px.histogram(percent_error, x="error", color='resolution', category_orders={'resolution':['annual','monthly','monthhourly']}, template='plotly_white', title='Histogram of errors at each resolution').update_layout(barmode='overlay').update_traces(opacity=0.75)