In [None]:
%reload_ext autoreload
%autoreload 2

# import packages
import pandas as pd
import missingno
import numpy as np

#from pathlib import Path
import plotly.express as px

from os import path

import load_data

# Load Data

In [None]:
# specify the data year we want to examine
ef_year = 2019

# choose the units you want to use -either kgCO2/kWh or lbCO2/MWh
units = 'lbCO2/MWh'

# the base units are kg/kWh
unit_conversion = 1
if units == 'lbCO2/MWh':
    unit_conversion = 1 / 0.453592 * 1000


## Load the grid emission factor data

In [None]:
# Load the hourly emission factors
##################################
hourly_ef = pd.read_csv('../data/processed/emission_factors/emission_factors_monthhour.csv', index_col='datetime_local', parse_dates=True)

# rename columns to remove EIA. prefix
#hourly_ef.columns = [col.split('.')[-1] for col in hourly_ef.columns]
# update list of regions
ba_list = list(hourly_ef.columns)

missingno.matrix(hourly_ef, labels=True)

In [None]:
# calculate monthly and annual average EFs
##########################################

# calculate the monthly average
mo_average = hourly_ef.copy()
mo_average['month'] = mo_average.index.month
mo_average = mo_average.groupby('month').mean().reset_index()
#mo_average.columns = pd.MultiIndex.from_product([mo_average.columns, ['monthly']])

#calculate the annual average
yr_average = hourly_ef.copy()
for col in yr_average.columns:
    yr_average[col] = yr_average[col].mean()
#yr_average.columns = pd.MultiIndex.from_product([yr_average.columns, ['annual']])

# calculate the month-hour average
mh_average = hourly_ef.copy()
mh_average['month'] = mh_average.index.month
mh_average['hour'] = mh_average.index.hour

mh_average = mh_average.groupby(['month','hour']).mean().reset_index()

# merge the averages
ef = hourly_ef.copy()
ef['month'] = ef.index.month
ef['hour'] = ef.index.hour

# merge month-hourly
ef = ef.merge(mh_average, how='left', on=['month','hour'], suffixes=('_hourly','_monthhourly')).set_index(hourly_ef.index).drop(columns=['month','hour'])

# merge the monthly
ef['month'] = ef.index.month
ef = ef.merge(mo_average.add_suffix('_monthly'), how='left', left_on='month', right_on='month_monthly').set_index(hourly_ef.index).drop(columns=['month', 'month_monthly'])

# merge the annual data
ef = ef.merge(yr_average.add_suffix('_annual'), how='left', left_index=True, right_index=True)

# preserve the missing values of the hourly data across all resolutions
for ba in ba_list:
    for resolution in ['annual','monthly','monthhourly']:
        ef.loc[ef[f'{ba}_hourly'].isna(), f'{ba}_{resolution}'] = np.NaN

# split the columns into a multiindex by region and resolution
split_columns = ef.columns.str.split('_', expand=True)
ef.columns = split_columns
split_columns = pd.MultiIndex.from_product([split_columns.levels[0], split_columns.levels[1]])
ef = ef.reindex(columns=split_columns)

# set the dtype to float32 to conserve memory
ef = ef.astype('float32')

ef.head(3)

## Load the building demand data

In [None]:
# create a list to hold the data
demand = []

# for each ba
for ba in ba_list:
    try:
        ba_data = pd.read_csv(f'../data/processed/nrel_demand/{ba}.csv.zip', compression='zip', dtype='float16')
        ba_data.columns = pd.MultiIndex.from_product([[ba], ba_data.columns], names=['location','building_type'])
        # append the county data to the list
        demand.append(ba_data)
    except FileNotFoundError:
        print(f'No demand data for {ba}')

demand = pd.concat(demand, axis='columns')

demand

# Calculate inventories

In [None]:
hourly_ef = ef.loc[:, (slice(None), 'hourly')].droplevel(1, axis=1).reset_index(drop=True)
mh_ef = ef.loc[:, (slice(None), 'monthhourly')].droplevel(1, axis=1).reset_index(drop=True)
monthly_ef = ef.loc[:, (slice(None), 'monthly')].droplevel(1, axis=1).reset_index(drop=True)
annual_ef = ef.loc[:, (slice(None), 'annual')].droplevel(1, axis=1).reset_index(drop=True)

In [None]:
# hourly inventory
hourly_inventory = demand.multiply(hourly_ef, axis=1, level=0).sum()

mh_inventory = demand.multiply(mh_ef, axis=1, level=0).sum()

# monthly inventory
monthly_inventory = demand.multiply(monthly_ef, axis=1, level=0).sum()

# annual inventory
annual_inventory = demand.multiply(annual_ef, axis=1, level=0).sum()

combined_inventory = pd.DataFrame()
combined_inventory['hourly'] = hourly_inventory
combined_inventory['monthhourly'] = mh_inventory
combined_inventory['monthly'] = monthly_inventory
combined_inventory['annual'] = annual_inventory

combined_inventory

## Calculate Percentage Error

In [None]:
# calculate error metrics

# calculate percent error
percent_error = pd.DataFrame()
percent_error['monthly'] = (monthly_inventory - hourly_inventory) / hourly_inventory
percent_error['monthhourly'] = (mh_inventory - hourly_inventory) / hourly_inventory
percent_error['annual'] = (annual_inventory - hourly_inventory) / hourly_inventory

percent_error = percent_error.round(3)

# create new columns for 
percent_error = percent_error.reset_index()
percent_error['climate_zone'] = percent_error['building_type'].str.split('_', expand=True)[0]
percent_error['building_category'] = percent_error['building_type'].str.split('_', expand=True)[1]
percent_error['building_name'] = percent_error['building_type'].str.split('_', expand=True)[2]
percent_error['building_id'] = percent_error['building_type'].str.split('_', expand=True)[3]

# melt the data into long format
percent_error = percent_error.drop(columns='building_type').melt(id_vars=['location','climate_zone', 'building_category','building_name','building_id'], var_name='resolution', value_name='error')
# convert to percentage out of 100
percent_error['error'] = percent_error['error'] * 100

percent_error['building_sector'] = 'Commercial'
residential_buildings = ['MobileHome', 'SingleFamily', 'MediumMultifamily',
       'SmallMultifamily', 'LargeMultifamily']
percent_error.loc[percent_error['building_category'].isin(residential_buildings), 'building_sector'] = 'Residential'

# drop any rows with na values
percent_error = percent_error.dropna(axis=0, how='any')

# move the results for DOPD and CHPD to a separate dataframe
percent_error_outliers = percent_error.copy()[percent_error['location'].isin(['DOPD','CHPD'])]
percent_error = percent_error[~percent_error['location'].isin(['DOPD','CHPD'])]

percent_error.head(5)

In [None]:
# reformat for use in plotting
percent_error_for_graph = percent_error.copy()[percent_error['resolution'] == 'annual'].drop(columns=['resolution'])

# get some metadata that we will use for graph display
descending_median_order = list(percent_error_for_graph.groupby(['location']).median().sort_values(by='error', ascending=False).index)
descending_median_order_res = list(percent_error_for_graph[percent_error_for_graph['building_sector'] == 'Residential'].groupby(['location']).median().sort_values(by='error', ascending=False).index)
descending_median_order_com = list(percent_error_for_graph[percent_error_for_graph['building_sector'] == 'Commercial'].groupby(['location']).median().sort_values(by='error', ascending=False).index)

ba_name_dict = pd.read_csv('../data/manual/ba_names.csv', index_col='ba_code').to_dict()['ba_name']


percent_error_for_graph.head(5)

## National Summary by BA and building Sector

In [None]:
# reverse the order of the BA list
descending_median_order.reverse()

In [None]:
error_by_region_sector = px.box(percent_error_for_graph.replace({'location':ba_name_dict}), 
       y='location', 
       x='error', 
       category_orders={'location':[ba_name_dict[i] for i in descending_median_order]}, #sorted(ba_code_list)
       template='plotly_white', 
       labels={'error':'%  by which annual accounting...','location':'Balancing Authority','building_sector':'Building Sector'}, 
       hover_data=['building_name','climate_zone'], 
       color='building_sector',
       color_discrete_sequence=['blue', 'red'],
       width=1000, 
       height=1200) \
.update_xaxes(zeroline=True, zerolinewidth=2, zerolinecolor='black', dtick=5, mirror='allticks', side='top') \
.update_yaxes(showgrid=True, ticks='outside', tickson='boundaries') \
.update_layout(boxgap=0.25, boxgroupgap=0, margin=dict(b=0, r=0), ) \
.update_layout(legend=dict(
    yanchor="top",
    xanchor='center',
    y=-0.01,
    x=0.5,
    orientation='h'), font_family="Helvetica", font_size=14) \
.add_annotation(x=(percent_error_for_graph['error'].max()/2), y=1.035,
                text="overestimates GHG",
                showarrow=False,
                xref='x',
                yref='paper') \
.add_annotation(x=(percent_error_for_graph['error'].min()/2), y=1.035,
                text="underestimates GHG",
                showarrow=False,
                xref='x',
                yref='paper')
error_by_region_sector.show()

In [None]:
error_by_region_sector.write_image("../results/figures/figure_2/national_relative_bias_by_region_and_sector.jpeg")

In [None]:
error_by_region_sector.update_traces(line=dict(width=2)).write_image("../results/figures/figure_2/national_relative_bias_by_region_and_sector.svg", scale=1.77165)

## Breakdown by BA and Building type

In [None]:
# calculate the mean of the median absolute percent error for all building categories
error_by_ba = percent_error.copy()[percent_error['resolution'] == 'annual']
error_by_ba['error'] = abs(error_by_ba['error']) / 100
error_by_ba = error_by_ba.groupby(['location','building_category']).median().reset_index().groupby(['location']).mean().reset_index()

# get a list of all BAs in order of error
descending_error_order = error_by_ba.sort_values(by='error', ascending=False)['location'].to_list()

In [None]:
national_bias_region_building = px.box(percent_error_for_graph, 
       x='building_category', 
       y='error', 
       facet_col='location', 
       facet_col_wrap=5, 
       facet_row_spacing=0.02, 
       color='building_category',
       category_orders={'location':sorted(ba_list)}, #sorted(ba_list)
       template='plotly_white', 
       labels={'error':'Bias (%)','building_category':'Building Type'}, 
       #hover_data=['building_name','climate_zone'], 
       width=1200, 
       height=1800,
       boxmode='overlay') \
.update_yaxes(zeroline=True, zerolinewidth=1, zerolinecolor='black', dtick=10) \
.update_xaxes(tickangle=45, showgrid=True, ticklabelposition='outside top') \
.for_each_annotation(lambda a: a.update(text=a.text.split("=")[-1])) \
.update_layout(boxgap=0.01, font_family="Arial", font_size=14)

national_bias_region_building.write_image("../results/figures/SI/national_relative_bias_region_building.jpeg")
national_bias_region_building.show()

# Compare accounting resolutions

In [None]:
px.histogram(percent_error, 
            x="error", 
            color='resolution', 
            histnorm='percent', 
            category_orders={'resolution':['annual','monthly','monthhourly']}, 
            template='plotly_white', 
            title='Bias introduced through annual accounting<br>for all national GHG inventories', 
            nbins=100, 
            labels={'error':'% error'}).update_layout(barmode='overlay').update_traces(xbins=dict(start=-10.5, end=10.5,size=0.5), opacity=0.75).add_vline(x=0)

In [None]:
# reformat the percent error data into a wid format
percent_error_wide = percent_error.pivot(index=['location','building_sector','building_category','building_name','climate_zone','building_id'], columns='resolution', values='error').reset_index()
percent_error_wide = percent_error_wide.sort_values(by='building_sector', ascending=False)

In [None]:
percent_error_wide['change'] = abs(percent_error_wide['monthly']) - abs(percent_error_wide['annual'])
percent_error_wide

In [None]:
percent_error_wide[percent_error_wide['change'] >= 0]

In [None]:
annual_vs_monthly = px.scatter(percent_error_wide, 
           x=abs(percent_error_wide['annual']), 
           y=abs(percent_error_wide['monthly']), 
           labels={'x':'Absolute % Error (Annual)', 'y':'Absolute % Error (Monthly)','building_sector':'Building Sector'}, 
           title='(a) National, Monthly Average Accounting',
           color='building_sector',
           width=600, 
           height=600, 
           trendline='ols',
           hover_data=['location','building_category','building_name','climate_zone'], 
           color_discrete_sequence=['red', 'blue'],
           template='plotly_white') \
.update_xaxes(range=[0,20],  constrain='domain', zeroline=True, zerolinewidth=2, zerolinecolor='black') \
.update_yaxes(scaleanchor = "x", scaleratio = 1, range=[0,20], constrain='domain', zeroline=True, zerolinewidth=2, zerolinecolor='black') \
.add_shape(type="line", x0=0, y0=0, x1=20, y1=20, line=dict(color="Black", width=1)) \
.update_traces(marker=dict(opacity=0.35, size=4)) \
.add_annotation(x=15, y=15,
                text="no change in bias",
                showarrow=False,
                textangle=-45,
                yshift=15) \
.add_annotation(x=7.5, y=15,
                text="Monthly CI<br>increases bias",
                showarrow=False,
                textangle=0,
                yshift=0) \
.add_annotation(x=15, y=7.5,
                text="Monthly CI<br>decreases bias",
                showarrow=False,
                textangle=0,
                yshift=0) \
.update_layout(font_family="Helvetica", font_size=14, title=dict(yanchor="bottom",xanchor='center',x=0.5, y=0.9), legend=dict(
    yanchor="bottom",
    x=0.5,
    y=0.98,
    xanchor='center',
    orientation='h'))

annual_vs_monthly.show()

In [None]:
annual_vs_monthly.write_image("../results/figures/figure_4/national_annual_vs_monthly.svg", scale=1.77165)
annual_vs_monthly.write_image("../results/figures/figure_4/national_annual_vs_monthly.jpeg")

In [None]:
annual_vs_monthlytod = px.scatter(percent_error_wide, 
           x=abs(percent_error_wide['annual']), 
           y=abs(percent_error_wide['monthhourly']), 
           labels={'x':'Absolute % Error (Annual)', 'y':'Absolute % Error (Monthly TOD)','building_sector':'Building Sector'}, 
           title='(c) National, Monthly TOD Average Accounting',
           color='building_sector',
           width=600, 
           height=600, 
           trendline='ols',
           hover_data=['location','building_category','building_name','climate_zone'], 
           color_discrete_sequence=['red', 'blue'],
           template='plotly_white') \
.update_xaxes(range=[0,20],  constrain='domain', zeroline=True, zerolinewidth=2, zerolinecolor='black') \
.update_yaxes(scaleanchor = "x", scaleratio = 1, range=[0,20], constrain='domain', zeroline=True, zerolinewidth=2, zerolinecolor='black') \
.add_shape(type="line", x0=0, y0=0, x1=20, y1=20, line=dict(color="Black", width=1)) \
.update_traces(marker=dict(opacity=0.35, size=4)) \
.add_annotation(x=15, y=15,
                text="no change in bias",
                showarrow=False,
                textangle=-45,
                yshift=15) \
.add_annotation(x=7.5, y=15,
                text="Monthly TOD CI<br>increases bias",
                showarrow=False,
                textangle=0,
                yshift=0) \
.add_annotation(x=15, y=7.5,
                text="Monthly TOD CI<br>decreases bias",
                showarrow=False,
                textangle=0,
                yshift=0) \
.update_layout(font_family="Helvetica", font_size=14, title=dict(yanchor="bottom",xanchor='center',x=0.5, y=0.9), legend=dict(
    yanchor="bottom",
    x=0.5,
    y=0.98,
    xanchor='center',
    orientation='h'))

annual_vs_monthlytod.show()

In [None]:
annual_vs_monthlytod.write_image("../results/figures/figure_4/national_annual_vs_monthlytod.svg", scale=1.77165)
annual_vs_monthlytod.write_image("../results/figures/figure_4/national_annual_vs_monthlytod.jpeg")

# Generate Results for DOPD and CHPD

In [None]:
outlier_percent_error = percent_error_outliers.copy()[percent_error_outliers['resolution'] == 'annual'].drop(columns=['resolution'])
outlier_percent_error['building_sector'] = 'Commercial'
outlier_percent_error.loc[outlier_percent_error['building_category'] == 'Residential', 'building_sector'] = 'Residential'

In [None]:
outlier_bias_region_building = px.box(outlier_percent_error.replace({'location':ba_name_dict}), 
       x='building_category', 
       y='error', 
       facet_col='location', 
       facet_col_wrap=5, 
       facet_row_spacing=0.02, 
       category_orders={'location':sorted(ba_list)}, #sorted(ba_code_list)
       template='plotly_white', 
       labels={'error':'Bias (%)','building_category':'Building Type'}, 
       #hover_data=['building_name','climate_zone'], 
       width=1200, 
       boxmode='overlay') \
.update_yaxes(zeroline=True, zerolinewidth=1, zerolinecolor='black', dtick=20, rangemode='tozero', range=[-60,200]) \
.update_xaxes(tickangle=45, showgrid=True, ticklabelposition='outside top') \
.for_each_annotation(lambda a: a.update(text=a.text.split("=")[-1])) \
.update_layout(boxgap=0.01) \
.update_layout(font_family="Helvetica", font_size=14)

outlier_bias_region_building.write_image("../results/figures/SI/outlier_bias_region_building.jpeg")
outlier_bias_region_building.show()

In [None]:
outlier_error_by_region_sector = px.box(outlier_percent_error.replace({'location':ba_name_dict}), 
       x='location', 
       y='error', 
       category_orders={'location':[ba_name_dict[i] for i in descending_median_order]}, #sorted(ba_code_list)
       template='plotly_white', 
       labels={'error':'%  by which annual EF...','location':'Balancing Authority','building_sector':'Building Sector'}, 
       hover_data=['building_name','climate_zone'], 
       color='building_sector',
       #color_discrete_map={'day':'yellow','night':'navy', 'flat':'orange'},
       width=300, 
       height=800,
       boxmode='overlay') \
.update_yaxes(zeroline=True, zerolinewidth=2, zerolinecolor='black') \
.update_xaxes(tickangle=45, showgrid=True, ticks='outside') \
.update_layout(boxgap=0.01) \
.add_annotation(x=0, y=(outlier_percent_error['error'].max()/2),
                text="...Overestimates GHG",
                showarrow=False,
                textangle=-90,
                xref='paper',
                xshift=-45) \
.add_annotation(x=0, y=-6,
                text="...Underestimates GHG",
                showarrow=False,
                textangle=-90,
                xref='paper',
                xshift=-45) \
.update_layout(legend=dict(
    yanchor="top",
    x=0.5,
    y=1.07,
    xanchor='center',
    orientation='h')) \
.update_layout(font_family="Helvetica", font_size=14)

outlier_error_by_region_sector.write_image("../results/figures/SI/outlier_error_by_region_sector.jpeg")
outlier_error_by_region_sector.show()

In [None]:
outlier_efs = ef.copy()[['DOPD','CHPD']].xs('hourly',axis=1,level=1,drop_level=True)
outlier_efs = outlier_efs * unit_conversion
outlier_efs = outlier_efs.melt(var_name='region', ignore_index=False).reset_index()
outlier_efs.head(3)

In [None]:
outlier_ef = px.line(outlier_efs.replace({'region':ba_name_dict}), 
        x='datetime_local',
        y='value',
        facet_col='region',
        facet_col_wrap=1,
        template='plotly_white', 
        labels={'value':'lbCO2/MWh', 'datetime_local':'Datetime'},
        width=1200) \
.update_xaxes(dtick='M1', tickformat='%b') \
.for_each_annotation(lambda a: a.update(text=a.text.split("=")[-1])) \
.update_layout(font_family="Helvetica", font_size=14)

outlier_ef.write_image("../results/figures/SI/outlier_ef.jpeg")
outlier_ef.show()

# Visualize Building Demand

In [None]:
demand_region = 'ISNE'
demand_viz = demand.loc[:, (demand_region, slice(None))].droplevel(axis=1, level=0)
# normalize demand profiles
demand_viz = demand_viz / demand_viz.max(axis=0)
demand_viz['datetime'] = pd.date_range(start='2018-01-01 00:00:00', end='2018-12-31 23:00:00', freq='H')
demand_viz = demand_viz.groupby([demand_viz.datetime.dt.month, demand_viz.datetime.dt.hour]).mean()
demand_viz.index = demand_viz.index.set_names(['month','hour'])
demand_viz = demand_viz.reset_index().melt(id_vars=['month','hour'],var_name='building', value_name='demand')
# create new columns
demand_viz['climate_zone'] = demand_viz['building'].str.split('_', expand=True)[0]
demand_viz['building_category'] = demand_viz['building'].str.split('_', expand=True)[1]
demand_viz['building_name'] = demand_viz['building'].str.split('_', expand=True)[2]
demand_viz['building_id'] = demand_viz['building'].str.split('_', expand=True)[3]

demand_viz['building_sector'] = 'Commercial'
residential_buildings = ['MobileHome', 'SingleFamily', 'MediumMultifamily',
       'SmallMultifamily', 'LargeMultifamily']
demand_viz.loc[demand_viz['building_category'].isin(residential_buildings), 'building_sector'] = 'Residential'
demand_viz = demand_viz.groupby(['building_sector','building_category','month','hour']).mean().reset_index()
demand_viz

In [None]:
com_demand = px.line(demand_viz[demand_viz['building_sector'] == 'Commercial'], x='hour', y='demand', facet_col='month', facet_col_wrap=3, color='building_category', height=1000, width=800, template='plotly_white', title=f'Average normalized demand profiles for commercial buildings in {demand_region}').update_yaxes(range=[0,1]).update_xaxes(dtick=3)
com_demand.write_image(f"../results/figures/SI/com_demand_profiles_{demand_region}.jpeg")
com_demand.show()

In [None]:
res_demand = px.line(demand_viz[demand_viz['building_sector'] == 'Residential'], x='hour', y='demand', facet_col='month', facet_col_wrap=3, color='building_category', height=1000, width=800, template='plotly_white', title=f'Average normalized demand profiles for residential buildings in {demand_region}').update_yaxes(range=[0,1]).update_xaxes(dtick=3)
res_demand.write_image(f"../results/figures/SI/res_demand_profiles_{demand_region}.jpeg")
res_demand.show()

# Explore Bias Mathematically

In [None]:
ba_code_list = ['IID']

# calculate residual between average EF and actual hourly EF
residual = ef.copy()
for ba in ba_code_list:
    for res in ['annual','monthly','monthhourly','hourly']:
        residual[ba][res] = residual.copy()[ba][res] - ef.copy()[ba]['hourly']

# format the percent error data
pe_formatted = percent_error_wide.copy()
pe_formatted['building_type'] = pe_formatted[['building_category','building_name','climate_zone']].agg('_'.join, axis=1)
pe_formatted = pe_formatted.set_index(['location','building_type']).drop(columns=['building_category','building_name','climate_zone'])


In [None]:
bias = pd.DataFrame(columns=['ef_resolution'])

for ba in ba_code_list: #list(percent_error.location.unique()):
    for frequency in ['annual', 'monthly','monthhourly']:
        bias_i = pd.DataFrame(columns=['ef_resolution'])
        bias_i['E[D]'] = demand[ba].mean()
        bias_i['E[u]'] = residual[ba][frequency].mean()
        bias_i['stdev_D'] = demand[ba].std(ddof=0)
        bias_i['stdev_u'] = residual[ba][frequency].std(ddof=0)
        bias_i['corr(D,u)'] = demand[ba].corrwith(residual[ba][frequency])
        bias_i['E[D]*E[u]'] = bias_i['E[D]'] * bias_i['E[u]']
        bias_i['std_D*std_D*corr(D,u)'] = (bias_i['stdev_D'] * bias_i['stdev_u'] * bias_i['corr(D,u)'])
        bias_i['E[D*u]'] = bias_i['E[D]*E[u]'] + bias_i['std_D*std_D*corr(D,u)']
        bias_i['E[C]'] = demand[ba].mul(ef[ba]['hourly'], axis=0).mean()
        bias_i['stdev_C'] = demand[ba].mul(ef[ba]['hourly'], axis=0).std(ddof=0)
        bias_i['E[D*u] / E[C]'] = (bias_i['E[D*u]'] / bias_i['E[C]'])
        bias_i['percent_error'] = pe_formatted.loc[(ba, slice(None)), frequency].droplevel('location')
        bias_i['ef_resolution'] = frequency
        bias_i['BA'] = ba

        bias_i = bias_i.reset_index()

        bias = bias.append(bias_i, ignore_index=True)

bias = bias.round(3)
bias

In [None]:
bias[(bias['BA']=='ISNE') & (bias['building_type'].str.contains('Residential')) & (bias['ef_resolution'].isin(['annual','monthly']))].sort_values(by=['building_type','ef_resolution'])

In [None]:
bias[(bias['BA']=='AZPS') & (bias['building_type'].isin(['Residential_SmallSingleFamily_2B'])) & (bias['ef_resolution'].isin(['annual','monthly']))].sort_values(by=['building_type','ef_resolution'])[['ef_resolution','stdev_D','stdev_u','corr(D,u)','E[D*u]','percent_error']] #'Residential_LargeSingleFamily_3B','Residential_SmallSingleFamily_2B'

In [None]:
px.scatter(bias, x=(bias['stdev_D'] / bias['E[D]']), y='percent_error', labels={'x':'COV of demand', 'percent_error':'Bias (% error)'}, title='Effect of building demand variance on inventory bias').update_yaxes(zeroline=True, zerolinewidth=1, zerolinecolor='black')

In [None]:
px.scatter(bias, x=(bias['stdev_C'] / bias['E[C]']), y='percent_error', labels={'x':'COV of errors in emission estimates', 'percent_error':'Bias (% error)'}, title='Effect of carbon intensity error variance on inventory bias', hover_data=['BA','building_type','ef_resolution']).update_yaxes(zeroline=True, zerolinewidth=1, zerolinecolor='black')