In [None]:
%reload_ext autoreload
%autoreload 2

# import packages
import pandas as pd
import numpy as np
#from pathlib import Path
import plotly.express as px
import plotly.subplots as sp
import plotly.graph_objects as go

# Overview
This notebook is used to generate the subplots for Figure 1.

We use consumption-based carbon intensity values downloaded from the Carbonara API, with any missing values filled based on the month-hour average value.

Fuel mix data comes from the United States Energy Information Administration (EIA). "Hourly Electric Grid Monitor (EIA Form-930)." Available from: https://www.eia.gov/electricity/gridmonitor/about
- 2019 generation balance data from EIA-930: https://www.eia.gov/electricity/gridmonitor/sixMonthFiles/EIA930_BALANCE_2019_Jul_Dec.csv and https://www.eia.gov/electricity/gridmonitor/sixMonthFiles/EIA930_BALANCE_2019_Jan_Jun.csv

# Load Data

In [None]:
# load data from CSV if needed
hourly_ef = pd.read_csv('../data/processed/emission_factors/emission_factors_monthhour.csv', index_col='datetime_local', parse_dates=True)
# drop data for SPA
hourly_ef = hourly_ef.drop(columns='SPA')
ba_list = list(hourly_ef.columns)

ef_year = 2019

# specify the type of emission factor we want to use
ef_type = 'consumption_ef_EGRID_2019'

# choose the units you want to use -either kgCO2/kWh or lbCO2/MWh
units = 'kg/MWh'

# the base units are kg/kWh
unit_conversion = 1
if units == 'lb/MWh':
    unit_conversion = 1 / 0.453592 * 1000
elif units == 'kg/MWh':
    unit_conversion = 1 * 1000
    convert_to_lb = 1 / 0.453592

In [None]:
# get a list of all of the BAs in order from lowest to highest COV
hourly_cov = (hourly_ef.std(ddof=0) / hourly_ef.mean()).sort_values()
cov_order = list(hourly_cov.index)
hourly_std = (hourly_ef.std(ddof=0)).sort_values()
stdev_order = list(hourly_std.index)
ascending_median_order = list(hourly_ef.median().sort_values().index)
x_len = len(hourly_ef.columns)

# Create Subplots for Figure 1

## Visualize CI Distribution

In [None]:
ef_dist = px.box(hourly_ef * unit_conversion, 
       labels={'variable':'Balancing Area', 'value':f'Carbon intensity ({units})'}, 
       category_orders={'variable':ascending_median_order}, 
       template='plotly_white', 
       width=1200, 
       height=300) \
.update_yaxes(range=[0,1000], zeroline=True, zerolinewidth=1, zerolinecolor='black') \
.update_xaxes(showgrid=True, title=None) \
.add_annotation(x=0, y=200,
                text=f"{int(round(200*convert_to_lb,0))}",
                showarrow=False,
                textangle=0,
                xref='paper',
                xshift=1040) \
.add_annotation(x=0, y=400,
                text=f"{int(round(400*convert_to_lb,0))}",
                showarrow=False,
                textangle=0,
                xref='paper',
                xshift=1040) \
.add_annotation(x=0, y=600,
                text=f"{int(round(600*convert_to_lb,0))}",
                showarrow=False,
                textangle=0,
                xref='paper',
                xshift=1040) \
.add_annotation(x=0, y=800,
                text=f"{int(round(800*convert_to_lb,0))}",
                showarrow=False,
                textangle=0,
                xref='paper',
                xshift=1040) \
.add_annotation(x=0, y=1000,
                text=f"{int(round(1000*convert_to_lb,0))}",
                showarrow=False,
                textangle=0,
                xref='paper',
                xshift=1040) \
.add_annotation(x=0, y=(1000/2),
                text="lb/MWh",
                font={'size':14},
                showarrow=False,
                textangle=90,
                xref='paper',
                xshift=1085) \
.update_layout(font_family='Helvetica', font_size=14)


ef_dist.show()

In [None]:
# final formatting for export
ef_dist.update_layout(margin=dict(b=0, t=0)).update_xaxes(showticklabels=False)
ef_dist.write_image("../results/figures/figure_1/ef_dist.svg", scale=1.77165)

## Visualize Standard Deviation of CI

In [None]:
ef_std = px.bar(hourly_std * unit_conversion, 
       category_orders={'index':ascending_median_order},    
       template='plotly_white', 
       width=1200, 
       height=200,
       labels={'index':'Balancing Area', 'value':f'Std. Dev. ({units})'}) \
.update_xaxes(showgrid=True, title=None) \
.update_layout(showlegend=False) \
.add_annotation(x=0, y=50,
                text=f"{int(round(50*convert_to_lb,0))}",
                showarrow=False,
                textangle=0,
                xref='paper',
                xshift=1040) \
.add_annotation(x=0, y=100,
                text=f"{int(round(100*convert_to_lb,0))}",
                showarrow=False,
                textangle=0,
                xref='paper',
                xshift=1040) \
.add_annotation(x=0, y=150,
                text=f"{int(round(150*convert_to_lb,0))}",
                showarrow=False,
                textangle=0,
                xref='paper',
                xshift=1040) \
.add_annotation(x=0, y=(160/2),
                text="lb/MWh",
                font={'size':14},
                showarrow=False,
                textangle=90,
                xref='paper',
                xshift=1085) \
.update_layout(font_family='Helvetica', font_size=14) \
.add_hline(y=0, line_width=2)


ef_std.show()

In [None]:
# final formatting for export
ef_std.update_layout(margin=dict(b=0, t=0)).update_xaxes(showticklabels=False)
ef_std.write_image("../results/figures/figure_1/ef_stddev.svg", scale=1.0827)

## Visualize fuel mix including imports

In [None]:
columns_to_use = ['Balancing Authority', 'Data Date', 
       'Total Interchange (MW)', 
       'Net Generation (MW) from Coal', 'Net Generation (MW) from Natural Gas',
       'Net Generation (MW) from Nuclear',
       'Net Generation (MW) from All Petroleum Products',
       'Net Generation (MW) from Hydropower and Pumped Storage',
       'Net Generation (MW) from Solar', 'Net Generation (MW) from Wind',
       'Net Generation (MW) from Other Fuel Sources']

net_gen_columns = ['Net Generation (MW) from Coal', 'Net Generation (MW) from Natural Gas',
       'Net Generation (MW) from Nuclear',
       'Net Generation (MW) from All Petroleum Products',
       'Net Generation (MW) from Hydropower and Pumped Storage',
       'Net Generation (MW) from Solar', 'Net Generation (MW) from Wind',
       'Net Generation (MW) from Other Fuel Sources']

# load the data from EIA-930 for 2019
eia_930 = pd.concat([pd.read_csv('../data/downloaded/eia/EIA930_BALANCE_2019_Jan_Jun.csv', usecols=columns_to_use, thousands=','),pd.read_csv('../data/downloaded/eia/EIA930_BALANCE_2019_Jul_Dec.csv', usecols=columns_to_use, thousands=',')])

# only keep BAs that are in our list
eia_930 = eia_930.loc[eia_930['Balancing Authority'].isin(ba_list),:]

# sum the net generation data
eia_930['Net Generation Total'] = eia_930[net_gen_columns].sum(axis=1)

# create column for imports
eia_930['Imports'] = eia_930['Total Interchange (MW)']
eia_930.loc[eia_930['Imports'] > 0,'Imports'] = 0
eia_930['Imports'] = eia_930['Imports'] * -1

# create column for exports
eia_930['Exports'] = eia_930['Total Interchange (MW)']
eia_930.loc[eia_930['Exports'] < 0,'Exports'] = 0

# when exporting, discount net generation by the amount exported
export_discount = (eia_930['Net Generation Total'] - eia_930['Exports']) / eia_930['Net Generation Total']
# if the discount is negative (more electricity was exported than generated) assume no exports 
export_discount[export_discount < 0] = 1

eia_930[net_gen_columns] = eia_930[net_gen_columns].multiply(export_discount, axis='index')

# drop the columns we no longer need
eia_930 = eia_930.drop(columns=['Total Interchange (MW)', 'Net Generation Total', 'Exports'])

# sum by balancing authority
eia_930 = eia_930.groupby('Balancing Authority').sum()


In [None]:
sort_order = 'median'
# calculate fuel mix 
mix = eia_930.copy()
mix.columns = [col.replace("Net Generation (MW) from ","") for col in mix.columns]
fuel_list = list(mix.columns)
mix['Total'] = mix[['Coal',
 'Natural Gas',
 'Nuclear',
 'All Petroleum Products',
 'Hydropower and Pumped Storage',
 'Solar',
 'Wind',
 'Imports',
 'Other Fuel Sources']].sum(axis=1)

for fuel in fuel_list:
    mix[fuel] = mix[fuel] / mix['Total']

mix = mix.drop(columns='Total')

if sort_order == 'carbon_free':
    mix['carbon_free'] = mix[['Solar', 'Wind','Nuclear','Hydropower and Pumped Storage']].sum(axis=1)
    mix = mix.sort_values(by=['carbon_free', 'Coal'], ascending=[False,True])
    carbon_free_order = mix.index.to_list()
    mix = mix.drop(columns='carbon_free')
elif sort_order == 'VRE':
    mix['VRE'] = mix[['Solar', 'Wind']].sum(axis=1)
    mix = mix.sort_values(by=['VRE', 'Coal'], ascending=[False,True])
    vre_order = mix.index.to_list()
    mix = mix.drop(columns='VRE')
elif sort_order == 'stdev':
    mix = mix.reindex(stdev_order)
elif sort_order == 'median':
    mix = mix.reindex(ascending_median_order)

mix = mix.rename(columns={'All Petroleum Products':'Petroleum','Hydropower and Pumped Storage':'Hydropower'})

mix = mix.reset_index().melt(id_vars='Balancing Authority',var_name='fuel', value_name='mix')

fuel_colors = {'Coal':'black',
               'Petroleum':'sienna',
               'Natural Gas':'orange',
               'Nuclear':'green',
               'Hydropower':'blue',
               'Wind':'skyblue',
               'Solar':'yellow',
               'Imports':'maroon',
               'Other Fuel Sources':'gray'}

ba_name_dict = pd.read_csv('../data/manual/ba_names.csv', index_col='ba_code').to_dict()['ba_name']
fuel_mix_930 = px.bar(mix.replace({'Balancing Authority':ba_name_dict}), 
       barmode='stack', 
       x='Balancing Authority', 
       color='fuel', 
       y=mix['mix']*100, 
       color_discrete_map=fuel_colors, 
       category_orders={'fuel':['Solar','Wind','Hydropower','Nuclear','Natural Gas','Petroleum','Coal','Other Fuel Sources']}, 
       labels={'y':'Fuel Mix %','fuel':'Fuel Type'}, 
       template='plotly_white', 
       height=500,
       width=1200) \
.update_layout(font_family='Helvetica', font_size=14, legend=dict(
    yanchor="bottom",
    y=-1.2,
    xanchor='left',
    orientation='h'
)).add_hline(y=0, line_width=2)


fuel_mix_930.show()

In [None]:
# final layout for export
fuel_mix_930.update_layout(margin=dict(t=0))
fuel_mix_930.write_image("../results/figures/figure_1/fuel_mix_930_imports.svg", scale=1.77165)


# Other Data Exploration

## Calculate statistics of fuel mix

In [None]:
# create a dataframe with fuel mix and EF COV
BA_stats = mix.copy().pivot(index='Balancing Authority', columns='fuel', values='mix')

#create summary mix columns
BA_stats['variable_renewables'] = BA_stats[['Solar','Wind']].sum(axis=1)
BA_stats['renewables'] = BA_stats[['Solar','Wind','Hydropower',]].sum(axis=1)
BA_stats['carbon_free'] = BA_stats[['Solar','Wind','Hydropower','Nuclear']].sum(axis=1)

# add columns for the COV
BA_stats['COV'] = hourly_cov

BA_stats.head(5)

BA_stats.to_csv('../data/processed/ba_stats.csv')

## Visualize Fuel Mix without imports

In [None]:
columns_to_use = ['Balancing Authority', 'Data Date', 
       'Net Generation (MW) from Coal', 'Net Generation (MW) from Natural Gas',
       'Net Generation (MW) from Nuclear',
       'Net Generation (MW) from All Petroleum Products',
       'Net Generation (MW) from Hydropower and Pumped Storage',
       'Net Generation (MW) from Solar', 'Net Generation (MW) from Wind',
       'Net Generation (MW) from Other Fuel Sources']

# load the data from EIA-930 for 2019
eia_930 = pd.concat([pd.read_csv('../data/downloaded/eia/EIA930_BALANCE_2019_Jan_Jun.csv', usecols=columns_to_use, thousands=','),pd.read_csv('../data/downloaded/eia/EIA930_BALANCE_2019_Jul_Dec.csv', usecols=columns_to_use, thousands=',')])

# sum by balancing authority
eia_930 = eia_930.groupby('Balancing Authority').sum()

# drop any generation-only balancing authorities (according to https://www.eia.gov/electricity/gridmonitor/about)
gen_only_bas = ['AVRN','DEAA','EEI','GRID','GRIF','GWA','HGMA','SEPA','WWA','YAD'] #NOTE: GRMA also generation-only, but retired in 2018
eia_930 = eia_930.drop(gen_only_bas)

# only keep BAs that are in our list
eia_930 = eia_930.loc[ba_list,:]


# calculate fuel mix 
mix = eia_930.copy()
mix.columns = [col.replace("Net Generation (MW) from ","") for col in mix.columns]
fuel_list = list(mix.columns)
mix['Total'] = mix[['Coal',
 'Natural Gas',
 'Nuclear',
 'All Petroleum Products',
 'Hydropower and Pumped Storage',
 'Solar',
 'Wind',
 'Other Fuel Sources']].sum(axis=1) # exclude unknown fuel sources

for fuel in fuel_list:
    mix[fuel] = mix[fuel] / mix['Total']

mix = mix.drop(columns='Total')

# sort values
mix['carbon_free'] = mix[['Hydropower and Pumped Storage',
 'Solar',
 'Wind','Nuclear']].sum(axis=1)

mix = mix.sort_values(by=['carbon_free', 'Coal'], ascending=[False,True])
carbon_free_order = mix.index.to_list()
mix = mix.drop(columns='carbon_free')

mix = mix.reset_index().melt(id_vars='Balancing Authority',var_name='fuel', value_name='mix')

fuel_colors = {'Coal':'black',
               'All Petroleum Products':'sienna',
               'Natural Gas':'orange',
               'Nuclear':'purple',
               'Hydropower and Pumped Storage':'blue',
               'Wind':'skyblue',
               'Solar':'yellow',
               'Other Fuel Sources':'gray'}

In [None]:
ba_name_dict = pd.read_csv('../data/manual/ba_names.csv', index_col='ba_code').to_dict()['ba_name']
fuel_mix_930 = px.bar(mix,#.replace({'Balancing Authority':ba_name_dict}), 
       barmode='stack', 
       x='Balancing Authority', 
       color='fuel', 
       y=mix['mix']*100, 
       color_discrete_map=fuel_colors, 
       category_orders={'fuel':['Solar','Wind','Hydropower and Pumped Storage','Nuclear','Natural Gas','All Petroleum Products','Coal','Other Fuel Sources'], 
                        'Balancing Authority': ascending_median_order}, 
       labels={'y':'Fuel Mix %','fuel':'Fuel Type'}, 
       template='plotly_white', 
       height=500,
       width=1200) \
.update_layout(font_family='Arial', font_size=14, legend=dict(
    yanchor="bottom",
    y=-1.5,
    xanchor='left',
    orientation='h'
))

fuel_mix_930.show()

## Visualize fuel mix data from eGRID2019

In [None]:
# explore fuel mix from eGRID

fuel_colors = {'coal':'black',
               'oil':'sienna',
               'gas':'orange',
               'nuclear':'purple',
               'hydro':'blue',
               'biomass':'darkgreen',
               'wind':'skyblue',
               'solar':'yellow',
               'geothermal':'firebrick',
               'other_fossil':'gray',
               'other':'gray'}

mix_egrid = pd.read_csv('../data/processed/resource_mix_2019.csv', usecols=['ba_code','mix_coal','mix_oil','mix_gas','mix_nuclear','mix_hydro','mix_biomass','mix_wind','mix_solar','mix_geothermal','mix_other_fossil','mix_other'])

# only keep data for bas of interest
ba_codes = [i.split('.')[1] if i.split('.')[0] == 'EIA' else i for i in ba_list]
mix_egrid = mix_egrid[mix_egrid['ba_code'].isin(ba_codes)]

mix_egrid['carbon_free'] = mix_egrid['mix_wind'] + mix_egrid['mix_solar'] + mix_egrid['mix_geothermal'] + mix_egrid['mix_hydro'] + mix_egrid['mix_nuclear']

mix_egrid = mix_egrid.sort_values(by='carbon_free', ascending=False)

carbon_free_order = mix_egrid['ba_code'].to_list()

mix_egrid = mix_egrid.drop(columns='carbon_free')

# melt the data into long form
mix_egrid = mix_egrid.melt(id_vars='ba_code',var_name='fuel', value_name='mix')

# remove the 'mix_' prefix 
mix_egrid['fuel'] = mix_egrid['fuel'].str.replace('mix_','')

px.bar(mix_egrid, barmode='stack', x='ba_code', color='fuel', y='mix', color_discrete_map=fuel_colors, category_orders={'fuel':['solar','wind','hydro','geothermal','nuclear','biomass','gas','oil','coal','other_fossil','other'], 'ba_code': ascending_median_order}, labels={'mix':'Resource Mix %','ba_code':'Balancing Area', 'fuel':'Fuel Type'}, title='Resource Mix by Balancing Area, 2019, from eGRID', template='plotly_white', width=1200)

## Calculate Monthly and Annual Averages

In [None]:
# calculate monthly and annual average EFs
##########################################

hourly_ef['month'] = hourly_ef.index.month

# calculate the monthly average
mo_average = hourly_ef.groupby('month').mean().reset_index()
#mo_average.columns = pd.MultiIndex.from_product([mo_average.columns, ['monthly']])

#calculate the annual average
yr_average = hourly_ef.copy().drop(columns='month')
for col in yr_average.columns:
    yr_average[col] = yr_average[col].mean()
#yr_average.columns = pd.MultiIndex.from_product([yr_average.columns, ['annual']])

# merge the monthly data
mo_average = hourly_ef.copy().merge(mo_average, how='left', on='month', suffixes=('_hourly','_monthly')).set_index(hourly_ef.index).drop(columns='month')

# merge the annual data
ef = mo_average.merge(yr_average.add_suffix('_annual'), how='left', left_index=True, right_index=True)

# split the columns into a multiindex by region and resolution
split_columns = ef.columns.str.split('_', expand=True)
ef.columns = split_columns
split_columns = pd.MultiIndex.from_product([split_columns.levels[0], split_columns.levels[1]])
ef = ef.reindex(columns=split_columns)


ef.head(3)

# Visualize emissions at different resolutions for single region

In [None]:
comparison_data = ef.copy()['CISO']

comparison_data = comparison_data * unit_conversion

comparison_data = comparison_data.reset_index()

comparison_data

In [None]:
caiso_ef_comparison = px.line(comparison_data, 
        x='datetime_local', 
        y=['hourly','monthly','annual'], 
        template='plotly_white', 
        labels={'variable':'Emission Factor Resolution', 'value':'lbCO2/MWh', 'datetime_local':'Datetime'},
        width=1200) \
.update_xaxes(dtick='M1', tickformat='%b') \
.update_yaxes(rangemode='tozero', dtick=200) \
.update_layout(legend=dict(
    yanchor="bottom",
    x=0.5,
    y=1.02,
    xanchor='center',
    orientation='h')) \

caiso_ef_comparison.write_image("../results/figures/caiso_ef_comparison.jpeg")
caiso_ef_comparison.show()

## Explore Month-hour average emissions factor for a region

In [None]:
mh = hourly_ef[['BPAT']].groupby([hourly_ef.index.month,hourly_ef.index.hour]).mean()
mh.index = mh.index.rename(['month','hour'])

mh=mh.reset_index()

px.line(mh, x='hour', facet_col='month', y='BPAT')