In [1]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import os
from os import listdir
from os.path import isfile, join
import glob
import numpy as np

Get paths of all monthly index and generation files

In [2]:
path = os.path.join('Data storage', 'final state data', 'Monthly index*')
mi_fns = glob.glob(path)

In [3]:
path = os.path.join('Data storage', 'final state data', 'Monthly gen*')
mg_fns = glob.glob(path)

Combine all state monthly index files into a single dataframe

In [4]:
df_list = []
for f in mi_fns:
    state = f.split('.')[0][-2:]
    df = pd.read_csv(f)
    df['State'] = state
    df_list.append(df)
full_mi = pd.concat(df_list)
full_mi.reset_index(inplace=True, drop=True)
full_mi.rename(columns={'index (g/kWh)': 'monthly index (g/kWh)'}, inplace=True)
full_mi['datetime'] = pd.to_datetime(full_mi['datetime'])

In [5]:
full_mi.head()

Unnamed: 0,year,month,generation (MWh),final CO2 (kg),datetime,quarter,monthly index (g/kWh),change since 2005,index (lb/MWh),State
0,2001,1,590145.0,354260200.0,2001-01-01,1,600.293443,0.117946,1323.406924,AK
1,2001,2,546167.0,361546800.0,2001-02-01,1,661.971217,0.23281,1459.381744,AK
2,2001,3,587115.0,390253500.0,2001-03-01,1,664.69693,0.237886,1465.390852,AK
3,2001,4,488698.0,303029800.0,2001-04-01,2,620.075841,0.154787,1367.0192,AK
4,2001,5,484687.0,300428500.0,2001-05-01,2,619.840221,0.154348,1366.49975,AK


Combine all state monthly generation files into a single dataframe

In [21]:
df_list = []
for f in mg_fns:
    state = f.split('.')[0][-2:]
    df = pd.read_csv(f)
    df['State'] = state
    df_list.append(df)
full_mg = pd.concat(df_list)
full_mg.reset_index(inplace=True, drop=True)
full_mg['datetime'] = pd.to_datetime(full_mg['datetime'])

monthly_gen = pd.pivot_table(full_mg, index=['State', 'datetime'], 
                             values='generation (MWh)', columns='fuel category')
monthly_gen.reset_index(inplace=True, drop=False)
monthly_gen['Year'] = monthly_gen['datetime'].dt.year
monthly_gen.replace(np.nan, 0, inplace=True)

In [32]:
monthly_gen_tidy = pd.melt(monthly_gen, id_vars=['State', 'datetime', 'Year'],
                           value_name='Generation')

In [22]:
monthly_gen.head()

fuel category,State,datetime,Coal,Natural Gas,Nuclear,Other,Renewables,Year
0,AK,2001-01-01,46903.0,367521.0,0.0,71085.0,104636.0,2001
1,AK,2001-02-01,54056.0,334016.0,0.0,67910.0,90185.0,2001
2,AK,2001-03-01,51920.0,343858.0,0.0,91413.0,99924.0,2001
3,AK,2001-04-01,37590.0,290050.0,0.0,76372.0,84686.0,2001
4,AK,2001-05-01,40986.0,283468.0,0.0,75034.0,85199.0,2001


Merge the two dataframes to combine generation and index data

In [23]:
gen_index = pd.merge(monthly_gen, full_mi[['datetime', 'State', 'monthly index (g/kWh)']], 
                     on=['datetime', 'State'])
gen_index.head()

Unnamed: 0,State,datetime,Coal,Natural Gas,Nuclear,Other,Renewables,Year,monthly index (g/kWh)
0,AK,2001-01-01,46903.0,367521.0,0.0,71085.0,104636.0,2001,600.293443
1,AK,2001-02-01,54056.0,334016.0,0.0,67910.0,90185.0,2001,661.971217
2,AK,2001-03-01,51920.0,343858.0,0.0,91413.0,99924.0,2001,664.69693
3,AK,2001-04-01,37590.0,290050.0,0.0,76372.0,84686.0,2001,620.075841
4,AK,2001-05-01,40986.0,283468.0,0.0,75034.0,85199.0,2001,619.840221


Calculate variability as the rolling standard deviation of monthly values. Also calculate a normalized value, which divides the rolling standard deviation by the rolling average.

In [24]:
for state in gen_index['State'].unique():
    gen_index.loc[gen_index['State'] == state, 'Index variability'] = \
        gen_index.loc[gen_index['State']==state, 
                       'monthly index (g/kWh)'].rolling(window=12).std()
    
    gen_index.loc[gen_index['State'] == state, 
                   'Normalized Index variability'] = \
         gen_index.loc[gen_index['State']==state, 'Index variability'] / \
         gen_index.loc[gen_index['State']==state, 
                       'monthly index (g/kWh)'].rolling(window=12).mean()
gen_index.tail()

Unnamed: 0,State,datetime,Coal,Natural Gas,Nuclear,Other,Renewables,Year,monthly index (g/kWh),Index variability,Normalized Index variability
9745,WY,2016-11-01,3136243.0,61286.95,0.0,40986.67,414531.36,2016,916.315369,17.668906,0.019208
9746,WY,2016-12-01,3952815.25,64950.5,0.0,38877.4,583667.43,2016,891.85433,19.201913,0.020905
9747,WY,2017-01-01,3875156.57,72746.85,0.0,45655.7,358419.99,2017,956.145267,22.000973,0.023889
9748,WY,2017-02-01,3140221.06,55896.8,0.0,42111.49,409344.56,2017,925.869631,21.677007,0.023501
9749,WY,2017-03-01,2970256.83,60934.81,0.0,41061.12,502269.26,2017,912.486445,21.853026,0.023716


Add in the percent of generation from each fuel type, and the change in generation from that fuel type since the average in 2001. Maybe change from 2001 to a different year?

In [25]:
base_year = 2005

In [27]:
fuels = ['Coal', 'Natural Gas', 'Renewables', 'Nuclear', 'Other']
gen_index['Total gen'] = gen_index.loc[:, fuels].sum(axis=1)
for fuel in fuels:
    # New columns that are being added
    col_percent = 'percent ' + fuel
    col_change = 'change in ' + fuel

    # Calculate percent of generation from each fuel type
    gen_index[col_percent] = gen_index.loc[:, fuel] / gen_index.loc[:, 'Total gen']

    # Percent of fuel in state in base year (entire year)
    for state in gen_index['State'].unique():
        percent_fuel_base = gen_index.loc[(gen_index['Year'] == base_year) & 
                                          (gen_index['State'] == state), fuel].sum() / gen_index.loc[(gen_index['Year'] == 2001) & 
                                                                                                     (gen_index['State'] == state), 'Total gen'].sum()

        # Use percent of fuel in 2001 to calculate change for each state/month
        gen_index.loc[gen_index['State'] == state, 
                      col_change] = (gen_index.loc[gen_index['State'] == state, col_percent] - percent_fuel_base) / percent_fuel_base
    
# Change in variability compared to average base year value
for state in gen_index['State'].unique():
    norm_variability_base = gen_index.loc[(gen_index['Year'] == base_year) & 
                                  (gen_index['State'] == state), 'Normalized Index variability'].mean()
    variability_base = gen_index.loc[(gen_index['Year'] == base_year) & 
                                  (gen_index['State'] == state), 'Index variability'].mean()
    
    gen_index.loc[gen_index['State'] == state, 
                  'change in variability'] = (gen_index.loc[gen_index['State'] == state, 
                                                            'Index variability'] - variability_base) / variability_base
    
    gen_index.loc[gen_index['State'] == state, 
                  'change in norm variability'] = (gen_index.loc[gen_index['State'] == state, 
                                                            'Normalized Index variability'] - norm_variability_base) / norm_variability_base

In [28]:
gen_index.loc[(gen_index['State'] == 'TX') &
              (gen_index['Year'].isin([2001, 2016]))]

Unnamed: 0,State,datetime,Coal,Natural Gas,Nuclear,Other,Renewables,Year,monthly index (g/kWh),Index variability,...,percent Natural Gas,change in Natural Gas,percent Renewables,change in Renewables,percent Nuclear,change in Nuclear,percent Other,change in Other,change in variability,change in norm variability
8190,TX,2001-01-01,11683911.0,13750546.0,3545310.0,1707467.21,314045.8,2001,638.408119,,...,0.443548,-0.163955,0.01013,-0.433987,0.11436,0.114452,0.055077,2.572064,,
8191,TX,2001-02-01,10236786.0,11507834.0,3037626.0,510769.63,333505.37,2001,639.497007,,...,0.44906,-0.153566,0.013014,-0.272847,0.118534,0.155132,0.019931,0.292653,,
8192,TX,2001-03-01,11004470.0,13316335.0,2462837.0,447733.81,348963.19,2001,654.137303,,...,0.48282,-0.089931,0.012653,-0.293043,0.089297,-0.129792,0.016234,0.052851,,
8193,TX,2001-04-01,9767225.0,14402417.0,2668816.0,331369.06,316487.94,2001,633.521538,,...,0.523985,-0.012339,0.011514,-0.356641,0.097096,-0.053787,0.012056,-0.218117,,
8194,TX,2001-05-01,11449397.0,16025878.0,3419870.0,383202.65,305111.35,2001,638.333358,,...,0.507414,-0.043574,0.00966,-0.460227,0.10828,0.055205,0.012133,-0.213108,,
8195,TX,2001-06-01,12312672.0,18320416.0,3405594.0,325916.94,254907.06,2001,636.48136,,...,0.529193,-0.002521,0.007363,-0.588591,0.098372,-0.041353,0.009414,-0.389434,,
8196,TX,2001-07-01,13218435.0,22605566.0,3460095.0,389539.85,266200.15,2001,636.275773,,...,0.56599,0.066838,0.006665,-0.627596,0.086633,-0.155755,0.009753,-0.367454,,
8197,TX,2001-08-01,12779997.0,22805027.0,3174506.0,391315.96,225748.04,2001,643.489399,,...,0.579152,0.091646,0.005733,-0.679669,0.080619,-0.214358,0.009938,-0.355481,,
8198,TX,2001-09-01,11579150.0,16887867.0,3383498.0,388012.7,233449.3,2001,629.141386,,...,0.520075,-0.019708,0.007189,-0.598305,0.104197,0.015416,0.011949,-0.225032,,
8199,TX,2001-10-01,9736388.0,15188595.0,2615902.0,278253.48,264496.52,2001,625.683145,,...,0.540834,0.019421,0.009418,-0.473765,0.093147,-0.092273,0.009908,-0.35741,,


In [29]:
id_vars=['Coal', 'Natural Gas', 'Nuclear', 'Other', 'Renewables']

pd.melt(gen_index, id_vars=['State', 'datetime', 'Year'])

Unnamed: 0,Coal,Natural Gas,Nuclear,Other,Renewables,variable,value
0,46903.00,367521.00,0.0,71085.00,104636.00,State,AK
1,54056.00,334016.00,0.0,67910.00,90185.00,State,AK
2,51920.00,343858.00,0.0,91413.00,99924.00,State,AK
3,37590.00,290050.00,0.0,76372.00,84686.00,State,AK
4,40986.00,283468.00,0.0,75034.00,85199.00,State,AK
5,39411.00,272074.00,0.0,58095.00,118424.00,State,AK
6,55082.00,314583.00,0.0,63515.00,120745.00,State,AK
7,65143.00,331115.00,0.0,67181.00,123357.00,State,AK
8,58910.00,355210.00,0.0,71144.00,154902.00,State,AK
9,38751.00,314444.00,0.0,91507.00,135001.00,State,AK


Plot the change in variability since 2005 against the change in each fuel type

In [None]:
g = sns.FacetGrid(gen_index, hue='State')
g.map()