In [1]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import os
from os import listdir
from os.path import isfile, join
import glob
import numpy as np

Get paths of all monthly index and generation files

In [2]:
path = os.path.join('Data storage', 'final state data', 'Monthly index*')
mi_fns = glob.glob(path)

In [3]:
path = os.path.join('Data storage', 'final state data', 'Monthly gen*')
mg_fns = glob.glob(path)

Combine all state monthly index files into a single dataframe

In [4]:
df_list = []
for f in mi_fns:
    state = f.split('.')[0][-2:]
    df = pd.read_csv(f)
    df['State'] = state
    df_list.append(df)
full_mi = pd.concat(df_list)
full_mi.reset_index(inplace=True, drop=True)
full_mi.rename(columns={'index (g/kWh)': 'monthly index (g/kWh)'}, inplace=True)
full_mi['datetime'] = pd.to_datetime(full_mi['datetime'])

In [5]:
full_mi.head()

Unnamed: 0,year,month,generation (MWh),final CO2 (kg),datetime,quarter,monthly index (g/kWh),change since 2005,index (lb/MWh),State
0,2001,1,590145.0,354260200.0,2001-01-01,1,600.293443,0.117946,1323.406924,AK
1,2001,2,546167.0,361546800.0,2001-02-01,1,661.971217,0.23281,1459.381744,AK
2,2001,3,587115.0,390253500.0,2001-03-01,1,664.69693,0.237886,1465.390852,AK
3,2001,4,488698.0,303029800.0,2001-04-01,2,620.075841,0.154787,1367.0192,AK
4,2001,5,484687.0,300428500.0,2001-05-01,2,619.840221,0.154348,1366.49975,AK


Combine all state monthly generation files into a single dataframe

In [19]:
df_list = []
for f in mg_fns:
    state = f.split('.')[0][-2:]
    df = pd.read_csv(f)
    df['State'] = state
    df_list.append(df)
full_mg = pd.concat(df_list)
full_mg.reset_index(inplace=True, drop=True)
full_mg['datetime'] = pd.to_datetime(full_mg['datetime'])

monthly_gen = pd.pivot_table(full_mg, index=['State', 'datetime'], 
                             values='generation (MWh)', columns='fuel category')
monthly_gen.reset_index(inplace=True, drop=False)
monthly_gen.drop('fuel category', axis=1, inplace=True)
monthly_gen['Year'] = monthly_gen['datetime'].dt.year

ValueError: labels ['fuel category'] not contained in axis

In [17]:
monthly_gen.head()

fuel category,State,datetime,Coal,Natural Gas,Nuclear,Other,Renewables,Year
0,AK,2001-01-01,46903.0,367521.0,,71085.0,104636.0,2001
1,AK,2001-02-01,54056.0,334016.0,,67910.0,90185.0,2001
2,AK,2001-03-01,51920.0,343858.0,,91413.0,99924.0,2001
3,AK,2001-04-01,37590.0,290050.0,,76372.0,84686.0,2001
4,AK,2001-05-01,40986.0,283468.0,,75034.0,85199.0,2001


In [10]:
full_mg.head()

Unnamed: 0,fuel category,year,month,generation (MWh),quarter,total fuel (mmbtu),elec fuel (mmbtu),all fuel CO2 (kg),elec fuel CO2 (kg),datetime,adjusted CO2 (kg),adjusted index (g/kWh),adjusted index (lb/MWh),State
0,Coal,2001,1,46903.0,1,1120000.0,872000.0,106680000.0,83058000.0,2001-01-01,83419930.0,1778.562816,3921.019585,AK
1,Coal,2001,2,54056.0,1,1521000.0,1094000.0,144875250.0,104203500.0,2001-02-01,104667800.0,1936.284533,4268.732881,AK
2,Coal,2001,3,51920.0,1,1491000.0,1062000.0,142017750.0,101155500.0,2001-03-01,101722700.0,1959.219407,4319.295104,AK
3,Coal,2001,4,37590.0,2,922000.0,708000.0,87820500.0,67437000.0,2001-04-01,67862640.0,1805.337654,3980.047392,AK
4,Coal,2001,5,40986.0,2,983000.0,770000.0,93630750.0,73342500.0,2001-05-01,73829080.0,1801.324322,3971.1996,AK


Merge the two dataframes to combine generation and index data

In [27]:
gen_index = pd.merge(monthly_gen, full_mi[['datetime', 'State', 'monthly index (g/kWh)']], 
                     on=['datetime', 'State'])
gen_index.head()

Unnamed: 0,State,datetime,Coal,Natural Gas,Nuclear,Other,Renewables,Year,monthly index (g/kWh)
0,AK,2001-01-01,46903.0,367521.0,,71085.0,104636.0,2001,600.293443
1,AK,2001-02-01,54056.0,334016.0,,67910.0,90185.0,2001,661.971217
2,AK,2001-03-01,51920.0,343858.0,,91413.0,99924.0,2001,664.69693
3,AK,2001-04-01,37590.0,290050.0,,76372.0,84686.0,2001,620.075841
4,AK,2001-05-01,40986.0,283468.0,,75034.0,85199.0,2001,619.840221


Calculate variability as the rolling standard deviation of monthly values. Also calculate a normalized value, which divides the rolling standard deviation by the rolling average.

In [28]:
for state in gen_index['State'].unique():
    gen_index.loc[gen_index['State'] == state, 'Index variability'] = \
        gen_index.loc[gen_index['State']==state, 
                       'monthly index (g/kWh)'].rolling(window=12).std()
    
    gen_index.loc[gen_index['State'] == state, 
                   'Normalized Index variability'] = \
         gen_index.loc[gen_index['State']==state, 'Index variability'] / \
         gen_index.loc[gen_index['State']==state, 
                       'monthly index (g/kWh)'].rolling(window=12).mean()
gen_index.tail()

Unnamed: 0,State,datetime,Coal,Natural Gas,Nuclear,Other,Renewables,Year,monthly index (g/kWh),Index variability,Normalized Index variability
9745,WY,2016-11-01,3136243.0,61286.95,,40986.67,414531.36,2005,916.315369,17.668906,0.019208
9746,WY,2016-12-01,3952815.25,64950.5,,38877.4,583667.43,2005,891.85433,19.201913,0.020905
9747,WY,2017-01-01,3875156.57,72746.85,,45655.7,358419.99,2005,956.145267,22.000973,0.023889
9748,WY,2017-02-01,3140221.06,55896.8,,42111.49,409344.56,2005,925.869631,21.677007,0.023501
9749,WY,2017-03-01,2970256.83,60934.81,,41061.12,502269.26,2005,912.486445,21.853026,0.023716


Add in the percent of generation from each fuel type, and the change in generation from that fuel type since the average in 2001. Maybe change from 2001 to a different year?

In [29]:
base_year = 2001

In [30]:
fuels = ['Coal', 'Natural Gas', 'Renewables', 'Nuclear', 'Other']
gen_index['Total gen'] = gen_index.loc[:, fuels].sum(axis=1)
for fuel in fuels:
    # New columns that are being added
    col_percent = 'percent ' + fuel
    col_change = 'change in ' + fuel

    # Calculate percent of generation from each fuel type
    gen_index[col_percent] = gen_index.loc[:, fuel] / gen_index.loc[:, 'Total gen']

    # Percent of fuel in state in 2001 (entire year)
    for state in gen_index['State'].unique():
        percent_fuel_2001 = gen_index.loc[(gen_index['Year'] == 2001) & 
                                          (gen_index['State'] == state), fuel].sum() / gen_index.loc[(gen_index['Year'] == 2001) & 
                                                                                                     (gen_index['State'] == state), 'Total gen'].sum()

        # Use percent of fuel in 2001 to calculate change for each state/month
        gen_index.loc[gen_index['State'] == state, 
                      col_change] = (gen_index.loc[gen_index['State'] == state, col_percent] - percent_fuel_2001) / percent_fuel_2001

In [33]:
gen_index.loc[(gen_index['State'] == 'TX') &
              (gen_index['Year'].isin([2001, 2016]))]

Unnamed: 0,State,datetime,Coal,Natural Gas,Nuclear,Other,Renewables,Year,monthly index (g/kWh),Index variability,...,percent Coal,change in Coal,percent Natural Gas,change in Natural Gas,percent Renewables,change in Renewables,percent Nuclear,change in Nuclear,percent Other,change in Other
8310,TX,2011-01-01,13967564.47,13856019.64,3786420.0,473353.39,2213778.11,2016,593.307195,17.070866,...,0.407252,0.2356,0.403999,-0.172355,0.064547,-0.21304,0.1104,0.229528,0.013802,0.319613
8311,TX,2011-02-01,11056737.46,12596233.42,3472886.0,434741.88,2703305.6,2016,571.771512,15.58753,...,0.365344,0.108453,0.416213,-0.147334,0.089324,0.089047,0.114753,0.278007,0.014365,0.37349
8312,TX,2011-03-01,11878957.33,11306467.97,3808335.0,413737.61,2868245.5,2016,575.387717,11.38208,...,0.392359,0.190416,0.37345,-0.23494,0.094737,0.155042,0.125788,0.400902,0.013666,0.306619
8313,TX,2011-04-01,12400212.44,14583806.39,2022377.0,356482.91,3244086.9,2016,590.749447,9.557212,...,0.380293,0.153809,0.44726,-0.083729,0.099491,0.212994,0.062023,-0.309253,0.010933,0.045315
8314,TX,2011-05-01,13391272.33,15526737.46,3380340.0,352798.77,3321314.32,2016,570.406643,8.668817,...,0.372265,0.12945,0.431628,-0.115754,0.092329,0.125683,0.09397,0.046545,0.009807,-0.062274
8315,TX,2011-06-01,14944173.71,20225234.41,3645844.0,408393.15,3583079.46,2016,568.423546,9.409049,...,0.349108,0.059193,0.472478,-0.032068,0.083704,0.020518,0.08517,-0.051465,0.00954,-0.08781
8316,TX,2011-07-01,15815850.94,24577261.14,3461110.0,430717.06,2284383.17,2016,585.274502,9.290266,...,0.33962,0.030405,0.527756,0.081177,0.049053,-0.401939,0.074322,-0.172281,0.009249,-0.115677
8317,TX,2011-08-01,15716182.67,26445110.6,3593550.0,410047.26,2163400.87,2016,580.207442,8.876062,...,0.325196,-0.013355,0.547197,0.121004,0.044765,-0.454227,0.074357,-0.171887,0.008485,-0.188756
8318,TX,2011-09-01,14119203.8,18544610.67,3659818.0,367156.04,1872021.89,2016,581.846705,8.620499,...,0.366135,0.110853,0.480894,-0.014827,0.048545,-0.40814,0.094905,0.056959,0.009521,-0.089666
8319,TX,2011-10-01,12123605.95,14076337.39,3091171.0,343444.53,2832274.08,2016,568.922279,8.537406,...,0.373415,0.13294,0.433561,-0.111795,0.087236,0.063584,0.09521,0.060353,0.010578,0.01143


In [34]:
gen_index.dtypes

State                                   object
datetime                        datetime64[ns]
Coal                                   float64
Natural Gas                            float64
Nuclear                                float64
Other                                  float64
Renewables                             float64
Year                                     int64
monthly index (g/kWh)                  float64
Index variability                      float64
Normalized Index variability           float64
Total gen                              float64
percent Coal                           float64
change in Coal                         float64
percent Natural Gas                    float64
change in Natural Gas                  float64
percent Renewables                     float64
change in Renewables                   float64
percent Nuclear                        float64
change in Nuclear                      float64
percent Other                          float64
change in Oth