In [1]:
%matplotlib inline
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
import numpy as np
import os
from os.path import join
import sys
import json
import geopandas as gpd

In [2]:
# Load the "autoreload" extension
%load_ext autoreload

# always reload modules marked with "%aimport"
%autoreload 1

In [3]:
# add the 'src' directory as one where we can import modules
src_dir = join(os.getcwd(), os.pardir, 'src')
sys.path.append(src_dir)

In [4]:
%aimport Data.make_data
from Data.make_data import states_in_nerc
%aimport Analysis.index
from Analysis.index import facility_emission_gen, group_facility_data
%aimport Analysis.index
from Analysis.index import facility_co2, adjust_epa_emissions, group_fuel_cats
%aimport util.utils
from util.utils import rename_cols, add_facility_location

## Load data

In [5]:
cwd = os.getcwd()
path = join(cwd, '..', 'Data storage',
            'Facility gen fuels and CO2 2017-08-31.zip')
eia_fac = pd.read_csv(path)

In [6]:
rename_cols(eia_fac)

In [7]:
eia_fac.head()

Unnamed: 0,f,fuel,month,plant id,total fuel (mmbtu),year,generation (mwh),elec fuel (mmbtu),geography,last_updated,lat,lon,prime mover,datetime,quarter,all fuel fossil co2 (kg),elec fuel fossil co2 (kg),all fuel total co2 (kg),elec fuel total co2 (kg)
0,M,SUB,6,10360,0.0,2017,0.0,0.0,USA-WI,2017-08-24T11:46:12-04:00,44.4936,-88.0303,ALL,2017-06-01,2,0.0,0.0,0.0,0.0
1,M,SUB,5,10360,0.0,2017,0.0,0.0,USA-WI,2017-08-24T11:46:12-04:00,44.4936,-88.0303,ALL,2017-05-01,2,0.0,0.0,0.0,0.0
2,M,SUB,4,10360,0.0,2017,0.0,0.0,USA-WI,2017-08-24T11:46:12-04:00,44.4936,-88.0303,ALL,2017-04-01,2,0.0,0.0,0.0,0.0
3,M,SUB,3,10360,0.0,2017,0.0,0.0,USA-WI,2017-08-24T11:46:12-04:00,44.4936,-88.0303,ALL,2017-03-01,1,0.0,0.0,0.0,0.0
4,M,SUB,2,10360,0.0,2017,0.0,0.0,USA-WI,2017-08-24T11:46:12-04:00,44.4936,-88.0303,ALL,2017-02-01,1,0.0,0.0,0.0,0.0


In [8]:
path = join(cwd, '..', 'Data storage', 'Derived data',
            'Monthly EPA emissions 2017-08-31.csv')
epa = pd.read_csv(path)

## Test facility_emissions_gen

In [9]:
fuel_cat_folder = join(cwd, '..', 'Data storage', 'Fuel categories')
state_cats_path = join(fuel_cat_folder, 'State_facility.json')

with open(state_cats_path, 'r') as f:
    state_fuel_cat = json.load(f)
    
custom_cats_path = join(fuel_cat_folder, 'Custom_results.json')
with open(custom_cats_path, 'r') as f:
    custom_fuel_cat = json.load(f)

In [113]:
co2, gen_fuels_custom = facility_emission_gen(eia_facility=eia_fac, epa=epa,
                                              state_fuel_cat=state_fuel_cat,
                                              custom_fuel_cat=custom_fuel_cat,
                                              export_state_cats=False)

Renaming columns
Grouping facilities
Adjusting EPA emissions
Caculating CO2
Gen/fuels to state categories
Gen/fuels to custom categories


In [10]:
co2, gen_fuels_state = facility_emission_gen(eia_facility=eia_fac, epa=epa,
                                              state_fuel_cat=state_fuel_cat,
                                              custom_fuel_cat=custom_fuel_cat,
                                              export_state_cats=True)

Renaming columns
Grouping facilities
Adjusting EPA emissions
Caculating CO2
Gen/fuels to state categories


In [11]:
co2.head()

Unnamed: 0,year,month,plant id,final co2 (kg)
0,2001,1,2,0.0
1,2001,1,3,962541100.0
2,2001,1,4,0.0
3,2001,1,7,67000920.0
4,2001,1,8,753020400.0


In [12]:
gen_fuels_state.head()

Unnamed: 0,type,year,month,plant id,total fuel (mmbtu),generation (mwh),elec fuel (mmbtu),lat,lon,quarter,all fuel fossil co2 (kg),elec fuel fossil co2 (kg),all fuel total co2 (kg),elec fuel total co2 (kg)
0,COW,2001,1,3,8275496.0,852306.0,8275496.0,31.0069,-88.0103,1,772103776.8,772103776.8,772103776.8,772103776.8
1,COW,2001,1,7,587162.0,45991.0,587162.0,34.0128,-85.9708,1,54782214.6,54782214.6,54782214.6,54782214.6
2,COW,2001,1,8,6910786.0,706857.0,6910786.0,33.644344,-87.196486,1,644776333.8,644776333.8,644776333.8,644776333.8
3,COW,2001,1,10,3108892.0,320454.0,3108892.0,32.6017,-87.7811,1,290059623.6,290059623.6,290059623.6,290059623.6
4,COW,2001,1,26,9745127.0,1005460.0,9745127.0,33.244211,-86.458056,1,909220349.1,909220349.1,909220349.1,909220349.1


### Check generation and fuel consumption totals

Interesting - there is some small part of generation that I'm losing along the way. It's 5 orders of magnitude smaller than the total though.

In [121]:
eia_fac['generation (mwh)'].sum()

65594379743.76258

In [123]:
gen_fuels_state['generation (mwh)'].sum()

65594327524.76352

Fuel consumption is pretty identical though

In [124]:
eia_fac['total fuel (mmbtu)'].sum(), eia_fac['elec fuel (mmbtu)'].sum()

(686393145435.8103, 645653921528.091)

In [125]:
gen_fuels_state['total fuel (mmbtu)'].sum(), gen_fuels_state['elec fuel (mmbtu)'].sum()

(686393145435.7861, 645653921528.0757)

## Extra gen/fuels from non-reporting

In [13]:
%aimport Analysis.index
from Analysis.index import extra_emissions_gen

Total EIA generation/fuel consumption and emission factors

In [116]:
cwd = os.getcwd()
path = join(cwd, '..', 'Data storage',
            'EIA country-wide gen fuel CO2 2017-08-31.csv')
eia_total = pd.read_csv(path)

path = join(cwd, '..', 'Data storage',
            'Final emission factors.csv')
ef = pd.read_csv(path, index_col=0)

In [117]:
eia_total.head()

Unnamed: 0,end,f,geography,last_updated,sector,series_id,start,type,units,year,month,generation (MWh),datetime,quarter,total fuel (mmbtu),elec fuel (mmbtu),all fuel CO2 (kg),elec fuel CO2 (kg)
0,201706,M,USA,2017-08-24T11:46:12-04:00,99,ELEC.GEN.DPV-US-99.M,201401,DPV,thousand megawatthours,2017,6,2524250.11,2017-06-01,2,,,0.0,0.0
1,201706,M,USA,2017-08-24T11:46:12-04:00,99,ELEC.GEN.DPV-US-99.M,201401,DPV,thousand megawatthours,2017,5,2479480.14,2017-05-01,2,,,0.0,0.0
2,201706,M,USA,2017-08-24T11:46:12-04:00,99,ELEC.GEN.DPV-US-99.M,201401,DPV,thousand megawatthours,2017,4,2267811.28,2017-04-01,2,,,0.0,0.0
3,201706,M,USA,2017-08-24T11:46:12-04:00,99,ELEC.GEN.DPV-US-99.M,201401,DPV,thousand megawatthours,2017,3,2054274.3,2017-03-01,1,,,0.0,0.0
4,201706,M,USA,2017-08-24T11:46:12-04:00,99,ELEC.GEN.DPV-US-99.M,201401,DPV,thousand megawatthours,2017,2,1449156.85,2017-02-01,1,,,0.0,0.0


In [118]:
extra_co2, extra_gen_fuel = extra_emissions_gen(gen_fuels_state, eia_total, ef)

Results match what I have previously found in the notebooks up on GitHub (Emissions Index repo)

In [119]:
extra_gen_fuel.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,total fuel (mmbtu),generation (mwh),elec fuel (mmbtu)
type,year,month,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
COW,2001,1,16918390.0,1011665.061,13157576.0
COW,2001,2,14968346.0,911346.865,11574491.0
COW,2001,3,18321543.0,1153328.913,14765162.0
COW,2001,4,13530117.0,776341.623,10374008.0
COW,2001,5,14014024.0,801266.923,10920463.0


In [121]:
extra_gen_fuel.loc[idx['DPV',:,:]]

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,total fuel (mmbtu),generation (mwh),elec fuel (mmbtu)
type,year,month,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
DPV,2014,1,,624032.82,
DPV,2014,2,,663828.73,
DPV,2014,3,,907029.95,
DPV,2014,4,,988423.77,
DPV,2014,5,,1092011.8,
DPV,2014,6,,1100560.82,
DPV,2014,7,,1148527.79,
DPV,2014,8,,1139134.66,
DPV,2014,9,,1046360.7,
DPV,2014,10,,964877.96,


## Total CO₂ (national)

In [114]:
facility_co2 = co2.groupby(['year', 'month']).sum()

In [115]:
facility_co2.tail()

Unnamed: 0_level_0,Unnamed: 1_level_0,plant id,final co2 (kg)
year,month,Unnamed: 2_level_1,Unnamed: 3_level_1
2017,2,57482310,116544200000.0
2017,3,58485598,123718700000.0
2017,4,58955690,113345400000.0
2017,5,59223601,129258100000.0
2017,6,59451819,151211300000.0


In [122]:
extra_co2.loc[idx['NG', :, :],:].tail(n=11)

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,all fuel co2 (kg),elec fuel co2 (kg)
type,year,month,Unnamed: 3_level_1,Unnamed: 4_level_1
NG,2016,8,6022574000.0,4039321000.0
NG,2016,9,4917984000.0,3118869000.0
NG,2016,10,4312456000.0,2624005000.0
NG,2016,11,4231444000.0,2458983000.0
NG,2016,12,4418713000.0,2530048000.0
NG,2017,1,7823823000.0,6101011000.0
NG,2017,2,6601814000.0,5030973000.0
NG,2017,3,8630216000.0,6986014000.0
NG,2017,4,7823192000.0,6280070000.0
NG,2017,5,8749634000.0,7244919000.0


In [125]:
national_co2 = (facility_co2.loc[:, 'final co2 (kg)']
                + extra_co2.loc[:, 'elec fuel co2 (kg)'].groupby(['year', 'month']).sum())

These values are very close, but not quite exactly the same as what I've put up on the website.

Why? Could it be because the extra 

In [126]:
national_co2

year  month
2001  1        2.162121e+11
      2        1.814475e+11
      3        1.900841e+11
      4        1.761317e+11
      5        1.905779e+11
      6        2.062662e+11
      7        2.311815e+11
      8        2.404311e+11
      9        1.950060e+11
      10       1.845210e+11
      11       1.731834e+11
      12       1.882633e+11
2002  1        1.951862e+11
      2        1.709727e+11
      3        1.855136e+11
      4        1.759043e+11
      5        1.863580e+11
      6        2.081023e+11
      7        2.385440e+11
      8        2.355222e+11
      9        2.092338e+11
      10       1.948277e+11
      11       1.844629e+11
      12       2.023788e+11
2003  1        2.167834e+11
      2        1.880347e+11
      3        1.876779e+11
      4        1.727763e+11
      5        1.840170e+11
      6        2.007602e+11
                   ...     
2015  1        1.779027e+11
      2        1.696220e+11
      3        1.522496e+11
      4        1.310557e+11
      5 

## National Index and gen by fuels

In [127]:
extra_gen_fuel.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,total fuel (mmbtu),generation (mwh),elec fuel (mmbtu)
type,year,month,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
COW,2001,1,16918390.0,1011665.061,13157576.0
COW,2001,2,14968346.0,911346.865,11574491.0
COW,2001,3,18321543.0,1153328.913,14765162.0
COW,2001,4,13530117.0,776341.623,10374008.0
COW,2001,5,14014024.0,801266.923,10920463.0


In [136]:
gen_fuels_state.groupby(['year', 'month']).sum().tail()

Unnamed: 0_level_0,Unnamed: 1_level_0,plant id,total fuel (mmbtu),generation (mwh),elec fuel (mmbtu),lat,lon,quarter,all fuel fossil co2 (kg),elec fuel fossil co2 (kg),all fuel total co2 (kg),elec fuel total co2 (kg)
year,month,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
2017,2,89637881,2471045000.0,246224700.0,2349592000.0,120729.133612,-296504.534805,3192,120956300000.0,115570100000.0,127393800000.0,118052500000.0
2017,3,90951249,2659571000.0,265474200.0,2526846000.0,121757.578709,-298889.971201,3217,128563600000.0,122580000000.0,135457000000.0,125264600000.0
2017,4,91388289,2424457000.0,242298300.0,2304171000.0,121805.826865,-299282.190963,6440,117636700000.0,112211900000.0,123945600000.0,114706400000.0
2017,5,91583293,2676784000.0,268119000.0,2555053000.0,121961.825569,-299520.312134,6446,133406400000.0,127885000000.0,139784400000.0,130483500000.0
2017,6,91677954,2997427000.0,302185300.0,2876122000.0,121896.867105,-299481.456675,6444,154463000000.0,149047900000.0,160936800000.0,151679000000.0


In [154]:
national_gen = (gen_fuels_state
                .groupby(['type', 'year', 'month'])['generation (mwh)'].sum()
                .add(extra_gen_fuel['generation (mwh)'], fill_value=0))

In [155]:
national_gen.groupby(['year', 'month']).sum().tail()

year  month
2017  2        2.898631e+08
      3        3.199879e+08
      4        2.959468e+08
      5        3.236810e+08
      6        3.582983e+08
Name: generation (mwh), dtype: float64

In [156]:
national_gen = group_fuel_cats(national_gen.reset_index(), custom_fuel_cat,
                               'type', 'fuel category').set_index(['fuel category', 'year', 'month'])

In [157]:
national_gen.tail()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,generation (mwh)
fuel category,year,month,Unnamed: 3_level_1
Wind,2017,2,21691634.88
Wind,2017,3,25598907.5
Wind,2017,4,25403089.66
Wind,2017,5,22326395.36
Wind,2017,6,19428938.56


In [159]:
total_gen = national_gen.groupby(['year', 'month']).sum()
# percent_gen = national_gen / total_gen
total_gen.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,generation (mwh)
year,month,Unnamed: 2_level_1
2001,1,332493160.0
2001,2,282940198.0
2001,3,300706544.0
2001,4,278078871.0
2001,5,300491621.0


In [160]:
df_list = []
for fuel in national_gen.index.get_level_values('fuel category').unique():
    percent_gen = national_gen.loc[fuel].divide(total_gen, fill_value=0)
    percent_gen['fuel category'] = fuel
    percent_gen.set_index('fuel category', inplace=True, append=True)
    df_list.append(percent_gen)
percent_gen = pd.concat(df_list)

In [162]:
path = join(cwd, '..', 'Data storage', 'National data', 'National generation.csv')
national_gen.to_csv(path)

path = join(cwd, '..', 'Data storage', 'National data', 'National percent gen.csv')
percent_gen.to_csv(path)

## Facility state and lat/lon file generation

In [14]:
%aimport Data.make_data
from Data.make_data import facility_location_data

In [15]:
facility_location_data(eia_fac)

## Fraction of estimated gen/fuels in each NERC region

**NOTE** A few of the 2015 annual facilities get assigned multiple NERC regions. I'm not sure why this is. A test of generation from these facilities showed that it was several orders of magnitude lower than total generation from all 2015 annual reporting facilities. 

In [16]:
%aimport Data.make_data
from Data.make_data import get_annual_plants

In [17]:
annual_ids = get_annual_plants(2015)

In [18]:
annual_2015 = eia_fac.loc[(eia_fac['year'] == 2015) &
                          (eia_fac['plant id'].isin(annual_ids)), :].copy()

In [19]:
annual_2015['generation (mwh)'].sum()

210166168.213

### Add NERC region labels

In [23]:
%aimport Analysis.state2nerc
from Analysis.state2nerc import fraction_state2nerc, add_region

In [25]:
cwd = os.getcwd()
path = join(cwd, '..', 'Data storage', 'Facility labels',
            'Facility locations.csv')
location_labels = pd.read_csv(path)

In [28]:
nerc_state_path = join(cwd, '..', 'Data storage', 'Derived data',
                  'NERC_states.json')

with open(nerc_state_path, 'r') as f:
    nerc_states = json.load(f)

In [39]:
eia_2015_annual = eia_fac.loc[eia_fac['plant id'].isin(annual_ids)].copy()
eia_2015_annual.head()

Unnamed: 0,f,fuel,month,plant id,total fuel (mmbtu),year,generation (mwh),elec fuel (mmbtu),geography,last_updated,...,lon,prime mover,datetime,quarter,all fuel fossil co2 (kg),elec fuel fossil co2 (kg),all fuel total co2 (kg),elec fuel total co2 (kg),type,state
120,M,NG,12,10052,25674.0,2015,1290.744,25674.0,USA-CA,2017-08-24T11:46:12-04:00,...,-124.2028,ALL,2015-12-01,4,1362519.18,1362519.18,1362519.18,1362519.18,NG,CA
121,M,NG,11,10052,6341.0,2015,378.814,6341.0,USA-CA,2017-08-24T11:46:12-04:00,...,-124.2028,ALL,2015-11-01,4,336516.87,336516.87,336516.87,336516.87,NG,CA
122,M,NG,10,10052,2898.0,2015,183.916,2898.0,USA-CA,2017-08-24T11:46:12-04:00,...,-124.2028,ALL,2015-10-01,4,153796.86,153796.86,153796.86,153796.86,NG,CA
123,M,NG,9,10052,2947.0,2015,191.471,2947.0,USA-CA,2017-08-24T11:46:12-04:00,...,-124.2028,ALL,2015-09-01,3,156397.29,156397.29,156397.29,156397.29,NG,CA
124,M,NG,8,10052,1761.0,2015,117.711,1761.0,USA-CA,2017-08-24T11:46:12-04:00,...,-124.2028,ALL,2015-08-01,3,93456.27,93456.27,93456.27,93456.27,NG,CA


In [40]:
eia_2015_annual = group_fuel_cats(eia_2015_annual, state_fuel_cat)
eia_2015_annual.head()

Unnamed: 0,type,year,month,plant id,total fuel (mmbtu),generation (mwh),elec fuel (mmbtu),lat,lon,quarter,all fuel fossil co2 (kg),elec fuel fossil co2 (kg),all fuel total co2 (kg),elec fuel total co2 (kg)
0,COW,2001,1,7,587162.0,45991.0,587162.0,34.0128,-85.9708,1,54782214.6,54782214.6,54782214.6,54782214.6
1,COW,2001,1,59,674160.0,62539.0,674160.0,40.854765,-98.348222,1,65528352.0,65528352.0,65528352.0,65528352.0
2,COW,2001,1,642,337326.0,27208.0,337326.0,30.6692,-84.8869,1,31472515.8,31472515.8,31472515.8,31472515.8
3,COW,2001,1,753,3794.0,118.512,3794.0,31.844708,-83.940734,1,353980.2,353980.2,353980.2,353980.2
4,COW,2001,1,964,309027.0,23207.0,309027.0,39.757327,-89.600512,1,28832219.1,28832219.1,28832219.1,28832219.1


In [41]:
eia_2015_annual_nerc = add_facility_location(eia_2015_annual, location_labels, 
                                        labels=['state', 'nerc'])
eia_2015_annual_nerc.head()

Unnamed: 0,type,year,month,plant id,total fuel (mmbtu),generation (mwh),elec fuel (mmbtu),lat,lon,quarter,all fuel fossil co2 (kg),elec fuel fossil co2 (kg),all fuel total co2 (kg),elec fuel total co2 (kg),state,nerc
0,COW,2001,1,7,587162.0,45991.0,587162.0,34.0128,-85.9708,1,54782214.6,54782214.6,54782214.6,54782214.6,AL,SERC
1,COW,2001,2,7,463753.0,33025.0,463753.0,34.0128,-85.9708,1,43268154.9,43268154.9,43268154.9,43268154.9,AL,SERC
2,COW,2001,3,7,562534.0,37340.0,562534.0,34.0128,-85.9708,1,52484422.2,52484422.2,52484422.2,52484422.2,AL,SERC
3,COW,2001,4,7,600426.0,43914.0,600426.0,34.0128,-85.9708,2,56019745.8,56019745.8,56019745.8,56019745.8,AL,SERC
4,COW,2001,5,7,489574.0,35148.0,489574.0,34.0128,-85.9708,2,45677254.2,45677254.2,45677254.2,45677254.2,AL,SERC


In [32]:
all_states = []
for value in nerc_states.values():
    all_states.extend(value)
all_states = set(all_states)

In [33]:
all_states

{'AL',
 'AR',
 'AZ',
 'CA',
 'CO',
 'CT',
 'DC',
 'DE',
 'FL',
 'GA',
 'IA',
 'ID',
 'IL',
 'IN',
 'KS',
 'KY',
 'LA',
 'MA',
 'MD',
 'ME',
 'MI',
 'MN',
 'MO',
 'MS',
 'MT',
 'NC',
 'ND',
 'NE',
 'NH',
 'NJ',
 'NM',
 'NV',
 'NY',
 'OH',
 'OK',
 'OR',
 'PA',
 'RI',
 'SC',
 'SD',
 'TN',
 'TX',
 'UT',
 'VA',
 'VT',
 'WA',
 'WI',
 'WV',
 'WY'}

In [42]:
df_list = []
for state in all_states:
    try:
        df_list.append(fraction_state2nerc(eia_2015_annual_nerc,
                       state, region_col='nerc', fuel_col='type'))
    except:
        print(state)
        pass

DC


In [43]:
nerc_fractions = pd.concat(df_list)
nerc_fractions.set_index(['state', 'nerc', 'type'], inplace=True)
nerc_fractions.sort_index(inplace=True)
nerc_fractions.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,% generation,% total fuel,% elec fuel
state,nerc,type,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
AL,SERC,COW,1.0,1.0,1.0
AL,SERC,HYC,1.0,1.0,1.0
AL,SERC,NG,1.0,1.0,1.0
AL,SERC,OOG,1.0,1.0,1.0
AL,SERC,OTH,1.0,1.0,1.0


With the values below I can allocate extra state-level generation and fuel use to each of the NERC regions!

In [44]:
nerc_fractions.tail()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,% generation,% total fuel,% elec fuel
state,nerc,type,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
WY,WECC,NG,1.0,1.0,1.0
WY,WECC,OOG,1.0,1.0,1.0
WY,WECC,OTH,1.0,1.0,1.0
WY,WECC,PEL,1.0,1.0,1.0
WY,WECC,WND,1.0,1.0,1.0


In [45]:
nerc_fractions.groupby(['state', 'type']).sum()

Unnamed: 0_level_0,Unnamed: 1_level_0,% generation,% total fuel,% elec fuel
state,type,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
AL,COW,1.0,1.0,1.0
AL,HYC,1.0,1.0,1.0
AL,NG,1.0,1.0,1.0
AL,OOG,1.0,1.0,1.0
AL,OTH,1.0,1.0,1.0
AL,PEL,1.0,1.0,1.0
AL,WAS,1.0,1.0,1.0
AL,WWW,1.0,1.0,1.0
AR,HYC,1.0,1.0,1.0
AR,NG,1.0,1.0,1.0


## Allocate extra gen from the state-level to regions

I still need to generate state-level total generation and fuel use!

In [77]:
idx = pd.IndexSlice

In [46]:
# a dictionary to match column names
nerc_frac_match = {'% generation': 'generation (mwh)',
                   '% total fuel': 'total fuel (mmbtu)',
                   '% elec fuel': 'elec fuel (mmbtu)'}

In [60]:
path = join(cwd, '..', 'Data storage', 'EIA state-level gen fuel CO2.csv')

state_total = pd.read_csv(path, parse_dates=['datetime'])

In [61]:
rename_cols(state_total)

In [62]:
state_total.head()

Unnamed: 0,end,f,last_updated,sector,series_id,start,units,generation (mwh),total fuel (mmbtu),elec fuel (mmbtu),all fuel co2 (kg),elec fuel co2 (kg),datetime,quarter
0,201706.0,M,2017-08-24T11:46:12-04:00,99.0,ELEC.GEN.AOR-AK-99.M,200101.0,megawatthours,87.0,,,,,2001-01-01,1
1,201706.0,M,2017-08-24T11:46:12-04:00,99.0,ELEC.GEN.AOR-AL-99.M,200101.0,megawatthours,401167.59,,,,,2001-01-01,1
2,201706.0,M,2017-08-24T11:46:12-04:00,99.0,ELEC.GEN.AOR-AR-99.M,200101.0,megawatthours,136530.37,,,,,2001-01-01,1
3,201706.0,M,2017-08-24T11:46:12-04:00,99.0,ELEC.GEN.AOR-AZ-99.M,200101.0,megawatthours,453.0,,,,,2001-01-01,1
4,201706.0,M,2017-08-24T11:46:12-04:00,99.0,ELEC.GEN.AOR-CA-99.M,200101.0,megawatthours,1717398.41,,,,,2001-01-01,1


In [63]:
state_total.dtypes

end                          float64
f                             object
last_updated                  object
sector                       float64
series_id                     object
start                        float64
units                         object
generation (mwh)             float64
total fuel (mmbtu)           float64
elec fuel (mmbtu)            float64
all fuel co2 (kg)            float64
elec fuel co2 (kg)           float64
datetime              datetime64[ns]
quarter                        int64
dtype: object

In [68]:
state_total['state'] = state_total['series_id'].str.split('-').str[1]
state_total['type'] = (state_total['series_id'].str.split('-').str[0].str.split('.').str[-1])
state_total['year'] = state_total['datetime'].dt.year
state_total['month'] = state_total['datetime'].dt.month

In [69]:
state_total.head()

Unnamed: 0,end,f,last_updated,sector,series_id,start,units,generation (mwh),total fuel (mmbtu),elec fuel (mmbtu),all fuel co2 (kg),elec fuel co2 (kg),datetime,quarter,state,year,month,type
0,201706.0,M,2017-08-24T11:46:12-04:00,99.0,ELEC.GEN.AOR-AK-99.M,200101.0,megawatthours,87.0,,,,,2001-01-01,1,AK,2001,1,AOR
1,201706.0,M,2017-08-24T11:46:12-04:00,99.0,ELEC.GEN.AOR-AL-99.M,200101.0,megawatthours,401167.59,,,,,2001-01-01,1,AL,2001,1,AOR
2,201706.0,M,2017-08-24T11:46:12-04:00,99.0,ELEC.GEN.AOR-AR-99.M,200101.0,megawatthours,136530.37,,,,,2001-01-01,1,AR,2001,1,AOR
3,201706.0,M,2017-08-24T11:46:12-04:00,99.0,ELEC.GEN.AOR-AZ-99.M,200101.0,megawatthours,453.0,,,,,2001-01-01,1,AZ,2001,1,AOR
4,201706.0,M,2017-08-24T11:46:12-04:00,99.0,ELEC.GEN.AOR-CA-99.M,200101.0,megawatthours,1717398.41,,,,,2001-01-01,1,CA,2001,1,AOR


In [70]:
cols = list(nerc_frac_match.values())
state_total = state_total.groupby(['state', 'year', 'month', 'type'])[cols].sum()
state_total.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,generation (mwh),total fuel (mmbtu),elec fuel (mmbtu)
state,year,month,type,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
AK,2001,1,AOR,87.0,,
AK,2001,1,COW,46903.0,1120000.0,872000.0
AK,2001,1,HYC,104549.0,,
AK,2001,1,NG,367521.0,4091000.0,3989000.0
AK,2001,1,PEL,71085.0,767000.0,763000.0


In [73]:
nercs = nerc_fractions.index.get_level_values('nerc').unique()

In [74]:
cols = list(nerc_frac_match.values())
eia_fac_type = group_fuel_cats(eia_fac, state_fuel_cat)
eia_fac_type = add_facility_location(eia_fac_type, location_labels, ['state'])
eia_fac_type = eia_fac_type.groupby(['state', 'year', 'month', 'type'])[cols].sum()

In [75]:
eia_fac_type.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,generation (mwh),total fuel (mmbtu),elec fuel (mmbtu)
state,year,month,type,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
AL,2001,1,COW,6547312.02,67581702.0,66429143.0
AL,2001,1,HYC,767601.0,7931621.15,7931621.15
AL,2001,1,NG,503553.867,7213851.0,4689532.0
AL,2001,1,NUC,2940300.0,30702612.0,30702612.0
AL,2001,1,OOG,2828.126,164765.0,31285.0


In [84]:
state_extra = (state_total.loc[idx[:, 2016:, :, :], :]
               - eia_fac_type.loc[idx[:, 2016:, :, :], :])
state_extra.dropna(how='all', inplace=True)
state_extra = state_extra.reorder_levels(['year', 'state', 'month', 'type'])
state_extra.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,generation (mwh),total fuel (mmbtu),elec fuel (mmbtu)
year,state,month,type,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2016,AL,1,COW,3172.785,143129.0,28261.0
2016,AL,1,HYC,300276.3,,
2016,AL,1,NG,149094.522,2875066.0,1677549.0
2016,AL,1,NUC,0.0,,
2016,AL,1,PEL,1987.279,89981.0,17614.0


In [85]:
state_extra.loc[2016].head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,generation (mwh),total fuel (mmbtu),elec fuel (mmbtu)
state,month,type,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
AL,1,COW,3172.785,143129.0,28261.0
AL,1,HYC,300276.3,,
AL,1,NG,149094.522,2875066.0,1677549.0
AL,1,NUC,0.0,,
AL,1,PEL,1987.279,89981.0,17614.0


In [87]:
state_extra.loc[(state_extra < -100).any(axis=1)]

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,generation (mwh),total fuel (mmbtu),elec fuel (mmbtu)
year,state,month,type,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2016,AZ,6,HYC,-12722.670,,
2016,AZ,7,HYC,-12463.360,,
2016,AZ,8,HYC,-6672.920,,
2016,CA,1,OOG,-36609.329,,
2016,CA,2,OOG,-35875.716,,
2016,CA,3,OOG,-35981.849,,
2016,CA,4,OOG,-25453.429,,
2016,CA,5,OOG,-37564.717,,
2016,CA,6,OOG,-36266.771,,
2016,CA,7,OOG,-34884.006,,


Why is generation from facilities greater than state-level estimates in some cases?

In [88]:
nerc_fractions.sort_index(inplace=True)
nerc_fractions.loc[idx['TX', 'WECC', :], :].tail()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,% generation,% total fuel,% elec fuel
state,nerc,type,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
TX,WECC,NG,0.007022,0.007095,0.008178
TX,WECC,PEL,0.002196,0.000838,0.00238
TX,WECC,SUN,0.062262,0.061306,0.061306
TX,WECC,WND,0.003737,0.003891,0.003891


In [92]:
state_extra.sort_index(inplace=True)
state_extra.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,generation (mwh),total fuel (mmbtu),elec fuel (mmbtu)
year,state,month,type,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2016,AL,1,COW,3172.785,143129.0,28261.0
2016,AL,1,HYC,300276.3,,
2016,AL,1,NG,149094.522,2875066.0,1677549.0
2016,AL,1,NUC,0.0,,
2016,AL,1,PEL,1987.279,89981.0,17614.0


In [94]:
state_extra.loc[2016, :].head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,generation (mwh),total fuel (mmbtu),elec fuel (mmbtu)
state,month,type,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
AL,1,COW,3172.785,143129.0,28261.0
AL,1,HYC,300276.3,,
AL,1,NG,149094.522,2875066.0,1677549.0
AL,1,NUC,0.0,,
AL,1,PEL,1987.279,89981.0,17614.0


In [96]:
state_extra_2016 = state_extra.loc[2016, :]
# state_extra_2016.index = state_extra_2016.index.droplevel('year')

state_extra_2017 = state_extra.loc[2017, :]
# state_extra_2017.index = state_extra_2017.index.droplevel('year')

In [97]:
state_extra_2016.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,generation (mwh),total fuel (mmbtu),elec fuel (mmbtu)
state,month,type,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
AL,1,COW,3172.785,143129.0,28261.0
AL,1,HYC,300276.3,,
AL,1,NG,149094.522,2875066.0,1677549.0
AL,1,NUC,0.0,,
AL,1,PEL,1987.279,89981.0,17614.0


In [98]:
df_list = []
for month in range(1, 13):
    df = nerc_fractions.copy()
    df['month'] = month
    df.set_index('month', append=True, inplace=True)
    df_list.append(df)

nerc_frac_monthly = pd.concat(df_list, axis=0)
nerc_frac_monthly.sort_index(inplace=True)
nerc_frac_monthly = (nerc_frac_monthly
                     .reorder_levels(['nerc', 'state', 'month', 'type']))

In [99]:
nerc_frac_monthly.loc['WECC'].head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,% generation,% total fuel,% elec fuel
state,month,type,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
AZ,1,HYC,1.0,1.0,1.0
AZ,2,HYC,1.0,1.0,1.0
AZ,3,HYC,1.0,1.0,1.0
AZ,4,HYC,1.0,1.0,1.0
AZ,5,HYC,1.0,1.0,1.0


This is it!

In [100]:
nerc_frac_monthly.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,% generation,% total fuel,% elec fuel
nerc,state,month,type,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
SERC,AL,1,COW,1.0,1.0,1.0
SERC,AL,2,COW,1.0,1.0,1.0
SERC,AL,3,COW,1.0,1.0,1.0
SERC,AL,4,COW,1.0,1.0,1.0
SERC,AL,5,COW,1.0,1.0,1.0


In [101]:
nercs

Index(['SERC', '-', 'SPP', 'WECC', 'NPCC', 'RFC', 'FRCC', 'MRO', 'TRE'], dtype='object', name='nerc')

In [104]:
df_list_outer = []
for year in [2016, 2017]:
    df_list_inner = []
    for nerc in nercs:
        df = pd.concat([(nerc_frac_monthly
                         .loc[nerc]['% generation']
                         * state_extra
                         .loc[year]['generation (mwh)']).dropna(),
                        (nerc_frac_monthly.
                         loc[nerc]['% total fuel']
                         * state_extra
                         .loc[year]['total fuel (mmbtu)']).dropna(),
                        (nerc_frac_monthly
                         .loc[nerc]['% elec fuel']
                         * state_extra
                         .loc[year]['elec fuel (mmbtu)']).dropna()],
                        axis=1)
        df.columns = nerc_frac_match.values()
        df['nerc'] = nerc
        df['year'] = year
        df = df.groupby(['year', 'nerc', 'month', 'type']).sum()
        df_list_inner.append(df)

    df_list_outer.append(pd.concat(df_list_inner))
final = pd.concat(df_list_outer)

In [105]:
final

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,generation (mwh),total fuel (mmbtu),elec fuel (mmbtu)
year,nerc,month,type,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2016,SERC,1,COW,1.250252e+05,4.777255e+06,1.075619e+06
2016,SERC,1,HYC,1.578592e+06,,
2016,SERC,1,NG,6.487438e+05,1.456982e+07,5.659672e+06
2016,SERC,1,OOG,2.896880e+04,,
2016,SERC,1,OTH,1.081877e+05,,
2016,SERC,1,PC,9.706641e+03,3.439330e+05,4.924300e+04
2016,SERC,1,PEL,1.467958e+04,2.632000e+05,1.213191e+05
2016,SERC,1,SUN,1.530236e+05,,
2016,SERC,1,WAS,1.460550e+05,,
2016,SERC,1,WND,2.933270e+03,,


In [106]:
path = join(cwd, '..', 'Data storage', 'NERC extra gen fuels_rev.csv')
final.to_csv(path)

## Check total generation in NPCC
Easy state boundaries

In [107]:
npcc_state = ['NY', 'CT', 'RI', 'MA', 'VT', 'NH', 'ME']

In [108]:
state_total.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,generation (mwh),total fuel (mmbtu),elec fuel (mmbtu)
state,year,month,type,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
AK,2001,1,AOR,87.0,,
AK,2001,1,COW,46903.0,1120000.0,872000.0
AK,2001,1,HYC,104549.0,,
AK,2001,1,NG,367521.0,4091000.0,3989000.0
AK,2001,1,PEL,71085.0,767000.0,763000.0


In [None]:
types = state

In [112]:
state_total.loc[idx[npcc_state, :, :, :], :].groupby(['year', 'month']).sum()

Unnamed: 0_level_0,Unnamed: 1_level_0,generation (mwh),total fuel (mmbtu),elec fuel (mmbtu)
year,month,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2001,1,23460172.75,142758000.0,132585000.0
2001,2,20862523.25,118389000.0,108740000.0
2001,3,22436317.43,130155000.0,119528000.0
2001,4,19788902.63,100303000.0,91749000.0
2001,5,20521924.71,114841000.0,106227000.0
2001,6,23884138.30,133834000.0,125326000.0
2001,7,24133560.44,140101000.0,131259000.0
2001,8,27082507.52,172179000.0,162340000.0
2001,9,22354372.48,125217000.0,116323000.0
2001,10,22024272.79,121355000.0,112875000.0
