# Combine national and NERC data
Take the various file (EIA facilities, EIA state-level totals, EPA emissions, etc) and combine them to determine total generation, CO₂ emissions, etc nationally and at NERC regions.

In [7]:
import pandas as pd
import numpy as np
import os
from os.path import join
import sys
import json

idx = pd.IndexSlice
cwd = os.getcwd()
data_path = join(cwd, '..', 'Data storage')

### Date string for filenames
This will be inserted into all filenames (reading and writing)

In [8]:
file_date = '2018-03-06'

In [9]:
%load_ext watermark
%watermark -iv -v

The watermark extension is already loaded. To reload it, use:
  %reload_ext watermark
pandas      0.22.0
numpy       1.14.2
json        2.0.9
CPython 3.6.4
IPython 6.2.1


In [10]:
# Load the "autoreload" extension
%load_ext autoreload

# always reload modules marked with "%aimport"
%autoreload 1

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [11]:
# add the 'src' directory as one where we can import modules
src_dir = join(os.getcwd(), os.pardir, 'src')
sys.path.append(src_dir)

In [63]:
%aimport Data.make_data
from Data.make_data import states_in_nerc
%aimport Analysis.index
from Analysis.index import facility_emission_gen, group_facility_data, add_quarter
%aimport Analysis.index
from Analysis.index import g2lb, change_since_2005, generation_index
%aimport Analysis.index
from Analysis.index import facility_co2, adjust_epa_emissions, group_fuel_cats
%aimport util.utils
from util.utils import rename_cols, add_facility_location

## Load data

In [13]:
cwd = os.getcwd()
path = join(data_path, 'Derived data',
            'Facility gen fuels and CO2 {}.csv'.format(file_date))
eia_fac = pd.read_csv(path)

In [14]:
rename_cols(eia_fac)

In [15]:
eia_fac.head()

Unnamed: 0,f,fuel,month,plant id,total fuel (mmbtu),year,generation (mwh),elec fuel (mmbtu),geography,last_updated,lat,lon,prime mover,datetime,quarter,all fuel fossil co2 (kg),elec fuel fossil co2 (kg),all fuel total co2 (kg),elec fuel total co2 (kg)
0,M,DFO,12,1001,1116.0,2017,114.587,1116.0,USA-IN,2018-02-28T02:03:13-05:00,39.9242,-87.4244,ALL,2017-12-01,4,81646.56,81646.56,81646.56,81646.56
1,M,DFO,11,1001,2772.0,2017,265.428,2772.0,USA-IN,2018-02-28T02:03:13-05:00,39.9242,-87.4244,ALL,2017-11-01,4,202799.52,202799.52,202799.52,202799.52
2,M,DFO,10,1001,4819.0,2017,460.695,4819.0,USA-IN,2018-02-28T02:03:13-05:00,39.9242,-87.4244,ALL,2017-10-01,4,352558.04,352558.04,352558.04,352558.04
3,M,DFO,9,1001,2720.0,2017,300.705,2720.0,USA-IN,2018-02-28T02:03:13-05:00,39.9242,-87.4244,ALL,2017-09-01,3,198995.2,198995.2,198995.2,198995.2
4,M,DFO,8,1001,6273.0,2017,609.358,6273.0,USA-IN,2018-02-28T02:03:13-05:00,39.9242,-87.4244,ALL,2017-08-01,3,458932.68,458932.68,458932.68,458932.68


In [16]:
path = join(data_path, 'Derived data',
            'Monthly EPA emissions {}.csv'.format(file_date))
epa = pd.read_csv(path)

In [17]:
epa.tail()

Unnamed: 0,CO2_MASS (kg),GLOAD (MW),HEAT_INPUT (mmBtu),MONTH,OP_TIME,ORISPL_CODE,SLOAD (1000lb/hr),YEAR
279427,0.0,0.0,337871.418,8,1494.69,880107,200653.0,2017
279428,0.0,0.0,308866.407,9,1447.3,880107,207589.0,2017
279429,0.0,0.0,327482.62,10,1488.72,880107,216266.0,2017
279430,0.0,0.0,341459.3,11,1440.0,880107,231679.0,2017
279431,0.0,0.0,360386.7,12,1488.0,880107,243984.0,2017


## Adjusted facility emissions and generation
I have both the state and custom fuel categories here, but am only using the state fuel categories for now.

In [18]:
fuel_cat_folder = join(data_path, 'Fuel categories')
state_cats_path = join(fuel_cat_folder, 'State_facility.json')

with open(state_cats_path, 'r') as f:
    state_fuel_cat = json.load(f)
    
custom_cats_path = join(fuel_cat_folder, 'Custom_results.json')
with open(custom_cats_path, 'r') as f:
    custom_fuel_cat = json.load(f)

In [19]:
co2, gen_fuels_custom = facility_emission_gen(eia_facility=eia_fac, epa=epa,
                                              state_fuel_cat=state_fuel_cat,
                                              custom_fuel_cat=custom_fuel_cat,
                                              export_state_cats=False)

Renaming columns
Grouping facilities
Adjusting EPA emissions
Caculating CO2
Gen/fuels to state categories
Gen/fuels to custom categories


In [20]:
co2, gen_fuels_state = facility_emission_gen(eia_facility=eia_fac, epa=epa,
                                              state_fuel_cat=state_fuel_cat,
                                              custom_fuel_cat=custom_fuel_cat,
                                              export_state_cats=True)

Renaming columns
Grouping facilities
Adjusting EPA emissions
Caculating CO2
Gen/fuels to state categories


In [21]:
co2.tail()

Unnamed: 0,year,month,plant id,final co2 (kg)
1116371,2017,12,61357,0.0
1116372,2017,12,61407,0.0
1116373,2017,12,61422,0.0
1116374,2017,12,61512,0.0
1116375,2017,12,61561,0.0


In [22]:
gen_fuels_state.tail()

Unnamed: 0,type,year,month,plant id,total fuel (mmbtu),generation (mwh),elec fuel (mmbtu),lat,lon,quarter,all fuel fossil co2 (kg),elec fuel fossil co2 (kg),all fuel total co2 (kg),elec fuel total co2 (kg)
1564478,WWW,2017,12,57919,296725.0,8933.912,44934.0,34.385278,-80.067778,4,0.0,0.0,27832805.0,4214809.2
1564479,WWW,2017,12,58523,51102.0,1828.0,20790.0,48.3877,-114.2409,4,0.0,0.0,4793367.6,1950102.0
1564480,WWW,2017,12,58574,123607.0,7832.0,123607.0,39.648056,-106.943056,4,0.0,0.0,11594336.6,11594336.6
1564481,WWW,2017,12,58944,0.0,0.0,0.0,30.748333,-94.4375,4,0.0,0.0,0.0,0.0
1564482,WWW,2017,12,60340,500075.0,30748.661,349715.0,31.5559,-84.1103,4,0.0,0.0,46907035.0,32803267.0


### Check generation and fuel consumption totals

Interesting - there is some small part of generation that I'm losing along the way. It's 5 orders of magnitude smaller than the total though.

In [23]:
eia_fac['generation (mwh)'].sum()

67601240020.031395

In [24]:
gen_fuels_state['generation (mwh)'].sum()

67601426136.03142

Fuel consumption is pretty identical though

In [25]:
eia_fac['total fuel (mmbtu)'].sum(), eia_fac['elec fuel (mmbtu)'].sum()

(707222059073.79, 664121418912.0795)

In [26]:
gen_fuels_state['total fuel (mmbtu)'].sum(), gen_fuels_state['elec fuel (mmbtu)'].sum()

(707222059073.7903, 664121418912.08)

## Extra gen/fuels from non-reporting

In [27]:
%aimport Analysis.index
from Analysis.index import extra_emissions_gen

Total EIA generation/fuel consumption and emission factors

In [28]:
cwd = os.getcwd()
path = join(data_path, 'Derived data',
            'EIA country-wide gen fuel CO2 {}.csv'.format(file_date))
eia_total = pd.read_csv(path)

path = join(data_path,
            'Final emission factors.csv')
ef = pd.read_csv(path, index_col=0)

In [29]:
eia_total.head()

Unnamed: 0,type,year,month,geography,end,f,last_updated,sector,series_id,start,units,generation (MWh),total fuel (mmbtu),elec fuel (mmbtu),all fuel CO2 (kg),elec fuel CO2 (kg),datetime,quarter
0,AOR,2001,1,USA-AK,201712.0,M,2018-02-28T02:03:13-05:00,99.0,ELEC.GEN.AOR-AK-99.M,200101.0,megawatthours,87.0,,,,,2001-01-01,1
1,AOR,2001,1,USA-AL,201712.0,M,2018-02-28T02:03:13-05:00,99.0,ELEC.GEN.AOR-AL-99.M,200101.0,megawatthours,401167.59,,,,,2001-01-01,1
2,AOR,2001,1,USA-AR,201712.0,M,2018-02-28T02:03:13-05:00,99.0,ELEC.GEN.AOR-AR-99.M,200101.0,megawatthours,136530.37,,,,,2001-01-01,1
3,AOR,2001,1,USA-AZ,201712.0,M,2018-02-28T02:03:13-05:00,99.0,ELEC.GEN.AOR-AZ-99.M,200101.0,megawatthours,453.0,,,,,2001-01-01,1
4,AOR,2001,1,USA-CA,201712.0,M,2018-02-28T02:03:13-05:00,99.0,ELEC.GEN.AOR-CA-99.M,200101.0,megawatthours,1717398.41,,,,,2001-01-01,1


### Calculate CO₂, generation, and fuel consumption that is not captured by facilities

In [30]:
extra_co2, extra_gen_fuel = extra_emissions_gen(gen_fuels_state, eia_total, ef)

Results match what I have previously found in the notebooks up on GitHub (Emissions Index repo)

In [31]:
extra_gen_fuel.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,total fuel (mmbtu),generation (mwh),elec fuel (mmbtu)
type,year,month,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
COW,2001,1,16916390.0,1011665.061,13155576.0
COW,2001,2,14970346.0,911345.865,11576491.0
COW,2001,3,18320543.0,1153327.913,14764162.0
COW,2001,4,13530117.0,776339.623,10374008.0
COW,2001,5,14016024.0,801267.923,10922463.0


In [26]:
extra_gen_fuel.loc[idx['WND',:,:]]

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,total fuel (mmbtu),generation (mwh),elec fuel (mmbtu)
type,year,month,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
WND,2001,1,-3.446894e+06,5.566881e+04,-3.446894e+06
WND,2001,2,-3.288177e+06,1.130211e+05,-3.288177e+06
WND,2001,3,-4.798975e+06,6.768812e+04,-4.798975e+06
WND,2001,4,-6.166244e+06,8.794347e+04,-6.166244e+06
WND,2001,5,-5.694490e+06,8.393153e+04,-5.694490e+06
WND,2001,6,-6.120034e+06,7.735354e+04,-6.120034e+06
WND,2001,7,-5.827621e+06,7.101049e+04,-5.827621e+06
WND,2001,8,-5.409429e+06,5.369600e+04,-5.409429e+06
WND,2001,9,-4.550627e+06,4.983153e+04,-4.550627e+06
WND,2001,10,-5.668965e+06,5.820182e+04,-5.668965e+06


## Total CO₂ (national)
Combine adjusted CO₂ at facilities and CO₂ from fuel consumption that is not captured by facility data.

In [32]:
facility_co2 = co2.groupby(['year', 'month']).sum()

In [33]:
facility_co2.tail()

Unnamed: 0_level_0,Unnamed: 1_level_0,plant id,final co2 (kg)
year,month,Unnamed: 2_level_1,Unnamed: 3_level_1
2017,8,59956777,168566900000.0
2017,9,60081740,138675100000.0
2017,10,60498933,127508800000.0
2017,11,61215677,123380800000.0
2017,12,62726265,143848500000.0


In [34]:
extra_co2.loc[idx['NG', :, :],:].tail(n=11)

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,all fuel co2 (kg),elec fuel co2 (kg)
type,year,month,Unnamed: 3_level_1,Unnamed: 4_level_1
NG,2017,2,6794671000.0,5074766000.0
NG,2017,3,8979710000.0,7158304000.0
NG,2017,4,8153363000.0,6487786000.0
NG,2017,5,9068398000.0,7404283000.0
NG,2017,6,10911650000.0,9142059000.0
NG,2017,7,14422800000.0,12492170000.0
NG,2017,8,13335760000.0,11462650000.0
NG,2017,9,11332810000.0,9560062000.0
NG,2017,10,9850189000.0,8133934000.0
NG,2017,11,9016542000.0,7232485000.0


In [35]:
national_co2 = (facility_co2.loc[:, 'final co2 (kg)']
                + extra_co2.loc[:, 'elec fuel co2 (kg)']
                           .groupby(['year', 'month']).sum())
national_co2.name = 'final co2 (kg)'

In [36]:
national_co2.head()

year  month
2001  1        2.145929e+11
      2        1.799442e+11
      3        1.887242e+11
      4        1.744977e+11
      5        1.888085e+11
Name: final co2 (kg), dtype: float64

## National Index and gen by fuels

In [47]:
extra_gen_fuel.tail()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,total fuel (mmbtu),generation (mwh),elec fuel (mmbtu)
type,year,month,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
WWW,2017,8,-55058738.0,2117348.405,-13990971.0
WWW,2017,9,-49957910.0,1781924.065,-12090584.0
WWW,2017,10,-53801581.0,1900680.564,-13339160.0
WWW,2017,11,-55584275.0,1838602.253,-13369010.0
WWW,2017,12,-59505061.0,2001088.969,-14322102.0


In [46]:
gen_fuels_state.groupby(['year', 'month']).sum().tail()

Unnamed: 0_level_0,Unnamed: 1_level_0,plant id,total fuel (mmbtu),generation (mwh),elec fuel (mmbtu),lat,lon,quarter,all fuel fossil co2 (kg),elec fuel fossil co2 (kg),all fuel total co2 (kg),elec fuel total co2 (kg)
year,month,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
2017,8,92101054,3243524000.0,330363400.0,3099425000.0,123777.238892,-303729.803934,9816,173116300000.0,166458500000.0,179877200000.0,169114700000.0
2017,9,92336205,2821822000.0,284938500.0,2685294000.0,123966.011103,-304186.929849,9825,143628900000.0,137268300000.0,149761000000.0,139641700000.0
2017,10,92793908,2675856000.0,268394800.0,2533375000.0,124485.258253,-305617.871689,13160,132902200000.0,126312200000.0,139453400000.0,128821800000.0
2017,11,93515311,2602604000.0,260115000.0,2458400000.0,124758.874949,-306277.678952,13188,128774100000.0,122200400000.0,135509100000.0,124702300000.0
2017,12,95030773,2969232000.0,298134600.0,2813103000.0,125632.30018,-308773.344081,13288,149846100000.0,142716600000.0,157008500000.0,145362500000.0


### Total national generation (all fuels)
Add generation/fuel consumption reported by facilities (with state-level fuel codes) and extra generation/fuel consumption (which also uses state-level fuel codes).

In [53]:
extra_gen_fuel.merge(extra_co2, left_index=True, right_index=True).columns

Index(['total fuel (mmbtu)', 'generation (mwh)', 'elec fuel (mmbtu)',
       'all fuel co2 (kg)', 'elec fuel co2 (kg)'],
      dtype='object')

In [52]:
gen_fuels_state.groupby(['type', 'year', 'month']).sum().head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,plant id,total fuel (mmbtu),generation (mwh),elec fuel (mmbtu),lat,lon,quarter,all fuel fossil co2 (kg),elec fuel fossil co2 (kg),all fuel total co2 (kg),elec fuel total co2 (kg)
type,year,month,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
COW,2001,1,8550284,1848875000.0,176275400.0,1810304000.0,25142.050268,-53175.8593,645,175575700000.0,171962900000.0,175575700000.0,171962900000.0
COW,2001,2,8550284,1568672000.0,148824100.0,1536017000.0,25142.050268,-53175.8593,645,149006500000.0,145947600000.0,149006500000.0,145947600000.0
COW,2001,3,8550284,1626246000.0,154115700.0,1591151000.0,25142.050268,-53175.8593,645,154428600000.0,151142400000.0,154428600000.0,151142400000.0
COW,2001,4,8550284,1471568000.0,139894300.0,1439361000.0,25142.050268,-53175.8593,1290,139718100000.0,136701800000.0,139718100000.0,136701800000.0
COW,2001,5,8498259,1595267000.0,150791600.0,1563698000.0,25147.539168,-53176.2288,1290,151542400000.0,148585600000.0,151542400000.0,148585600000.0


In [66]:
national_gen = (gen_fuels_state
                .groupby(['type', 'year', 'month'])['generation (mwh)'].sum()
                .add(extra_gen_fuel['generation (mwh)'], fill_value=0))

In [67]:
national_gen.head()

type  year  month
COW   2001  1        177287111.0
            2        149735483.0
            3        155269010.0
            4        140670652.0
            5        151592915.0
Name: generation (mwh), dtype: float64

In [68]:
national_gen.groupby(['year', 'month']).sum().tail()

year  month
2017  8        3.842233e+08
      9        3.360709e+08
      10       3.202084e+08
      11       3.080090e+08
      12       3.471760e+08
Name: generation (mwh), dtype: float64

#### Regroup generation from state codes to my custom fuel codes

In [69]:
national_gen = group_fuel_cats(national_gen.reset_index(), custom_fuel_cat,
                               'type', 'fuel category').set_index(['fuel category', 'year', 'month'])

In [70]:
national_gen.tail()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,generation (mwh)
fuel category,year,month,Unnamed: 3_level_1
Wind,2017,8,13064719.5
Wind,2017,9,17264878.55
Wind,2017,10,24814868.01
Wind,2017,11,23315592.47
Wind,2017,12,22757913.41


In [61]:
total_gen = national_gen.groupby(['year', 'month']).sum()
total_gen.tail()

Unnamed: 0_level_0,Unnamed: 1_level_0,generation (mwh),elec fuel co2 (kg)
year,month,Unnamed: 2_level_1,Unnamed: 3_level_1
2017,8,384223300.0,179430500000.0
2017,9,336070900.0,149089100000.0
2017,10,320208400.0,135530500000.0
2017,11,308009000.0,130457500000.0
2017,12,347176000.0,151920500000.0


### National Index

In [42]:
national_co2.head()

year  month
2001  1        2.145929e+11
      2        1.799442e+11
      3        1.887242e+11
      4        1.744977e+11
      5        1.888085e+11
Name: final co2 (kg), dtype: float64

In [43]:
total_gen.tail()

Unnamed: 0_level_0,Unnamed: 1_level_0,generation (mwh)
year,month,Unnamed: 2_level_1
2017,8,384223300.0
2017,9,336070900.0
2017,10,320208400.0
2017,11,308009000.0
2017,12,347176000.0


In [71]:
national_index = total_gen.copy()
national_index['final co2 (kg)'] = national_co2
national_index['index (g/kwh)'] = (national_index['final co2 (kg)']
                                   / national_index['generation (mwh)'])
national_index.reset_index(inplace=True)
add_quarter(national_index)
g2lb(national_index)
change_since_2005(national_index)

In [72]:
national_index.tail()

Unnamed: 0,year,month,generation (mwh),elec fuel co2 (kg),final co2 (kg),index (g/kwh),datetime,quarter,index (lb/mwh),change since 2005
199,2017,8,384223300.0,179430500000.0,181539000000.0,472.482908,2017-08-01,3,1041.635819,-0.211385
200,2017,9,336070900.0,149089100000.0,150495900000.0,447.809966,2017-09-01,3,987.241851,-0.252567
201,2017,10,320208400.0,135530500000.0,136727100000.0,426.994115,2017-10-01,4,941.351226,-0.28731
202,2017,11,308009000.0,130457500000.0,131638000000.0,427.383523,2017-11-01,4,942.209714,-0.28666
203,2017,12,347176000.0,151920500000.0,153052300000.0,440.849405,2017-12-01,4,971.896597,-0.264185


In [73]:
path = join(data_path, 'National data',
            'National index {}.csv'.format(file_date))
national_index.to_csv(path, index=False)

### Percent generation by custom fuel type

In [74]:
df_list = []
for fuel in national_gen.index.get_level_values('fuel category').unique():
    percent_gen = national_gen.loc[fuel].divide(total_gen, fill_value=0)
    percent_gen['fuel category'] = fuel
    percent_gen.set_index('fuel category', inplace=True, append=True)
    df_list.append(percent_gen)
percent_gen = pd.concat(df_list)

In [75]:
path = join(data_path, 'National data',
            'National generation {}.csv'.format(file_date))
national_gen.to_csv(path)

path = join(data_path, 'National data',
            'National percent gen {}.csv'.format(file_date))
percent_gen.to_csv(path)

## Facility state and lat/lon file generation
~~Create a .csv with lat/lon and state code for each facility. Need to manually add the NERC region label.~~ ~~I've done this with a spatial join between the lat/lon and NERC shapefiles in QGIS. It should be possible to do the spatial join in [GeoPandas](http://geopandas.org/).  Could also add custom region labels (e.g. eGRID subregions, ISO/RTO boundaries, etc).~~

This isn't possible with a spatial join because NERC regions aren't strictly based on geography. Instead, I've taken the NERC codes from EIA-860 and assigned unknown plants (mostly those that retired before 2012 when modern NERCs were mostly defined, and those that were assigned a plant id in 2017) NERC labels using a k-nearest neighbors algorithm and lat/lon information.

See the `Assign NERC region labels` notebook for the creation of this file.

## Fraction of estimated gen/fuels in each NERC region

**NOTE** EIA changed the reporting requirements for wind and solar facilities in 2017. I'm now going to use the list of annual facilities from 2017 rather than from 2015, but still use the reported generation by those facilities in 2015.

In [76]:
%aimport Data.make_data
from Data.make_data import get_annual_plants

In [77]:
annual_ids_2015 = get_annual_plants(2015)

In [78]:
annual_ids_2017 = get_annual_plants(2017)

Combine the lists of annual plants in 2015 and 2017. This lets us catch facilities that have gone from monthly to annual since 2015, but it also includes plants that were annual in 2015 an may have retired.

There is the possibility of an error in allocation to NERC regions for 2016 state-level generation when using the facilities that changed to annual in 2017. But since the state-level generation is so much smaller in 2016 I don't think this is much of an issue.

In [79]:
annual_ids = set(annual_ids_2015.tolist() + annual_ids_2017.tolist())

In [80]:
len(annual_ids)

7146

### Add NERC region labels

In [81]:
%aimport Analysis.state2nerc
from Analysis.state2nerc import fraction_state2nerc, add_region

In [82]:
cwd = os.getcwd()
path = join(data_path, 'Facility labels',
            'Facility locations_knn.csv')
location_labels = pd.read_csv(path)

In [83]:
nerc_state_path = join(data_path, 'Derived data',
                  'NERC_states.json')

with open(nerc_state_path, 'r') as f:
    nerc_states = json.load(f)

Added the filter that year must be 2015 - was getting all 2015 annual plants, but for all years!

In [84]:
eia_2015_annual = eia_fac.loc[(eia_fac['plant id'].isin(annual_ids)) & 
                              (eia_fac['year'] == 2015)].copy()

# Group to state-level fuel categories
eia_2015_annual = group_fuel_cats(eia_2015_annual, state_fuel_cat)

In [85]:
eia_2015_annual_nerc = add_facility_location(eia_2015_annual, location_labels, 
                                        labels=['state', 'nerc'])

This is 2015 data on annual reporting facilities (from both 2015 and 2017)

In [86]:
eia_2015_annual_nerc.tail()

Unnamed: 0,type,year,month,plant id,total fuel (mmbtu),generation (mwh),elec fuel (mmbtu),lat,lon,quarter,all fuel fossil co2 (kg),elec fuel fossil co2 (kg),all fuel total co2 (kg),elec fuel total co2 (kg),state,nerc
92433,WWW,2015,8,58707,211987.0,13966.0,211987.0,33.238611,-80.450278,3,0.0,0.0,19884380.6,19884380.6,SC,RFC
92434,WWW,2015,9,58707,214505.0,13769.0,214505.0,33.238611,-80.450278,3,0.0,0.0,20120569.0,20120569.0,SC,RFC
92435,WWW,2015,10,58707,162426.0,8554.0,162426.0,33.238611,-80.450278,4,0.0,0.0,15235558.8,15235558.8,SC,RFC
92436,WWW,2015,11,58707,245845.0,13352.0,245845.0,33.238611,-80.450278,4,0.0,0.0,23060261.0,23060261.0,SC,RFC
92437,WWW,2015,12,58707,249553.0,14099.0,249553.0,33.238611,-80.450278,4,0.0,0.0,23408071.4,23408071.4,SC,RFC


In [87]:
# Get a list of all state abbreviations

all_states = []
for value in nerc_states.values():
    all_states.extend(value)
all_states = set(all_states)

In [88]:
df_list = []

for state in all_states:
    try:
        df_list.append(fraction_state2nerc(eia_2015_annual_nerc,
                       state, region_col='nerc', fuel_col='type'))
    except:
        print(state)
        pass

In [89]:
nerc_fraction = pd.concat(df_list)
nerc_fraction.set_index(['state', 'nerc', 'type'], inplace=True)
nerc_fraction.sort_index(inplace=True)

In [90]:
nerc_fraction.loc[idx['TX', :, 'WND'], :]

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,% generation,% total fuel,% elec fuel
state,nerc,type,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
TX,SPP,WND,0.125231,0.125231,0.125231
TX,TRE,WND,0.874769,0.874769,0.874769
TX,WECC,WND,0.0,0.0,0.0


With the values below I can allocate extra state-level generation and fuel use to each of the NERC regions!

In [91]:
nerc_fraction.loc['TX']

Unnamed: 0_level_0,Unnamed: 1_level_0,% generation,% total fuel,% elec fuel
nerc,type,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
SERC,HYC,0.260935,0.260935,0.260935
SERC,NG,0.124994,0.153379,0.092145
SERC,OOG,0.280953,0.678359,0.159521
SERC,OTH,-0.006302,0.025416,0.006614
SERC,WAS,0.032216,0.022106,0.03111
SERC,WWW,-0.492566,0.19064,0.257302
SPP,NG,0.160722,0.174954,0.185692
SPP,OOG,0.603794,0.200676,0.759801
SPP,PEL,0.478368,0.745665,0.29
SPP,WAS,0.140836,0.382821,0.131444


In [92]:
nerc_fraction.tail()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,% generation,% total fuel,% elec fuel
state,nerc,type,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
WY,WECC,NG,1.0,1.0,1.0
WY,WECC,OOG,1.0,1.0,1.0
WY,WECC,OTH,1.0,1.0,1.0
WY,WECC,PEL,1.0,1.0,1.0
WY,WECC,WND,1.0,1.0,1.0


Making sure that no values are greater than 1 (within tolerance)

In [93]:
(nerc_fraction.groupby(['state', 'type']).sum() > 1.0001).any()

% generation    False
% total fuel    False
% elec fuel     False
dtype: bool

In [94]:
(nerc_fraction.groupby(['state', 'type']).sum()
 .loc[(nerc_fraction.groupby(['state', 'type']).sum() > 1).any(axis=1)])

Unnamed: 0_level_0,Unnamed: 1_level_0,% generation,% total fuel,% elec fuel
state,type,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
AR,NG,1.0,1.0,1.0
TX,PEL,1.0,1.0,1.0


## Allocate extra gen from the state-level to regions

I still need to generate state-level total generation and fuel use!

In [95]:
idx = pd.IndexSlice

In [96]:
# a dictionary to match column names
nerc_frac_match = {'% generation': 'generation (mwh)',
                   '% total fuel': 'total fuel (mmbtu)',
                   '% elec fuel': 'elec fuel (mmbtu)'}

### Load state-level total gen/fuel consumption

In [98]:
path = join(data_path, 'Derived data',
            'EIA state-level gen fuel CO2 {}.csv'.format(file_date))

state_total = pd.read_csv(path, parse_dates=['datetime'])

In [99]:
rename_cols(state_total)
state_total['state'] = state_total['geography'].str[-2:]

In [100]:
state_total.head()

Unnamed: 0,type,year,month,geography,end,f,last_updated,sector,series_id,start,units,generation (mwh),total fuel (mmbtu),elec fuel (mmbtu),all fuel co2 (kg),elec fuel co2 (kg),datetime,quarter,state
0,AOR,2001,1,USA-AK,201712,M,2018-02-28T02:03:13-05:00,99,ELEC.GEN.AOR-AK-99.M,200101,megawatthours,87.0,,,,,2001-01-01,1,AK
1,AOR,2001,1,USA-AL,201712,M,2018-02-28T02:03:13-05:00,99,ELEC.GEN.AOR-AL-99.M,200101,megawatthours,401167.59,,,,,2001-01-01,1,AL
2,AOR,2001,1,USA-AR,201712,M,2018-02-28T02:03:13-05:00,99,ELEC.GEN.AOR-AR-99.M,200101,megawatthours,136530.37,,,,,2001-01-01,1,AR
3,AOR,2001,1,USA-AZ,201712,M,2018-02-28T02:03:13-05:00,99,ELEC.GEN.AOR-AZ-99.M,200101,megawatthours,453.0,,,,,2001-01-01,1,AZ
4,AOR,2001,1,USA-CA,201712,M,2018-02-28T02:03:13-05:00,99,ELEC.GEN.AOR-CA-99.M,200101,megawatthours,1717398.41,,,,,2001-01-01,1,CA


In [101]:
state_total.dtypes

type                          object
year                           int64
month                          int64
geography                     object
end                            int64
f                             object
last_updated                  object
sector                         int64
series_id                     object
start                          int64
units                         object
generation (mwh)             float64
total fuel (mmbtu)           float64
elec fuel (mmbtu)            float64
all fuel co2 (kg)            float64
elec fuel co2 (kg)           float64
datetime              datetime64[ns]
quarter                        int64
state                         object
dtype: object

Simplify the dataframe

In [102]:
cols = list(nerc_frac_match.values())
state_total = state_total.groupby(['state', 'year', 'month', 'type'])[cols].sum()
state_total.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,generation (mwh),total fuel (mmbtu),elec fuel (mmbtu)
state,year,month,type,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
AK,2001,1,AOR,87.0,0.0,0.0
AK,2001,1,COW,46903.0,1120000.0,872000.0
AK,2001,1,HYC,104549.0,0.0,0.0
AK,2001,1,NG,367521.0,4091000.0,3989000.0
AK,2001,1,PEL,71085.0,767000.0,763000.0


In [103]:
# list of NERC regions
nercs = nerc_fraction.index.get_level_values('nerc').unique()

### Group the facility data to state fuel categories and add state labels

In [104]:
cols = list(nerc_frac_match.values())
eia_fac_type = group_fuel_cats(eia_fac, state_fuel_cat)
eia_fac_type = add_facility_location(eia_fac_type, location_labels, ['state'])
eia_fac_type = eia_fac_type.groupby(['state', 'year', 'month', 'type'])[cols].sum()

In [105]:
eia_fac_type.loc[idx['OK', 2017, :, 'WND'], :]

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,generation (mwh),total fuel (mmbtu),elec fuel (mmbtu)
state,year,month,type,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
OK,2017,1,WND,982955.0,9074640.0,9074640.0
OK,2017,2,WND,1053595.0,9726789.0,9726789.0
OK,2017,3,WND,1237665.0,11426122.0,11426122.0
OK,2017,4,WND,1164423.0,10749954.0,10749954.0
OK,2017,5,WND,960103.0,8863670.0,8863670.0
OK,2017,6,WND,987307.0,9114817.0,9114817.0
OK,2017,7,WND,741231.0,6843046.0,6843046.0
OK,2017,8,WND,496488.0,4583577.0,4583577.0
OK,2017,9,WND,942175.0,8698158.0,8698158.0
OK,2017,10,WND,1316521.0,12154122.0,12154122.0


In [106]:
eia_fac_type.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,generation (mwh),total fuel (mmbtu),elec fuel (mmbtu)
state,year,month,type,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
AK,2001,1,COW,26493.302,576574.0,520312.0
AK,2001,1,HYC,103940.302,1074015.14,1074015.14
AK,2001,1,NG,314206.726,3372210.0,3270620.0
AK,2001,1,PEL,65373.158,707882.0,704150.0
AK,2001,1,WND,86.518,893.99,893.99


### Calculate the extra gen/fuel consumption at the state levels
**Only worrying about extra generation from 2016 forward.**

In [107]:
state_extra = (state_total.loc[idx[:, 2016:, :, :], :]
               - eia_fac_type.loc[idx[:, 2016:, :, :], :])
state_extra.dropna(how='all', inplace=True)
state_extra = state_extra.reorder_levels(['year', 'state', 'month', 'type'])
state_extra.sort_index(inplace=True)
state_extra.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,generation (mwh),total fuel (mmbtu),elec fuel (mmbtu)
year,state,month,type,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2016,AK,1,COW,0.0,0.0,1.0
2016,AK,1,HYC,0.004,-1451631.0,-1451631.0
2016,AK,1,NG,-7127.76,-93858.0,-93863.0
2016,AK,1,PEL,-0.001,-4.0,0.0
2016,AK,1,WAS,-0.001,-27910.0,-27910.0


The huge jump in Texas state-level wind generation is why I need to use more recent lists of annual reporting facilities rather than the list from 2015.

In [108]:
state_extra.loc[idx[:, 'TX', :, 'WND'], :]

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,generation (mwh),total fuel (mmbtu),elec fuel (mmbtu)
year,state,month,type,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2016,TX,1,WND,-112214.996,-42126843.0,-42126843.0
2016,TX,2,WND,-125883.003,-48434096.0,-48434096.0
2016,TX,3,WND,-151810.997,-53426416.0,-53426416.0
2016,TX,4,WND,-129348.002,-44924684.0,-44924684.0
2016,TX,5,WND,-135324.999,-49005417.0,-49005417.0
2016,TX,6,WND,-95285.004,-35791076.0,-35791076.0
2016,TX,7,WND,-118644.004,-53486104.0,-53486104.0
2016,TX,8,WND,-70829.002,-34831264.0,-34831264.0
2016,TX,9,WND,-83498.0,-36917999.0,-36917999.0
2016,TX,10,WND,-118149.998,-51416318.0,-51416318.0


Why is generation from facilities greater than state-level estimates in some cases?

In [109]:
state_extra.loc[(state_extra < -100).any(axis=1)]

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,generation (mwh),total fuel (mmbtu),elec fuel (mmbtu)
year,state,month,type,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2016,AK,1,HYC,4.000000e-03,-1451631.0,-1451631.0
2016,AK,1,NG,-7.127760e+03,-93858.0,-93863.0
2016,AK,1,WAS,-1.000000e-03,-27910.0,-27910.0
2016,AK,1,WND,-1.114910e+02,-40223.0,-40223.0
2016,AK,2,HYC,1.000000e-03,-1197011.0,-1197011.0
2016,AK,2,NG,-6.190797e+03,-81521.0,-81526.0
2016,AK,2,WAS,-4.000000e-03,-20262.0,-20262.0
2016,AK,2,WND,-2.100260e+02,-75774.0,-75774.0
2016,AK,3,HYC,3.000000e-03,-1529981.0,-1529981.0
2016,AK,3,NG,-6.518067e+03,-85829.0,-85835.0


Sort the index of each dataframe to make sure they can be easily combined.

In [110]:
nerc_fraction.sort_index(inplace=True)
nerc_fraction.loc[idx['TX', 'WECC', :], :].tail()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,% generation,% total fuel,% elec fuel
state,nerc,type,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
TX,WECC,NG,0.061815,0.056091,0.071281
TX,WECC,PEL,0.033746,0.008952,0.032607
TX,WECC,SUN,0.067911,0.067911,0.067911
TX,WECC,WND,0.0,0.0,0.0


In [111]:
state_extra.sort_index(inplace=True)
state_extra.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,generation (mwh),total fuel (mmbtu),elec fuel (mmbtu)
year,state,month,type,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2016,AK,1,COW,0.0,0.0,1.0
2016,AK,1,HYC,0.004,-1451631.0,-1451631.0
2016,AK,1,NG,-7127.76,-93858.0,-93863.0
2016,AK,1,PEL,-0.001,-4.0,0.0
2016,AK,1,WAS,-0.001,-27910.0,-27910.0


Create a copy of the `nerc_fraction` dataframe with repeated values for every month of the year, so that they MultiIndex matches the `state_extra` MultiIndex

In [112]:
df_list = []
for month in range(1, 13):
    df = nerc_fraction.copy()
    df['month'] = month
    df.set_index('month', append=True, inplace=True)
    df_list.append(df)

nerc_frac_monthly = pd.concat(df_list, axis=0)
nerc_frac_monthly.sort_index(inplace=True)
nerc_frac_monthly = (nerc_frac_monthly
                     .reorder_levels(['nerc', 'state', 'month', 'type']))

In [113]:
nercs

Index(['SERC', 'SPP', 'WECC', 'MRO', 'NPCC', 'RFC', 'FRCC', 'TRE'], dtype='object', name='nerc')

Cycle through each year (2016 and 2017 in this case) and each NERC, multiplying the state-level extra generation, total fuel consumption, and fuel consumption for electricity by the share that should be allocated to each NERC.

In [114]:
df_list_outer = []
for year in [2016, 2017]:
    df_list_inner = []
    for nerc in nercs:
        df = pd.concat([(nerc_frac_monthly
                         .loc[nerc]['% generation']
                         * state_extra
                         .loc[year]['generation (mwh)']).dropna(),
                        (nerc_frac_monthly.
                         loc[nerc]['% total fuel']
                         * state_extra
                         .loc[year]['total fuel (mmbtu)']).dropna(),
                        (nerc_frac_monthly
                         .loc[nerc]['% elec fuel']
                         * state_extra
                         .loc[year]['elec fuel (mmbtu)']).dropna()],
                        axis=1)
        df.columns = nerc_frac_match.values()
        df['nerc'] = nerc
        df['year'] = year
        df = df.groupby(['year', 'nerc', 'month', 'type']).sum()
        df_list_inner.append(df)

    df_list_outer.append(pd.concat(df_list_inner))
final = pd.concat(df_list_outer)
final.sort_index(inplace=True)

In [115]:
nerc_frac_monthly.sort_index(inplace=True)

Although the dataframe is called `final`, it's really just the final allocated extra state-level generation/fuel consumption

In [116]:
final.loc[idx[2017, 'SPP', :, 'WND'], :]

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,generation (mwh),total fuel (mmbtu),elec fuel (mmbtu)
year,nerc,month,type,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2017,SPP,1,WND,2225204.0,-19723010.0,-19723010.0
2017,SPP,2,WND,2262461.0,-20490960.0,-20490960.0
2017,SPP,3,WND,2650092.0,-25635550.0,-25635550.0
2017,SPP,4,WND,2571236.0,-25349800.0,-25349800.0
2017,SPP,5,WND,2235343.0,-21735110.0,-21735110.0
2017,SPP,6,WND,1870060.0,-20359200.0,-20359200.0
2017,SPP,7,WND,1531402.0,-16341420.0,-16341420.0
2017,SPP,8,WND,1277675.0,-12028080.0,-12028080.0
2017,SPP,9,WND,1819973.0,-20337560.0,-20337560.0
2017,SPP,10,WND,2391769.0,-26169700.0,-26169700.0


In [118]:
path = join(data_path, 'Derived data',
            'NERC extra gen fuels {}.csv'.format(file_date))
final.to_csv(path)