In [24]:
%matplotlib inline
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
import numpy as np
import os
from os.path import join
import sys
import json
import geopandas as gpd

In [3]:
# Load the "autoreload" extension
%load_ext autoreload

# always reload modules marked with "%aimport"
%autoreload 1

In [25]:
# add the 'src' directory as one where we can import modules
src_dir = join(os.getcwd(), os.pardir, 'src')
sys.path.append(src_dir)

In [90]:
%aimport Data.make_data
from Data.make_data import states_in_nerc
%aimport Analysis.index
from Analysis.index import facility_emission_gen, group_facility_data
%aimport Analysis.index
from Analysis.index import facility_co2, adjust_epa_emissions, group_fuel_cats
%aimport util.utils
from util.utils import rename_cols, add_facility_location

## Load data

In [78]:
cwd = os.getcwd()
path = join(cwd, '..', 'Data storage',
            'Facility gen fuels and CO2 2017-08-31.zip')
eia_fac = pd.read_csv(path)

In [79]:
rename_cols(eia_fac)

In [80]:
eia_fac.head()

Unnamed: 0,f,fuel,month,plant id,total fuel (mmbtu),year,generation (mwh),elec fuel (mmbtu),geography,last_updated,lat,lon,prime mover,datetime,quarter,all fuel fossil co2 (kg),elec fuel fossil co2 (kg),all fuel total co2 (kg),elec fuel total co2 (kg)
0,M,SUB,6,10360,0.0,2017,0.0,0.0,USA-WI,2017-08-24T11:46:12-04:00,44.4936,-88.0303,ALL,2017-06-01,2,0.0,0.0,0.0,0.0
1,M,SUB,5,10360,0.0,2017,0.0,0.0,USA-WI,2017-08-24T11:46:12-04:00,44.4936,-88.0303,ALL,2017-05-01,2,0.0,0.0,0.0,0.0
2,M,SUB,4,10360,0.0,2017,0.0,0.0,USA-WI,2017-08-24T11:46:12-04:00,44.4936,-88.0303,ALL,2017-04-01,2,0.0,0.0,0.0,0.0
3,M,SUB,3,10360,0.0,2017,0.0,0.0,USA-WI,2017-08-24T11:46:12-04:00,44.4936,-88.0303,ALL,2017-03-01,1,0.0,0.0,0.0,0.0
4,M,SUB,2,10360,0.0,2017,0.0,0.0,USA-WI,2017-08-24T11:46:12-04:00,44.4936,-88.0303,ALL,2017-02-01,1,0.0,0.0,0.0,0.0


In [48]:
path = join(cwd, '..', 'Data storage', 'Derived data',
            'Monthly EPA emissions 2017-08-31.csv')
epa = pd.read_csv(path)

## Test facility_emissions_gen

In [49]:
fuel_cat_folder = join(cwd, '..', 'Data storage', 'Fuel categories')
state_cats_path = join(fuel_cat_folder, 'State_facility.json')

with open(state_cats_path, 'r') as f:
    state_fuel_cat = json.load(f)
    
custom_cats_path = join(fuel_cat_folder, 'Custom_results.json')
with open(custom_cats_path, 'r') as f:
    custom_fuel_cat = json.load(f)

In [32]:
co2, gen_fuels_custom = facility_emission_gen(eia_facility=eia_fac, epa=epa,
                                              state_fuel_cat=state_fuel_cat,
                                              custom_fuel_cat=custom_fuel_cat,
                                              export_state_cats=False)

Renaming columns
Grouping facilities
Adjusting EPA emissions
Caculating CO2
Gen/fuels to state categories


In [50]:
co2, gen_fuels_state = facility_emission_gen(eia_facility=eia_fac, epa=epa,
                                              state_fuel_cat=state_fuel_cat,
                                              custom_fuel_cat=custom_fuel_cat,
                                              export_state_cats=True)

Renaming columns
Grouping facilities
Adjusting EPA emissions
Caculating CO2
Gen/fuels to state categories


In [51]:
co2.head()

Unnamed: 0,year,month,plant id,final co2 (kg)
0,2001,1,2,0.0
1,2001,1,3,962541100.0
2,2001,1,4,0.0
3,2001,1,7,67000920.0
4,2001,1,8,753020400.0


In [199]:
gen_fuels_state.head()

Unnamed: 0,type,year,month,plant id,total fuel (mmbtu),generation (mwh),elec fuel (mmbtu),lat,lon,quarter,all fuel fossil co2 (kg),elec fuel fossil co2 (kg),all fuel total co2 (kg),elec fuel total co2 (kg)
0,COW,2001,1,3,8275496.0,852306.0,8275496.0,31.0069,-88.0103,1,772103776.8,772103776.8,772103776.8,772103776.8
1,COW,2001,1,7,587162.0,45991.0,587162.0,34.0128,-85.9708,1,54782214.6,54782214.6,54782214.6,54782214.6
2,COW,2001,1,8,6910786.0,706857.0,6910786.0,33.644344,-87.196486,1,644776333.8,644776333.8,644776333.8,644776333.8
3,COW,2001,1,10,3108892.0,320454.0,3108892.0,32.6017,-87.7811,1,290059623.6,290059623.6,290059623.6,290059623.6
4,COW,2001,1,26,9745127.0,1005460.0,9745127.0,33.244211,-86.458056,1,909220349.1,909220349.1,909220349.1,909220349.1


### Check generation and fuel consumption totals

Interesting - there is some small part of generation that I'm losing along the way. It's 5 orders of magnitude smaller than the total though.

In [121]:
eia_fac['generation (mwh)'].sum()

65594379743.76258

In [123]:
gen_fuels_state['generation (mwh)'].sum()

65594327524.76352

Fuel consumption is pretty identical though

In [124]:
eia_fac['total fuel (mmbtu)'].sum(), eia_fac['elec fuel (mmbtu)'].sum()

(686393145435.8103, 645653921528.091)

In [125]:
gen_fuels_state['total fuel (mmbtu)'].sum(), gen_fuels_state['elec fuel (mmbtu)'].sum()

(686393145435.7861, 645653921528.0757)

## Extra gen/fuels from non-reporting

In [200]:
%aimport Analysis.index
from Analysis.index import extra_emissions_gen

Total EIA generation/fuel consumption and emission factors

In [201]:
cwd = os.getcwd()
path = join(cwd, '..', 'Data storage',
            'EIA country-wide gen fuel CO2 2017-08-31.csv')
eia_total = pd.read_csv(path)

path = join(cwd, '..', 'Data storage',
            'Final emission factors.csv')
ef = pd.read_csv(path, index_col=0)

In [202]:
eia_total.head()

Unnamed: 0,end,f,geography,last_updated,sector,series_id,start,type,units,year,month,generation (MWh),datetime,quarter,total fuel (mmbtu),elec fuel (mmbtu),all fuel CO2 (kg),elec fuel CO2 (kg)
0,201706,M,USA,2017-08-24T11:46:12-04:00,99,ELEC.GEN.DPV-US-99.M,201401,DPV,thousand megawatthours,2017,6,2524250.11,2017-06-01,2,,,0.0,0.0
1,201706,M,USA,2017-08-24T11:46:12-04:00,99,ELEC.GEN.DPV-US-99.M,201401,DPV,thousand megawatthours,2017,5,2479480.14,2017-05-01,2,,,0.0,0.0
2,201706,M,USA,2017-08-24T11:46:12-04:00,99,ELEC.GEN.DPV-US-99.M,201401,DPV,thousand megawatthours,2017,4,2267811.28,2017-04-01,2,,,0.0,0.0
3,201706,M,USA,2017-08-24T11:46:12-04:00,99,ELEC.GEN.DPV-US-99.M,201401,DPV,thousand megawatthours,2017,3,2054274.3,2017-03-01,1,,,0.0,0.0
4,201706,M,USA,2017-08-24T11:46:12-04:00,99,ELEC.GEN.DPV-US-99.M,201401,DPV,thousand megawatthours,2017,2,1449156.85,2017-02-01,1,,,0.0,0.0


In [136]:
extra_co2, extra_gen_fuel = extra_emissions_gen(gen_fuels_state, eia_total, ef)

Results match what I have previously found in the notebooks up on GitHub (Emissions Index repo)

In [94]:
extra_gen_fuel.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,total fuel (mmbtu),generation (mwh),elec fuel (mmbtu)
type,year,month,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
COW,2001,1,16918390.0,1011665.061,13157576.0
COW,2001,2,14968346.0,911346.865,11574491.0
COW,2001,3,18321543.0,1153328.913,14765162.0
COW,2001,4,13530117.0,776341.623,10374008.0
COW,2001,5,14014024.0,801266.923,10920463.0


In [95]:
eia_extra.loc[idx['DPV',:,:]]

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,total fuel (mmbtu),generation (mwh),elec fuel (mmbtu)
type,year,month,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
DPV,2014,1,,624032.82,
DPV,2014,2,,663828.73,
DPV,2014,3,,907029.95,
DPV,2014,4,,988423.77,
DPV,2014,5,,1092011.8,
DPV,2014,6,,1100560.82,
DPV,2014,7,,1148527.79,
DPV,2014,8,,1139134.66,
DPV,2014,9,,1046360.7,
DPV,2014,10,,964877.96,


## Total CO₂ (national)

In [130]:
facility_co2 = co2.groupby(['year', 'month']).sum()

In [131]:
facility_co2.tail()

Unnamed: 0_level_0,Unnamed: 1_level_0,plant id,final co2 (kg)
year,month,Unnamed: 2_level_1,Unnamed: 3_level_1
2017,2,57482310,116544200000.0
2017,3,58485598,123718700000.0
2017,4,58955690,113345400000.0
2017,5,59223601,129258100000.0
2017,6,59451819,151211300000.0


In [141]:
extra_co2.loc[idx['NG', :, :],:].tail(n=11)

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,all fuel co2 (kg),elec fuel co2 (kg)
type,year,month,Unnamed: 3_level_1,Unnamed: 4_level_1
NG,2016,8,6022574000.0,4039321000.0
NG,2016,9,4917984000.0,3118869000.0
NG,2016,10,4312456000.0,2624005000.0
NG,2016,11,4231444000.0,2458983000.0
NG,2016,12,4418713000.0,2530048000.0
NG,2017,1,7823823000.0,6101011000.0
NG,2017,2,6601814000.0,5030973000.0
NG,2017,3,8630216000.0,6986014000.0
NG,2017,4,7823192000.0,6280070000.0
NG,2017,5,8749634000.0,7244919000.0


In [133]:
national_co2 = (facility_co2.loc[:, 'final co2 (kg)']
                + extra_co2.loc[:, 'elec fuel co2 (kg)'])

These values are very close, but not quite exactly the same as what I've put up on the website.

Why? Could it be because the extra 

In [134]:
national_co2

year  month
2001  1        2.162121e+11
      2        1.814475e+11
      3        1.900841e+11
      4        1.761317e+11
      5        1.905779e+11
      6        2.062662e+11
      7        2.311815e+11
      8        2.404311e+11
      9        1.950060e+11
      10       1.845210e+11
      11       1.731834e+11
      12       1.882633e+11
2002  1        1.951862e+11
      2        1.709727e+11
      3        1.855136e+11
      4        1.759043e+11
      5        1.863580e+11
      6        2.081023e+11
      7        2.385440e+11
      8        2.355222e+11
      9        2.092338e+11
      10       1.948277e+11
      11       1.844629e+11
      12       2.023788e+11
2003  1        2.167834e+11
      2        1.880347e+11
      3        1.876779e+11
      4        1.727763e+11
      5        1.840170e+11
      6        2.007602e+11
                   ...     
2015  1        1.779027e+11
      2        1.696220e+11
      3        1.522496e+11
      4        1.310557e+11
      5 

## Facility state and lat/lon file generation

In [148]:
%aimport Data.make_data
from Data.make_data import facility_location_data

In [150]:
facility_location_data(eia_fac)

## Fraction of estimated gen/fuels in each NERC region

**NOTE** A few of the 2015 annual facilities get assigned multiple NERC regions. I'm not sure why this is. A test of generation from these facilities showed that it was several orders of magnitude lower than total generation from all 2015 annual reporting facilities. 

In [9]:
%aimport Data.make_data
from Data.make_data import get_annual_plants

In [10]:
annual_ids = get_annual_plants(2015)

In [14]:
annual_2015 = eia_fac.loc[(eia_fac['year'] == 2015) &
                          (eia_fac['plant id'].isin(annual_ids)), :].copy()

In [15]:
annual_2015['generation (mwh)'].sum()

210166168.213

### Add NERC region labels

In [33]:
%aimport Analysis.state2nerc
from Analysis.state2nerc import fraction_state2nerc, add_region

In [22]:
cwd = os.getcwd()
path = join(cwd, '..', 'Data storage', 'NERC_Regions_EIA',
            'NercRegions_201610.shp')
regions = gpd.read_file(path)

In [107]:
regions.head()

Unnamed: 0,NERC,NERC_Label,geometry
0,-,Indeterminate - various NERC membership,(POLYGON ((-91.71222639199999 32.9756623780000...
1,FRCC,Florida Reliability Coordinating Council (FRCC),(POLYGON ((-81.95506602699999 24.5196900770000...
2,MRO,Midwest Reliability Organization (MRO),POLYGON ((-95.07148604699995 49.36451082200006...
3,NPCC,Northeast Power Coordinating Council (NPCC),(POLYGON ((-73.82134384999995 40.6045004290000...
4,RFC,ReliabilityFirst Corporation (RFC),(POLYGON ((-90.89359863199996 29.0467829150000...


In [91]:
cwd = os.getcwd()
path = join(cwd, '..', 'Data storage', 'Facility labels',
            'Facility locations.csv')
location_labels = pd.read_csv(path)

In [36]:
#Drop facilities without lat/lon data
eia_fac_nerc = eia_fac.dropna(subset=['lat', 'lon'])

In [38]:
eia_fac_nerc = add_region(eia_fac_nerc, regions, region_col='nerc')
eia_fac_nerc['state'] = eia_fac_nerc['geography'].str[-2:]

In [55]:
eia_fac_nerc_type = group_fuel_cats(eia_fac_nerc, state_fuel_cat)

In [58]:
eia_fac_nerc_type.columns

Index(['type', 'year', 'month', 'plant id', 'total fuel (mmbtu)',
       'generation (MWh)', 'elec fuel (mmbtu)', 'lat', 'lon', 'quarter',
       'all fuel fossil CO2 (kg)', 'elec fuel fossil CO2 (kg)',
       'all fuel total CO2 (kg)', 'elec fuel total CO2 (kg)', 'nerc'],
      dtype='object')

In [18]:
nerc_state_path = join(cwd, '..', 'Data storage', 'Derived data',
                  'NERC_states.json')

with open(nerc_state_path, 'r') as f:
    nerc_states = json.load(f)

In [160]:
eia_2015_annual = eia_fac.loc[eia_fac['plant id'].isin(annual_ids)].copy()
eia_2015_annual.head()

Unnamed: 0,f,fuel,month,plant id,total fuel (mmbtu),year,generation (mwh),elec fuel (mmbtu),geography,last_updated,lat,lon,prime mover,datetime,quarter,all fuel fossil co2 (kg),elec fuel fossil co2 (kg),all fuel total co2 (kg),elec fuel total co2 (kg)
120,M,NG,12,10052,25674.0,2015,1290.744,25674.0,USA-CA,2017-08-24T11:46:12-04:00,40.7995,-124.2028,ALL,2015-12-01,4,1362519.18,1362519.18,1362519.18,1362519.18
121,M,NG,11,10052,6341.0,2015,378.814,6341.0,USA-CA,2017-08-24T11:46:12-04:00,40.7995,-124.2028,ALL,2015-11-01,4,336516.87,336516.87,336516.87,336516.87
122,M,NG,10,10052,2898.0,2015,183.916,2898.0,USA-CA,2017-08-24T11:46:12-04:00,40.7995,-124.2028,ALL,2015-10-01,4,153796.86,153796.86,153796.86,153796.86
123,M,NG,9,10052,2947.0,2015,191.471,2947.0,USA-CA,2017-08-24T11:46:12-04:00,40.7995,-124.2028,ALL,2015-09-01,3,156397.29,156397.29,156397.29,156397.29
124,M,NG,8,10052,1761.0,2015,117.711,1761.0,USA-CA,2017-08-24T11:46:12-04:00,40.7995,-124.2028,ALL,2015-08-01,3,93456.27,93456.27,93456.27,93456.27


In [161]:
eia_2015_annual = group_fuel_cats(eia_2015_annual, state_fuel_cat)
eia_2015_annual.head()

Unnamed: 0,type,year,month,plant id,total fuel (mmbtu),generation (mwh),elec fuel (mmbtu),lat,lon,quarter,all fuel fossil co2 (kg),elec fuel fossil co2 (kg),all fuel total co2 (kg),elec fuel total co2 (kg)
0,COW,2001,1,7,587162.0,45991.0,587162.0,34.0128,-85.9708,1,54782214.6,54782214.6,54782214.6,54782214.6
1,COW,2001,1,59,674160.0,62539.0,674160.0,40.854765,-98.348222,1,65528352.0,65528352.0,65528352.0,65528352.0
2,COW,2001,1,642,337326.0,27208.0,337326.0,30.6692,-84.8869,1,31472515.8,31472515.8,31472515.8,31472515.8
3,COW,2001,1,753,3794.0,118.512,3794.0,31.844708,-83.940734,1,353980.2,353980.2,353980.2,353980.2
4,COW,2001,1,964,309027.0,23207.0,309027.0,39.757327,-89.600512,1,28832219.1,28832219.1,28832219.1,28832219.1


In [162]:
eia_2015_annual = add_facility_location(eia_2015_annual, location_labels, labels=['state'])
eia_2015_annual.head()

Unnamed: 0,type,year,month,plant id,total fuel (mmbtu),generation (mwh),elec fuel (mmbtu),lat,lon,quarter,all fuel fossil co2 (kg),elec fuel fossil co2 (kg),all fuel total co2 (kg),elec fuel total co2 (kg),state
0,COW,2001,1,7,587162.0,45991.0,587162.0,34.0128,-85.9708,1,54782214.6,54782214.6,54782214.6,54782214.6,AL
1,COW,2001,2,7,463753.0,33025.0,463753.0,34.0128,-85.9708,1,43268154.9,43268154.9,43268154.9,43268154.9,AL
2,COW,2001,3,7,562534.0,37340.0,562534.0,34.0128,-85.9708,1,52484422.2,52484422.2,52484422.2,52484422.2,AL
3,COW,2001,4,7,600426.0,43914.0,600426.0,34.0128,-85.9708,2,56019745.8,56019745.8,56019745.8,56019745.8,AL
4,COW,2001,5,7,489574.0,35148.0,489574.0,34.0128,-85.9708,2,45677254.2,45677254.2,45677254.2,45677254.2,AL


In [163]:
eia_2015_annual_nerc = add_region(eia_2015_annual,
                             regions, region_col='nerc')
eia_2015_annual_nerc.head()

Unnamed: 0,type,year,month,plant id,total fuel (mmbtu),generation (mwh),elec fuel (mmbtu),lat,lon,quarter,all fuel fossil co2 (kg),elec fuel fossil co2 (kg),all fuel total co2 (kg),elec fuel total co2 (kg),state,nerc
0,COW,2001,1,7,587162.0,45991.0,587162.0,34.0128,-85.9708,1,54782214.6,54782214.6,54782214.6,54782214.6,AL,SERC
1,COW,2001,2,7,463753.0,33025.0,463753.0,34.0128,-85.9708,1,43268154.9,43268154.9,43268154.9,43268154.9,AL,SERC
2,COW,2001,3,7,562534.0,37340.0,562534.0,34.0128,-85.9708,1,52484422.2,52484422.2,52484422.2,52484422.2,AL,SERC
3,COW,2001,4,7,600426.0,43914.0,600426.0,34.0128,-85.9708,2,56019745.8,56019745.8,56019745.8,56019745.8,AL,SERC
4,COW,2001,5,7,489574.0,35148.0,489574.0,34.0128,-85.9708,2,45677254.2,45677254.2,45677254.2,45677254.2,AL,SERC


In [188]:
all_states = []
for value in nerc_states.values():
    all_states.extend(value)
all_states = set(all_states)

In [189]:
all_states

{'AL',
 'AR',
 'AZ',
 'CA',
 'CO',
 'CT',
 'DC',
 'DE',
 'FL',
 'GA',
 'IA',
 'ID',
 'IL',
 'IN',
 'KS',
 'KY',
 'LA',
 'MA',
 'MD',
 'ME',
 'MI',
 'MN',
 'MO',
 'MS',
 'MT',
 'NC',
 'ND',
 'NE',
 'NH',
 'NJ',
 'NM',
 'NV',
 'NY',
 'OH',
 'OK',
 'OR',
 'PA',
 'RI',
 'SC',
 'SD',
 'TN',
 'TX',
 'UT',
 'VA',
 'VT',
 'WA',
 'WI',
 'WV',
 'WY'}

In [194]:
df_list = []
for state in all_states:
    try:
        df_list.append(fraction_state2nerc(eia_2015_annual_nerc,
                       state, region_col='nerc', fuel_col='type'))
    except:
        print(state)
        pass

DC


In [195]:
nerc_fractions = pd.concat(df_list)
nerc_fractions.head()

Unnamed: 0,state,nerc,type,% generation,% total fuel,% elec fuel
0,MT,WECC,HYC,1.0,1.0,1.0
1,MT,WECC,WWW,1.0,1.0,1.0
2,MT,WECC,NG,1.0,1.0,1.0
3,MT,WECC,COW,1.0,1.0,1.0
4,MT,WECC,PEL,1.0,1.0,1.0


With the values below I can allocate extra state-level generation and fuel use to each of the NERC regions!

In [196]:
nerc_fractions.tail()

Unnamed: 0,state,nerc,type,% generation,% total fuel,% elec fuel
2,NV,WECC,NG,1.0,1.0,1.0
3,NV,WECC,PEL,1.0,1.0,1.0
4,NV,WECC,OOG,1.0,1.0,1.0
5,NV,WECC,OTH,1.0,1.0,1.0
6,NV,WECC,SUN,1.0,1.0,1.0


In [197]:
nerc_fractions.loc[nerc_fractions.state == 'AL']

Unnamed: 0,state,nerc,type,% generation,% total fuel,% elec fuel
0,AL,SERC,HYC,1.0,1.0,1.0
1,AL,SERC,WWW,1.0,1.0,1.0
2,AL,SERC,NG,1.0,1.0,1.0
3,AL,SERC,COW,1.0,1.0,1.0
4,AL,SERC,WAS,1.0,1.0,1.0
5,AL,SERC,PEL,1.0,1.0,1.0
6,AL,SERC,OOG,1.0,1.0,1.0
7,AL,SERC,OTH,1.0,1.0,1.0


In [198]:
nerc_fractions.groupby(['state', 'type']).sum()

Unnamed: 0_level_0,Unnamed: 1_level_0,% generation,% total fuel,% elec fuel
state,type,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
AL,COW,1.0,1.0,1.0
AL,HYC,1.0,1.0,1.0
AL,NG,1.0,1.0,1.0
AL,OOG,1.0,1.0,1.0
AL,OTH,1.0,1.0,1.0
AL,PEL,1.0,1.0,1.0
AL,WAS,1.0,1.0,1.0
AL,WWW,1.0,1.0,1.0
AR,HYC,1.0,1.0,1.0
AR,NG,1.0,1.0,1.0


## Allocate extra gen from the state-level to regions

I still need to generate state-level total generation and fuel use!