In [1]:
%matplotlib inline
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
import numpy as np
import os
from os.path import join
import sys
import json

In [2]:
# Load the "autoreload" extension
%load_ext autoreload

# always reload modules marked with "%aimport"
%autoreload 1

In [3]:
# add the 'src' directory as one where we can import modules
src_dir = join(os.getcwd(), os.pardir, 'src')
sys.path.append(src_dir)

In [4]:
%aimport Data.make_data
from Data.make_data import states_in_nerc
%aimport Analysis.index
from Analysis.index import facility_emission_gen, group_facility_data, facility_co2, adjust_epa_emissions, group_fuel_cats
%aimport util.utils
from util.utils import rename_cols

## Load data

In [116]:
cwd = os.getcwd()
path = join(cwd, '..', 'Data storage',
            'Facility gen fuels and CO2 2017-08-31.zip')
eia_fac = pd.read_csv(path)

In [143]:
eia_fac.head()

Unnamed: 0,f,fuel,month,plant id,total fuel (mmbtu),year,generation (mwh),elec fuel (mmbtu),geography,last_updated,lat,lon,prime mover,datetime,quarter,all fuel fossil co2 (kg),elec fuel fossil co2 (kg),all fuel total co2 (kg),elec fuel total co2 (kg),type
0,M,SUB,6,10360,0.0,2017,0.0,0.0,USA-WI,2017-08-24T11:46:12-04:00,44.4936,-88.0303,ALL,2017-06-01,2,0.0,0.0,0.0,0.0,COW
1,M,SUB,5,10360,0.0,2017,0.0,0.0,USA-WI,2017-08-24T11:46:12-04:00,44.4936,-88.0303,ALL,2017-05-01,2,0.0,0.0,0.0,0.0,COW
2,M,SUB,4,10360,0.0,2017,0.0,0.0,USA-WI,2017-08-24T11:46:12-04:00,44.4936,-88.0303,ALL,2017-04-01,2,0.0,0.0,0.0,0.0,COW
3,M,SUB,3,10360,0.0,2017,0.0,0.0,USA-WI,2017-08-24T11:46:12-04:00,44.4936,-88.0303,ALL,2017-03-01,1,0.0,0.0,0.0,0.0,COW
4,M,SUB,2,10360,0.0,2017,0.0,0.0,USA-WI,2017-08-24T11:46:12-04:00,44.4936,-88.0303,ALL,2017-02-01,1,0.0,0.0,0.0,0.0,COW


In [118]:
path = join(cwd, '..', 'Data storage', 'Derived data',
            'Monthly EPA emissions 2017-08-31.csv')
epa = pd.read_csv(path)

## Test facility_emissions_gen

In [119]:
fuel_cat_folder = join(cwd, '..', 'Data storage', 'Fuel categories')
state_cats_path = join(fuel_cat_folder, 'State_facility.json')

with open(state_cats_path, 'r') as f:
    state_fuel_cat = json.load(f)
    
custom_cats_path = join(fuel_cat_folder, 'Custom_results.json')
with open(custom_cats_path, 'r') as f:
    custom_fuel_cat = json.load(f)

In [32]:
co2, gen_fuels_custom = facility_emission_gen(eia_facility=eia_fac, epa=epa,
                                              state_fuel_cat=state_fuel_cat,
                                              custom_fuel_cat=custom_fuel_cat,
                                              export_state_cats=False)

Renaming columns
Grouping facilities
Adjusting EPA emissions
Caculating CO2
Gen/fuels to state categories


In [120]:
co2, gen_fuels_state = facility_emission_gen(eia_facility=eia_fac, epa=epa,
                                              state_fuel_cat=state_fuel_cat,
                                              custom_fuel_cat=custom_fuel_cat,
                                              export_state_cats=True)

Renaming columns
Grouping facilities
Adjusting EPA emissions
Caculating CO2
Gen/fuels to state categories


In [9]:
co2.head()

Unnamed: 0,year,month,plant id,final co2 (kg)
0,2001,1,2,0.0
1,2001,1,3,962541100.0
2,2001,1,4,0.0
3,2001,1,7,67000920.0
4,2001,1,8,753020400.0


In [10]:
gen_fuels.head()

Unnamed: 0,plant id,fuel category,year,month,total fuel (mmbtu),generation (mwh),elec fuel (mmbtu),all fuel fossil co2 (kg),elec fuel fossil co2 (kg),all fuel total co2 (kg),elec fuel total co2 (kg)
0,2,Hydro,2001,1,195479.69,18918.0,195479.69,0.0,0.0,0.0,0.0
1,2,Hydro,2001,2,312128.93,30207.0,312128.93,0.0,0.0,0.0,0.0
2,2,Hydro,2001,3,390060.42,37749.0,390060.42,0.0,0.0,0.0,0.0
3,2,Hydro,2001,4,281925.57,27284.0,281925.57,0.0,0.0,0.0,0.0
4,2,Hydro,2001,5,92594.01,8961.0,92594.01,0.0,0.0,0.0,0.0


### Check generation and fuel consumption totals

Interesting - there is some small part of generation that I'm losing along the way. It's 5 orders of magnitude smaller than the total though.

In [121]:
eia_fac['generation (mwh)'].sum()

65594379743.76258

In [123]:
gen_fuels_state['generation (mwh)'].sum()

65594327524.76352

Fuel consumption is pretty identical though

In [124]:
eia_fac['total fuel (mmbtu)'].sum(), eia_fac['elec fuel (mmbtu)'].sum()

(686393145435.8103, 645653921528.091)

In [125]:
gen_fuels_state['total fuel (mmbtu)'].sum(), gen_fuels_state['elec fuel (mmbtu)'].sum()

(686393145435.7861, 645653921528.0757)

## Extra gen/fuels from non-reporting

In [142]:
%aimport Analysis.index
from Analysis.index import extra_emissions_gen

Total EIA generation/fuel consumption and emission factors

In [128]:
cwd = os.getcwd()
path = join(cwd, '..', 'Data storage',
            'EIA country-wide gen fuel CO2 2017-08-31.csv')
eia_total = pd.read_csv(path)

path = join(cwd, '..', 'Data storage',
            'Final emission factors.csv')
ef = pd.read_csv(path, index_col=0)

In [136]:
extra_co2, extra_gen_fuel = extra_emissions_gen(gen_fuels_state, eia_total, ef)

Results match what I have previously found in the notebooks up on GitHub (Emissions Index repo)

In [94]:
extra_gen_fuel.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,total fuel (mmbtu),generation (mwh),elec fuel (mmbtu)
type,year,month,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
COW,2001,1,16918390.0,1011665.061,13157576.0
COW,2001,2,14968346.0,911346.865,11574491.0
COW,2001,3,18321543.0,1153328.913,14765162.0
COW,2001,4,13530117.0,776341.623,10374008.0
COW,2001,5,14014024.0,801266.923,10920463.0


In [95]:
eia_extra.loc[idx['DPV',:,:]]

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,total fuel (mmbtu),generation (mwh),elec fuel (mmbtu)
type,year,month,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
DPV,2014,1,,624032.82,
DPV,2014,2,,663828.73,
DPV,2014,3,,907029.95,
DPV,2014,4,,988423.77,
DPV,2014,5,,1092011.8,
DPV,2014,6,,1100560.82,
DPV,2014,7,,1148527.79,
DPV,2014,8,,1139134.66,
DPV,2014,9,,1046360.7,
DPV,2014,10,,964877.96,


## Total CO₂ (national)

In [130]:
facility_co2 = co2.groupby(['year', 'month']).sum()

In [131]:
facility_co2.tail()

Unnamed: 0_level_0,Unnamed: 1_level_0,plant id,final co2 (kg)
year,month,Unnamed: 2_level_1,Unnamed: 3_level_1
2017,2,57482310,116544200000.0
2017,3,58485598,123718700000.0
2017,4,58955690,113345400000.0
2017,5,59223601,129258100000.0
2017,6,59451819,151211300000.0


In [141]:
extra_co2.loc[idx['NG', :, :],:].tail(n=11)

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,all fuel co2 (kg),elec fuel co2 (kg)
type,year,month,Unnamed: 3_level_1,Unnamed: 4_level_1
NG,2016,8,6022574000.0,4039321000.0
NG,2016,9,4917984000.0,3118869000.0
NG,2016,10,4312456000.0,2624005000.0
NG,2016,11,4231444000.0,2458983000.0
NG,2016,12,4418713000.0,2530048000.0
NG,2017,1,7823823000.0,6101011000.0
NG,2017,2,6601814000.0,5030973000.0
NG,2017,3,8630216000.0,6986014000.0
NG,2017,4,7823192000.0,6280070000.0
NG,2017,5,8749634000.0,7244919000.0


In [133]:
national_co2 = (facility_co2.loc[:, 'final co2 (kg)']
                + extra_co2.loc[:, 'elec fuel co2 (kg)'])

These values are very close, but not quite exactly the same as what I've put up on the website.

Why? Could it be because the extra 

In [134]:
national_co2

year  month
2001  1        2.162121e+11
      2        1.814475e+11
      3        1.900841e+11
      4        1.761317e+11
      5        1.905779e+11
      6        2.062662e+11
      7        2.311815e+11
      8        2.404311e+11
      9        1.950060e+11
      10       1.845210e+11
      11       1.731834e+11
      12       1.882633e+11
2002  1        1.951862e+11
      2        1.709727e+11
      3        1.855136e+11
      4        1.759043e+11
      5        1.863580e+11
      6        2.081023e+11
      7        2.385440e+11
      8        2.355222e+11
      9        2.092338e+11
      10       1.948277e+11
      11       1.844629e+11
      12       2.023788e+11
2003  1        2.167834e+11
      2        1.880347e+11
      3        1.876779e+11
      4        1.727763e+11
      5        1.840170e+11
      6        2.007602e+11
                   ...     
2015  1        1.779027e+11
      2        1.696220e+11
      3        1.522496e+11
      4        1.310557e+11
      5 

## Facility state and lat/lon file generation

In [148]:
%aimport Data.make_data
from Data.make_data import facility_location_data

In [150]:
facility_location_data(eia_fac)