# Calculate NERC level results
Use the NERC extra generation and fuel consumption (things not yet reported in EIA-923 final data) from the *Calculate national and NERC gen and emissions* notebook, along with facility EIA and EPA data to calculate generation and emissions in each NERC region.

## Instructions
Make sure the `file_date` parameter below is set to whatever value you would like appended to file names.

The entire notebook can be run at once using *Run All Cells*

In [1]:
# Load the "autoreload" extension
%load_ext autoreload
%autoreload 2

In [2]:
%matplotlib inline
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
import numpy as np
import os
from os.path import join
import sys
import json
idx = pd.IndexSlice

In [20]:
from src.analysis.index import (
    facility_emission_gen,
    group_facility_data,
    g2lb,
    change_since_2005,
    generation_index,
    facility_co2,
    adjust_epa_emissions,
    group_fuel_cats,
    extra_emissions_gen,
    reduce_emission_factors
)
from src.util import add_quarter, add_facility_location, rename_cols, add_datetime
from src.params import (
    DATA_DATE,
    DATA_PATHS,
    QUARTER_YEAR,
    STATE_FACILITY_FUELS,
    CUSTOM_FUELS,
    FINAL_DATA_YEAR,
    STATES,
    NERCS
)

prev_year = FINAL_DATA_YEAR - 1

### Date string for filenames
This will be inserted into all filenames (reading and writing)

In [None]:
file_date = '2019-02-26'

In [None]:
%load_ext watermark

In [None]:
%watermark -v -iv

In [None]:
# Load the "autoreload" extension
%load_ext autoreload

# always reload modules marked with "%aimport"
%autoreload 1

In [None]:
# add the 'src' directory as one where we can import modules
src_dir = join(os.getcwd(), os.pardir, 'src')
sys.path.append(src_dir)

In [None]:
%aimport Data.make_data
from Data.make_data import states_in_nerc
%aimport Analysis.index
from Analysis.index import facility_emission_gen, group_facility_data
%aimport Analysis.index
from Analysis.index import facility_co2, adjust_epa_emissions, group_fuel_cats
from Analysis.index import reduce_emission_factors, add_datetime
%aimport util.utils
from util.utils import rename_cols, add_facility_location

## Extra NERC emissions and generation

### Load extra NERC generation and fuel consumption

In [None]:
cwd = os.getcwd()
path = join(cwd, '..', 'Data storage', 'Derived data',
            'NERC extra gen fuels {}.csv'.format(file_date))
extra_nerc = pd.read_csv(path, index_col=[0, 1, 2, 3])

In [4]:
path = (
    DATA_PATHS['nerc_extra']
    / f'NERC extra gen fuels {DATA_DATE}.csv'
)
extra_nerc = pd.read_csv(path, index_col=[0, 1, 2, 3])

### Load emission factors

In [None]:
path = join(cwd, '..', 'Data storage', 'Final emission factors.csv')
ef = pd.read_csv(path, index_col=0)

In [5]:
EF_PATH = DATA_PATHS['inputs'] / 'Final emission factors.csv'
EF = pd.read_csv(EF_PATH, index_col=0)

In [6]:
ef_type = reduce_emission_factors(EF)

In [7]:
ef_type = pd.Series(ef_type, name='type')

### Calculate CO₂ emissions

In [8]:
extra_nerc.loc[:, 'total co2 (kg)'] = (extra_nerc
                                       .loc[:, 'total fuel (mmbtu)']
                                       .multiply(ef_type, 'type'))
extra_nerc.loc[:, 'elec co2 (kg)'] = (extra_nerc
                                       .loc[:, 'elec fuel (mmbtu)']
                                       .multiply(ef_type, 'type'))

In [9]:
extra_nerc.sort_index(inplace=True)

## Facilities by NERC

In [None]:
# %aimport Analysis.state2nerc
from src.analysis.state2nerc import fraction_state2nerc, add_region

In [None]:
fuel_cat_folder = join(cwd, '..', 'Data storage', 'Fuel categories')
state_cats_path = join(fuel_cat_folder, 'State_facility.json')

with open(state_cats_path, 'r') as f:
    state_fuel_cat = json.load(f)
    
custom_cats_path = join(fuel_cat_folder, 'Custom_results.json')
with open(custom_cats_path, 'r') as f:
    custom_fuel_cat = json.load(f)

In [None]:
path = join(cwd, '..', 'Data storage', 'Derived data',
            'Monthly EPA emissions {}.csv'.format(file_date))
epa = pd.read_csv(path)

epa.rename(columns={'orispl': 'plant id',
                    'co2mass_kg': 'co2_mass (kg)',
                    'gload_mwh': 'gload (mw)',
                    'heatinput_mmbtu': 'heat_input (mmbtu)'}, inplace=True)

In [10]:
location_path = DATA_PATHS['transformed_data'] / 'Facility locations_RF.csv'
location_labels = pd.read_csv(location_path)

In [None]:
path = join(cwd, '..', 'Data storage', 'Facility labels',
            'Facility locations_RF.csv')
facility_labels = pd.read_csv(path)

In [None]:
path = join(cwd, '..', 'Data storage', 'Derived data',
            'Facility gen fuels and CO2 {}.csv'.format(file_date))
eia_fac = pd.read_csv(path)

In [11]:
FACILITY_PATH = (
    DATA_PATHS['eia_compiled']
    / 'facility_gen_fuel_data_{}.parquet'.format(DATA_DATE)
)
eia_fac = pd.read_parquet(FACILITY_PATH)

epa_path = (
    DATA_PATHS['epa_emissions']
    / 'epa_emissions_{}.parquet'.format(DATA_DATE)
)
epa = pd.read_parquet(epa_path)

In [12]:
co2, gen_fuels_state = facility_emission_gen(eia_facility=eia_fac, epa=epa,
                                              state_fuel_cat=STATE_FACILITY_FUELS,
                                              custom_fuel_cat=CUSTOM_FUELS,
                                              export_state_cats=True)

Renaming columns
Grouping facilities
Adjusting EPA emissions
Caculating CO2
Gen/fuels to state categories


### Monthly CO₂ emissions

In [14]:
co2 = add_facility_location(co2, location_labels,
                            labels=['lat', 'lon', 'state', 'nerc', 'year'])

In [15]:
co2_nerc = co2.groupby(['year', 'nerc', 'month'])['final co2 (kg)'].sum()

In [16]:
# This now includes facility emissions (measured and adjusted) and state-level estimated 
# emissions from fuel consumption that have been allocated to NERC regions.
co2_nerc_total = co2_nerc.add(extra_nerc.groupby(['year', 'nerc', 'month'])['elec co2 (kg)'].sum(), fill_value=0)
co2_nerc_total.name = 'final co2 (kg)'

### Monthly generation

In [26]:
gen_fuels_nerc = add_facility_location(gen_fuels_state,
                                       location_labels, labels=['nerc', 'year'])
gen_fuels_nerc = (gen_fuels_nerc
                  .groupby(['year', 'nerc', 'month', 'type'])
                  ['generation (mwh)'].sum())

## Combine generation

In [27]:
total_gen = gen_fuels_nerc.copy()

In [28]:
total_gen.tail()

year  nerc  month  type
2018  WECC  12     PEL       15799.251
                   SUN      642262.000
                   WAS      123329.873
                   WND     1169305.000
                   WWW      156203.172
Name: generation (mwh), dtype: float64

In [31]:
extra_nerc.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,generation (mwh),total fuel (mmbtu),elec fuel (mmbtu),total co2 (kg),elec co2 (kg)
year,nerc,month,type,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
2018,ASCC,1,COW,35156.169,876859.0,306116.0,83520820.0,29157550.0
2018,ASCC,1,NG,265348.871,2631767.0,2631758.0,139667900.0,139667400.0
2018,ASCC,1,PEL,36141.871,354537.0,333281.0,26935950.0,25321020.0
2018,ASCC,1,WAS,140.649,-38796.0,-38796.0,,
2018,ASCC,2,COW,27788.456,774869.0,246976.0,73806270.0,23524460.0


In [30]:
total_gen.loc[idx[FINAL_DATA_YEAR:, :, :, :]] = (total_gen.loc[FINAL_DATA_YEAR:] 
                                     .add(extra_nerc.loc[:, 'generation (mwh)']
                                          , fill_value=0))
total_gen = total_gen.reset_index()
add_datetime(total_gen)

In [None]:
total_gen.loc[idx[2017:, :, :, :]] = (total_gen.loc[2017:] 
                                     .add(extra_nerc.loc[:, 'generation (mwh)']
                                          , fill_value=0))
total_gen = total_gen.reset_index()
add_datetime(total_gen)

In [32]:
final = group_fuel_cats(total_gen,
                        CUSTOM_FUELS, 'type',
                        'fuel category', extra_group_cols=['nerc', 'datetime'])
final.set_index(['nerc', 'fuel category', 'datetime'], inplace=True)

In [33]:
total = final.groupby(['nerc', 'datetime']).sum()

In [None]:
nercs = total.index.get_level_values('nerc').unique()

### Percent generation by fuel
    

In [34]:
df_list = []
for nerc in NERCS:
    percent_gen = final.loc[nerc].divide(total.loc[nerc], level='datetime')
    percent_gen['nerc'] = nerc
    percent_gen.set_index('nerc', append=True, inplace=True)
    df_list.append(percent_gen)
    
percent_gen = pd.concat(df_list)
percent_gen.drop(['year', 'month'], axis=1, inplace=True)
percent_gen.columns = ['% generation']

In [None]:
path = join(cwd, '..', 'Data storage', 'Final NERC data',
            'NERC percent gen {}.csv'.format(file_date))
percent_gen.to_csv(path)

In [35]:
total_monthly_gen = final.groupby(['fuel category', 'year', 'nerc', 'month']).sum()
total_monthly_gen.sort_index(inplace=True)

In [42]:
total_monthly_gen.sample(5)

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,generation (mwh)
year,nerc,month,Unnamed: 3_level_1
2003,HICC,3,892493.8
2015,SPP,6,21156900.0
2018,SERC,6,102282100.0
2009,WECC,4,54871790.0
2015,NPCC,10,19348800.0


In [36]:
# path = join(cwd, '..', 'Data storage', 'Final NERC data',
#             'NERC generation {}.csv'.format(file_date))

path = DATA_PATHS['results'] / f'NERC generation {DATA_DATE}.csv'
total_monthly_gen.to_csv(path)

## CO₂ emissions intensity by NERC region

In [37]:
total_monthly_gen = total_monthly_gen.groupby(['year', 'nerc', 'month']).sum()

In [38]:
nerc_index = pd.concat([co2_nerc.sort_index(), total_monthly_gen.sort_index()], axis=1)
nerc_index['index'] = nerc_index['final co2 (kg)'] / nerc_index['generation (mwh)']
nerc_index = nerc_index.reset_index()
add_datetime(nerc_index)

In [39]:
nerc_index = pd.concat([co2_nerc_total.sort_index(), total_monthly_gen.sort_index()], axis=1)
nerc_index['index'] = nerc_index['final co2 (kg)'] / nerc_index['generation (mwh)']
nerc_index = nerc_index.reset_index()
add_datetime(nerc_index)

### Write data to file

In [43]:
# path = join(cwd, '..', 'Data storage', 'Final NERC data',
#             '_NERC gen emissions and index {}.csv'.format(file_date))

path = (
    DATA_PATHS['results']
    / f'NERC gen emissions and index {DATA_DATE}.csv'
)
nerc_index.to_csv(path, index=False)