## Aggregate CEMS data at the utility level for RMI

In [6]:
# Standard libraries
import logging
import os
import pathlib
import sys

# 3rd party libraries
import geopandas as gpd
import geoplot as gplt
import dask.dataframe as dd
from dask.distributed import Client
import matplotlib.pyplot as plt
import matplotlib as mpl
import numpy as np
import pandas as pd
import seaborn as sns
import sqlalchemy as sa


# Local libraries
import pudl

In [7]:
pudl_settings = pudl.workspace.setup.get_defaults()
#display(pudl_settings)

ferc1_engine = sa.create_engine(pudl_settings['ferc1_db'])
#display(ferc1_engine)

pudl_engine = sa.create_engine(pudl_settings['pudl_db'])
#display(pudl_engine)

#pudl_engine.table_names()
pudl_out = pudl.output.pudltabl.PudlTabl(pudl_engine)

##### Prep FPL Information

In [8]:
# Read master unit list
mul = pd.read_pickle('/Users/aesharpe/Desktop/master_unit_list.pkl.gz')

In [9]:
# Get the ownership fractions at the plant level
plant_mul = mul[(mul['plant_part']=='plant') & (mul['ownership']=='owned')].copy()
plant_mul = (
    plant_mul.rename(columns={'report_year': 'year'})
    .reset_index()
    [['plant_id_eia', 'year', 'fraction_owned', 'utility_id_eia']]
    .drop_duplicates()
)

In [10]:
fpl_eia_id = 6452
# Duke EIA ID = 

fpl_df = plant_mul[plant_mul['utility_id_eia']==fpl_eia_id]
fpl_plants = list(set((fpl_df.plant_id_eia)))

##### Prep CEMS with FPL data 

In [15]:
# CEMS
years = range(2009, 2019)
cems_df = pd.DataFrame()

my_cols = [
    'state',
    'plant_id_eia', 
    'unitid',
    'so2_mass_lbs', 
    'nox_mass_lbs', 
    'co2_mass_tons',
    'operating_datetime_utc',
]

for yr in years:
    print(f'starting calculation for {yr}')
    epacems_path = (pudl_settings['parquet_dir'] + f'/epacems/year={yr}')
    cems_dd = (
        dd.read_parquet(epacems_path, columns=my_cols)
        .assign(state=lambda x: x['state'].astype('string'))
#         .groupby(['plant_id_eia', 'operating_datetime_utc']).agg({
#             'state': 'first',
#             'so2_mass_lbs': 'sum',
#             'nox_mass_lbs': 'sum',
#             'co2_mass_tons': 'sum',
#         }).reset_index()
    )
    cems_dd_fpl = cems_dd[cems_dd['plant_id_eia'].isin(fpl_plants)]
    
    client = Client()
    cems_df_fpl = (
        client.compute(cems_dd_fpl)
        .result()
        .assign(year=yr))
    cems_df = pd.concat([cems_df, cems_df_fpl])

starting calculation for 2009
starting calculation for 2010


Perhaps you already have a cluster running?
Hosting the HTTP server on port 51432 instead


starting calculation for 2011


Perhaps you already have a cluster running?
Hosting the HTTP server on port 51475 instead


starting calculation for 2012


Perhaps you already have a cluster running?
Hosting the HTTP server on port 51513 instead


starting calculation for 2013


Perhaps you already have a cluster running?
Hosting the HTTP server on port 51548 instead


starting calculation for 2014


Perhaps you already have a cluster running?
Hosting the HTTP server on port 51584 instead


starting calculation for 2015


Perhaps you already have a cluster running?
Hosting the HTTP server on port 51625 instead


starting calculation for 2016


Perhaps you already have a cluster running?
Hosting the HTTP server on port 51663 instead


starting calculation for 2017


Perhaps you already have a cluster running?
Hosting the HTTP server on port 51711 instead


starting calculation for 2018


Perhaps you already have a cluster running?
Hosting the HTTP server on port 51750 instead


In [16]:
# groupby time and plant
cems_df = (
    cems_df.groupby(['plant_id_eia', 'operating_datetime_utc']).agg({
        'state': 'first',
        'year': 'first',
        'so2_mass_lbs': 'sum',
        'nox_mass_lbs': 'sum',
        'co2_mass_tons': 'sum',
    }).reset_index()
)

##### Prep master unit list by plant percent owned

In [5]:
## Read master unit list
# mul = pd.read_pickle('/Users/aesharpe/Desktop/master_unit_list.pkl.gz')

In [289]:
# # Get the ownership fractions at the plant level
# plant_mul = mul[(mul['plant_part']=='plant') & (mul['ownership']=='owned')].copy()
# plant_mul = (
#     plant_mul.rename(columns={'report_year': 'year'})
#     .reset_index()
#     [['plant_id_eia', 'year', 'fraction_owned', 'utility_id_eia']]
#     .drop_duplicates()
# )

##### Combine CEMS and MUL on FPL data

In [17]:
cems_mul = (
    pd.merge(cems_df, plant_mul, on=['plant_id_eia', 'year'], how='left')
)

# Only get the fractions from FPL
cems_fpl = cems_mul[cems_mul['utility_id_eia']==6452]

In [18]:
cems_fpl_utility = (
    cems_fpl.assign(
        so2_mass_lbs=lambda x: x.so2_mass_lbs * x.fraction_owned,
        nox_mass_lbs=lambda x: x.nox_mass_lbs * x.fraction_owned,
        co2_mass_tons=lambda x: x.co2_mass_tons * x.fraction_owned)
    .groupby('operating_datetime_utc').agg({
        'utility_id_eia': 'first',
        'state': 'first',
        'year': 'first',
        'so2_mass_lbs': 'sum',
        'nox_mass_lbs': 'sum',
        'co2_mass_tons': 'sum',
    })
)

In [19]:
cems_fpl_utility.to_csv('CEMS_FPL.csv')

In [275]:
cems_fpl_utility.groupby(['plant_id_eia', 'operating_datetime_utc']).sum()

Unnamed: 0_level_0,Unnamed: 1_level_0,so2_mass_lbs,nox_mass_lbs,co2_mass_tons,year,fraction_owned,utility_id_eia
plant_id_eia,operating_datetime_utc,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
207,2009-01-01 05:00:00+00:00,9.083393e+06,7.095893e+06,4576210.00,2009,0.50,6452
207,2010-01-01 05:00:00+00:00,0.000000e+00,0.000000e+00,0.00,2010,0.00,6452
207,2011-01-01 05:00:00+00:00,1.257438e+07,6.151063e+06,3789954.25,2011,0.50,6452
207,2012-01-01 05:00:00+00:00,1.279855e+07,1.286926e+07,3429048.75,2012,0.50,6452
207,2013-01-01 05:00:00+00:00,9.961960e+06,8.582538e+06,2553926.55,2013,0.35,6452
...,...,...,...,...,...,...,...
56407,2014-01-01 05:00:00+00:00,8.239859e+04,1.074593e+06,8157454.00,2014,1.00,6452
56407,2015-01-01 05:00:00+00:00,8.523717e+04,1.089301e+06,8413140.00,2015,1.00,6452
56407,2016-01-01 05:00:00+00:00,8.410130e+04,1.148120e+06,8314335.50,2016,1.00,6452
56407,2017-01-01 05:00:00+00:00,8.278310e+04,1.068876e+06,8191236.50,2017,1.00,6452
