In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
import pandas as pd
import numpy as np
import sqlalchemy as sa
import logging
import sys

import pudl

from pudl.analysis.allocate_net_gen import *

In [None]:
logger = logging.getLogger()
logger.setLevel(logging.INFO)
handler = logging.StreamHandler(stream=sys.stdout)
formatter = logging.Formatter('%(message)s')
handler.setFormatter(formatter)
logger.handlers = [handler]

# Implementation Zone for Allocating Fuel

In [None]:
pudl_settings = pudl.workspace.setup.get_defaults()
pudl_engine = sa.create_engine(pudl_settings['pudl_db'])
# the fill_net_gen arg is employing a beta feature!
# It will result in a much higher coverage of net generation
# and fuel consuption, but it has known issues with multi-fuel plants.
pudl_out = pudl.output.pudltabl.PudlTabl(
    pudl_engine,
    freq='AS',
    fill_fuel_cost=True,
    roll_fuel_cost=True,
    fill_net_gen=False,
)

In [None]:
# make sure the net generation allocation still functions w/ the fuel changes
gen_alloc = pudl_out.gen_allocated_eia923()

## Common Inputs Between Net Gen and Fuel Allocation

In [None]:
# extract all of the tables from pudl_out early in the process and select
# only the columns we need. this is for speed and clarity.
gf = pudl_out.gf_eia923().loc[
    :, IDX_PM_FUEL + ['net_generation_mwh', 'fuel_consumed_mmbtu']]
gen = pudl_out.gen_original_eia923().loc[
    :, IDX_GENS + ['net_generation_mwh']]
gens = pudl_out.gens_eia860().loc[
    :, IDX_GENS + ['unit_id_pudl', 'prime_mover_code', 'capacity_mw', 'fuel_type_count',
                   'operational_status', 'retirement_date']
    + list(pudl_out.gens_eia860().filter(like='energy_source_code'))]

bf = (
    pudl_out.bf_eia923()
    .rename(columns={'fuel_type_code': 'fuel_type'})
    .loc[:,IDX_FUEL + ['boiler_id', 'unit_id_pudl', 'fuel_consumed_mmbtu']]
)

In [None]:
gen_assoc = associate_generator_tables(gf=gf, gen=gen, gens=gens, bf=bf)
gen_pm_fuel = prep_alloction_fraction(gen_assoc)

## Fuel Allocation

In [None]:

not_in_bf_tbl = allocate_fuel_for_non_bf_gens(gen_pm_fuel)

# we still need to develop a process for the gens that do show up in the BF table 
in_bf_tbl = gen_pm_fuel.loc[gen_pm_fuel.in_bf_tbl]

# Examples!

In [None]:
#IDX_PM_FUEL= ['prime_mover_code', 'fuel_type'] # plus 'plant_id_eia', 'report_date' of course
# inputs
gens_ex = pd.DataFrame({
    'generator_id': ['A', 'B', 'C', 'D'],
    'unit_id_pudl': [1, 1, 2, 3],
    'prime_mover_code': ['GT', 'ST', 'GT', 'GT'],
    'energy_source_code_1': ['NG', 'NG', 'NG', 'NG'],
    'energy_source_code_2': ['DFO', np.nan, 'DFO', np.nan],
    'capacity_mw': [100, 50, 25, 10]
})
bf_ex = pd.DataFrame({
    'boiler_id': ['X1', 'Y1'],
    'fuel_type': ['NG', 'DFO'],
    'unit_id_pudl': [1, 1],
    'fuel_consumed_mmbtu': [8, 2]
})
gf_ex = pd.DataFrame({
    'prime_mover_code': 
        ['GT', 'ST', 'GT'],
    'fuel_type': 
        ['NG', 'NG', 'DFO'],
    'fuel_consumed_mmbtu': 
        [7, 2, 3]
})

## Interim Output
# stack the gens table with the different fuel types
gens_stacked_ex = pd.DataFrame({
    'generator_id': ['A', 'A', 'B', 'C', 'C', 'D'],
    'unit_id_pudl': [1, 1, 1, 2, 2, 3],
    'prime_mover_code': ['GT', 'GT', 'ST', 'GT', 'GT', 'GT'],
    'fuel_type': ['NG', 'DFO', 'NG', 'NG', 'DFO', 'NG']
})
# associate the generators w/ the gf table (mergeon IDX_PM_F)
# and calculate values (sums/groupbys)
gen_assoc_ex = pd.DataFrame({
    'generator_id': ['A', 'A', 'B', 'C', 'C', 'D'],
    'unit_id_pudl': [1, 1, 1, 2, 2, 3,],
    'prime_mover_code': 
        ['GT', 'GT',  'ST', 'GT', 'GT', 'GT'],
    'fuel_type': 
        ['NG', 'DFO', 'NG', 'NG', 'DFO', 'NG'],
    'capacity_mw': 
        [100,  100,   50,   25,   25,    10],
    # what we are trying to allocate
    # merge btwn gen_assoc & gf, by PM_F
    # Fuel totals on a per-pm-fuel basis
    'fuel_consumed_mmbtu_gf_tbl': 
        [7, 3, 2, 7, 3, 7],

    ########
    # sums/groupbys that we need before doing any of the assign-type calcs
    'capacity_mw_pm_fuel': 
        [135,  125,   50,   135,  125,   10],
    # (summed by PM)
    'fuel_consumed_mmbtu_gf_tbl_pm':
        [10, 10, 2, 10, 10, 10],
    # from gf table, summed by fuel
    'fuel_consumed_mmbtu_gf_tbl_fuel': 
        [9, 3, 9, 9, 3, 9],
    # reported by unit and fuel type in BF
    # merged into association table, broadcasting across prime mover
    'fuel_consumed_mmbtu_bf_tbl':
        [8, 2, 8, np.nan, np.nan, np.nan],
    'in_bf_tbl':
        [True, True, True, False, False, False],
    # sum of cap in [pm, fuel + exist_in_bf] group
    'capacity_mw_pm_fuel_in_bf_tbl_group':
        [100, 100, 50, 35, 35, 35],
})

In [None]:
#  gen_assoc tabl but only the records that don't exist in bf
# plus the calculated fields we need to generate the final fuel
not_in_bf_tbl = pd.DataFrame({
    ### columns from gen_assoc
    'generator_id': ['C', 'C', 'D'],
    'unit_id_pudl': [2, 2, 3,],
    'prime_mover_code': ['GT', 'GT', 'GT'],
    'fuel_type': ['NG', 'DFO', 'NG'],
    'capacity_mw': [25, 25, 10],
    # what we are trying to allocate
    # merge btwn gen_assoc & gf, by PM_F
    # Fuel totals on a per-pm-fuel basis
    'fuel_consumed_mmbtu_gf': 
        [7, 3, 7],
    
    ########
    # sums/groupbys that we need before doing any of the assign-type calcs
    # from bf table, summed by fuel
    'fuel_consumed_mmbtu_bf_fuel':
        [8, 2, 8],
    # from gf table, summed by fuel
    'fuel_consumed_mmbtu_gf_fuel': 
        [9, 3, 9],
    # sum of cap in [pm, fuel + exist_in_bf] group
    # total capacity in each pm fuel group
    'capacity_mw_pm_fuel_in_bf_tbl_group':
        [35, 25, 35],
    
    # what fuel should be assigned to these "not in bf" 
    # records based on fuel groupings?
    # fuel that should be allocated to the not-reporting-to-bf records
    # (the fuel in the gf tbl's fuel group - the fuel in the bf tbl's fuel group)
    # (fuel_consumed_mmbtu_gf_fuel - fuel_consumed_mmbtu_bf_fuel) / fuel_consumed_mmbtu_gf
    'fuel_consumed_mmbtu_not_in_bf': 
        [(9-8), (3-2), (9-8)],
    
    ### Get the frac_cap (used for allocating within gens that don't report to bf)
    # Portion of capacity for each PM_FUEL group
    # capacity_mw / capacity_mw_pm_fuel_in_bf_tbl_group
    'frac_cap':
        [25/35, 25/25, 10/35],
    
    # frac * fuel_consumed_mmbtu_gf
    'fuel_consumed_mmbtu':
        [(9-8) * (25/35), (3-2) * (35/35), (9-8) * (10/35)],
})

# aggregate the generator allocation to the units
not_in_bf_unit = pd.DataFrame({
    'unit_id_pudl': 
        [2, 3],
    'fuel_consumed_mmbtu':
        [1.71, 0.028]
})

# aggregate the bf table to the unit leve
bf_unit = pd.DataFrame({
    'unit_id_pudl': [1],
    'fuel_consumed_mmbtu': [10]
})

all_unit = pd.DataFrame({
    'unit_id_pudl': 
        [1, 2, 3],
    'fuel_consumed_mmbtu':
        [10, 1.714, 0.286]
})

In [None]:
assert(all_unit.fuel_consumed_mmbtu.sum() == gf_ex.fuel_consumed_mmbtu.sum())

### Allocating the boiler fuel data to generators
We don't need to do this if our end goal is to aggregate to the unit level... but it may be helpful for other contexts so I'm leaving this here for now. This exploration is not complete, but it was an attempt to allocate the boiler-fuel data to the generator level.

In [None]:
# beginning of unit
frac_df_in_bf = pd.DataFrame({
    'generator_id': ['A', 'A', 'B'],
    'unit_id_pudl': [1, 1, 1],
    'prime_mover_code': ['GT', 'GT', 'ST'],
    'fuel_type': ['NG', 'DFO', 'NG'],
    'capacity_mw': 
        [100, 100, 50],
    # merge btwn gen_assoc & gf, by PM_F
    # Fuel totals on a pm-fuel basis
    'fuel_consumed_mmbtu_gf':
        [7, 3, 2],
    
    ########
    # sums/groupbys that we need before doing any of the assign-type calcs
    'capacity_mw_pm_fuel': 
        [135,  125,   50],
    # reported by unit and fuel type in BF
    # merged into association table, broadcasting across prime mover
    'fuel_consumed_mmbtu_bf':
        [8, 2, 8],
    # from gf table, summed by fuel
    'fuel_consumed_mmbtu_gf_fuel': 
        [9, 3, 9],
    
    'frac_cap_in_pm_fuel':
        [100/135, 100/125, 50/50],
    
    # portion of the fuel that should be allocated to the in-bf records
    # this is on a fuel basis bc the bf table is reported w/ only fuel (no PM)
    # fuel_consumed_mmbtu_bf/fuel_consumed_mmbtu_gf_fuel
    'frac_in_bf':
        [(8/9), (2/3), (8/9)],
    # we are...
    # fuel_consumed_mmbtu_gf * frac_in_bf
    'fuel_consumed_mmbtu':
        [7*(8/9), 3*(2/3), 2*(8/9)],
})

In [None]:
frac_df_in_bf.assign(
    test=lambda x: x.frac_cap_in_pm_fuel * x.frac_in_bf,
    test_fc=lambda x: x.fuel_consumed_mmbtu_gf * x.test
)