In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
import sys
import pandas as pd
import numpy as np
import sqlalchemy as sa
import logging
from typing import List

import pudl

In [None]:
logger = logging.getLogger()
logger.setLevel(logging.INFO)
handler = logging.StreamHandler(stream=sys.stdout)
formatter = logging.Formatter('%(message)s')
handler.setFormatter(formatter)
logger.handlers = [handler]

In [None]:
# pudl output object
pudl_settings = pudl.workspace.setup.get_defaults()
pudl_engine = sa.create_engine(pudl_settings["pudl_db"])
pudl_out = pudl.output.pudltabl.PudlTabl(
    pudl_engine,freq='AS',
    fill_fuel_cost=True,
    roll_fuel_cost=True,
    fill_net_gen=True,
)

In [None]:
# the stuff in this cell should probably go into a module somewhere...
SUM_COLS: List[str] = [
    'total_fuel_cost',
    'net_generation_mwh',
    'capacity_mw',
    'total_mmbtu',
]
"""
list: list of columns to sum when aggregating a table.
"""

FUEL_CATEGORY_MAP = {
    'oil': 'fossil',
    'wind': 'renewables',
    'hydro': 'hydro',
    'coal': 'fossil',
    'gas': 'fossil',
    'other': 'other',
    'nuclear':'other',
    'solar': 'renewables',
    'waste': 'other'
}

ENTITY_CATEGORY_MAP = {
    'investor_owned': 'investor_owned',
    'municipal': 'public',
    'cooperative': 'public',
    'state': 'public',
    'independent_power_producer':'independent_power_producer',
    'political_subdivision': 'public',
    'federal': 'public',
    'other': 'other',
    'commercial':'commercial',
    'industrial':'industrial',
}


def slice_by_ownership(gens_mega, own_eia860):
    """
    Generate proportional data by ownership %s.

    Why do we have to do this at all? Sometimes generators are owned by
    many different utility owners that own slices of that generator. EIA
    reports which portion of each generator is owned by which utility
    relatively clearly in their ownership table. On the other hand, in
    FERC1, sometimes a partial owner reports the full plant-part, sometimes
    they report only their ownership portion of the plant-part. And of
    course it is not labeld in FERC1. Because of this, we need to compile
    all of the possible ownership slices of the EIA generators.

    In order to accumulate every possible version of how a generator could
    be reported, this method generates two records for each generator's
    reported owners: one of the portion of the plant part they own and one
    for the plant-part as a whole. The portion records are labeled in the
    ``ownership`` column as "owned" and the total records are labeled as
    "total".

    In this function we merge in the ownership table so that generators
    with multiple owners then have one record per owner with the
    ownership fraction (in column ``fraction_owned``). Because the ownership
    table only contains records for generators that have multiple owners,
    we assume that all other generators are owned 100% by their operator.
    Then we generate the "total" records by duplicating the "owned" records
    but assigning the ``fraction_owned`` to be 1 (i.e. 100%).
    """
    # grab the ownership table, and reduce it to only the columns we need
    own860 = (
        own_eia860
        [['plant_id_eia', 'generator_id', 'report_date',
          'fraction_owned', 'owner_utility_id_eia']]
        .pipe(pudl.helpers.convert_cols_dtypes, 'eia')
    )
    # we're left merging BC we've removed the retired gens, which are
    # reported in the ownership table
    gens_mega = (
        gens_mega.merge(
            own860,
            how='left',
            on=['plant_id_eia', 'generator_id', 'report_date'],
            validate='1:m'
        )
        .assign(  # assume gens that don't show up in the own table have one 100% owner
            fraction_owned=lambda x: x.fraction_owned.fillna(value=1),
            # assign the operator id as the owner if null bc if a gen isn't
            # reported in the own_eia860 table we can assume the operator
            # is the owner
            owner_utility_id_eia=lambda x:
                x.owner_utility_id_eia.fillna(x.utility_id_eia),
            ownership='owned'
        )   # swap in the owner as the utility
        .drop(columns=['utility_id_eia'])
        .rename(columns={'owner_utility_id_eia': 'utility_id_eia'})
    )

    gens_mega.loc[:, SUM_COLS] = (
        gens_mega.loc[:, SUM_COLS]
        .multiply(gens_mega['fraction_owned'], axis='index')
    )
    return gens_mega


def label_generators(gens_own):
    gens_own.loc[:, 'fuel_category'] =gens_own.fuel_type_code_pudl.replace(FUEL_CATEGORY_MAP)
    gens_own.loc[:, 'entity_category'] =gens_own.entity_type.replace(ENTITY_CATEGORY_MAP)
    return gens_own

In [None]:
# Get inputs from pudl_out
utils = pudl_out.utils_eia860()
mcoe = pudl_out.mcoe(all_gens=True)
own_eia860 = pudl_out.own_eia860()

In [None]:
# process inputs
gens_own = (
    slice_by_ownership(mcoe, own_eia860)
    .merge(
        utils[['utility_id_eia', 'report_date', 'entity_type']],
        on=['utility_id_eia', 'report_date'],
        how='left',
        validate='m:1'
    )
    .pipe(label_generators)
)

# Ownership Breakdown

In [None]:
def group_generators(
    data_col= 'capacity_mw',
    years = [2020],
    op_statuses = ['existing'],
    gb_by = ['report_date', 'fuel_category', 'entity_category',],
    entity_cats = None,
    fuel_cats = None):
    if not fuel_cats:
        fuel_cats = gens_own.fuel_category.unique()
    if not entity_cats:
        entity_cats = gens_own.entity_category.unique()
    gens_cap = (
        gens_own[
            gens_own.report_date.dt.year.isin(years)
            & gens_own.operational_status.isin(op_statuses)
            & gens_own.fuel_category.isin(fuel_cats)
            & gens_own.entity_category.isin(entity_cats)
        ]
        .groupby(gb_by, dropna=False)
        [[data_col]].sum()
        .sort_values([data_col], ascending=False)
    )

    cap_total= gens_cap[data_col].sum()
    return gens_cap.assign(percentage=lambda x: (x[data_col]/cap_total )*100).round(2)

In [None]:
# proposed plants
group_generators(
    gb_by = ['report_date', 'entity_category','fuel_category',],
    op_statuses=['proposed'],
    entity_cats=None)

In [None]:
group_generators(
    gb_by = ['report_date', 'entity_category','fuel_type_code_pudl',],
    fuel_cats=['fossil'],
    op_statuses=['proposed'],
    entity_cats=None)

# Existing Plants

In [None]:
group_generators(
    gb_by = ['report_date', 'entity_category','fuel_category',],
    op_statuses=['existing'],
    entity_cats=None,
    fuel_cats=['fossil'],
)

In [None]:
gens_fossil = gens_own[
    (gens_own.operational_status == 'existing')
    & (gens_own.report_date.dt.year == 2020)
    & (gens_own.fuel_category == 'fossil')
]

In [None]:
not_retiring = gens_fossil[
    gens_fossil.planned_retirement_date.isnull()
].capacity_mw.sum()/gens_fossil.capacity_mw.sum()
logger.info(
    f"Fossil plants w/ no retirement date: {not_retiring:.01%}")

In [None]:
(
    gens_fossil[gens_fossil.planned_retirement_date.notnull()]
    .assign(planned_retirement_year=lambda x: x.planned_retirement_date.dt.year)
    .groupby(['planned_retirement_year', 'fuel_type_code_pudl'])[['capacity_mw', 'net_generation_mwh']].sum())