# Depreciation to FERC 1 Connection

This stage in the connection process is very much under constuction

In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
import pandas as pd
import numpy as np
import pudl
import sqlalchemy as sa
import logging
import sys

from datetime import date

import pudl_rmi
from pudl_rmi.connect_deprish_to_ferc1 import *

import pudl_rmi.model_inputs

import warnings
warnings.filterwarnings('ignore')

In [None]:
import matplotlib.pyplot as plt
import matplotlib as mpl
%matplotlib inline
mpl.style.use('dark_background')
figsize=(12,5)

In [None]:
logger = logging.getLogger()
logger.setLevel(logging.INFO)
handler = logging.StreamHandler(stream=sys.stdout)
formatter = logging.Formatter('%(message)s')
handler.setFormatter(formatter)
logger.handlers = [handler]

pd.options.display.max_columns = None
pd.options.display.max_rows = 60

## Make the output via rmi_out

In [None]:
# pudl output object
pudl_settings = pudl.workspace.setup.get_defaults()
pudl_engine = sa.create_engine(pudl_settings["pudl_db"])
pudl_out = pudl.output.pudltabl.PudlTabl(
    pudl_engine,freq='AS',
    fill_fuel_cost=False,
    roll_fuel_cost=True,
    fill_net_gen=True,
)
rmi_out = pudl_rmi.coordinate.Output(
    pudl_out,
)

In [None]:
ppl, d, de_og, fe, scaled_df = rmi_out.grab_all(clobber_all=True)
de = pudl_rmi.model_inputs.fake_duke_deprish_eia_for_mod(de_og, ppl).pipe(pudl.helpers.convert_cols_dtypes, 'eia')

### Scale Everything to the Generator-level & Merge

In [None]:
from pudl_rmi.connect_deprish_to_ferc1 import *
from pudl_rmi.connect_deprish_to_ferc1 import _make_record_id_eia_wo_ownership

In [None]:
ferc_deprish_eia = pudl_rmi.connect_deprish_to_ferc1.execute(
    plant_parts_eia=ppl,
    deprish_eia=de,
    ferc1_to_eia=fe
)

#### Asset Retirement Cost stuff

In [None]:
# There are some temp functions re: Asset retirement cost I haven't
# checked in yet and probably won't bc they are generally one-offs
try:
    arc = pudl_rmi.model_inputs.make_dep_arc(d)
    scaled_arc = pudl_rmi.model_inputs.scale_arc(arc, ppl)
except AttributeError:
    logger.info("this is not checked in rn")
    scaled_arc = None
        
try:
    ferc_deprish_eia = ferc_deprish_eia.merge(
        scaled_arc[['arc_by_plant']],
        right_index=True, left_index=True,
        how='left',
    )
except TypeError:
    logger.info("the ARC table isn't available rn bc the functions aren't checked in")

### Export Duke Jawn

In [None]:
ferc_deprish_eia1 = pudl_rmi.model_inputs.append_non_plant_deprish_records(d, ferc_deprish_eia, ppl)
ferc_deprish_eia1.ferc_acct_name = ferc_deprish_eia1.ferc_acct_name.str.lower()

In [None]:
# try:
#     ferc_deprish_eia1 = pudl_rmi.model_inputs.append_non_plant_deprish_records(de, ferc_deprish_eia, ppl)
# except AttributeError:
#     logger.info("This function isn't checked in rn. Don't know where it will end up living yet")

# ferc_deprish_eia1 = ferc_deprish_eia
years=[2018,2019,2020]
duke_all = ferc_deprish_eia1[
    (
        (ferc_deprish_eia1.report_year.isin(years))
        & (ferc_deprish_eia1.utility_id_pudl.isin([90, 97]))
    ) 
]

duke_puc = duke_all[duke_all.data_source.isin(['PUC', pd.NA])]
duke_ferc = duke_all[
    ~duke_all.plant_id_eia.isin(duke_puc.plant_id_eia.unique())
    & ~duke_all.plant_id_eia.isnull()
]


duke = (
    pd.concat([duke_puc, duke_ferc])
    .assign(data_source=lambda x: x.data_source.fillna('PUC'))
    .sort_index()
)


In [None]:
(
    fe[
        fe.utility_id_pudl.isin([90,97])
        & fe.report_year.isin([2018,2019,2020])
    ]
    .groupby(
        [ 'utility_id_pudl', 'ferc_acct_name', 'report_year',], 
        dropna=False
    )
    [['capex_annual_addition']]
    .sum(min_count=1)
)

In [None]:
(
    duke.groupby(
        [ 'data_source', 'utility_id_pudl', 'ferc_acct_name', 'report_year',] ,
        dropna=False
    )
    [['plant_balance_w_common', 'capex_annual_addition']]
    .sum(min_count=1)
)

In [None]:
duke_out = pudl_rmi.model_inputs.convert_to_model_format(
    duke.sort_index().reset_index(),
    pudl_out,
    util_ids_pudl=[90,97],
    years=[2018,2019,2020]
)

duke_out.to_csv(pudl_rmi.OUTPUTS_DIR / f"current_owned_duke_{date.today().strftime('%Y-%m-%d')}.csv", index=False)

# Validation

In [None]:
util_bad, plant_bad = data_col_test(ferc_deprish_eia1, d, 'plant_balance_w_common')

In [None]:
plant_pks = ['report_year', 'utility_id_pudl', 'plant_id_pudl']

plants = pudl_out.plants_eia860()
plant_compare = (
    pd.merge(
        group_sum_col(fe, 'capex_total', by=plant_pks).add_suffix('_steam'),
        group_sum_col(ferc_deprish_eia1[ferc_deprish_eia1.data_source == 'PUC'], 'plant_balance_w_common', by=plant_pks).add_suffix('_puc'),
        how='outer', right_index=True, left_index=True,
    )
    .merge(
        group_sum_col(ferc_deprish_eia1[ferc_deprish_eia1.data_source == 'FERC'], 'plant_balance_w_common', by=plant_pks).add_suffix('_edcfu'),
        how='outer', right_index=True, left_index=True,
    )
    .assign(
        edcfu_to_steam_diff=lambda x: x.plant_balance_w_common_edcfu / x.capex_total_steam,
        edcfu_to_puc_diff=lambda x: x.plant_balance_w_common_edcfu / x.plant_balance_w_common_puc,
        puc_to_steam_diff=lambda x: x.plant_balance_w_common_puc / x.capex_total_steam,
    )
    .reset_index()
    .merge(
        plants[['plant_id_pudl','plant_name_eia']].drop_duplicates(),
        on=['plant_id_pudl']
    )
    .pipe(pudl.helpers.organize_cols, plant_pks + ['plant_name_eia', 'capex_total_steam', 'plant_balance_w_common_edcfu', 'plant_balance_w_common_puc'])
)

In [None]:
plant_compare[
    plant_compare.report_year.isin([2018, 2019, 2020])
    & plant_compare.utility_id_pudl.isin([90,97])
].to_csv('duke_plant_compare.csv', index=False)