# Depreciation to FERC 1 Connection

This stage in the connection process is very much under constuction

In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
import pandas as pd
import numpy as np
import pudl
import sqlalchemy as sa
import logging
import sys

import pudl_rmi
from pudl_rmi.connect_deprish_to_ferc1 import *

import warnings
warnings.filterwarnings('ignore')

In [None]:
logger = logging.getLogger()
logger.setLevel(logging.INFO)
handler = logging.StreamHandler(stream=sys.stdout)
formatter = logging.Formatter('%(message)s')
handler.setFormatter(formatter)
logger.handlers = [handler]

pd.options.display.max_columns = None
pd.options.display.max_rows = 60

## Make the output via rmi_out

In [None]:
# pudl output object
pudl_settings = pudl.workspace.setup.get_defaults()
pudl_engine = sa.create_engine(pudl_settings["pudl_db"])
pudl_out = pudl.output.pudltabl.PudlTabl(
    pudl_engine,freq='AS',
    fill_fuel_cost=True,
    roll_fuel_cost=True,
    fill_net_gen=True,
)
rmi_out = pudl_rmi.coordinate.Output(
    pudl_out,
)

In [None]:
ppl, d, de, fe, scaled_df = rmi_out.grab_all(clobber_all=False)

## Make the output directly

In [None]:
inputs = InputsManager(
    plant_parts_eia=rmi_out.get_plant_part_list(),
    deprish_eia=rmi_out.get_deprish_to_eia(clobber=False),
    ferc1_to_eia=rmi_out.get_ferc1_to_eia(clobber=True),
)
matcher = MatchMaker(inputs)
scaler = Scaler(matcher)
scaled_df = scaler.scale()

### Modernize Duke (Temp)

In [None]:
def fake_duke_deprish_eia_for_mod(de):
    """Temp function to fake Duke's deprish records for modernization."""
    logger.info("Adding fake years of Duke data....")
    cols_to_keep = [
        'plant_part_name', 'utility_name_ferc1', 'report_year', 'report_date',
        'plant_name_match', 'record_id_eia', 'line_id', 'utility_id_pudl',
        'data_source'
    ]
    fake_year_dfs = []
    for fake_year in [2019, 2020]:
        de_fake_new_year = (
            de[
                de.utility_id_pudl.isin([90, 97])
                & (de.report_date.dt.year == 2018)
            ]
        )
        de_fake_new_year = (
            de_fake_new_year.copy()
            .assign(
                report_year=fake_year,
                report_date=pd.to_datetime(fake_year, format="%Y")
            )
            .replace(
                {"record_id_eia": "_2018_",
                 "line_id": "2018_",},
                {"record_id_eia": f"_{fake_year}_",
                 "line_id": f"{fake_year}_"},
                regex=True
            )
            [cols_to_keep]
        )
        fake_year_dfs.append(de_fake_new_year)
    de_faked = pd.concat([de] + fake_year_dfs)
    assert (~de_faked[de_faked.report_date.dt.year == 2020].empty)
    return de_faked

In [None]:
de = fake_duke_deprish_eia_for_mod(de)
de.to_pickle(pudl_rmi.DEPRISH_EIA_PKL)
scaled_df = rmi_out.grab_deprish_to_ferc1(clobber=True)

cols = [
    'utility_name_ferc1_deprish',
    'report_year',
    'data_source',
    'utility_id_ferc1_deprish',
    'plant_part_name',
    'plant_id_pudl',
    'plant_balance_w_common',
    'book_reserve_w_common',
    'unaccrued_balance_w_common',
    'depreciation_annual_epxns_w_common',
    'depreciation_annual_rate',
    'record_id_ferc1',
    'record_id_eia_deprish',
    'plant_name_eia_deprish',
    'plant_part_deprish',
    'capex_total_ferc1_deprish',
    'capex_annual_addt_ferc1_deprish',
]

duke_out = (
    scaled_df[
        scaled_df.utility_id_pudl_deprish.isin([90,97])
        & (scaled_df.report_year.isin([2018, 2019, 2020]))
    ]
    .pipe(pudl.helpers.organize_cols, cols)
    .drop_duplicates([x for x in cols if x != 'record_id_ferc1'])
    .sort_values(['utility_name_ferc1_deprish','plant_part_name', 'plant_id_pudl', 'report_year', 'data_source'])
)

duke_out.to_csv('duke_for_modernization.csv', index=False)