# Depreciation to FERC 1 Connection

This stage in the connection process is very much under constuction

In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
import pandas as pd
import numpy as np
import pudl
import sqlalchemy as sa
import logging
import sys

import pudl_rmi
from pudl_rmi.connect_deprish_to_ferc1 import *

import warnings
warnings.filterwarnings('ignore')

In [None]:
logger = logging.getLogger()
logger.setLevel(logging.INFO)
handler = logging.StreamHandler(stream=sys.stdout)
formatter = logging.Formatter('%(message)s')
handler.setFormatter(formatter)
logger.handlers = [handler]

pd.options.display.max_columns = None
pd.options.display.max_rows = 60

## Make the output via rmi_out

In [None]:
# pudl output object
pudl_settings = pudl.workspace.setup.get_defaults()
pudl_engine = sa.create_engine(pudl_settings["pudl_db"])
pudl_out = pudl.output.pudltabl.PudlTabl(
    pudl_engine,freq='AS',
    fill_fuel_cost=True,
    roll_fuel_cost=True,
    fill_net_gen=True,
)
rmi_out = pudl_rmi.coordinate.Output(
    pudl_out,
)

In [None]:
ppl = rmi_out.get_plant_part_list()
d = rmi_out.get_deprish()

In [None]:
scaled_df = rmi_out.get_deprish_to_ferc1(
    clobber=False,
    clobber_de=False,
    clobber_fe=False
)
de = rmi_out.get_deprish_to_eia()
d = rmi_out.get_deprish(clobber=False)

In [None]:
d[
    d.utility_id_pudl.isin([90,97])
    & (d.book_reserve_w_common < 0)
    & (d.report_date.dt.year == 2018)
]

In [None]:
scaled_df[
    scaled_df.utility_id_pudl_deprish.isin([90,97])
    & (scaled_df.book_reserve_w_common < 0)
    & (scaled_df.report_year == 2018)
]

## Remap Duke Utilities

In [None]:
# template = pd.read_csv(pudl_rmi.inputs_dir / 'capex template.csv')
ops_pro = pd.read_csv(pudl_rmi.inputs_dir / 'operation_projection.csv', encoding = "ISO-8859-1")

In [None]:
plants_e = ops_pro[['Unique_ID', 'Utility', 'Asset Status','Resource Type', 'Resource/Plant']].drop_duplicates()
plants_e= plants_e[
    (plants_e['Asset Status'] == 'Existing')
    & (plants_e['Utility'] == 'Duke')
]

In [None]:
plants_e.loc[:, 'plant_name_eia'] = plants_e['Resource/Plant'].str.split(":").str[0]

In [None]:
plants_eia = pudl_out.plants_eia860()
plants_e = (
    plants_e.merge(
        plants_eia[['plant_name_eia', 'utility_name_eia', 'plant_id_eia']].drop_duplicates(),
        on=['plant_name_eia'],
        how='left'
    )
)

In [None]:
duke_rename = {'Duke Energy Carolinas, LLC': "Duke Energy Carolinas",'Duke Energy Progress - (NC)':"Duke Energy Progress"}

In [None]:
plants_e = plants_e.replace(duke_rename)

In [None]:
plants_e.drop_duplicates(subset=['plant_name_eia']).to_csv('duke ids.csv')

In [None]:
plants_e[plants_e.utility_name_eia.isnull()].drop_duplicates(subset=['plant_name_eia'])

In [None]:
plants_e.groupby(['utility_name_eia'])[['plant_name_eia']].nunique()

In [None]:
p = pudl_out.plants_eia860()

In [None]:
ppl[
    (ppl.plant_name_eia.str.contains('Sherwood'))
    #& (ppl.fuel_type_code_pudl == 'hydro')
].utility_id_eia.unique()

In [None]:
utils[utils.utility_id_eia.isin([163, 5416, 14276, 14277, 59762, 63565, 63621])].utility_name_eia.unique()

## Make capex template for Duke

In [None]:
col_to_rename = {
    "auto_id":
        "Unique_ID",
    "utility_name_ferc1_deprish":
        "Utility",
    "scenario":
        "Scenario",
    "operational_status":
        "Asset Status",
    "plant_part_name":  # plant_name_new_deprish
        "Plants",
    "generator_id":
        "Unit",
    "resource_type":
        "Resource Type",  # TODO: Generator this column
    "state":
        "State (Physical Location)",
    "fraction_owned":
        "Ownership Percentage (%)",
    "capacity_mw":
        "Net Capacity (MW)",
    "capacity_factor_eia":
        "Capacity Factor (%)",
    "net_generation_mwh":
        "Net Generation in 2019 (MWh)",
    "co2_mass_tons":
        "CO2 Emission (tons)",
    "total_fuel_cost":
        "Fuel Cost ($)",
    "variable_om":
        "Non-Fuel Variable O&M Costs ($)",
    "fixed_om":
        "Fixed O&M Costs ($)",
    "opex_nonfuel":
        "Total O&M Cost",
    "installation_year_eia":
        "Commission Year",
    "remaining_life_avg":
        "Current Remaining Accounting Life (Yrs from 2019)",
    "plant_balance_w_common":
        "Gross Plant Balance/Original Cost as of Dec 2019 ($)",
    "book_reserve_w_common":
        "Book Reserve/Accumulated Depreciation as of Dec 2019 ($)",
    "unaccrued_balance_w_common":
        "Current Net Plant Balance ($)",
    "depreciation_annual_epxns_w_common":
        "Annual Depreciation Expense ($)",
    "depreciation_annual_rate":
        "Depreciation Rate (%)",
    "net_removal_rate":
        "Decommissioning Cost ($)",
    "line_id":
        "Record ID Depreciation",
    "record_id_eia_deprish":
        "Record ID EIA (MUL)",
    "record_id_ferc1":
        "Record ID FERC 1",
}

tech_descrpt_to_resource_type = {
    'Conventional Steam Coal': 'Coal',
    'Natural Gas Fired Combined Cycle': 'NaturalGasCC',
    'Natural Gas Fired Combustion Turbine': 'NaturalGasCT',
    'Natural Gas Steam Turbine': 'NaturalGasCT',  # MAYBE?!?
    'Geothermal': 'Geothermal',
    'Onshore Wind Turbine': 'LandbasedWind',
    'Conventional Hydroelectric': 'Hydropower',
    # pd.NA: 'Transmission',
    # pd.NA: 'Distribution',
    'Solar Photovoltaic': 'UtilityPV',
    'Nuclear': 'Nuclear',
    'Offshore Wind Turbine': 'OffshoreWind',  # THIS ONE IS A GUESS
    'Solar Thermal with Energy Storage': 'SolarPlusBattery',
    # pd.NA: 'EE',
    # pd.NA: 'DR',
    # pd.NA: 'Battery'
}

In [None]:
def make_auto_id(scaled_df):
    """
    Make the auto-incremented ID for RMI models.
    
    We need to first reset whatever index we had so we are sure
    that the index is a clean 0+n index. Then we use that new index
    to make the new ID. We add 1 so it starts with 1 instead of 0.
    And we add 'E' to the begining to represent existing plants.
    Then we use this column as the index.
    """
    scaled_df1 = (
        scaled_df.reset_index(drop=True)
        .assign(auto_id=lambda x: "E" + (x.index + 1).map(str))
    )
    scaled_df1 = scaled_df1.set_index('auto_id')
    return scaled_df1

cols_to_choose = [
    'plant_id_eia',
    'utility_id_eia',
    'plant_name_eia',
    'generator_id',
    'fraction_owned',
    'technology_description',
]
for col in cols_to_choose:
    scaled_df.loc[:,col] = scaled_df[f'{col}_deprish'].fillna(scaled_df[f'{col}_ferc1'])

# # add utility name
# scaled_df = scaled_df.merge(
#     pudl_out.utils_eia860()[['utility_id_eia', 'utility_name_eia']].drop_duplicates(),
#     on=['utility_id_eia'],
#     validate='m:1',
#     how='left'   
# )
# add plant state
scaled_df = scaled_df.merge(
    pudl_out.plants_eia860()[['plant_id_eia', 'state']].drop_duplicates(),
    on=['plant_id_eia'],
    validate='m:1',
    how='left',
)
scaled_df = make_auto_id(scaled_df)

scaled_df.loc[:,'scenario'] = pd.NA
scaled_df.loc[:,'variable_om'] = pd.NA
scaled_df.loc[:,'fixed_om'] = pd.NA
scaled_df.loc[:,'co2_mass_tons'] = pd.NA
scaled_df.loc[:,'resource_type'] = scaled_df.technology_description.replace(tech_descrpt_to_resource_type)

In [None]:
model_input_all = scaled_df.reset_index().rename(columns=col_to_rename,)[list(col_to_rename.values())]

In [None]:
duke_ids = [97, 90]
# duke_de = de[(de.utility_id_pudl.isin(duke_ids)) & (de.report_year == 2018)]
duke_s = scaled_df[
    ((scaled_df.utility_id_pudl_deprish.isin(duke_ids)) | (scaled_df.utility_id_pudl_ferc1.isin(duke_ids)))
    & (scaled_df.report_year == 2018)]
duke_s = make_auto_id(duke_s)
model_input_duke = duke_s.reset_index().rename(columns=col_to_rename,)[list(col_to_rename.values())]
model_input_duke.to_csv(pudl_rmi.outputs_dir / "capex template duke.csv", index=False)
model_input_all.to_csv(pudl_rmi.outputs_dir / "capex template all.csv", index=False)

In [None]:
model_input_duke

In [None]:
model_input_all.to_csv(pudl_rmi.outputs_dir / "capex template all.csv", index=False)
duke_s.to_csv(pudl_rmi.outputs_dir / "deprish_ferc_eia_duke.csv", index=False)

In [None]:
model_input_duke

## Make the output directly

In [None]:
inputs = InputsManager(
    plant_parts_eia=rmi_out.get_plant_part_list(),
    deprish_eia=rmi_out.get_deprish_to_eia(clobber=False),
    ferc1_to_eia=rmi_out.get_ferc1_to_eia(clobber=True),
)
matcher = MatchMaker(inputs)
scaler = Scaler(matcher)
scaled_df = scaler.scale()

In [None]:
scaled_df.operational_status

## AhhhhhHHHHH