In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
import pandas as pd
import numpy as np
import pudl
import pudl.constants as pc
import sqlalchemy as sa
import logging
import pathlib
from copy import deepcopy
import sys

from fuzzywuzzy import fuzz, process
from pudl_rmi.deprish import *
from pudl_rmi import connect_deprish_to_eia

In [None]:
logger = logging.getLogger()
logger.setLevel(logging.INFO)
handler = logging.StreamHandler(stream=sys.stdout)
formatter = logging.Formatter('%(message)s')
handler.setFormatter(formatter)
logger.handlers = [handler]

pd.options.display.max_columns = None
pd.set_option('display.max_colwidth', None)

In [None]:
pudl_settings = pudl.workspace.setup.get_defaults()
ferc1_engine = sa.create_engine(pudl_settings["ferc1_db"])
pudl_engine = sa.create_engine(pudl_settings["pudl_db"])

### Auto Generate Common Associations

In [None]:
file_path_deprish = pathlib.Path().cwd().parent/'inputs'/'depreciation_rmi.xlsx'

In [None]:
common_assn = make_default_common_assn(file_path_deprish)

In [None]:
path_common_assn=  pathlib.Path().cwd().parent/'outputs' /'common_assn.csv'
common_assn.to_csv(path_common_assn,index=False)

### Process Depreciation Data

In [None]:
file_path_deprish = pathlib.Path().cwd().parent/'inputs'/'depreciation_rmi.xlsx'

# get the studies from the spreadsheet
extract_df = Extractor(
    file_path=file_path_deprish,
    sheet_name=0).execute()

# create a transformer object to process the extracted data
transformer = Transformer(
    extract_df=extract_df
)

In [None]:
# the transformer has 3 main steps that are encapsulated in execute
#   1. tidying w/ early_tidy()
#   2. reshaping w/ reshape() - this is where the common plant allocaiton is happening
#   3. filling-in w/ fill_in()
#   4. aggregating by plant w/ agg_to_idx()
# Note: two warnings will be thrown when you run this rn.
# there are the remaning issues in the common plant allocation process
# that need to be ironed out
transform_df = transformer.execute(clobber=True)

In [None]:
# print the output!
path_common_deprish_c=  pathlib.Path().cwd().parent/'outputs' /'deprish_w_common.csv'
transform_df.to_csv(path_common_deprish_c,index=False)

### Mannual Common Labeling

In [None]:
# Note: pulling and aggregating EIA data for labeling takes a little bit of time...
common_labeling = make_common_assn_labeling(
    pudl_out, file_path_deprish, transformer=transformer)

In [None]:
path_common_dc =  pathlib.Path().cwd().parent/'outputs' /'deprish_cleaned.xlsx'
connect_deprish_to_eia.save_to_workbook(
    path_common_dc,
    sheets_df_dict={
        'deprish_cleaned': transform_df,
        'deprish_cleaned_w_ferc_acct': transformer.reshaped_df,
        'deprish_raw': extract_df
    },
)

In [None]:
# if you want to read directly from the stored CSV
#path_common_labeling= pathlib.Path().cwd().parent/'outputs' /'common_labeling.csv'
#common_labeling = pd.read_csv(path_common_labeling, index='line_id')

In [None]:
common_assn_override = (
    pd.merge(
        common_assn,
        common_assn_manual,
        left_on=['line_id_common', 'line_id_main'],
        right_on=['line_id_main', 'line_id_main_asst'],
        suffixes=('', '_m'),
        how='outer',
        indicator=True
    )
    .drop(columns=['line_id_main_m'])
)