In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import pandas as pd
import numpy as np
import pudl
import pudl.constants as pc
import sqlalchemy as sa
import logging
import pathlib
from copy import deepcopy
import sys

from fuzzywuzzy import fuzz, process
from pudl_rmi.deprish import *
from pudl_rmi import connect_deprish_to_eia

In [3]:
logger = logging.getLogger()
logger.setLevel(logging.INFO)
handler = logging.StreamHandler(stream=sys.stdout)
formatter = logging.Formatter('%(message)s')
handler.setFormatter(formatter)
logger.handlers = [handler]

pd.options.display.max_columns = None
pd.set_option('display.max_colwidth', None)

In [4]:
pudl_settings = pudl.workspace.setup.get_defaults()
ferc1_engine = sa.create_engine(pudl_settings["ferc1_db"])
pudl_engine = sa.create_engine(pudl_settings["pudl_db"])
#pudl_engine = sa.create_engine('sqlite:////Users/christinagosnell/code/pudl_work/sqlite/pudl_wo_ytd.sqlite')

### Auto Generate Common Associations

In [5]:
file_path_deprish = pathlib.Path().cwd().parent/'inputs'/'depreciation_rmi.xlsx'

In [6]:
common_assn = make_default_common_assn(file_path_deprish)

Reading the depreciation data from /Users/christinagosnell/code/rmi-ferc1-eia/inputs/depreciation_rmi.xlsx
# of reserve_rate over 1 (100%): 1. Higher #s here may indicate an issue with the original data or the fill_in method


In [7]:
path_common_assn=  pathlib.Path().cwd().parent/'outputs' /'common_assn.csv'
common_assn.to_csv(path_common_assn,index=False)

### Process Depreciation Data

In [133]:
file_path_deprish = pathlib.Path().cwd().parent/'inputs'/'depreciation_rmi.xlsx'

# get the studies from the spreadsheet
extract_df = Extractor(
    file_path=file_path_deprish,
    sheet_name=0).execute()

# create a transformer object to process the extracted data
transformer = Transformer(
    extract_df=extract_df
)

Reading the depreciation data from /Users/christinagosnell/code/rmi-ferc1-eia/inputs/depreciation_rmi.xlsx


In [139]:
# the transformer has 3 main steps that are encapsulated in execute
#   1. tidying w/ early_tidy()
#   2. reshaping w/ reshape() - this is where the common plant allocaiton is happening
#   3. filling-in w/ fill_in()
#   4. aggregating by plant w/ agg_to_idx()
# Note: two warnings will be thrown when you run this rn.
# there are the remaning issues in the common plant allocation process
# that need to be ironed out
transform_df = transformer.execute(clobber=True)

# of reserve_rate over 1 (100%): 1. Higher #s here may indicate an issue with the original data or the fill_in method
overriding auto-generated common associations with 471 mannual associations
grabbed 1142 common records
grabbed 1970 common reocrds and 16020 atomic records. of total 16020
We are calculating the common portion for 12705 records w/ unaccrued_balance
We are calculating the common portion for 3315 records w/o plant_balance
The resulting plant_balance allocated is 99.49% of the original
aggregating to: ['report_date', 'plant_id_pudl', 'plant_part_name', 'utility_id_pudl', 'data_source', 'line_id', 'common', 'utility_name_ferc1']


In [147]:
test = transformer.split_allocate_common(split_col='plant_balance', common_suffix='_common')

overriding auto-generated common associations with 471 mannual associations
grabbed 1142 common records
grabbed 1970 common reocrds and 16020 atomic records. of total 16020
We are calculating the common portion for 12705 records w/ unaccrued_balance
We are calculating the common portion for 3315 records w/o plant_balance
The resulting plant_balance allocated is 99.49% of the original


In [148]:
test.columns

Index(['utility_id_ferc1', 'utility_id_pudl', 'utility_name_ferc1', 'state',
       'state_allocation', 'plant_id_pudl', 'plant_part_name', 'ferc_acct',
       'report_date', 'note', 'ferc_acct_full', 'plant_balance',
       'book_reserve', 'unaccrued_balance', 'reserve_rate',
       'survivor_curve_type', 'service_life_avg', 'net_salvage_rate',
       'net_salvage_rate_type_pct', 'net_salvage', 'net_removal_rate',
       'remaining_life_avg', 'retirement_date', 'depreciation_annual_epxns',
       'depreciation_annual_rate', 'depreciation_annual_rate_type_pct',
       'data_source', 'report_year', 'ferc_acct_main', 'ferc_acct_sub',
       'ferc_acct_merge', 'ferc_acct_name', 'line_id', 'plant_balance_common',
       'unaccrued_balance_sum', 'unaccrued_balance_ratio',
       'plant_balance_common_portion', 'plant_bal_count', 'plant_bal_any',
       'plant_balance_w_common', 'common', 'unaccrued_balance_ratio_check',
       'plant_balance_common_portion_check'],
      dtype='object')

In [149]:
test2 = transformer.split_allocate_common(split_col='book_reserve', common_suffix='_common')

overriding auto-generated common associations with 471 mannual associations
grabbed 1142 common records
grabbed 1970 common reocrds and 16020 atomic records. of total 16020
We are calculating the common portion for 12705 records w/ unaccrued_balance
We are calculating the common portion for 3315 records w/o book_reserve


AttributeError: 'DataFrame' object has no attribute 'plant_balance_common'

In [None]:
# print the output!
path_common_deprish_c=  pathlib.Path().cwd().parent/'outputs' /'deprish_w_common.csv'
transform_df.to_csv(path_common_deprish_c,index=False)

### Mannual Common Labeling

In [None]:
# Note: pulling and aggregating EIA data for labeling takes a little bit of time...
common_labeling = make_common_assn_labeling(
    pudl_out, file_path_deprish, transformer=transformer)

In [145]:
path_common_dc =  pathlib.Path().cwd().parent/'outputs' /'deprish_cleaned.xlsx'
connect_deprish_to_eia.save_to_workbook(
    path_common_dc,
    sheets_df_dict={#'common_labeling': common_labeling.reset_index(),
                    'deprish_cleaned': transform_df,
                    'deprish_cleaned_w_ferc_acct': transformer.reshaped_df,
                    'deprish_raw': extract_df
                   },
)

Saving dataframe to /Users/christinagosnell/code/rmi-ferc1-eia/outputs/deprish_cleaned.xlsx
Removing deprish_cleaned from /Users/christinagosnell/code/rmi-ferc1-eia/outputs/deprish_cleaned.xlsx
Removing deprish_cleaned_w_ferc_acct from /Users/christinagosnell/code/rmi-ferc1-eia/outputs/deprish_cleaned.xlsx
Removing deprish_raw from /Users/christinagosnell/code/rmi-ferc1-eia/outputs/deprish_cleaned.xlsx


In [None]:
# if you want to read directly from the stored CSV
#path_common_labeling= pathlib.Path().cwd().parent/'outputs' /'common_labeling.csv'
#common_labeling = pd.read_csv(path_common_labeling, index='line_id')

In [None]:
common_assn_override = (
    pd.merge(
        common_assn,
        common_assn_manual,
        left_on=['line_id_common', 'line_id_main'],
        right_on=['line_id_main', 'line_id_main_asst'],
        suffixes=('', '_m'),
        how='outer',
        indicator=True
    )
    .drop(columns=['line_id_main_m'])
)