In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import pandas as pd
import numpy as np
import pudl
import pudl.constants as pc
import sqlalchemy as sa
import logging
import pathlib
from copy import deepcopy
import sys

from fuzzywuzzy import fuzz, process
from pudl_rmi.deprish import *

In [3]:
logger = logging.getLogger()
logger.setLevel(logging.INFO)
handler = logging.StreamHandler(stream=sys.stdout)
formatter = logging.Formatter('%(message)s')
handler.setFormatter(formatter)
logger.handlers = [handler]

pd.options.display.max_columns = None

In [4]:
pudl_settings = pudl.workspace.setup.get_defaults()
ferc1_engine = sa.create_engine(pudl_settings["ferc1_db"])
pudl_engine = sa.create_engine(pudl_settings["pudl_db"])

In [5]:
pudl_out = pudl.output.pudltabl.PudlTabl(pudl_engine, freq='AS', fill_net_gen=False)

### Process Depreciation Data

In [7]:
file_path_deprish = pathlib.Path().cwd().parent/'inputs'/'depreciation_rmi.xlsx'

# get the studies from the spreadsheet
extract_df = Extractor(
    file_path=file_path_deprish,
    sheet_name=0).execute()

# create a transformer object to process the extracted data
transformer = Transformer(
    extract_df=extract_df
)

Reading the depreciation data from /Users/christinagosnell/code/rmi-ferc1-eia/inputs/depreciation_rmi.xlsx


In [8]:
# the transformer has 3 main steps that are encapsulated in execute
#   1. tidying w/ early_tidy()
#   2. reshaping w/ reshape() - this is where the common plant allocaiton is happening
#   3. filling-in w/ fill_in()
# Note: there are two warnings that will be thrown when this is run rn.
# there are the remaning edge cases in the common plant allocation process
# that need to be ironed out
transform_df = transformer.execute()

We are calculating the common portion for 6340 records w/ plant_balance
We are calculating the common portion for 305 records w/o plant_balance
The resulting plant_balance allocated is 106.00% of the original
# of reserve_rate over 1 (100%): 1 Higher #s here may indicate an issue with the original data or the fill_in method




In [9]:
transform_df.head()

Unnamed: 0,utility_id_ferc1,utility_id_pudl,utility_name_ferc1,state,state_allocation,plant_id_pudl,plant_part_name,ferc_acct,report_date,note,ferc_acct_full,plant_balance,book_reserve,unaccrued_balance,reserve_rate,survivor_curve_type,service_life_avg,net_salvage_rate,net_salvage_rate_type_pct,net_salvage,net_removal_rate,remaining_life_avg,retirement_date,depreciation_annual_epxns,depreciation_annual_rate,depreciation_annual_rate_type_pct,data_source,report_year,ferc_acct_sub,ferc_acct_name,line_id,line_id_common,plant_balance_common,count_common,count_main,plant_balance_sum,plant_balance_ratio,plant_balance_common_portion,plant_bal_count,plant_bal_any,plant_balance_w_common
0,166,302,Southwestern Public Service Company,TX,,250.0,harrington common facilities,,2018-12-31,,Land Rights,13705.0,6347.0,7357.0,0.463116,,,-0.0,True,-0.0,,22.0,,334.0,0.000244,True,PUC,2018.0,,,2018_250_harrington common facilities___302_PUC,,,,,580925900.0,2.4e-05,,,,13705.0
1,166,302,Southwestern Public Service Company,TX,,391.0,moore county – land,,2018-12-31,,Land Rights,463.0,463.0,0.0,1.0,,,-0.0,True,-0.0,,0.0,,,0.0,True,PUC,2018.0,,,2018_391_moore county – land___302_PUC,,,,,17627.0,0.026267,,,,463.0
2,166,302,Southwestern Public Service Company,TX,,1223.0,riverview,,2018-12-31,,Land and Water Rights,676.0,676.0,0.0,1.0,,,-0.0,True,-0.0,,0.0,,,0.0,True,PUC,2018.0,,,2018_1223_riverview___302_PUC,,,,,1921.0,0.3519,,,,676.0
3,166,302,Southwestern Public Service Company,TX,,,,,2018-12-31,,,151888969.0,12888790.0,139000180.0,0.084857,,,-0.0,True,-0.0,,71.78,,1936404.0,0.000127,True,PUC,2018.0,,,2018_<NA>_nan___302_PUC,,,,,3928665000.0,0.038662,,,,151888969.0
4,145,272,Public Service Company of Colorado,CO,,107.0,cherokee common,,2018-01-01,,312 Boiler Plant Equipment AQIR,17251728.0,16676667.5,,0.966666,,15.0,,,,,0.5,,1150121.0,0.066667,False,PUC,2018.0,,,2018_107_cherokee common___272_PUC,,,,,804832200.0,0.021435,,,,17251728.0


### Read and process Common Labels

In [10]:
file_path_deprish = pathlib.Path().cwd().parent/'inputs'/'depreciation_rmi.xlsx'

In [11]:
common_assn = make_default_common_assn(file_path_deprish)

Reading the depreciation data from /Users/christinagosnell/code/rmi-ferc1-eia/inputs/depreciation_rmi.xlsx


In [12]:
common_assn_wide = transform_common_assn_for_labeling(common_assn)

In [13]:
common_labeling = make_common_assn_for_labeling(common_assn, pudl_out, transformer)

In [14]:
path_common_assn=  pathlib.Path().cwd().parent/'outputs' /'common_assn.csv'
common_assn.to_csv(path_common_assn,index=False)

In [15]:
path_common_labeling=  pathlib.Path().cwd().parent/'outputs' /'common_labeling.csv'
common_labeling.to_csv(path_common_labeling)

### Process the Mannual Common Labeling

In [16]:
path_common_labeling= pathlib.Path().cwd().parent/'outputs' /'common_labeling.csv'
common_labeling = pd.read_csv(path_common_labeling)

In [36]:
common_assn_manual = (
    pd.DataFrame(
        common_labeling.filter(like='line_id')
        .set_index('line_id')
        .stack()
    )
    .reset_index()
    .drop(columns=['level_1'])
    .rename(columns={0: 'line_id_common',
                     'line_id': 'line_id_main'
                    })
)

In [38]:
common_assn_manual

Unnamed: 0,line_id_main,line_id_common
0,2011_110_chesterfield common___349_PUC,2011_110_chesterfield unit 5___349_PUC
1,2011_110_chesterfield common___349_PUC,2011_110_chesterfield unit 7___349_PUC
2,2011_110_chesterfield common___349_PUC,2011_110_chesterfield unit 6___349_PUC
3,2011_110_chesterfield common___349_PUC,2011_110_chesterfield unit 8___349_PUC
4,2011_110_chesterfield common___349_PUC,2011_110_chesterfield unit 3___349_PUC
...,...,...
588,2020_405_naughton common_steam__246_PUC,2020_405_naughton unit 3_steam__246_PUC
589,2020_405_naughton common_steam__246_PUC,2020_405_naughton_steam__246_
590,2020_63_blundell geothermal common_steam__246_PUC,2020_63_blundell geothermal steam field_steam_...
591,2020_63_blundell geothermal common_steam__246_PUC,2020_63_blundell geothermal unit 1_steam__246_PUC


In [39]:
common_assn

Unnamed: 0,line_id_common,line_id_main
6630,2011_110_chesterfield common___349_PUC,2011_110_chesterfield unit 5___349_PUC
6704,2011_110_chesterfield common___349_PUC,2011_110_chesterfield unit 7___349_PUC
6600,2011_110_chesterfield common___349_PUC,2011_110_chesterfield unit 6___349_PUC
6611,2011_110_chesterfield common___349_PUC,2011_110_chesterfield unit 8___349_PUC
6683,2011_110_chesterfield common___349_PUC,2011_110_chesterfield unit 3___349_PUC
...,...,...
11198,2020_405_naughton common_steam__246_PUC,2020_405_naughton unit 1_steam__246_PUC
11185,2020_405_naughton common_steam__246_PUC,2020_405_naughton_steam__246_
11212,2020_63_blundell geothermal common_steam__246_PUC,2020_63_blundell geothermal unit 1_steam__246_PUC
11211,2020_63_blundell geothermal common_steam__246_PUC,2020_63_blundell geothermal steam field_steam_...


In [40]:
(
    pd.merge(
        common_assn,
        common_assn_manual,
        on=['line_id_common'],
        suffixes=('_a', '_m')
    )
)

Unnamed: 0,line_id_common,line_id_main_a,line_id_main_m
