# Depreciation to FERC 1 Connection

In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import pandas as pd
import numpy as np
import pudl
import pudl.constants as pc
import sqlalchemy as sa
import logging
import sys
import copy
import pathlib
import random
import warnings
from copy import deepcopy

sys.path.append("../")
import connect_deprish_to_eia
import make_plant_parts_eia
import connect_deprish_to_ferc1

In [3]:
logger = logging.getLogger()
logger.setLevel(logging.INFO)
handler = logging.StreamHandler(stream=sys.stdout)
formatter = logging.Formatter('%(message)s')
handler.setFormatter(formatter)
logger.handlers = [handler]
pd.options.display.max_columns = None
pd.options.display.max_rows = 100

### Prepare the inputs

In [4]:
file_path_mul = pathlib.Path().cwd().parent / 'master_unit_list.pkl.gz'
file_path_steam_ferc1 = pathlib.Path().cwd().parent / 'steam_ferc1.pkl.gz'
file_path_ferc1_eia = pathlib.Path().cwd().parent / 'ferc1_to_eia.pkl.gz'
file_path_deprish_eia = pathlib.Path().cwd().parent / 'deprish_to_eia.pkl.gz'

In [5]:
%%time
inputs = connect_deprish_to_ferc1.InputsCompiler(
    file_path_mul=file_path_mul,
    file_path_steam_ferc1=file_path_steam_ferc1,
    file_path_ferc1_eia=file_path_ferc1_eia,
    file_path_deprish_eia=file_path_deprish_eia
)

Reading the master unit list from /Users/christinagosnell/code/rmi-ferc1-eia/master_unit_list.pkl.gz
CPU times: user 1.87 s, sys: 461 ms, total: 2.33 s
Wall time: 2.66 s


### Generate the options and connections!

In [6]:
match_maker = connect_deprish_to_ferc1.MatchMaker(inputs)
matches_df = match_maker.match()

Matched plants:    120
Not connected:       58
Missing connections: 3
Matches for same_true:   112
Matches for same_diff_own:   7
Matches for same_quals:   168
Matches for one_ferc1_opt:   0
Portion of unique depreciation records:
    Matched:   87.98%
    No link:   4.65%
Connected:
same_quals       168
same_true        112
same_diff_own      7
Name: match_method, dtype: int64
Connection Levels:
smol        146
samezies    119
beeg         22
Name: level_deprish, dtype: int64


In [7]:
scaler = connect_deprish_to_ferc1.Scaler(match_maker)

In [8]:
scaled_df = scaler.scale()

In [9]:
# this should be true
len(scaled_df) == len(scaler.matches_df)

True

In [10]:
# sample test data for 
same_smol_test = pd.DataFrame(
    {'plant_id_pudl':[1,1,1,
                      2,2],
     "record_id_eia_ferc1": ["1_plant","1_plant","1_plant",
                             "2_plant","2_plant"],
     "record_id_eia_deprish": ["1_plant_gen_a","1_plant_gen_b","1_plant_gen_c",
                               "2_plant_gen_a","2_plant_gen_b"],
     "opex_nofuel": [250,250,250,
                     75,75], # data point to weight
     "net_generation_mwh_ferc1": [62500,62500,62500,
                                  11250,11250], 
     "total_fuel_cost_deprish": [100,200,200,
                                 pd.NA, pd.NA
                                ], # weight by option
     "net_generation_mwh_deprish": [10000,15000,35000,
                                    4500, 7500], # weight by option
     "capacity_mw_deprish": [50,100,100,
                             25,45], # weight by option
    })


opex_nofuel_ferc1_deprish = [
    50.0,100.0,100.0,
    28.125, 46.875]

net_generation_mwh_ferc1_ferc1_deprish =  [
    12500,
    25000,
    25000,
    4017.857,
    7232.142,]
same_smol_test

Unnamed: 0,plant_id_pudl,record_id_eia_ferc1,record_id_eia_deprish,opex_nofuel,net_generation_mwh_ferc1,total_fuel_cost_deprish,net_generation_mwh_deprish,capacity_mw_deprish
0,1,1_plant,1_plant_gen_a,250,62500,100.0,10000,50
1,1,1_plant,1_plant_gen_b,250,62500,200.0,15000,100
2,1,1_plant,1_plant_gen_c,250,62500,200.0,35000,100
3,2,2_plant,2_plant_gen_a,75,11250,,4500,25
4,2,2_plant,2_plant_gen_b,75,11250,,7500,45
