# Depreciation to FERC 1 Connection

In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
import pandas as pd
import numpy as np
import pudl
import pudl.constants as pc
import sqlalchemy as sa
import logging
import sys
import copy
import pathlib
import random
import warnings
from copy import deepcopy

sys.path.append("../")
import connect_deprish_to_eia
import make_plant_parts_eia
import connect_deprish_to_ferc1

In [None]:
logger = logging.getLogger()
logger.setLevel(logging.INFO)
handler = logging.StreamHandler(stream=sys.stdout)
formatter = logging.Formatter('%(message)s')
handler.setFormatter(formatter)
logger.handlers = [handler]
pd.options.display.max_columns = None
pd.options.display.max_rows = 100

### Prepare the inputs

In [None]:
file_path_mul = pathlib.Path().cwd().parent / 'master_unit_list.pkl.gz'
file_path_steam_ferc1 = pathlib.Path().cwd().parent / 'steam_ferc1.pkl.gz'
file_path_ferc1_eia = pathlib.Path().cwd().parent / 'ferc1_to_eia.pkl.gz'
file_path_deprish_eia = pathlib.Path().cwd().parent / 'deprish_to_eia.pkl.gz'

In [None]:
%%time
inputs = connect_deprish_to_ferc1.InputsCompiler(
    file_path_mul=file_path_mul,
    file_path_steam_ferc1=file_path_steam_ferc1,
    file_path_ferc1_eia=file_path_ferc1_eia,
    file_path_deprish_eia=file_path_deprish_eia
)

### Generate the options and connections!

In [None]:
match_maker = connect_deprish_to_ferc1.MatchMaker(inputs)
matches_df = match_maker.match()

In [None]:
scaler = connect_deprish_to_ferc1.Scaler(match_maker)
scaled_df = scaler.scale()
# this should be true
len(scaled_df) == len(scaler.matches_df)

In [None]:
same_smol = scaler.split_ferc1_data_cols()
same_beeg = scaler.agg_ferc_data_cols()

first_cols = ['plant_part_deprish', 'plant_part_ferc1',
              'record_id_eia_deprish', 'record_id_eia_ferc1',
              'plant_name', 'plant_name_match', 'plant_name_ferc1', 'plant_name_new_ferc1',
              'fraction_owned_deprish', 'fraction_owned_ferc1',
              'record_count_deprish', 'record_count_ferc1',
              'record_count_matches_deprish'
              ]
def reorder_cols(df, first_cols):
    # reorder cols so they are easier to see, maybe remove later
    df = df[
        [x for x in first_cols if x in df.columns] 
        + [x for x in df.columns if x not in first_cols]]
    return df
matches_df = reorder_cols(matches_df, first_cols)
same_smol = reorder_cols(same_smol, first_cols)
same_beeg = reorder_cols(same_beeg, first_cols)

In [None]:
bad_ids = [309,660,596,414]
idk_ids = [105,658, 646]
same_smol[(same_smol.record_count_deprish > same_smol.record_count_matches_deprish)
          #& (same_smol.plant_id_pudl == plant_id_pudl)
          & (~same_smol.plant_id_pudl.isin(bad_ids + idk_ids))
          & (same_smol.report_date.dt.year > 2015)
         ].sort_values(by='record_id_eia_deprish')#.plant_id_pudl

In [None]:
plant_id_pudl = 381
year = 2017
df = matches_df
(df[(df.plant_id_pudl == plant_id_pudl)
    #& (df.report_date.dt.year == year)
   ]
 .set_index(['record_id_eia_deprish','record_id_eia_ferc1'])
 #.filter(like='capacity_mw')
)

In [None]:
df = inputs.plant_parts_eia
(df[(df.plant_id_pudl == plant_id_pudl)
   & (df.report_date.dt.year == year)
   #& (df.true_gran)
   & (df.ownership == 'owned')
  ]
 .set_index(['plant_part','record_id_eia',])
 #.filter(like='capacity')
)

In [None]:
# sample test data for 
same_smol_test = pd.DataFrame(
    {'plant_id_pudl':[1,1,1,
                      2,2],
     "record_id_eia_ferc1": ["1_plant","1_plant","1_plant",
                             "2_plant","2_plant"],
     "record_id_eia_deprish": ["1_plant_gen_a","1_plant_gen_b","1_plant_gen_c",
                               "2_plant_gen_a","2_plant_gen_b"],
     "opex_nofuel": [250,250,250,
                     75,75], # data point to weight
     "net_generation_mwh_ferc1": [62500,62500,62500,
                                  11250,11250], 
     "total_fuel_cost_deprish": [100,200,200,
                                 pd.NA, pd.NA
                                ], # weight by option
     "net_generation_mwh_deprish": [10000,15000,35000,
                                    4500, 7500], # weight by option
     "capacity_mw_deprish": [50,100,100,
                             25,45], # weight by option
    })


opex_nofuel_ferc1_deprish = [
    50.0,100.0,100.0,
    28.125, 46.875]

net_generation_mwh_ferc1_ferc1_deprish =  [
    12500,
    25000,
    25000,
    4017.857,
    7232.142,]
same_smol_test