In [None]:
import numpy as np
import pandas as pd
import plotly_express as px
import plotly.graph_objects as go
import os
import sys
import wquantiles as wq
from getpass import getpass
from pyra.date_utils import get_planning_year_dates_between

## EMTDB Connection

In [None]:
sys.path.append(r'K:\Valuation\_Analysts\Hemanth\Python Notebooks\Miscellaneous\Python Analyst Engine 2.0')

In [None]:
from util import EmtdbConnection
from emtdb_api import pull_lmp_data

In [None]:
user = 'HXH07BP'
pw = getpass('Enter EMTDB pass:')

In [None]:
emtdb = EmtdbConnection(user, pw)

## Analyst Inputs

### Standard inputs

In [None]:
current_planning_year = '2025-2026' # For annual auction. Makes sure the format is always YYYY-YYYY

look_ahead_years = 4 # including the current planning year

zone = 'DPL' # Depending on deal

# zone, sink_ID and sink_name:
# AECO, 51291, AECO
# JCPL, 116472945, JCPL_RESID_AGG
# PSEG, 51301, PSEG
# RECO, 116472959, RECO_RESID_AGG
# PEPCO, 338268, PEPCO DC
# APS, 116472931, APS_RESID_AGG
# METED, 51296, METED
# PENELEC, 116472951, PENELEC_RESID_AGG
# METED, 51295, METED
# ATSI, 1258625176, FEOH
# UGI, 116472955, UGI_RESID_AGG
# DPL, 51293, DPL

sink_id = 51293 # read from K:\Valuation\MODELS\VQSWAP\VQSwap Update Info\PJM\ARRs\ARR Path Selection\2025\Hemanth\Data\Sink Node ID.xlsx

sink_name = 'DPL'

most_recent_LT_auction = '2026-2029'
most_recent_LT_auction_round = '4' # This should be set as a string

selection_threshold = 1500 # Usually 1500

percent_for_ARR = 1

percent_for_stage_1A = 0.6 # 60% 

### File paths

In [None]:
timeline_path = r'K:\Valuation\_Analysts\Hemanth\ARRs\Timeline\2025-ftr-arr-market-schedule.xlsx'
long_term_ftr_results_path = r'K:\Valuation\MODELS\VQSWAP\VQSwap Update Info\PJM\ARRs\ARR Path Selection\2025\Hemanth\Data\Long-term FTR Auction Results'

annual_ftr_results_path = r'K:\Valuation\MODELS\VQSWAP\VQSwap Update Info\PJM\ARRs\ARR Path Selection\2025\Hemanth\Data\Annual FTR Auction Results'

arr_selection_path = r'K:\Valuation\MODELS\VQSWAP\VQSwap Update Info\PJM\ARRs\ARR Path Selection\2025\Hemanth\Data\ARR Path Selection'
stage_1_resources_path = r'K:\Valuation\MODELS\VQSWAP\VQSwap Update Info\PJM\ARRs\ARR Path Selection\2025\Hemanth\Data\Resources\2025-26\2025-2026-stage-1-resources-by-zone.xlsx'

zonal_nspl_path = r'K:\Valuation\MODELS\VQSWAP\VQSwap Update Info\PJM\ARRs\PY 2526 ARR Forecast.xlsx'
nee_nspl_path = r'K:\Valuation\MODELS\VQSWAP\VQSwap Update Info\PJM\ARRs\ARR Path Selection\2025\Hemanth\Data\Subaccount\Subaccounts PY25-26 with NSPL & Paths.xlsx'

stage_2_dollar_per_MW_PY_26_27_path = r'K:\Valuation\MODELS\VQSWAP\VQSwap Update Info\PJM\ARRs\ARR Path Selection\2025\Betty\Forecast\Stage 2 MT\PY 2627 Stage 2 ARR - updated.xlsx' 
stage_2_dollar_per_MW_PY_27_28_path = r'K:\Valuation\MODELS\VQSWAP\VQSwap Update Info\PJM\ARRs\ARR Path Selection\2025\Betty\Forecast\Stage 2 MT\PY 2728 Stage 2 ARR - updated.xlsx'  

stage_1B_path = r'K:\Valuation\MODELS\VQSWAP\VQSwap Update Info\PJM\ARRs\ARR Path Selection\2025\Hemanth\Data\ARR Path Selection\PJM Stage 1B ARR Results 25-26.xlsx'

stage_2_path_r1 = r'K:\Valuation\MODELS\VQSWAP\VQSwap Update Info\PJM\ARRs\ARR Path Selection\2025\Hemanth\Data\ARR Path Selection\PJM S2R1 Pull 20250326 (awards).xlsx'
stage_2_path_r2 = r'K:\Valuation\MODELS\VQSWAP\VQSwap Update Info\PJM\ARRs\ARR Path Selection\2025\Hemanth\Data\ARR Path Selection\PJM S2R2 Pull 20250403.xlsx'

stage_1A_pull_path = r'K:\Valuation\MODELS\VQSWAP\VQSwap Update Info\PJM\ARRs\ARR Path Selection\2025\Hemanth\Data\ARR Path Selection\PJM 1A Pull 20250603.xlsx' # For stage 2

zonal_arr_path = r'K:\Valuation\MODELS\VQSWAP\VQSwap Update Info\PJM\ARRs\PJM ARR Tracking.xlsx'

### Helper function

In [None]:
def next_planning_year(current_planning_year): # returns next planning year given the current planning year
    return str(int(current_planning_year[:4]) + 1) + '-' + str(int(current_planning_year[5:]) + 1)

def planning_year_from_LT_auction(DF): # returns the planning year corresponding to the year of the long-term auction
    return str(int(DF['LT_Period'][:4]) + int(DF['Period Type'][-1]) - 1) + '-' + str(int(DF['LT_Period'][:4]) + int(DF['Period Type'][-1]))
    
def planning_year_from_date(DF): # returns the planning year corresponding to a date
    if (DF['Date'].month >= 6):
        return str(int(DF['Date'].year)) + '-' + str(int(DF['Date'].year) + 1)
    else:
        return str(int(DF['Date'].year) - 1) + '-' + str(int(DF['Date'].year))


next_year = next_planning_year(current_planning_year)
next_to_next_year = next_planning_year(next_year)
next_to_next_to_next_year = next_planning_year(next_to_next_year)

In [None]:
def unflatten_columns(DF):
    temp_DF = DF.copy()
    temp_DF.columns = pd.MultiIndex.from_tuples(
    [tuple(col.split('_')) for col in temp_DF.columns]
)
    
    return temp_DF

## Timeline

In [None]:
df_timeline = pd.read_excel(timeline_path)

df_timeline

In [None]:

fig = px.timeline(df_timeline.dropna(), x_start='Start Day', x_end='End Day', y='Market Name', title='ARR/FTR Auction Period', facet_col='Product')
fig.show()

In [None]:
fig = px.timeline(df_timeline.dropna(), x_start='Bidding Opening Day', x_end='Bidding Closing Day', y='Market Name', title='ARR/FTR Auction Dates', facet_col='Product')
fig.show()

## Stage 1 Resources 

In [None]:
df_stage_1_resources = pd.concat(
    pd.read_excel(
    stage_1_resources_path,
    sheet_name=None, # Reading all sheets and combining into single df
    header=[0, 1]
).values(),
ignore_index=True
).dropna(
    how='all', axis=0 # Dropping rows with no values
).iloc[:, [0, 1, 2, 5, 6, 10]]

df_stage_1_resources.columns = ['Zone', 'Pnode ID', 'FTR Name', current_planning_year+'_Capacity MW', 'Retired', 'Rate_based']

df_stage_1_resources.rename(
    columns={'Pnode ID': 'PNODEID'}, inplace=True
)

df_stage_1_resources


## Annual FTR Auction Results

In [None]:
df_annual_ftr_results = pd.DataFrame()

for file_name in os.listdir(os.path.join(annual_ftr_results_path, current_planning_year)):
    file_path = os.path.join(os.path.join(annual_ftr_results_path, current_planning_year), file_name)
    if 'round' in file_path:
        print(f'Aggregating annual FTR results from {file_name}')
        df_annual_ftr_results_temp = pd.read_excel(file_path, sheet_name='Obligation Nodal Prices RD ' + file_path[-14]) # file_path[-14] gets the round number from the file name
        df_annual_ftr_results_temp['Annual_Round'] = file_path[-14]
        df_annual_ftr_results = pd.concat([df_annual_ftr_results, df_annual_ftr_results_temp]) 
    

Checking where missing values are present and dropping rows and columns not needed

In [None]:
df_annual_ftr_results.reset_index(drop=True, inplace=True)

df_annual_ftr_results[df_annual_ftr_results.isna().any(axis=1)]

In [None]:
df_annual_ftr_results.dropna(inplace=True)

In [None]:
# dropping columns not needed

df_annual_ftr_results = df_annual_ftr_results.iloc[:, [0, 1, 3, 8]]

In [None]:
df_annual_ftr_results

In [None]:
# Pivoting the dataframe and calculating the average of the four rounds 

df_annual_ftr_results = df_annual_ftr_results.pivot(
    index=['Node', 'PNODEID'],
    columns='Annual_Round',
    values='LMP',
).reset_index()

df_annual_ftr_results['Average_LMP'] = df_annual_ftr_results.iloc[:, 2:].mean(axis=1) # These are the average LMPs of the sources

df_annual_ftr_results

In [None]:
# df_annual_ftr_results.loc[
#     lambda DF: (DF.Node.isin(['RTEP B0287 SOURCE','RTEP B0328 SOURCE', 'DPL', 'PEPCO']))
# ].to_clipboard(index=False)


df_annual_ftr_results.loc[
    lambda DF: DF.Node.isin(pd.read_clipboard().SourceName) | DF.Node.isin(pd.read_clipboard().SinkName)
].to_clipboard()

## Long-term FTR auction results

Iterating through different LT auction folders and the files in each folder

In [None]:
df_long_term_ftr_results = pd.DataFrame()

for folder_name in os.listdir(long_term_ftr_results_path):
    folder_path = os.path.join(long_term_ftr_results_path, folder_name)
    if ('2025-2028' in folder_path) or ('2026-2029' in folder_path): # to be set by analyst - can be automated
        for file_name in os.listdir(folder_path):
            file_path = os.path.join(folder_path, file_name)
            if 'round' in file_path:
                print(f'Aggregating long-term FTR results from {file_name}')
                df_long_term_ftr_results_temp = pd.read_excel(file_path, sheet_name='Obligation Nodal Prices RD ' + file_path[-6]) # file_path[-6] gets the round number from the file name
                df_long_term_ftr_results_temp['LT_Round'] = file_path[-6]
                df_long_term_ftr_results_temp['LT_Period'] = folder_name
                df_long_term_ftr_results = pd.concat([df_long_term_ftr_results, df_long_term_ftr_results_temp])             

In [None]:
df_long_term_ftr_results = df_long_term_ftr_results.iloc[:, [0, 1, 2, 3, 8, 9]]

In [None]:
# Checking where NaNs exist and drop them

df_long_term_ftr_results[df_long_term_ftr_results.isna().any(axis=1)]

df_long_term_ftr_results.dropna(axis=0, inplace=True)

In [None]:
df_long_term_ftr_results

In [None]:
# Reorganizing data

df_long_term_ftr_results = df_long_term_ftr_results.pivot_table(
    index=['PNODEID', 'Node'],
    columns=['LT_Round', 'Period Type', 'LT_Period'], # this is the convention for naming the column
    values='LMP'
)

df_long_term_ftr_results.columns = ['_'.join(col) for col in df_long_term_ftr_results.columns.to_flat_index()] # flattening MultiIndex columns

df_long_term_ftr_results

## ARR Valuation

In [None]:
# Filtering by zone selected by analyst, filtering out rows with 0 capacity, filtering out rows with retired and rate-based resources.

df_stage_1_resources_filtered = df_stage_1_resources[(df_stage_1_resources['Zone'] == zone) & (df_stage_1_resources[current_planning_year + '_Capacity MW'] > 0) 
                                                     & (df_stage_1_resources['Retired'] != 'Y') & (df_stage_1_resources['Rate_based'] != 'Rate-based')]

In [None]:
df_stage_1_resources_filtered = df_stage_1_resources_filtered.drop(
    columns=['Retired', 'Rate_based'] 
).groupby( # There are some paths with duplicate PNODEIDs and FTR names with different capacities, so we group
   ['Zone', 'PNODEID', 'FTR Name'] 
).sum().reset_index()

df_stage_1_resources_filtered 

In [None]:
# Merging with annual FTR auction results

df_arr_valuation = df_stage_1_resources_filtered.merge(
    right=df_annual_ftr_results,
    how='left',
    on='PNODEID',
    validate='m:1'
)

df_arr_valuation.drop(columns=['1', '2', '3', '4', 'FTR Name'], inplace=True) # Dropping columns 1, 2, 3, 4 since we already calculated the average and dropping FTR name since it 
# should be the same as the node

df_arr_valuation

In [None]:
# Merging with long-term FTR auction results

df_arr_valuation = df_arr_valuation.merge(
    right=df_long_term_ftr_results,
    how='left',
    on='PNODEID',
    validate='m:1',
)

df_arr_valuation

In [None]:
# Calculating path value of LT auctions: Sink congestion - source congestion

df_arr_valuation.iloc[:, 5:] = df_long_term_ftr_results[df_long_term_ftr_results.index.get_level_values(0) == sink_id].values - df_arr_valuation.iloc[:, 5:] 

In [None]:
# Calculating annual path value: Sink congestion - source congestion

df_arr_valuation[current_planning_year + '_path_value'] = df_annual_ftr_results[df_annual_ftr_results.PNODEID == sink_id]['Average_LMP'].values - df_arr_valuation['Average_LMP'] # Path value = sink LMP - source LMP

# dropping
df_arr_valuation.drop(
    columns='Average_LMP',
    inplace=True
)

In [None]:
# Filtering out paths whose values are below the threshold

temp_year = current_planning_year # just for the loop below

for i in range(look_ahead_years):
    if i == 0: # for current planning year
        df_arr_valuation[temp_year + '_Selection'] = df_arr_valuation.apply(
            lambda row: 1 if row[temp_year + '_path_value'] > selection_threshold else 0, axis=1 
        )
    else:
        df_arr_valuation[temp_year + '_Selection'] = df_arr_valuation.apply(
            lambda row: 1 if row[most_recent_LT_auction_round + '_' + 'YR' + str(i) + '_' + most_recent_LT_auction] > selection_threshold else 0, axis=1
        )
    temp_year = next_planning_year(temp_year)

In [None]:
df_arr_valuation # This should have the all the annual and LT path values along with the respective selections

Manually setting specific path selection

In [None]:
# PEPCO - DC

# df_arr_valuation.loc[
#     lambda DF: DF.Node.isin(
#         [
#         'SMECO   13 KV   SMECO', 'CHALKPT 24 KV   CHLKG4',
#        'CHALKPT 24 KV   CHLKG3', 'CHALKPT 13 KV   CT6',
#        'CHALKPT 13 KV   CT4', 'CHALKPT 13 KV   CT3',
#        'CHALKPT 13 KV   CT5', 'CHALKPT 13 KV   CT2',
#        'CHALKPT 4 KV    CT1', 'MORGANTO13 KV   CT3',
#        'MORGANTO13 KV   CT4', 'MORGANTO13 KV   CT5',
#        'MORGANTO13 KV   CT6', 'KELSONRI18 KV   STCHA1CT'
#         ]
#     ),
# [next_year + '_Selection', next_to_next_year + '_Selection', next_to_next_to_next_year + '_Selection']
# ] = 0

# APS

# df_arr_valuation.loc[
#     (df_arr_valuation.Node == 'GREENGAP35 KV   G1'), [next_to_next_year + '_Selection', next_to_next_to_next_year + '_Selection']
# ] = 0

# PENELEC

# df_arr_valuation.loc[
#     (df_arr_valuation.Node == 'WARREN  13 KV   UNITCT'), [next_to_next_year + '_Selection', next_to_next_to_next_year + '_Selection']
# ] = 0

# df_arr_valuation.to_clipboard()

## Long-term decay

### Historical Data Aggregation

Aggregating annual FTR auction results

In [None]:
df_all_annual_ftr_results = pd.DataFrame()

for folder_name in os.listdir(annual_ftr_results_path):
    
    folder_path = os.path.join(annual_ftr_results_path, folder_name)

    if os.path.isdir(folder_path):
        for file_name in os.listdir(folder_path):
            file_path = os.path.join(folder_path, file_name)
            
            if 'round' in file_path:
                print(f'Aggregating annual FTR results from {file_name}')
                if file_path[-14].isnumeric():
                    round_num = file_path[-14] # for xlsx files
                else:
                    round_num = file_path[-13] # for xls files
                df_all_annual_ftr_results_temp = pd.read_excel(file_path, sheet_name='Obligation Nodal Prices RD ' + round_num)
                df_all_annual_ftr_results_temp['Round'] = round_num
                df_all_annual_ftr_results_temp['Year'] = folder_name
                df_all_annual_ftr_results = pd.concat([df_all_annual_ftr_results, df_all_annual_ftr_results_temp])

df_all_annual_ftr_results 

In [None]:
df_all_annual_ftr_results_pivoted = df_all_annual_ftr_results.iloc[:, [0, 1, 3, 6, 7]].dropna().pivot_table(
    index='PNODEID',
    columns='Year',
    values='LMP'
) # this should average across all rounds

In [None]:
df_all_annual_ftr_results_pivoted

Aggregating long-term FTR auction results

In [None]:
df_all_long_term_ftr_results = pd.DataFrame()

for folder_name in os.listdir(long_term_ftr_results_path):
    folder_path = os.path.join(long_term_ftr_results_path, folder_name)
    for file_name in os.listdir(folder_path):
        file_path = os.path.join(folder_path, file_name)
        if 'round' in file_path:
            if file_path[-6].isnumeric():
                round_num = file_path[-6] # for xlsx files
            else:
                round_num = file_path[-5] # for xls files
            print(f'Aggregating long-term FTR results from {file_name}')
            df_all_long_term_ftr_results_temp = pd.read_excel(file_path, sheet_name='Obligation Nodal Prices RD ' + round_num) 
            df_all_long_term_ftr_results_temp['LT_Round'] = round_num
            df_all_long_term_ftr_results_temp['LT_Period'] = folder_name
            df_all_long_term_ftr_results = pd.concat([df_all_long_term_ftr_results, df_all_long_term_ftr_results_temp])

df_all_long_term_ftr_results

In [None]:
df_all_long_term_ftr_results = df_all_long_term_ftr_results[df_all_long_term_ftr_results['Period Type'] != 'YRALL'] # dropping rows where period type is YRALL

In [None]:
df_all_long_term_ftr_results = df_all_long_term_ftr_results.iloc[:, [0, 1, 2, 3, 6, 7]].dropna()

In [None]:
df_all_long_term_ftr_results['Year'] = df_all_long_term_ftr_results.apply(planning_year_from_LT_auction, axis=1) # adding the planning year based on the LT period and the period type

df_all_long_term_ftr_results

In [None]:
df_all_long_term_ftr_results_pivoted = df_all_long_term_ftr_results.pivot_table(
    index='PNODEID',
    columns=['Year', 'Period Type', 'LT_Round'], 
    values='LMP'
)

In [None]:
df_all_long_term_ftr_results_pivoted

### Long-term decay calculation for given sink and selected paths

In [None]:
# Calculating the annual path value for the given sink: sink congestion - source congestion

df_all_annual_ftr_results_path_values = df_all_annual_ftr_results_pivoted[df_all_annual_ftr_results_pivoted.index == sink_id].values - df_all_annual_ftr_results_pivoted

In [None]:
# Calculating the long-term path value for the given sink: sink congestion - source congestion

df_all_long_term_ftr_results_path_values = df_all_long_term_ftr_results_pivoted.loc[df_all_long_term_ftr_results_pivoted.index == sink_id].values -  df_all_long_term_ftr_results_pivoted 

In [None]:
# Long-term decay calculation

df_long_term_decay = df_all_annual_ftr_results_path_values.div(df_all_long_term_ftr_results_path_values.replace(0, np.nan)) - 1

In [None]:
# Excluding outliers - 2022-2023 had outliers due to Russia-Ukraine

# df_long_term_decay.drop(
#     '2022-2023', axis=1, level=0,
#     inplace=True
# )

In [None]:
df_long_term_decay = df_long_term_decay.groupby(
    axis=1,
    level=[1, 2],
).mean() # Averaging across the different years

In [None]:
df_long_term_decay.columns = df_long_term_decay.columns.to_flat_index() # Flattening MultiIndex to merge in the next step

In [None]:
df_long_term_decay

In [None]:
# df_long_term_decay.loc[
#     lambda DF: DF.index.isin([1097732449, 50558, 50557, 50489, 50490]) 
# ].to_excel('PSEG_2025-2026_4_LTD_5_largest_paths_all_history.xlsx')

In [None]:
# (
# df_arr_valuation[['PNODEID', current_planning_year + '_Capacity MW', next_year + '_Selection']].loc[
#     lambda DF: DF['2026-2027_Selection'] == 1
# ].sort_values(
#     by=['2025-2026_Capacity MW'],
#     ascending=False
# )
# .iloc[[0, 3, 4, 5, 6], :].PNODEID.values
# .iloc[[i for i in range(24) if i not in [0, 3, 4, 5, 6]], :]
# )
# df_arr_valuation

In [None]:
# Merging df_long_term_decay with df_arr_valuation to get the decay for the relevant paths

df_long_term_decay_filtered = df_arr_valuation[['PNODEID', current_planning_year + '_Capacity MW', next_year + '_Selection', most_recent_LT_auction_round + '_' + 'YR1' + '_' + most_recent_LT_auction]].merge( # Note that we use next year's selection
    right=df_long_term_decay,
    on='PNODEID',
    how='left',
    validate='m:1'
)


# Sometimes they want to see what the LTD would be if we don't filter any paths by the threshold. In that case, just replace the df_long_term_decay_filtered[current_planning_year + '_Selection']
# below by 1

# Usually, we just multiply by capacity, but as a different modeling appraoch, I also multiplied by the path value so that we can dollar weight it instead

# I call it selected capacity on the next line but it is actually selected value
df_long_term_decay_filtered[next_year + '_Selected_Capacity'] = df_long_term_decay_filtered[current_planning_year + '_Capacity MW'] * df_long_term_decay_filtered[next_year + '_Selection'] * df_long_term_decay_filtered[most_recent_LT_auction_round + '_' + 'YR1' + '_' + most_recent_LT_auction].astype('float64')

In [None]:
# Dropping these columns since we have already multiplied the selection (1 or 0) by the path MW in the previous step

df_long_term_decay_filtered.drop(
    columns=['PNODEID', current_planning_year + '_Capacity MW', next_year + '_Selection', most_recent_LT_auction_round + '_' + 'YR1' + '_' + most_recent_LT_auction], inplace=True
)

In [None]:
# df_all_annual_ftr_results_pivoted

df_long_term_decay_filtered

In [None]:
# df_long_term_decay_filtered[('YR1', '4')].to_clipboard(index=False, header=False)

Calculating the MW (treated as frequency) - weighted quantile

In [None]:
percentiles = [(i + 1) / 10 for i in range(-1, 10)] # Percentiles from 0% to 100%
num_columns_ltd = len(df_long_term_decay_filtered.drop(
    columns=next_year + '_Selected_Capacity'
).columns) # All the 'YR_', '' columns in df_long_term_decay_filtered

np_ltd = np.zeros((len(percentiles), num_columns_ltd)) # Creating numpy array to store results

for percentile in range(len(percentiles)):
    for column in range(num_columns_ltd):
        np_ltd[percentile, column] = wq.quantile(
            np.array(df_long_term_decay_filtered.drop(
            columns=next_year + '_Selected_Capacity'
        ))[:, column],
        df_long_term_decay_filtered[next_year + '_Selected_Capacity'], percentiles[percentile]
        ) 

In [None]:
df_long_term_decay_summary_stats = pd.DataFrame(np_ltd)

df_long_term_decay_summary_stats.index = percentiles # Naming the index

df_long_term_decay_summary_stats.columns = df_long_term_decay_filtered.drop(
    columns=next_year + '_Selected_Capacity'
).columns # Naming the columns

df_long_term_decay_summary_stats

In [None]:
# Creating a P table for LTD - original approach

# df_long_term_decay_summary_stats = df_long_term_decay_filtered.drop(columns=current_planning_year + '_Selected_Capacity').describe(
#     percentiles=[(i + 1) / 10 for i in range(9)],
# )

# df_long_term_decay_summary_stats

In [None]:
# Adding the capacity weighted average (that has now been updated to the value weighted average)

capacity_weights = df_long_term_decay_filtered[next_year + '_Selected_Capacity'].values / sum(df_long_term_decay_filtered[next_year + '_Selected_Capacity'].values)

capacity_weighted_average = df_long_term_decay_filtered.drop(columns=next_year + '_Selected_Capacity').mul(capacity_weights, axis=0).sum(axis=0).values

df_long_term_decay_summary_stats.loc['Value Weighted Average'] =  capacity_weighted_average.tolist()

df_long_term_decay_summary_stats

In [None]:
# Concatenating the standard outputs with the capacity weighted outputs and outputting to Excel - this is now also combined with the valuation tables

pd.concat([df_long_term_decay_filtered.drop(columns=next_year + '_Selected_Capacity').describe(), df_long_term_decay_summary_stats]).to_excel(
    f'{zone}_{current_planning_year}_{look_ahead_years}_LTD.xlsx'
)

In [None]:
# Original approach

# df_long_term_decay_summary_stats.to_excel(f'{zone}_{current_planning_year}_{look_ahead_years}_LTD.xlsx')

In [None]:
# wq.quantile(
#     np.array([-0.5, 0.05, -0.2, 0.15]),
#     np.array([5, 10, 15, 20]),
#     0.5
# )

## Stage 1B

In [None]:
# df_stage_1B = pd.concat([pd.read_excel(stage_1B_path, sheet_name='ARR'), pd.read_excel(stage_1B_path, sheet_name='IARR')]) # Told not to include IARR

df_stage_1B = pd.read_excel(stage_1B_path, sheet_name='ARR')

df_stage_1B = df_stage_1B.iloc[:, :6] # dropping columns not needed

df_stage_1B

Merging with nodal congestions from annual and long-term auctions

In [None]:
df_stage_1B = df_stage_1B.merge(
    right=df_annual_ftr_results,
    left_on='ns1:SinkName',
    right_on='Node'
).drop(columns=['1', '2', '3', '4', 'Node', 'PNODEID']).rename(columns={'Average_LMP': 'Annual Sink LMP'}).merge(
    right=df_annual_ftr_results,
    left_on='ns1:SourceName',
    right_on='Node'
).drop(columns=['1', '2', '3', '4', 'Node', 'PNODEID']).rename(columns={'Average_LMP': 'Annual Source LMP'}).merge(
    right=df_long_term_ftr_results,
    left_on='ns1:SinkName', # x - Sink
    right_on='Node'
).merge(
    right=df_long_term_ftr_results,
    left_on='ns1:SourceName', # y - Source
    right_on='Node'
)

In [None]:
temp_year = current_planning_year # just for the loop below

for i in range(look_ahead_years):
    if i == 0: # for current planning year
        df_stage_1B[temp_year + '_path_value'] = df_stage_1B['Annual Sink LMP'] - df_stage_1B['Annual Source LMP']
    else:
        df_stage_1B[temp_year + '_path_value'] = df_stage_1B[most_recent_LT_auction_round + '_' + 'YR' + str(i) + '_' + most_recent_LT_auction + '_x'] - df_stage_1B[most_recent_LT_auction_round + '_' + 'YR' + str(i) + '_' + most_recent_LT_auction + '_y']
    temp_year = next_planning_year(temp_year)

df_stage_1B = df_stage_1B[df_stage_1B['ns1:SinkName'] == sink_name] # filtering by sink

df_stage_1B

In [None]:
# Manually filtering out paths with very low path values - this is not standard for every deal

# df_stage_1B.loc[(df_stage_1B[current_planning_year + '_path_value'] <= 20) & (~df_stage_1B['ns1:ParticipantName'].isin(['FUNEPL', 'FPLG40'])), 'ns1:ClearedMW'] = 0

## Stage 2

In [None]:
df_stage_2 = pd.concat([pd.read_excel(stage_2_path_r1, sheet_name='PATHS').iloc[:, :6], pd.read_excel(stage_2_path_r2)])

Merging with source and sink congestions from annual auction results

In [None]:
df_stage_2 = df_stage_2.merge(
    right=df_annual_ftr_results,
    left_on='ns1:SinkName',
    right_on='Node',
    validate='m:1'
).drop(
    columns=['Node','PNODEID', '1', '2', '3', '4']
).rename(
    columns={
        'Average_LMP': 'Sink_LMP'
    }
).merge(
    right=df_annual_ftr_results,
    left_on='ns1:SourceName',
    right_on='Node',
    validate='m:1'    
).drop(
    columns=['Node','PNODEID', '1', '2', '3', '4']
).rename(
    columns={
        'Average_LMP': 'Source_LMP'
    }
)

In [None]:
df_stage_2['Path_Value'] = df_stage_2['Sink_LMP'] - df_stage_2['Source_LMP']
df_stage_2['Total_Value'] = df_stage_2['Path_Value'] * df_stage_2['ns1:ClearedMW']

df_stage_2

In [None]:
df_subaccounts_sinks = pd.read_excel(stage_1A_pull_path, sheet_name='NSPL')

subaccounts = df_subaccounts_sinks[df_subaccounts_sinks['sinkName'] == sink_name]['participantName'].unique()

df_stage_2_filtered = df_stage_2[df_stage_2['ns1:ParticipantName'].isin(subaccounts)] # filtering by sink

df_stage_2_filtered

The below analysis was only done for UGI which had no stage 1A or 1B paths - this is not a standard thing that had to be done

In [None]:
# df_stage_2_filtered.merge(
#     right=df_long_term_ftr_results[[most_recent_LT_auction_round + '_' + 'YR1' + '_' + most_recent_LT_auction]].reset_index(),
#     left_on='ns1:SinkName',
#     right_on='Node'
# ).rename(
#     columns={
#         most_recent_LT_auction_round + '_' + 'YR1' + '_' + most_recent_LT_auction: 'Sink_LT_LMP'
#     }
# ).merge(
#     right=df_long_term_ftr_results[[most_recent_LT_auction_round + '_' + 'YR1' + '_' + most_recent_LT_auction]].reset_index(),
#     left_on='ns1:SourceName',
#     right_on='Node'
# ).rename(
#     columns={
#         most_recent_LT_auction_round + '_' + 'YR1' + '_' + most_recent_LT_auction: 'Source_LT_LMP'
#     }
# ).drop(
#     columns=['PNODEID_x', 'Node_x', 'PNODEID_y', 'Node_y']
# ).assign(
#     LT_path_value=lambda DF: DF.Sink_LT_LMP - DF.Source_LT_LMP
# )

In [None]:
df_stage_2_filtered['Total_Value'].sum()

## Congestion settles for Stage 1A

In [None]:
# df_arr_valuation_pseg_congestion = df_arr_valuation.loc[
#     lambda DF: DF[next_year + '_Selection'] == 1
# ].sort_values(
#     by='2025-2026_Capacity MW',
#     ascending=False
# ).iloc[:7, :]

# df_arr_valuation_pseg_congestion

In [None]:
df_stage_1A_congestion_settles = pd.DataFrame() # initializing empty DF

# Pulling congestion LMPs for the source nodes (and sink) of the paths selected in stage 1A

# Appending sink ID to selected sources

# Normally, we do it for all the selections. For PSEG, we will just look at specific paths. To revert, replace df_arr_valuation_pseg_congestion with df_arr_valuation or vice versa

selected_nodes = np.append(df_arr_valuation.loc[df_arr_valuation[next_year + '_Selection'] == 1].PNODEID.unique().astype(int), sink_id)

selected_capacity = df_arr_valuation.loc[df_arr_valuation[next_year + '_Selection'] == 1][current_planning_year + '_Capacity MW'].values

for pnode_id in selected_nodes:
    df_stage_1A_congestion_settles_temp = pull_lmp_data(
    emtdb=emtdb,
    pnode_id=pnode_id,
    da_or_rt='DA',
    start_dt='2019-06-01',
    end_dt=pd.Timestamp.today().date() - pd.offsets.MonthEnd(),
    price_data_type='CONGESTION'
).reset_index().assign(
    PNODEID=pnode_id
)
    
    df_stage_1A_congestion_settles = pd.concat([df_stage_1A_congestion_settles, df_stage_1A_congestion_settles_temp], axis=0)

In [None]:
df_stage_1A_congestion_settles = df_stage_1A_congestion_settles.assign(
    Date=lambda DF: DF.Date.apply(pd.to_datetime)
)

df_stage_1A_congestion_settles

In [None]:
df_stage_1A_congestion_settles_pivoted = df_stage_1A_congestion_settles.pivot(
    index=['Date', 'Hour'],
    columns='PNODEID',
    values='Price'
).pipe(
    lambda DF: DF.sub(DF[sink_id], axis=0) # Here we're doing source - sink and then multiplying by the negative of the capacity in the next step
).drop(
    columns=sink_id
).reindex(columns=selected_nodes[:-1]).multiply(-selected_capacity).reset_index().assign( 
    Planning_year=lambda DF: DF.apply(planning_year_from_date, axis=1) # Helper column planning year
).assign(
    Month=lambda DF: DF.Date.dt.month # Helper column month
).assign(
    All_Paths=lambda DF: DF[selected_nodes[:-1]].sum(axis=1)
).drop(columns=selected_nodes[:-1]).pivot_table(
    index='Month',
    columns='Planning_year',
    values='All_Paths',
    aggfunc='sum'
).reindex([6, 7, 8, 9, 10, 11, 12, 1, 2, 3, 4, 5])

df_stage_1A_congestion_settles_pivoted

In [None]:
fig = go.Figure(
    data=go.Heatmap(
        z=df_stage_1A_congestion_settles_pivoted,
        x=df_stage_1A_congestion_settles_pivoted.columns,
        y=df_stage_1A_congestion_settles_pivoted.index.astype(str),
        colorscale='Jet'
    )
)

fig.show()

## Outputs

### Reading zonal NSPL and NEE NSPL

In [None]:
zonal_NSPL_for_ARR = pd.read_excel(zonal_nspl_path, sheet_name='Stage 1A MW - Update HH 12-10').pipe( # This tab was created by me by copying the existing Stage 1A MW tab and updating it for PJM's latest values 
    lambda DF: DF[DF.ZONE==zone]
).iloc[:, 2].values # extracting zonal NSPL corresponding to zone


NEE_NSPL_for_ARR = pd.read_excel(nee_nspl_path, sheet_name='NSPL').pipe(
    lambda DF: DF[DF.sinkName == sink_name]
)['ns1:NetworkServicePeakLoad'].sum() # summing up NEE NSPL corresponding to sink

pmi_share = NEE_NSPL_for_ARR / zonal_NSPL_for_ARR

clear_rate = 1 # Set this to 1 by default

stage_2_dollars_per_MW_PY_2026_2027_base = pd.read_excel(stage_2_dollar_per_MW_PY_26_27_path, usecols='M', skiprows=7, sheet_name='PY 2627 Summary', nrows=1).iloc[0, 0] * clear_rate # Cell M9
stage_2_dollars_per_MW_PY_2027_2028_base = pd.read_excel(stage_2_dollar_per_MW_PY_27_28_path, usecols='M', skiprows=7, sheet_name='PY 2728 Summary', nrows=1).iloc[0, 0] * clear_rate # Cell M9

stage_2_dollars_per_MW_PY_2026_2027_good = pd.read_excel(stage_2_dollar_per_MW_PY_26_27_path, usecols='N', skiprows=7, sheet_name='PY 2627 Summary', nrows=1).iloc[0, 0] * clear_rate # Cell N9
stage_2_dollars_per_MW_PY_2027_2028_good = pd.read_excel(stage_2_dollar_per_MW_PY_27_28_path, usecols='N', skiprows=7, sheet_name='PY 2728 Summary', nrows=1).iloc[0, 0] * clear_rate # Cell N9

stage_2_dollars_per_MW_PY_2026_2027_bad = stage_2_dollars_per_MW_PY_2026_2027_base * 25 / 28
stage_2_dollars_per_MW_PY_2027_2028_bad = stage_2_dollars_per_MW_PY_2027_2028_base * 25 / 28


stage_1B_MW = df_stage_1B['ns1:ClearedMW'].sum() / pmi_share # This is what it is normally but if stage 1B is negative, we set it to 0

In [None]:
zonal_NSPL_for_ARR, NEE_NSPL_for_ARR, pmi_share, stage_1B_MW

In [None]:
Total_MW_for_ARR = percent_for_ARR * zonal_NSPL_for_ARR
Total_MW_for_stage_1A = percent_for_stage_1A * Total_MW_for_ARR

In [None]:
Total_MW_for_ARR, Total_MW_for_stage_1A

### Helper function for doing the base, good and bad cases

In [None]:
def arr_valuation_output(annual_decay: list, stage_2_dollars_per_MW: list):
    planning_years = []
    raw_stage_1_dollars = []
    stage_1_dollars = []
    annual_path_MWs = []
    adj_ratios = []
    MW_for_stage_2 = []
    stage_1B_dollars = []
    stage_2_dollars = []
    total_dollars = []

    temp_year = current_planning_year # just for the loop below

    for i in range(look_ahead_years):

        planning_years.append(temp_year)
        
        annual_path_MWs.append(round((df_arr_valuation[current_planning_year + '_Capacity MW'] * df_arr_valuation[temp_year + '_Selection']).sum()))

        if temp_year == current_planning_year:
            # print(i)
            adj_ratios.append(1)
            
            raw_stage_1_dollars.append((df_arr_valuation[current_planning_year + '_Capacity MW'] * df_arr_valuation[temp_year + '_path_value'] * 
                                    df_arr_valuation[temp_year + '_Selection']).sum())        
            
            stage_1_dollars.append((df_arr_valuation[current_planning_year + '_Capacity MW'] * df_arr_valuation[temp_year + '_path_value'] * 
                                    df_arr_valuation[temp_year + '_Selection']).sum() * adj_ratios[i] * (1 + annual_decay[i]) ** i )
            
            stage_1B_dollars.append((df_stage_1B[temp_year + '_path_value'] * df_stage_1B['ns1:ClearedMW']).sum() / pmi_share / 2)
            
            stage_2_dollars.append(df_stage_2_filtered['Total_Value'].sum() / pmi_share / 2)
            
            MW_for_stage_2.append(0) # we don't need this for the current planning year since we get the pool of dollars
        
        else: 
            adj_ratios.append(min(1, Total_MW_for_stage_1A / annual_path_MWs[i])) # This was i - 1 earlier
            # adj_ratios.append(1) # Use this for zones with no stage 1A MW
            raw_stage_1_dollars.append((df_arr_valuation[current_planning_year + '_Capacity MW'] * df_arr_valuation[most_recent_LT_auction_round + '_YR' + str(i) + '_' + most_recent_LT_auction] * 
                                    df_arr_valuation[temp_year + '_Selection']).sum())        
            stage_1_dollars.append((df_arr_valuation[current_planning_year + '_Capacity MW'] * df_arr_valuation[most_recent_LT_auction_round + '_YR' + str(i) + '_' + most_recent_LT_auction] * 
                                    df_arr_valuation[temp_year + '_Selection']).sum() * adj_ratios[i] * (1 + annual_decay[i]) ** i)
            
            stage_1B_dollars.append((df_stage_1B[temp_year + '_path_value'] * df_stage_1B['ns1:ClearedMW']).sum() / pmi_share)
            
            MW_for_stage_2.append(Total_MW_for_ARR - annual_path_MWs[i] * adj_ratios[i] - stage_1B_MW)
            
            stage_2_dollars.append(MW_for_stage_2[i] * stage_2_dollars_per_MW[i])
        
        total_dollars.append(stage_1_dollars[i] + stage_1B_dollars[i] + stage_2_dollars[i])
        temp_year = next_planning_year(temp_year)

    return pd.DataFrame([annual_path_MWs,
          raw_stage_1_dollars,
          adj_ratios,
          annual_decay,
          stage_1_dollars,
          stage_1B_dollars,
          MW_for_stage_2,
          stage_2_dollars_per_MW,
          stage_2_dollars,
          total_dollars,
          ], columns=planning_years, index=[
            'annual_path_MWs',
            'raw_stage_1_dollars', 
          'adj_ratios',
          'annual_decay',
          'stage_1_dollars',
          'stage_1B_dollars',
          'MW_for_stage_2',
          'stage_2_dollars_per_MW',
          'stage_2_dollars',
          'total_dollars',   
          ])

### Analyst to set good, base and bad LTD 

In [None]:
# AECO
# good_decay = -0.22 
# base_decay = -0.27
# bad_decay = -0.31

# JCPL
# good_decay = -0.11 
# base_decay = -0.14
# bad_decay = -0.18

# PSEG
# good_decay = -0.06 
# base_decay = -0.14
# bad_decay = -0.24

# RECO
# good_decay = 0
# base_decay = 0
# bad_decay = 0

# UGI
# good_decay = -0.15
# base_decay = -0.11
# bad_decay = -0.13

# DPL
good_decay = -0.16
base_decay = -0.15
bad_decay = -0.13

annual_decay_good = []
annual_decay_base = []
annual_decay_bad = []

for i in range(look_ahead_years):
    if i == 0:
        annual_decay_good.append(0)
        annual_decay_base.append(0)
        annual_decay_bad.append(0)
    else:
        annual_decay_good.append(good_decay)
        annual_decay_base.append(base_decay)
        annual_decay_bad.append(bad_decay)

stage_2_dollars_per_MW_good = [0, stage_2_dollars_per_MW_PY_2026_2027_good, stage_2_dollars_per_MW_PY_2027_2028_good, stage_2_dollars_per_MW_PY_2027_2028_good]
stage_2_dollars_per_MW_base = [0, stage_2_dollars_per_MW_PY_2026_2027_base, stage_2_dollars_per_MW_PY_2027_2028_base, stage_2_dollars_per_MW_PY_2027_2028_base]
stage_2_dollars_per_MW_bad = [0, stage_2_dollars_per_MW_PY_2026_2027_bad, stage_2_dollars_per_MW_PY_2027_2028_bad, stage_2_dollars_per_MW_PY_2027_2028_bad]

### ARR Valuation Tables

In [None]:
with pd.ExcelWriter(f'{zone}_{current_planning_year}_{look_ahead_years}_valuation.xlsx', engine='openpyxl') as writer:
    df_arr_valuation.to_excel(writer, sheet_name='Paths')
    arr_valuation_output(annual_decay_base, stage_2_dollars_per_MW_base).to_excel(writer, sheet_name='Base')
    arr_valuation_output(annual_decay_good, stage_2_dollars_per_MW_good).to_excel(writer, sheet_name='Good')
    arr_valuation_output(annual_decay_bad, stage_2_dollars_per_MW_bad).to_excel(writer, sheet_name='Bad')
    pd.concat([df_long_term_decay_filtered.drop(columns=next_year + '_Selected_Capacity').describe(), df_long_term_decay_summary_stats]).to_excel(writer, sheet_name='LTD')
    df_stage_1A_congestion_settles_pivoted.to_excel(writer, sheet_name='Congestion Settles')


Below cell just for manually outputting to Excel

In [None]:
# arr_valuation_output(annual_decay_base, stage_2_dollars_per_MW_base).to_clipboard()
# arr_valuation_output(annual_decay_good, stage_2_dollars_per_MW_good).to_clipboard()
# arr_valuation_output(annual_decay_bad, stage_2_dollars_per_MW_bad).to_clipboard()
# pd.concat([df_long_term_decay_filtered.drop(columns=next_year + '_Selected_Capacity').describe(), df_long_term_decay_summary_stats]).to_clipboard()
# df_stage_1A_congestion_settles_pivoted.to_clipboard()

### Zonal ARR chart

In [None]:
zonal_arrs_base = pd.read_excel(zonal_arr_path, sheet_name='Final Results', skiprows=4).iloc[:22, 2:23].rename(
    columns={'Unnamed: 2': 'Zone'}
).set_index('Zone').loc[zone, :].pipe(
    lambda series: series.where(series.index != 'PY26/27', arr_valuation_output(annual_decay_base, stage_2_dollars_per_MW_base).loc['total_dollars', next_year])
).pipe(
    lambda series: series.where(series.index != 'PY27/28', arr_valuation_output(annual_decay_base, stage_2_dollars_per_MW_base).loc['total_dollars', next_to_next_year])
).pipe(
    lambda series: series.where(series.index != 'PY28/29', arr_valuation_output(annual_decay_base, stage_2_dollars_per_MW_base).loc['total_dollars', next_to_next_to_next_year])
)

In [None]:
zonal_arrs_good = pd.read_excel(zonal_arr_path, sheet_name='Final Results', skiprows=4).iloc[:22, 2:23].rename(
    columns={'Unnamed: 2': 'Zone'}
).set_index('Zone').loc[zone, :].pipe(
    lambda series: series.where(series.index != 'PY26/27', arr_valuation_output(annual_decay_good, stage_2_dollars_per_MW_good).loc['total_dollars', next_year])
).pipe(
    lambda series: series.where(series.index != 'PY27/28', arr_valuation_output(annual_decay_good, stage_2_dollars_per_MW_good).loc['total_dollars', next_to_next_year])
).pipe(
    lambda series: series.where(series.index != 'PY28/29', arr_valuation_output(annual_decay_good, stage_2_dollars_per_MW_good).loc['total_dollars', next_to_next_to_next_year])
)

In [None]:
zonal_arrs_bad = pd.read_excel(zonal_arr_path, sheet_name='Final Results', skiprows=4).iloc[:22, 2:23].rename(
    columns={'Unnamed: 2': 'Zone'}
).set_index('Zone').loc[zone, :].pipe(
    lambda series: series.where(series.index != 'PY26/27', arr_valuation_output(annual_decay_bad, stage_2_dollars_per_MW_bad).loc['total_dollars', next_year])
).pipe(
    lambda series: series.where(series.index != 'PY27/28', arr_valuation_output(annual_decay_bad, stage_2_dollars_per_MW_bad).loc['total_dollars', next_to_next_year])
).pipe(
    lambda series: series.where(series.index != 'PY28/29', arr_valuation_output(annual_decay_bad, stage_2_dollars_per_MW_bad).loc['total_dollars', next_to_next_to_next_year])
)

In [None]:
fig = go.Figure()

fig.add_trace(go.Scatter(
    x=zonal_arrs_base.index, y=zonal_arrs_base.values, mode='lines', name='base'
))


fig.add_trace(go.Scatter(
    x=zonal_arrs_bad.index, y=zonal_arrs_bad.values, mode='lines', name='bad'
))


fig.add_trace(go.Scatter(
    x=zonal_arrs_good.index, y=zonal_arrs_good.values, mode='lines', name='good'
))

fig.update_layout(title=f'{zone} ARR Valuation')

fig.show()

In [None]:
# Save image

# fig.write_image(f'{zone}_{current_planning_year}_{look_ahead_years}.png')

## Sink Selection

Here we compare the aggregated path dollars by using the residual aggregate, load aggregate and zone as the sink, and choose the one that leads to the highest path value.

In [None]:
df_stage_1_resources_for_sink_selection = pd.concat(
    pd.read_excel(
    stage_1_resources_path,
    sheet_name=None, # Reading all sheets and combining into single df
    header=[0, 1]
).values(),
ignore_index=True
).dropna(
    how='all', axis=0 # Dropping rows with no values
).iloc[:, [0, 1, 2, 5, 6, 10]]

df_stage_1_resources_for_sink_selection.columns = ['Zone', 'Pnode ID', 'FTR Name', current_planning_year+'_Capacity MW', 'Retired', 'Rate_based']

df_stage_1_resources_for_sink_selection.rename(
    columns={'Pnode ID': 'PNODEID'}, inplace=True
)

df_stage_1_resources_for_sink_selection = df_stage_1_resources_for_sink_selection[
    (df_stage_1_resources_for_sink_selection.Retired != 'Y')
][(df_stage_1_resources_for_sink_selection.Rate_based != 'Rate-based')].dropna(subset='PNODEID').assign(
    Zone=lambda DF: DF.Zone.str.strip()
)

df_stage_1_resources_for_sink_selection

In [None]:
# Since PJM posted the retired resources and the QRRs for PY 26-27 we account for that below

py_26_27_retired_resources, py_26_27_qrrs = pd.read_excel(
    r'K:\Valuation\MODELS\VQSWAP\VQSwap Update Info\PJM\ARRs\ARR Path Selection\2025\Hemanth\Data\Resources\2026-27\2026-2027-stage-1-retired-and-initial-qualified-replacement-resources-by-zone.xlsx',
    sheet_name=None,
    skiprows=1
).values()

py_26_27_retired_resources = py_26_27_retired_resources.rename(
    columns={
        'Pnode ID': 'PNODEID'
    }
).drop(
    columns='Historical Unit Name'
)

py_26_27_retired_resources # These resources should be removed for sink selection

In [None]:
py_26_27_qrrs.rename(
    columns={
        'Pnode ID': 'PNODEID'
    },
    inplace=True
)

py_26_27_qrrs[current_planning_year + '_Capacity MW'] = 1

py_26_27_qrrs # These resources will be appended and given a proxy capacity of 1 MW

In [None]:
df_sink_node = pd.read_excel(
    r'K:\Valuation\MODELS\VQSWAP\VQSwap Update Info\PJM\ARRs\ARR Path Selection\2025\Hemanth\Data\Sink Node ID.xlsx'
).dropna(
    subset=['Zone_Node', 'Resid_Agg_Node', 'Load_Agg_Node'],
    how='all' # dropping zones with no sinks
)

df_sink_node

In [None]:
df_stage_1_resources_for_sink_selection.loc[
    lambda DF: ~DF.PNODEID.isin(py_26_27_retired_resources.PNODEID.values) # filtering out resources slated to be retired
].pipe(
    lambda DF: pd.concat([DF, py_26_27_qrrs], ignore_index=True) # adding QRRs with proxy capacity of 1
).reset_index(drop=True).merge(
    df_sink_node,
    on='Zone',
    how='left'
).merge( # Merging with source LMP
    right=df_long_term_ftr_results[[most_recent_LT_auction_round + '_' + 'YR1'+ '_' + most_recent_LT_auction]],
    how='left',
    on='PNODEID',
    validate='m:1',
).rename(
    columns={
        most_recent_LT_auction_round + '_' + 'YR1'+ '_' + most_recent_LT_auction: 'Source_LMP'
    }
).merge( # Merging with Zone LMP
    right=df_long_term_ftr_results[[most_recent_LT_auction_round + '_' + 'YR1'+ '_' + most_recent_LT_auction]],
    how='left',
    left_on='Zone_Node',
    right_on='PNODEID',
    validate='m:1',
).rename(
    columns={
        most_recent_LT_auction_round + '_' + 'YR1'+ '_' + most_recent_LT_auction: 'Zone_Node_LMP'
    }
).merge( # Merging with Residual Aggregate LMP
    right=df_long_term_ftr_results[[most_recent_LT_auction_round + '_' + 'YR1'+ '_' + most_recent_LT_auction]],
    how='left',
    left_on='Resid_Agg_Node',
    right_on='PNODEID',
    validate='m:1',
).rename(
    columns={
        most_recent_LT_auction_round + '_' + 'YR1'+ '_' + most_recent_LT_auction: 'Resid_Agg_Node_LMP'
    }
).merge( # Merging with Load Aggregate LMP
    right=df_long_term_ftr_results[[most_recent_LT_auction_round + '_' + 'YR1'+ '_' + most_recent_LT_auction]],
    how='left',
    left_on='Load_Agg_Node',
    right_on='PNODEID',
    validate='m:1',
).rename(
    columns={
        most_recent_LT_auction_round + '_' + 'YR1'+ '_' + most_recent_LT_auction: 'Load_Agg_Node_LMP'
    }
).assign( # Calculates the path value for each sink and multiplying it by capacity (after adding a threshold of 0) to get total dollars
    Zone_Node_Path_Value=lambda DF: DF.Zone_Node_LMP - DF.Source_LMP,
    Resid_Agg_Node_Path_Value=lambda DF: DF.Resid_Agg_Node_LMP - DF.Source_LMP,
    Load_Agg_Node_Path_Value=lambda DF: DF.Load_Agg_Node_LMP - DF.Source_LMP,
    Zone_Node_Dollars=lambda DF: (DF.Zone_Node_Path_Value > 0) * DF.Zone_Node_Path_Value * DF[current_planning_year + '_Capacity MW'],
    Resid_Agg_Node_Dollars=lambda DF: (DF.Resid_Agg_Node_Path_Value > 0) * DF.Resid_Agg_Node_Path_Value * DF[current_planning_year + '_Capacity MW'],
    Load_Agg_Node_Dollars=lambda DF: (DF.Load_Agg_Node_Path_Value > 0) * DF.Load_Agg_Node_Path_Value * DF[current_planning_year + '_Capacity MW'] 
).pivot_table( # Summing up dollars based on different sinks
    index='Sink_Zone',
    values=['Zone_Node_Dollars', 'Resid_Agg_Node_Dollars', 'Load_Agg_Node_Dollars'],
    aggfunc='sum'
).loc[
    lambda DF: (DF != 0).any(axis=1) # Dropping zones where all dollar are 0
].assign(
    Sink_Selection=lambda DF: DF[['Load_Agg_Node_Dollars', 'Resid_Agg_Node_Dollars', 'Zone_Node_Dollars']].idxmax(axis=1).str.rstrip('_Dollars') # Choosing the sink based on the largest
).to_excel('ARR sink selection PY 26-27 after PJM update.xlsx')


## MSRS

In [None]:
folder_path = r'K:\Valuation\Wholesale for Retail\_Full Requirements\PJM\MD-PEPCO\2026-01\ARRs\msrs_reports_recon\APS'

df_arr = pd.DataFrame()
df_nits = pd.DataFrame()

for file_name in os.listdir(folder_path):
    file_path = os.path.join(folder_path, file_name)
    if 'ARR' in file_name:
        df_arr_temp = pd.read_csv(
            file_path,
            skiprows=4
        )

        df_arr = pd.concat([
            df_arr,
            df_arr_temp
        ])
    
    else:

        df_nits_temp = pd.read_csv(
            file_path,
            skiprows=4
        )

        df_nits = pd.concat(
            [
                df_nits,
                df_nits_temp
            ]
        )

df_arr

In [None]:
df_nits

In [None]:
df_arr[['Date', 'ARR Target Credit ($)']].groupby('Date').sum().reset_index().merge(
    df_nits[['Date', 'Daily Peak Load (MW)']],
    on='Date'
).assign(
    Zonal_peak_share=lambda DF: DF['Daily Peak Load (MW)'] / 8937.6, #6765.9,
    Zonal_ARR_dollars=lambda DF: DF['ARR Target Credit ($)'] / DF['Zonal_peak_share']
).to_clipboard()

# .pipe(
#     lambda DF: DF.Zonal_ARR_dollars.sum()
# )