In [1]:
import pandas as pd

data_2022 = pd.read_excel("Wholesale_Pricing_Data/ice_electric-2022final.xlsx")
data_2023 = pd.read_excel("Wholesale_Pricing_Data/ice_electric-2023final.xlsx")
data_2024 = pd.read_excel("Wholesale_Pricing_Data/ice_electric-2024final.xlsx")

combined_df = pd.concat([data_2022, data_2023, data_2024], ignore_index=True, axis=0)

In [3]:
hub_to_iso = {
    'Indiana Hub RT Peak': 'MISO',
    'Mid C Peak': 'Non-ISO (Mid-Columbia)',
    'NP15 EZ Gen DA LMP Peak': 'CAISO',
    'Nepool MH DA LMP Peak': 'ISO-NE',
    'PJM WH Real Time Peak': 'PJM',
    'Palo Verde Peak': 'CAISO',
    'SP15 EZ Gen DA LMP Peak': 'CAISO'
}

combined_df['ISO'] = combined_df['Price hub'].map(hub_to_iso)

In [5]:
iso_hourly_data = pd.read_csv("combined_ne_2023_lmp_data.csv")
eia_pricing_data = combined_df

In [7]:
iso_hourly_data.head()

Unnamed: 0,timestamp_utc,iso,Location Name,Location Type,LMP,MCC,MLC
0,2023-01-01 00:00:00+00:00,ISO-NE,UN.FRNKLNSQ13.810CC,Node,31.28,0.0,0.0
1,2023-01-01 00:00:00+00:00,NYISO,GENESE,Node,21.14,-1.63,-0.42
2,2023-01-01 00:00:00+00:00,NYISO,DUNWOD,Node,33.22,-11.82,1.47
3,2023-01-01 00:00:00+00:00,NYISO,CENTRL,Node,21.88,-2.0,-0.04
4,2023-01-01 00:00:00+00:00,NYISO,CAPITL,Node,37.7,-16.8,0.98


In [9]:
iso_hourly_data['iso'].unique()

array(['ISO-NE', 'NYISO', 'PJM'], dtype=object)

In [11]:
eia_pricing_data.head()

Unnamed: 0,Price hub,Trade date,Delivery start date,Delivery \nend date,High price $/MWh,Low price $/MWh,Wtd avg price $/MWh,Change,Daily volume MWh,Number of trades,Number of counterparties,ISO
0,Indiana Hub RT Peak,2022-01-04 00:00:00,2022-01-05,2022-01-05,50.0,50.0,50.0,-11.0,800,1,2,MISO
1,Indiana Hub RT Peak,2022-01-05 00:00:00,2022-01-06,2022-01-06,70.0,68.0,69.0,19.0,4800,6,6,MISO
2,Indiana Hub RT Peak,2022-01-06 00:00:00,2022-01-07,2022-01-07,81.5,81.5,81.5,12.5,1600,2,3,MISO
3,Indiana Hub RT Peak,2022-01-07 00:00:00,2022-01-10,2022-01-10,75.0,75.0,75.0,-6.5,800,1,2,MISO
4,Indiana Hub RT Peak,2022-01-19 00:00:00,2022-01-20,2022-01-20,85.0,80.0,83.93,8.93,5600,7,8,MISO


## Aggregate EIA data into daily trades per ISO

In [13]:
# Convert dates to datetime
trade_dates = pd.to_datetime(eia_pricing_data['Trade date'], format='mixed', errors='coerce')
delivery_start_dates = pd.to_datetime(eia_pricing_data['Delivery start date'], format='mixed', errors='coerce')
delivery_end_dates = pd.to_datetime(eia_pricing_data['Delivery \nend date'], format='mixed', errors='coerce')

# Check the unique years
print("Trade date years:", sorted(trade_dates.dropna().dt.year.unique()))
print("Delivery start date years:", sorted(delivery_start_dates.dropna().dt.year.unique()))
print("Delivery end date years:", sorted(delivery_end_dates.dropna().dt.year.unique()))

Trade date years: [2021, 2022, 2023, 2024]
Delivery start date years: [2021, 2022, 2023, 2024]
Delivery end date years: [2021, 2022, 2023, 2024]


In [15]:
eia_daily = eia_pricing_data.copy()

eia_daily['Trade date'] = pd.to_datetime(eia_daily['Trade date'], format='mixed').dt.date

eia_daily['weighted_price'] = eia_daily['Wtd avg price $/MWh'] * eia_daily['Daily volume MWh']

eia_daily_summary = (
    eia_daily
    .groupby(['Trade date', 'ISO'])
    .agg(
        weighted_avg_price=('weighted_price', 'sum'),  # sum of (P × V)
        total_volume=('Daily volume MWh', 'sum')       # sum of (V)
    )
    .reset_index()
)

eia_daily_summary['Wtd avg price $/MWh'] = eia_daily_summary['weighted_avg_price'] / eia_daily_summary['total_volume']

eia_daily_summary = eia_daily_summary[['Trade date', 'ISO', 'Wtd avg price $/MWh']]

In [17]:
eia_daily_summary.head()

Unnamed: 0,Trade date,ISO,Wtd avg price $/MWh
0,2021-12-29,CAISO,61.560769
1,2021-12-29,ISO-NE,44.33
2,2021-12-29,Non-ISO (Mid-Columbia),65.93
3,2021-12-29,PJM,31.25
4,2021-12-30,CAISO,63.198462


## Left join ISO data with EIA data

In [19]:
iso_hourly_data['date'] = pd.to_datetime(iso_hourly_data['timestamp_utc']).dt.date

merged_df = iso_hourly_data.merge(
    eia_daily_summary,
    how='left',
    left_on=['date', 'iso'],
    right_on=['Trade date', 'ISO']
)
merged_df = merged_df.drop(columns=['Trade date', 'ISO'])

In [21]:
merged_df.loc[merged_df['iso']=='PJM',]

Unnamed: 0,timestamp_utc,iso,Location Name,Location Type,LMP,MCC,MLC,date,Wtd avg price $/MWh
9691,2023-01-01 05:00:00+00:00,PJM,AEP-DAYTON HUB,Node,24.512404,0.097991,0.204413,2023-01-01,
9692,2023-01-01 05:00:00+00:00,PJM,OHIO HUB,Node,24.587087,0.104729,0.272358,2023-01-01,
9693,2023-01-01 05:00:00+00:00,PJM,BUCKEYE - AEP,Node,24.575430,0.072463,0.292967,2023-01-01,
9694,2023-01-01 05:00:00+00:00,PJM,BUCKEYE - DPL,Node,25.050972,0.070000,0.770972,2023-01-01,
9695,2023-01-01 05:00:00+00:00,PJM,DAY,Node,25.247434,0.070000,0.967434,2023-01-01,
...,...,...,...,...,...,...,...,...,...
19649829,2024-01-01 04:00:00+00:00,PJM,LOUDOUN,Node,20.770000,0.340000,0.650000,2024-01-01,
19649830,2024-01-01 04:00:00+00:00,PJM,MORRISVILLE,Node,20.630000,0.330000,0.520000,2024-01-01,
19649831,2024-01-01 04:00:00+00:00,PJM,OX,Node,20.770000,0.350000,0.640000,2024-01-01,
19649832,2024-01-01 04:00:00+00:00,PJM,POSSUM POINT,Node,20.760000,0.360000,0.620000,2024-01-01,


In [23]:
perc_na = merged_df['Wtd avg price $/MWh'].isna().sum()/merged_df.shape[0]
perc_na

0.5926957958016338