In [None]:
import pandas as pd
from statsmodels.tsa.statespace.sarimax import SARIMAX

# File paths for 2022, 2023, and 2024 data
files = [
    '/Users/aashrithasankineni/Downloads/california_electricity_prices/caiso_lmp_rt_5min_interfaces_2022Q1.csv',
    '/Users/aashrithasankineni/Downloads/california_electricity_prices/caiso_lmp_rt_5min_interfaces_2022Q2.csv',
    '/Users/aashrithasankineni/Downloads/california_electricity_prices/caiso_lmp_rt_5min_interfaces_2022Q3.csv',
    '/Users/aashrithasankineni/Downloads/california_electricity_prices/caiso_lmp_rt_5min_interfaces_2022Q4.csv',
    '/Users/aashrithasankineni/Downloads/california_electricity_prices/caiso_lmp_rt_5min_interfaces_2023Q1.csv',
    '/Users/aashrithasankineni/Downloads/california_electricity_prices/caiso_lmp_rt_5min_interfaces_2023Q2.csv',
    '/Users/aashrithasankineni/Downloads/california_electricity_prices/caiso_lmp_rt_5min_interfaces_2023Q3.csv',
    '/Users/aashrithasankineni/Downloads/california_electricity_prices/caiso_lmp_rt_5min_interfaces_2024Q1.csv',
    '/Users/aashrithasankineni/Downloads/california_electricity_prices/caiso_lmp_rt_5min_interfaces_2024Q2.csv',
    '/Users/aashrithasankineni/Downloads/california_electricity_prices/caiso_lmp_rt_5min_interfaces_2024Q3.csv',
    '/Users/aashrithasankineni/Downloads/california_electricity_prices/caiso_lmp_rt_5min_interfaces_2024Q4.csv'
]

# Combine all data
data_combined = pd.concat([pd.read_csv(file, skiprows=3) for file in files], ignore_index=True)

# Parse timestamps and set as index
data_combined['Datetime (UTC)'] = pd.to_datetime(data_combined['UTC Timestamp (Interval Ending)'])
data_combined.set_index('Datetime (UTC)', inplace=True)

# Select relevant electricity price column
electricity_prices = data_combined['PALOVRDE_ASR-APND LMP']  # Replace with the correct column name

# Resample to hourly averages
hourly_prices = electricity_prices.resample('H').mean()

# Fit the SARIMA model with seasonality (daily cycle)
seasonal_order = (1, 1, 1, 24)  # Seasonal parameters
sarima_model = SARIMAX(
    hourly_prices,  # Historical data
    order=(1, 1, 1),  # Non-seasonal ARIMA parameters
    seasonal_order=seasonal_order,  # Seasonal parameters
    enforce_stationarity=False,
    enforce_invertibility=False
)
sarima_fit = sarima_model.fit(disp=False)

# Display model summary
print("SARIMA Model Training Complete.")
print(sarima_fit.summary())

# Save the model for later use
sarima_fit.save('/Users/aashrithasankineni/Downloads/california_electricity_prices/sarima_model_2022_2024.pkl')
