In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns
import pmdarima as pm
from statsmodels.tsa.statespace.sarimax import SARIMAX
from sklearn.metrics import mean_squared_error
import warnings
import joblib

# Load the dataset
file_path = "/Users/alonclausen/Desktop/Studie/Kandidat/Seminar/SEMINARFIN/fulldata.csv"
data = pd.read_csv(file_path)



In [2]:
# Ensure the 'from' column is in datetime format and set it as index
data['from'] = pd.to_datetime(data['from'])
data = data.set_index('from')
data.index = pd.DatetimeIndex(data.index.values, freq=pd.infer_freq(data.index))

# Filter data from April 1, 2016, to August 31, 2024
start_date = '2016-04-01'
end_date = '2024-08-31'
data_filtered = data[(data.index >= start_date) & (data.index <= end_date)]

# Extract the 'SpotPriceDKK' column
spot_price_filtered = data_filtered['SpotPriceDKK']

In [None]:
#Fit the SARIMA model (ARIMA(2,0,0)(2,0,1)[24] with intercept)
sarima_model = SARIMAX(spot_price_filtered, order=(2, 0, 0), seasonal_order=(2, 0, 1, 24), trend='c')
sarima_results = sarima_model.fit(disp=False)

  self._init_dates(dates, freq)
  self._init_dates(dates, freq)


In [None]:
# Rolling forecast
rolling_forecast = []
forecast_steps = 24
current_start = spot_price_filtered.index[-1] + pd.Timedelta(hours=1)

for i in range(len(spot_price_filtered) // forecast_steps):
    forecast_index = pd.date_range(start=current_start, periods=forecast_steps, freq='h')

    try:
        # Generate forecast for the next 24 hours
        forecast = sarima_results.get_forecast(steps=forecast_steps).predicted_mean
        forecast.index = forecast_index
        rolling_forecast.append(forecast)

        # Update the start for the next forecast chunk
        current_start = forecast_index[-1] + pd.Timedelta(hours=1)
    except Exception as e:
        print(f"Error encountered at iteration {i}: {e}. Skipping this forecast chunk.")
        continue

# Combine all forecasts into a single series
rolling_forecast = pd.concat(rolling_forecast)


In [None]:
# Filter observed data to match forecast period
observed_data = spot_price_filtered.reindex(rolling_forecast.index).fillna(method='ffill')

# Plot observed data and rolling forecast
plt.figure(figsize=(12, 6))
plt.plot(observed_data, label='Observed Data', color='blue')
plt.plot(rolling_forecast, label='Rolling Forecast', linestyle='--', color='red')
plt.legend()
plt.title("Rolling SARIMA Forecast vs. Observed Data")
plt.xlabel("Time")
plt.ylabel("Spot Price DKK")
plt.xticks(rotation=45)
plt.tight_layout()
plt.show()

In [None]:
# Calculate RMSE for the forecasted period
rmse = mean_squared_error(observed_data, rolling_forecast, squared=False)
print(f'RMSE for the forecasted period: {rmse}')
