In [4]:
import os
import numpy as np
import pandas as pd
import joblib
from sklearn.metrics import accuracy_score, mean_absolute_error, mean_squared_error

# Define directories
BASE_DIR = os.getcwd()
DATA_DIR = os.path.join(BASE_DIR, "../datasets/finalized")
MODEL_DIR = os.path.join(BASE_DIR, "../models")
os.makedirs(MODEL_DIR, exist_ok=True)

# Load dataset
df_hourly = pd.read_csv(os.path.join(DATA_DIR, "finalized_hourly_data.csv"), parse_dates=["Start date"], low_memory=False)
df_hourly.set_index("Start date", inplace=True)

# Feature Engineering (Must match training)
df_hourly["Avg_Price_EUR_MWh"] = df_hourly[
    ["Germany/Luxembourg [/MWh] Original resolutions", "Belgium [/MWh] Original resolutions", "France [/MWh] Original resolutions"]
].mean(axis=1)

df_hourly["Rolling_Mean_24"] = df_hourly["Avg_Price_EUR_MWh"].rolling(24).mean()
df_hourly["Price_Change_1"] = df_hourly["Avg_Price_EUR_MWh"].pct_change() * 100
df_hourly["Lag_1"] = df_hourly["Avg_Price_EUR_MWh"].shift(1)
df_hourly.fillna(0, inplace=True)

features = ["Rolling_Mean_24", "Price_Change_1", "Lag_1"]
X = df_hourly[features]
y = np.where(
    df_hourly["Avg_Price_EUR_MWh"].pct_change().fillna(0) > 0.05, 1,
    np.where(df_hourly["Avg_Price_EUR_MWh"].pct_change().fillna(0) < -0.05, 2, 0)
)

# Load models & scaler
lgb_classifier = joblib.load(os.path.join(MODEL_DIR, "lgb_price_model.pkl"))
xgb_classifier = joblib.load(os.path.join(MODEL_DIR, "xgb_price_model.pkl"))
scaler = joblib.load(os.path.join(MODEL_DIR, "scaler.pkl"))

# Preprocess data
X_scaled = scaler.transform(X)

# Predictions
y_pred_lgb = lgb_classifier.predict(X_scaled)
y_pred_xgb = xgb_classifier.predict(X_scaled)

# Ensemble method
y_pred_ensemble = np.round((y_pred_lgb + y_pred_xgb) / 2)

# Evaluation Metrics
def evaluate_predictions(y_true, y_pred):
    acc = accuracy_score(y_true, y_pred)
    mae = mean_absolute_error(y_true, y_pred)
    rmse = np.sqrt(mean_squared_error(y_true, y_pred))
    
    actual_volatility = np.std(y_true)
    predicted_volatility = np.std(y_pred)
    volatility_capture = 1 - abs(actual_volatility - predicted_volatility) / actual_volatility if actual_volatility != 0 else 0
    
    extreme_moves = (np.abs(y_true) > 0.15).sum()
    extreme_correct = ((np.abs(y_true) > 0.15) & (np.abs(y_pred) > 0.15)).sum()
    extreme_accuracy = extreme_correct / extreme_moves if extreme_moves > 0 else 0

    print(f"üìä Accuracy: {acc:.4f}")
    print(f"üìâ Mean Absolute Error: {mae:.4f}")
    print(f"üìà Root Mean Squared Error: {rmse:.4f}")
    print(f"‚ö° Volatility Capture Score: {volatility_capture:.4f}")
    print(f"üîç Extreme Price Movement Accuracy: {extreme_accuracy:.4f}")

# Run Evaluation
evaluate_predictions(y, y_pred_ensemble)

print("‚úÖ Model evaluation completed!")


  df_hourly["Price_Change_1"] = df_hourly["Avg_Price_EUR_MWh"].pct_change() * 100


üìä Accuracy: 0.9992
üìâ Mean Absolute Error: 0.0012
üìà Root Mean Squared Error: 0.0422
‚ö° Volatility Capture Score: 0.9993
üîç Extreme Price Movement Accuracy: 0.9977
‚úÖ Model evaluation completed!
