In [3]:
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score

# Load processed sequences & labels
X = np.load("../data/processed/X_train_fd001.npy")
y = np.load("../data/processed/y_train_fd001.npy")

print("X shape:", X.shape)   # (samples, seq_len, num_features)
print("y shape:", y.shape)   # (samples,)

# Flatten sequence for classic ML (use last cycle only)
X_flat = X[:, -1, :]  # take last timestep
print("X_flat shape:", X_flat.shape)

# Train/test split
X_train, X_val, y_train, y_val = train_test_split(X_flat, y, test_size=0.2, random_state=42)


X shape: (17631, 30, 24)
y shape: (17631,)
X_flat shape: (17631, 24)


In [4]:
from sklearn.linear_model import LinearRegression

lr = LinearRegression()
lr.fit(X_train, y_train)
y_pred_lr = lr.predict(X_val)


In [5]:
from sklearn.ensemble import RandomForestRegressor

rf = RandomForestRegressor(n_estimators=100, random_state=42)
rf.fit(X_train, y_train)
y_pred_rf = rf.predict(X_val)


In [9]:
from xgboost import XGBRegressor

xgb = XGBRegressor(n_estimators=200, learning_rate=0.05, max_depth=6, random_state=42)
xgb.fit(X_train, y_train)
y_pred_xgb = xgb.predict(X_val)


In [10]:
import math

def evaluate_model(y_true, y_pred, name):
    rmse = math.sqrt(mean_squared_error(y_true, y_pred))
    mae = mean_absolute_error(y_true, y_pred)
    r2 = r2_score(y_true, y_pred)
    print(f"{name} → RMSE: {rmse:.2f}, MAE: {mae:.2f}, R²: {r2:.3f}")
    return rmse, mae, r2

results = {}
results['LR'] = evaluate_model(y_val, y_pred_lr, "Linear Regression")
results['RF'] = evaluate_model(y_val, y_pred_rf, "Random Forest")
results['XGB'] = evaluate_model(y_val, y_pred_xgb, "XGBoost")


Linear Regression → RMSE: 38.84, MAE: 29.19, R²: 0.596
Random Forest → RMSE: 36.96, MAE: 26.01, R²: 0.634
XGBoost → RMSE: 36.68, MAE: 25.75, R²: 0.640


In [11]:
import pandas as pd
import os

# Collect results into DataFrame
results_dict = {
    "Linear Regression": [38.84, 29.19, 0.596],
    "Random Forest": [36.96, 26.01, 0.634],
    "XGBoost": [36.68, 25.75, 0.640]
}

df_results = pd.DataFrame.from_dict(
    results_dict,
    orient='index',
    columns=['RMSE', 'MAE', 'R2']
)

# Make sure results folder exists
os.makedirs("../results", exist_ok=True)

# Save as CSV
df_results.to_csv("../results/baseline_fd001.csv")

print("✅ Results saved to ../results/baseline_fd001.csv")
df_results


✅ Results saved to ../results/baseline_fd001.csv


Unnamed: 0,RMSE,MAE,R2
Linear Regression,38.84,29.19,0.596
Random Forest,36.96,26.01,0.634
XGBoost,36.68,25.75,0.64
