# Predictive Thermal Model Training

This notebook trains and compares multiple regression models for CPU temperature prediction using physics-aware features.
INCOMPLETE



In [None]:

# Imports and setup
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor, ExtraTreesRegressor
from sklearn.linear_model import Ridge, Lasso
from sklearn.svm import SVR
from sklearn.neural_network import MLPRegressor
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from sklearn.preprocessing import StandardScaler

import joblib
import os
import time
import warnings
warnings.filterwarnings('ignore')


## Load Data

In [None]:

DATA_PATH = 'processed_data/thermal_processed.csv'
assert os.path.exists(DATA_PATH), "Processed data not found"

df = pd.read_csv(DATA_PATH)
print(f"Loaded {len(df)} samples, {len(df.columns)} columns")
df.head()


## Feature Preparation

In [None]:

exclude_cols = ['timestamp', 'unix_time', 'cpu_temp']
feature_cols = [c for c in df.columns if c not in exclude_cols]

X = df[feature_cols]
y = df['cpu_temp']

print(f"Features: {len(feature_cols)}")


## Train-Test Split (Temporal)

In [None]:

test_size = 0.2
split_idx = int(len(X) * (1 - test_size))

X_train, X_test = X.iloc[:split_idx], X.iloc[split_idx:]
y_train, y_test = y.iloc[:split_idx], y.iloc[split_idx:]

scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

print(f"Train: {len(X_train)}, Test: {len(X_test)}")


## Model Initialization

In [None]:

models = {
    'Ridge Regression': Ridge(alpha=1.0),
    'Lasso Regression': Lasso(alpha=0.1, max_iter=10000),
    'Random Forest': RandomForestRegressor(n_estimators=100, random_state=42, n_jobs=-1),
    'Gradient Boosting': GradientBoostingRegressor(random_state=42),
    'Extra Trees': ExtraTreesRegressor(n_estimators=100, random_state=42, n_jobs=-1),
    'Neural Network': MLPRegressor(hidden_layer_sizes=(100,50,25), max_iter=500),
    'SVR (RBF)': SVR(kernel='rbf', C=10)
}


## Train and Evaluate Models

In [None]:

results = {}

for name, model in models.items():
    start = time.time()
    if name in ['Ridge Regression','Lasso Regression','Neural Network','SVR (RBF)']:
        model.fit(X_train_scaled, y_train)
        y_pred = model.predict(X_test_scaled)
    else:
        model.fit(X_train, y_train)
        y_pred = model.predict(X_test)

    rmse = np.sqrt(mean_squared_error(y_test, y_pred))
    mae = mean_absolute_error(y_test, y_pred)
    r2 = r2_score(y_test, y_pred)

    results[name] = (rmse, mae, r2, time.time()-start)
    print(f"{name:20s} RMSE={rmse:.3f}  MAE={mae:.3f}  R2={r2:.3f}")


## Results Summary

In [None]:

results_df = pd.DataFrame.from_dict(
    results, orient='index',
    columns=['RMSE','MAE','R2','TrainTime']
).sort_values('RMSE')

results_df


## Save Best Model

In [None]:

best_model_name = results_df.index[0]
best_model = models[best_model_name]

os.makedirs('models', exist_ok=True)
joblib.dump(best_model, 'models/best_thermal_model.pkl')
joblib.dump(scaler, 'models/feature_scaler.pkl')

print(f"Saved best model: {best_model_name}")
