In [13]:
import optuna
import optuna.visualization as vis
import numpy as np
import xgboost as xgb
import lightgbm as lgb
import tensorflow as tf
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from statsmodels.tsa.arima.model import ARIMA
from sklearn.tree import DecisionTreeRegressor
from statsmodels.tsa.holtwinters import ExponentialSmoothing
from sklearn.linear_model import LinearRegression
from sklearn.svm import SVR
from sklearn.metrics import mean_absolute_percentage_error
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Input, Dropout
from tensorflow.keras.callbacks import EarlyStopping
from pathlib import Path
from sklearn.preprocessing import StandardScaler
from utils import time_operation



In [2]:
import os
import warnings
import logging
import optuna
import tensorflow as tf

# --- 1. SYSTEM & C++ LEVEL SILENCE ---
# Directs NVIDIA compiler to be quiet and silences TF C++ logs
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
os.environ['TF_CPP_VMODULE'] = 'nvptx_compiler=0'
os.environ['TF_XLA_FLAGS'] = '--tf_xla_cpu_global_jit'

# --- 2. PYTHON LEVEL SILENCE ---
warnings.filterwarnings("ignore", category=UserWarning)
warnings.filterwarnings("ignore", category=FutureWarning)
# This silences the "All configs were filtered out" XLA autotune spam
logging.getLogger('tensorflow').setLevel(logging.ERROR)

# --- 3. OPTUNA VERBOSITY ---
# Set to INFO to see trial results, WARNING to see nothing but errors
optuna.logging.set_verbosity(optuna.logging.INFO) 

# --- 4. LIGHTGBM / XGBOOST INTERNAL SILENCE ---
# Note: In your objective function, ensure you set:
# lgb.LGBMRegressor(..., verbose=-1)
# xgb.XGBRegressor(..., verbosity=0)

print("üîá Environment Silenced. GPU is ready for clean tuning.")


üîá Environment Silenced. GPU is ready for clean tuning.


In [3]:
# Verify GPU is still active in this session
print("GPU Available:", tf.config.list_physical_devices('GPU'))


GPU Available: [PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')]


In [4]:
# Define paths for your 10-page report
data_path = Path().cwd().parent / "data" / "electricity_prediction.csv"
results_path = Path().cwd().parent / "results"
results_path.mkdir(parents=True, exist_ok=True)


In [5]:
try:
    # Load data with header=None as the file lacks headers
    df = pd.read_csv(data_path, header=None)
except FileNotFoundError:
    print("Error: Data file not found. Please check the path.")
else:
    # Rename columns
    column_names = [f'Hour_{i}' for i in range(1, 7)] + ['Target']
    df.columns = column_names
    print("Data Loaded Successfully.")

df.info()


Data Loaded Successfully.
<class 'pandas.DataFrame'>
RangeIndex: 139571 entries, 0 to 139570
Data columns (total 7 columns):
 #   Column  Non-Null Count   Dtype  
---  ------  --------------   -----  
 0   Hour_1  139571 non-null  float64
 1   Hour_2  139571 non-null  float64
 2   Hour_3  139571 non-null  float64
 3   Hour_4  139571 non-null  float64
 4   Hour_5  139571 non-null  float64
 5   Hour_6  139571 non-null  float64
 6   Target  139571 non-null  float64
dtypes: float64(7)
memory usage: 7.5 MB


## --- STEP 1: LOAD & DECORATED PREPROCESSING ---

In [6]:
@time_operation
def prepare_data(df):
    """
    Combines feature engineering and scaling.
    Recorded time will include the generation of 13 features.
    """
    X = df.copy()
    series = X['Target']
    
    # 1. Cyclical Features
    hour_series = X.index % 24
    X['Hour_Sin'] = np.sin(2 * np.pi * hour_series / 24)
    X['Hour_Cos'] = np.cos(2 * np.pi * hour_series / 24)
    
    # 2. Lags & Rolling Stats
    X['Lag_24'] = series.shift(24)
    X['Lag_168'] = series.shift(168)
    X['Rolling_Mean_6'] = series.shift(1).rolling(window=6).mean()
    X['Rolling_Std_24'] = series.shift(1).rolling(window=24).std()
    
    # 3. Clean and Split
    X = X.dropna()
    y = X['Target']
    X = X.drop(columns=['Target'])
    
    return X, y

# Execute and record time
(X_eng, y_eng), prep_time = prepare_data(df)
print(f"Data prepared in {prep_time:.2f} ms. Shape: {X_eng.shape}")


Data prepared in 52.12 ms. Shape: (139403, 12)


## Step 2: The Chronological Split
First, we must partition the data. Since your ACF plot showed high temporal dependency, we keep the sequences intact.

In [24]:
# --- STEP 2: CHRONOLOGICAL SPLIT ---

# 70% Train, 15% Val, 15% Test
train_size = int(len(X_eng) * 0.70)
val_size = int(len(X_eng) * 0.15)

X_train, y_train = X_eng.iloc[:train_size], y_eng.iloc[:train_size]
X_val, y_val = X_eng.iloc[train_size:train_size+val_size], y_eng.iloc[train_size:train_size+val_size]
X_test, y_test = X_eng.iloc[train_size+val_size:], y_eng.iloc[train_size+val_size:]

# Scaling - Critical for SVR, NN, and Linear Regression
scaler_X = StandardScaler()
scaler_y = StandardScaler() # Scaling target helps Neural Networks converge faster

X_train_scaled = scaler_X.fit_transform(X_train)
X_val_scaled = scaler_X.transform(X_val)
X_test_scaled = scaler_X.transform(X_test)

# Reshape y for the scaler
y_train_scaled = scaler_y.fit_transform(y_train.values.reshape(-1, 1)).flatten()
y_val_scaled = scaler_y.transform(y_val.values.reshape(-1, 1)).flatten()
y_test_scaled = scaler_y.transform(y_test.values.reshape(-1, 1)).flatten()

print(f"Split complete. Training on {len(X_train)} samples.")


Split complete. Training on 97582 samples.


## save the sets train/validation/test

In [26]:
from pathlib import Path
import joblib

# 1. Setup the data directory
data_path = Path().cwd().parent / "data"
data_path.mkdir(parents=True, exist_ok=True)

# 2. Package all sets (including the unscaled ones for reference)
data_bundle = {
    "X_train_scaled": X_train_scaled,
    "X_val_scaled": X_val_scaled,
    "X_test_scaled": X_test_scaled,
    "y_train_scaled": y_train_scaled,
    "y_val_scaled": y_val_scaled,
    "y_test_scaled": y_test_scaled
}

# 3. Save to the data folder
joblib.dump(data_bundle, data_path / "electricity_data_split.pkl")
print(f"‚úÖ Data bundle saved to: {data_path / 'electricity_data_split.pkl'}")


‚úÖ Data bundle saved to: /home/bhavik/Dropbox/edu/smu/winter/data_mining/a4_regression_ts/data/electricity_data_split.pkl


## Step 3: Performance & Time Tracking Registry

In [8]:
from sklearn.metrics import mean_squared_error, mean_absolute_error

results_registry = {}

def calculate_metrics(y_true, y_pred, model_name, duration_ms):
    """
    Calculates MAPE, RMSE, and MAE. 
    Stores results in the global registry for the 10-page report.
    """
    # MAPE calculation
    mape = np.mean(np.abs((y_true - y_pred) / y_true)) * 100
    # RMSE calculation (requires sklearn.metrics.mean_squared_error)
    rmse = np.sqrt(mean_squared_error(y_true, y_pred))
    # MAE calculation (requires sklearn.metrics.mean_absolute_error)
    mae = mean_absolute_error(y_true, y_pred)
    
    # Store in registry
    results_registry[model_name] = {
        "MAPE": mape,
        "RMSE": rmse,
        "MAE": mae,
        "Time_ms": duration_ms
    }
    
    print(f"--- {model_name} Results ---")
    print(f"MAPE: {mape:.2f}% | RMSE: {rmse:.2f} | Time: {duration_ms:.2f} ms\n")


In [9]:
def objective(trial):
    # 1. SELECT THE ARCHITECTURE
    model_type = trial.suggest_categorical('model_type', [
        'LinearRegression', 'HoltWinters', 'ARIMA', 'SVR', 
        'RegressionTree', 'XGBoost', 'LightGBM', 
        'NN_1_Layer', 'NN_3_Layer'
    ])
    
    # --- MODEL IMPLEMENTATIONS ---
    if model_type == 'LinearRegression':
        model = LinearRegression().fit(X_train_scaled, y_train_scaled)
        preds = model.predict(X_val_scaled)

    elif model_type == 'HoltWinters':
        model = ExponentialSmoothing(y_train_scaled, trend='add', seasonal='add', seasonal_periods=24).fit()
        preds = model.forecast(len(y_val_scaled))

    elif model_type == 'ARIMA':
        p, d, q = trial.suggest_int('p', 0, 3), trial.suggest_int('d', 0, 1), trial.suggest_int('q', 0, 3)
        try:
            model = ARIMA(y_train_scaled, order=(p, d, q)).fit()
            preds = model.forecast(steps=len(y_val_scaled))
        except: return 1.0 # High error for non-convergent trials

    elif model_type == 'SVR':
        kernel = trial.suggest_categorical('svr_kernel', ['linear', 'rbf'])
        idx = np.random.choice(len(X_train_scaled), int(len(X_train_scaled)*0.1), replace=False)
        model = SVR(kernel=kernel, C=trial.suggest_float('SVR_C', 0.1, 10.0)).fit(X_train_scaled[idx], y_train_scaled[idx])
        preds = model.predict(X_val_scaled)

    elif model_type == 'RegressionTree':
        model = DecisionTreeRegressor(max_depth=trial.suggest_int('dt_depth', 3, 20)).fit(X_train_scaled, y_train_scaled)
        preds = model.predict(X_val_scaled)

    elif model_type == 'XGBoost':
        model = xgb.XGBRegressor(tree_method='hist', device='cuda', n_estimators=500).fit(X_train_scaled, y_train_scaled)
        preds = model.predict(X_val_scaled)

    elif model_type == 'LightGBM':
        model = lgb.LGBMRegressor(device='gpu', n_estimators=500, verbose=-1).fit(X_train_scaled, y_train_scaled)
        preds = model.predict(X_val_scaled)

    elif 'NN' in model_type:
        num_layers = 1 if '1_Layer' in model_type else 3
        model = tf.keras.Sequential([tf.keras.layers.Input(shape=(X_train_scaled.shape[1],))])
        for i in range(num_layers):
            units = trial.suggest_int(f'u{i}_{model_type}', 32, 256)
            model.add(tf.keras.layers.Dense(units, activation='relu'))
        model.add(tf.keras.layers.Dense(1))
        model.compile(optimizer='adam', loss='mse')
        model.fit(X_train_scaled, y_train_scaled, epochs=30, batch_size=1024, verbose=0)
        preds = model.predict(X_val_scaled).flatten()

    return mean_absolute_percentage_error(y_val_scaled, preds)

# --- EXECUTE THE NAMED STUDY ---
study_name = "Electricity_Consumption_Tuning"
study = optuna.create_study(study_name=study_name, direction='minimize')
study.optimize(objective, n_trials=100)


[32m[I 2026-02-08 17:19:49,405][0m A new study created in memory with name: Electricity_Consumption_Tuning[0m
I0000 00:00:1770585589.575484   36665 gpu_device.cc:2020] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 6223 MB memory:  -> device: 0, name: NVIDIA GeForce RTX 4070 Laptop GPU, pci bus id: 0000:01:00.0, compute capability: 8.9
2026-02-08 17:19:52.818119: I external/local_xla/xla/service/service.cc:163] XLA service 0x774f68004c60 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:
2026-02-08 17:19:52.818173: I external/local_xla/xla/service/service.cc:171]   StreamExecutor device (0): NVIDIA GeForce RTX 4070 Laptop GPU, Compute Capability 8.9
2026-02-08 17:19:52.896217: I tensorflow/compiler/mlir/tensorflow/utils/dump_mlir_util.cc:269] disabling MLIR crash reproducer, set env var `MLIR_CRASH_REPRODUCER_DIRECTORY` to enable.
2026-02-08 17:19:53.373539: I external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:473] Loaded c

[1m654/654[0m [32m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ[0m[37m[0m [1m6s[0m 6ms/step


[32m[I 2026-02-08 17:20:28,128][0m Trial 0 finished with value: 1.1823490650630526 and parameters: {'model_type': 'NN_3_Layer', 'u0_NN_3_Layer': 204, 'u1_NN_3_Layer': 61, 'u2_NN_3_Layer': 114}. Best is trial 0 with value: 1.1823490650630526.[0m
[32m[I 2026-02-08 17:20:38,239][0m Trial 1 finished with value: 1.1700158843582469 and parameters: {'model_type': 'LightGBM'}. Best is trial 1 with value: 1.1700158843582469.[0m
[32m[I 2026-02-08 17:20:47,180][0m Trial 2 finished with value: 0.9999905973940331 and parameters: {'model_type': 'ARIMA', 'p': 0, 'd': 0, 'q': 0}. Best is trial 2 with value: 0.9999905973940331.[0m
[32m[I 2026-02-08 17:20:56,303][0m Trial 3 finished with value: 1.2254853762029063 and parameters: {'model_type': 'SVR', 'svr_kernel': 'rbf', 'SVR_C': 8.571718610202538}. Best is trial 2 with value: 0.9999905973940331.[0m


2026-02-08 17:21:02.019651: I external/local_xla/xla/service/gpu/autotuning/dot_search_space.cc:208] All configs were filtered out because non

[1m654/654[0m [32m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ[0m[37m[0m [1m6s[0m 5ms/step


[32m[I 2026-02-08 17:21:31,867][0m Trial 4 finished with value: 1.1999721796113332 and parameters: {'model_type': 'NN_3_Layer', 'u0_NN_3_Layer': 195, 'u1_NN_3_Layer': 129, 'u2_NN_3_Layer': 122}. Best is trial 2 with value: 0.9999905973940331.[0m
2026-02-08 17:21:33.368692: I external/local_xla/xla/service/gpu/autotuning/dot_search_space.cc:208] All configs were filtered out because none of them sufficiently match the hints. Maybe the hints set does not contain a good representative set of valid configs? Working around this by using the full hints set instead.
2026-02-08 17:21:33.368746: I external/local_xla/xla/service/gpu/autotuning/dot_search_space.cc:208] All configs were filtered out because none of them sufficiently match the hints. Maybe the hints set does not contain a good representative set of valid configs? Working around this by using the full hints set instead.
2026-02-08 17:21:37.193597: I external/local_xla/xla/service/gpu/autotuning/dot_search_space.cc:208] All config

[1m654/654[0m [32m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ[0m[37m[0m [1m4s[0m 5ms/step


[32m[I 2026-02-08 17:21:57,425][0m Trial 5 finished with value: 1.1710368987361477 and parameters: {'model_type': 'NN_3_Layer', 'u0_NN_3_Layer': 194, 'u1_NN_3_Layer': 76, 'u2_NN_3_Layer': 128}. Best is trial 2 with value: 0.9999905973940331.[0m
[32m[I 2026-02-08 17:22:27,046][0m Trial 6 finished with value: 1.1954904308134586 and parameters: {'model_type': 'SVR', 'svr_kernel': 'linear', 'SVR_C': 6.900028377110187}. Best is trial 2 with value: 0.9999905973940331.[0m
[32m[I 2026-02-08 17:22:28,047][0m Trial 7 finished with value: 1.2014676505974566 and parameters: {'model_type': 'XGBoost'}. Best is trial 2 with value: 0.9999905973940331.[0m
[32m[I 2026-02-08 17:22:28,071][0m Trial 8 finished with value: 1.2805548816082095 and parameters: {'model_type': 'LinearRegression'}. Best is trial 2 with value: 0.9999905973940331.[0m
[32m[I 2026-02-08 17:22:28,096][0m Trial 9 finished with value: 1.2805548816082095 and parameters: {'model_type': 'LinearRegression'}. Best is trial 2 wi

[1m654/654[0m [32m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ[0m[37m[0m [1m4s[0m 5ms/step


[32m[I 2026-02-08 17:23:25,594][0m Trial 14 finished with value: 1.1975900662220684 and parameters: {'model_type': 'NN_1_Layer', 'u0_NN_1_Layer': 163}. Best is trial 2 with value: 0.9999905973940331.[0m
[32m[I 2026-02-08 17:23:25,900][0m Trial 15 finished with value: 1.526196010440881 and parameters: {'model_type': 'RegressionTree', 'dt_depth': 4}. Best is trial 2 with value: 0.9999905973940331.[0m
[32m[I 2026-02-08 17:23:58,710][0m Trial 16 finished with value: 14.754727048241548 and parameters: {'model_type': 'HoltWinters'}. Best is trial 2 with value: 0.9999905973940331.[0m
[32m[I 2026-02-08 17:25:50,437][0m Trial 17 finished with value: 4.415340467882024 and parameters: {'model_type': 'ARIMA', 'p': 3, 'd': 0, 'q': 3}. Best is trial 2 with value: 0.9999905973940331.[0m
[32m[I 2026-02-08 17:26:03,662][0m Trial 18 finished with value: 1.002423087364464 and parameters: {'model_type': 'ARIMA', 'p': 1, 'd': 0, 'q': 0}. Best is trial 2 with value: 0.9999905973940331.[0m
[3

[1m654/654[0m [32m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ[0m[37m[0m [1m3s[0m 4ms/step


[32m[I 2026-02-08 17:28:02,223][0m Trial 26 finished with value: 1.2478644982852893 and parameters: {'model_type': 'NN_1_Layer', 'u0_NN_1_Layer': 37}. Best is trial 2 with value: 0.9999905973940331.[0m
[32m[I 2026-02-08 17:28:16,545][0m Trial 27 finished with value: 1.010558474206348 and parameters: {'model_type': 'ARIMA', 'p': 1, 'd': 0, 'q': 1}. Best is trial 2 with value: 0.9999905973940331.[0m
[32m[I 2026-02-08 17:28:25,739][0m Trial 28 finished with value: 0.9999905973940331 and parameters: {'model_type': 'ARIMA', 'p': 0, 'd': 0, 'q': 0}. Best is trial 2 with value: 0.9999905973940331.[0m
[32m[I 2026-02-08 17:29:40,195][0m Trial 29 finished with value: 1.0153069377415398 and parameters: {'model_type': 'ARIMA', 'p': 3, 'd': 0, 'q': 2}. Best is trial 2 with value: 0.9999905973940331.[0m
[32m[I 2026-02-08 17:29:44,913][0m Trial 30 finished with value: 10.677154054789584 and parameters: {'model_type': 'ARIMA', 'p': 2, 'd': 1, 'q': 1}. Best is trial 2 with value: 0.999990

[1m654/654[0m [32m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ[0m[37m[0m [1m5s[0m 5ms/step


[32m[I 2026-02-08 17:30:59,117][0m Trial 35 finished with value: 1.1784105768039124 and parameters: {'model_type': 'NN_3_Layer', 'u0_NN_3_Layer': 35, 'u1_NN_3_Layer': 248, 'u2_NN_3_Layer': 212}. Best is trial 2 with value: 0.9999905973940331.[0m
[32m[I 2026-02-08 17:31:08,152][0m Trial 36 finished with value: 0.9999905973940331 and parameters: {'model_type': 'ARIMA', 'p': 0, 'd': 0, 'q': 0}. Best is trial 2 with value: 0.9999905973940331.[0m
[32m[I 2026-02-08 17:31:08,960][0m Trial 37 finished with value: 1.2014676505974566 and parameters: {'model_type': 'XGBoost'}. Best is trial 2 with value: 0.9999905973940331.[0m
[32m[I 2026-02-08 17:31:09,029][0m Trial 38 finished with value: 1.2805548816082095 and parameters: {'model_type': 'LinearRegression'}. Best is trial 2 with value: 0.9999905973940331.[0m
[32m[I 2026-02-08 17:31:22,637][0m Trial 39 finished with value: 1.1961911616524128 and parameters: {'model_type': 'SVR', 'svr_kernel': 'linear', 'SVR_C': 2.0825899646584003}.

[1m654/654[0m [32m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ[0m[37m[0m [1m7s[0m 6ms/step


[32m[I 2026-02-08 17:31:58,526][0m Trial 40 finished with value: 1.207315166513519 and parameters: {'model_type': 'NN_3_Layer', 'u0_NN_3_Layer': 78, 'u1_NN_3_Layer': 248, 'u2_NN_3_Layer': 33}. Best is trial 2 with value: 0.9999905973940331.[0m
[32m[I 2026-02-08 17:32:07,872][0m Trial 41 finished with value: 0.9999905973940331 and parameters: {'model_type': 'ARIMA', 'p': 0, 'd': 0, 'q': 0}. Best is trial 2 with value: 0.9999905973940331.[0m
[32m[I 2026-02-08 17:32:16,971][0m Trial 42 finished with value: 0.9999905973940331 and parameters: {'model_type': 'ARIMA', 'p': 0, 'd': 0, 'q': 0}. Best is trial 2 with value: 0.9999905973940331.[0m
[32m[I 2026-02-08 17:32:29,602][0m Trial 43 finished with value: 1.002423087364464 and parameters: {'model_type': 'ARIMA', 'p': 1, 'd': 0, 'q': 0}. Best is trial 2 with value: 0.9999905973940331.[0m


[1m654/654[0m [32m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ[0m[37m[0m [1m4s[0m 5ms/step


[32m[I 2026-02-08 17:32:54,406][0m Trial 44 finished with value: 1.2469750774612243 and parameters: {'model_type': 'NN_1_Layer', 'u0_NN_1_Layer': 246}. Best is trial 2 with value: 0.9999905973940331.[0m
[32m[I 2026-02-08 17:32:54,943][0m Trial 45 finished with value: 1.290845637835094 and parameters: {'model_type': 'RegressionTree', 'dt_depth': 14}. Best is trial 2 with value: 0.9999905973940331.[0m
[32m[I 2026-02-08 17:33:38,574][0m Trial 46 finished with value: 14.754727048241548 and parameters: {'model_type': 'HoltWinters'}. Best is trial 2 with value: 0.9999905973940331.[0m
[32m[I 2026-02-08 17:33:48,348][0m Trial 47 finished with value: 0.9999905973940331 and parameters: {'model_type': 'ARIMA', 'p': 0, 'd': 0, 'q': 0}. Best is trial 2 with value: 0.9999905973940331.[0m
[32m[I 2026-02-08 17:33:48,363][0m Trial 48 finished with value: 1.2805548816082095 and parameters: {'model_type': 'LinearRegression'}. Best is trial 2 with value: 0.9999905973940331.[0m
[32m[I 2026-

[1m654/654[0m [32m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ[0m[37m[0m [1m4s[0m 5ms/step


[32m[I 2026-02-08 17:37:21,885][0m Trial 66 finished with value: 1.1888768763227173 and parameters: {'model_type': 'NN_1_Layer', 'u0_NN_1_Layer': 64}. Best is trial 56 with value: 0.9999401814718625.[0m
[32m[I 2026-02-08 17:37:59,365][0m Trial 67 finished with value: 14.754727048241548 and parameters: {'model_type': 'HoltWinters'}. Best is trial 56 with value: 0.9999401814718625.[0m
[32m[I 2026-02-08 17:38:14,147][0m Trial 68 finished with value: 0.9999401814718625 and parameters: {'model_type': 'ARIMA', 'p': 0, 'd': 0, 'q': 2}. Best is trial 56 with value: 0.9999401814718625.[0m
[32m[I 2026-02-08 17:38:22,203][0m Trial 69 finished with value: 1.238630578454687 and parameters: {'model_type': 'SVR', 'svr_kernel': 'rbf', 'SVR_C': 4.245903258404162}. Best is trial 56 with value: 0.9999401814718625.[0m


2026-02-08 17:38:29.373317: I external/local_xla/xla/service/gpu/autotuning/dot_search_space.cc:208] All configs were filtered out because none of them sufficiently match the h

[1m654/654[0m [32m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ[0m[37m[0m [1m5s[0m 4ms/step


[32m[I 2026-02-08 17:38:59,222][0m Trial 70 finished with value: 1.1623891249673788 and parameters: {'model_type': 'NN_3_Layer', 'u0_NN_3_Layer': 117, 'u1_NN_3_Layer': 173, 'u2_NN_3_Layer': 247}. Best is trial 56 with value: 0.9999401814718625.[0m
[32m[I 2026-02-08 17:39:13,683][0m Trial 71 finished with value: 0.9999401814718625 and parameters: {'model_type': 'ARIMA', 'p': 0, 'd': 0, 'q': 2}. Best is trial 56 with value: 0.9999401814718625.[0m
[32m[I 2026-02-08 17:39:28,653][0m Trial 72 finished with value: 0.9999401814718625 and parameters: {'model_type': 'ARIMA', 'p': 0, 'd': 0, 'q': 2}. Best is trial 56 with value: 0.9999401814718625.[0m
[32m[I 2026-02-08 17:39:43,607][0m Trial 73 finished with value: 0.9999401814718625 and parameters: {'model_type': 'ARIMA', 'p': 0, 'd': 0, 'q': 2}. Best is trial 56 with value: 0.9999401814718625.[0m
[32m[I 2026-02-08 17:39:58,202][0m Trial 74 finished with value: 0.9999401814718625 and parameters: {'model_type': 'ARIMA', 'p': 0, 'd'

[1m654/654[0m [32m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ[0m[37m[0m [1m4s[0m 5ms/step


[32m[I 2026-02-08 17:46:15,627][0m Trial 94 finished with value: 1.1814655170080242 and parameters: {'model_type': 'NN_1_Layer', 'u0_NN_1_Layer': 251}. Best is trial 77 with value: 0.999921814554208.[0m
[32m[I 2026-02-08 17:46:33,908][0m Trial 95 finished with value: 0.999921814554208 and parameters: {'model_type': 'ARIMA', 'p': 0, 'd': 0, 'q': 3}. Best is trial 77 with value: 0.999921814554208.[0m
2026-02-08 17:46:35.543427: I external/local_xla/xla/service/gpu/autotuning/dot_search_space.cc:208] All configs were filtered out because none of them sufficiently match the hints. Maybe the hints set does not contain a good representative set of valid configs? Working around this by using the full hints set instead.
2026-02-08 17:46:35.543456: I external/local_xla/xla/service/gpu/autotuning/dot_search_space.cc:208] All configs were filtered out because none of them sufficiently match the hints. Maybe the hints set does not contain a good representative set of valid configs? Working a

[1m654/654[0m [32m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ[0m[37m[0m [1m5s[0m 5ms/step


[32m[I 2026-02-08 17:47:11,027][0m Trial 96 finished with value: 1.196926866961114 and parameters: {'model_type': 'NN_3_Layer', 'u0_NN_3_Layer': 249, 'u1_NN_3_Layer': 172, 'u2_NN_3_Layer': 40}. Best is trial 77 with value: 0.999921814554208.[0m
[32m[I 2026-02-08 17:47:28,951][0m Trial 97 finished with value: 0.999921814554208 and parameters: {'model_type': 'ARIMA', 'p': 0, 'd': 0, 'q': 3}. Best is trial 77 with value: 0.999921814554208.[0m
[32m[I 2026-02-08 17:48:10,444][0m Trial 98 finished with value: 14.754727048241548 and parameters: {'model_type': 'HoltWinters'}. Best is trial 77 with value: 0.999921814554208.[0m
[32m[I 2026-02-08 17:48:29,558][0m Trial 99 finished with value: 0.999921814554208 and parameters: {'model_type': 'ARIMA', 'p': 0, 'd': 0, 'q': 3}. Best is trial 77 with value: 0.999921814554208.[0m


In [19]:
from pathlib import Path
import joblib

# 1. Define and Create Results Folder
# Using Path objects makes directory creation more robust
# 2. Export human-readable results (MAPE, parameters, etc.)
# Optuna dataframes can be saved directly using path objects
df_trials = study.trials_dataframe()
csv_file = results_path / "optuna_trials_final.csv"
df_trials.to_csv(csv_file, index=False)

# 3. Save the actual Optuna Study Object
# This is your "insurance policy" for the 10-page report
pickle_file = results_path / "electricity_study.pkl"
joblib.dump(study, pickle_file)

print(f"‚úÖ 100 trials saved to {csv_file}")
print(f"üì¶ Study object serialized to {pickle_file}")


‚úÖ 100 trials saved to /home/bhavik/Dropbox/edu/smu/winter/data_mining/a4_regression_ts/results/optuna_trials_final.csv
üì¶ Study object serialized to /home/bhavik/Dropbox/edu/smu/winter/data_mining/a4_regression_ts/results/electricity_study.pkl
