In [29]:
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim

import mlflow
import mlflow.sklearn
import mlflow.pytorch
from mlflow.models.signature import infer_signature

from sklearn.model_selection import GridSearchCV
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LinearRegression, Ridge, Lasso
from sklearn.tree import DecisionTreeRegressor
from sklearn.svm import SVR
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor

import xgboost as xgb
import lightgbm as lgb
import optuna

# Set random seed for reproducibility
np.random.seed(42)
torch.manual_seed(42)

<torch._C.Generator at 0x1168b13b0>

In [30]:
# Load the data (adjust file paths as needed)
X_train = np.load('X_train.npy', allow_pickle=True)
y_train = np.load('y_train.npy', allow_pickle=True)
X_test = np.load('X_test.npy', allow_pickle=True)
y_test = np.load('y_test.npy', allow_pickle=True)

print(f'X_train shape: {X_train.shape}')  # Expected: (num_races_train, max_riders, num_features)
print(f'y_train shape: {y_train.shape}')  # Expected: (num_races_train, max_riders)
print(f'X_test shape: {X_test.shape}')    # Expected: (num_races_test, max_riders, num_features)
print(f'y_test shape: {y_test.shape}')    # Expected: (num_races_test, max_riders)

# Flatten the data for scikit-learn models
X_train_flat = X_train.reshape(-1, X_train.shape[2])    # Shape: (num_races_train * max_riders, num_features)
X_test_flat = X_test.reshape(-1, X_test.shape[2])       # Shape: (num_races_test * max_riders, num_features)

# Flatten the targets
y_train_flat = y_train.flatten()  # Shape: (num_races_train * max_riders,)
y_test_flat = y_test.flatten()    # Shape: (num_races_test * max_riders,)

# Filter out invalid targets (if necessary)
valid_indices_train = y_train_flat > 0
valid_indices_test = y_test_flat > 0

X_train_flat = X_train_flat[valid_indices_train]
y_train_flat = y_train_flat[valid_indices_train]

X_test_flat = X_test_flat[valid_indices_test]
y_test_flat = y_test_flat[valid_indices_test]

# # Optionally scale the features
# scaler = StandardScaler()
# X_train_flat = scaler.fit_transform(X_train_flat)
# X_test_flat = scaler.transform(X_test_flat)

X_train shape: (2034, 207, 227)
y_train shape: (2034, 207)
X_test shape: (153, 207, 227)
y_test shape: (153, 207)


In [31]:
# Set MLflow experiment
mlflow.set_tracking_uri("file:./mlruns")
mlflow.set_experiment("Race_Prediction_Experiment_II")

<Experiment: artifact_location='file:///Users/feliks/Documents/Faks/Diplomska/App/mlruns/586264776644289656', creation_time=1732972027382, experiment_id='586264776644289656', last_update_time=1732972027382, lifecycle_stage='active', name='Race_Prediction_Experiment_II', tags={}>

In [32]:
def train_and_evaluate_model(model_class, param_grid, model_name, X_train, y_train, X_test, y_test):
    from itertools import product
    import pandas as pd

    # Generate all combinations of hyperparameters
    keys = param_grid.keys()
    values = (param_grid[key] for key in keys)
    param_combinations = [dict(zip(keys, combination)) for combination in product(*values)]

    # For each combination, train and log the model
    for idx, params in enumerate(param_combinations):
        # Initialize model with current hyperparameters
        model = model_class(**params)

        # Fit the model
        model.fit(X_train, y_train)

        # Predict on test set
        y_pred = model.predict(X_test)

        # Calculate metrics
        mse = mean_squared_error(y_test, y_pred)
        mae = mean_absolute_error(y_test, y_pred)
        mape = mean_absolute_percentage_error(y_test, y_pred)
        rmse = np.sqrt(mse)
        smape = symmetric_mean_absolute_percentage_error(y_test, y_pred)
        r2 = r2_score(y_test, y_pred)

        # Start MLflow run
        with mlflow.start_run(run_name=f"{model_name} - Run {idx+1}"):
            # Log parameters
            mlflow.log_param("model_class", model_name)
            mlflow.log_params(params)

            # Log metrics
            mlflow.log_metric("test_mse", mse)
            mlflow.log_metric("test_mae", mae)
            mlflow.log_metric("test_r2", r2)
            mlflow.log_metric("test_mape", mape)
            mlflow.log_metric("test_rmse", rmse)
            mlflow.log_metric("test_smape", smape)

            # Log the model
            input_example = X_train[:5]
            signature = infer_signature(X_train, model.predict(X_train))

            mlflow.sklearn.log_model(
                sk_model=model,
                artifact_path="model",
                input_example=input_example,
                signature=signature
            )

            # Print results
            print(f"\n{model_name} Run {idx+1} parameters: {params}")
            print(f"{model_name} Run {idx+1} Test MSE: {mse:.4f}")
            print(f"{model_name} Run {idx+1} Test MAE: {mae:.4f}")
            print(f"{model_name} Run {idx+1} Test R^2 Score: {r2:.4f}")
            print(f"{model_name} Run {idx+1} Test MAPE: {mape:.4f}")
            print(f"{model_name} Run {idx+1} Test RMSE: {rmse:.4f}")
            print(f"{model_name} Run {idx+1} Test sMAPE: {smape:.4f}")

def mean_absolute_percentage_error(y_true, y_pred):
    epsilon = 1e-8  # Small number to prevent division by zero
    y_true, y_pred = np.array(y_true), np.array(y_pred)
    # Avoid division by zero
    mask = np.abs(y_true) > epsilon
    if np.sum(mask) == 0:
        return np.inf  # Return infinity if no valid entries
    mape = np.mean(np.abs((y_true[mask] - y_pred[mask]) / y_true[mask])) * 100
    return mape

def symmetric_mean_absolute_percentage_error(y_true, y_pred):
    y_true, y_pred = np.array(y_true), np.array(y_pred)
    denominator = (np.abs(y_true) + np.abs(y_pred)) / 2
    diff = np.abs(y_pred - y_true)
    # Avoid division by zero
    mask = denominator != 0
    smape = np.mean((diff[mask] / denominator[mask])) * 100
    return smape

### Linear Regression

In [22]:
linear_reg = LinearRegression
param_grid_lr = {
    # No hyperparameters to tune
}

train_and_evaluate_model(
    model_class=linear_reg,
    param_grid=param_grid_lr,
    model_name="Linear Regression",
    X_train=X_train_flat,
    y_train=y_train_flat,
    X_test=X_test_flat,
    y_test=y_test_flat
)

(459, 2) (459, 2)

Linear Regression Run 1 parameters: {}
Linear Regression Run 1 Test MSE: 0.4521
Linear Regression Run 1 Test MAE: 0.6713
Linear Regression Run 1 Test R^2 Score: 0.0000
Linear Regression Run 1 Test MAPE: 67.1267
Linear Regression Run 1 Test RMSE: 0.6724
Linear Regression Run 1 Test sMAPE: 101.3003
Linear Regression Run 1 Test Log Loss: 0.0000
Linear Regression Run 1 Test Brier Loss: 0.0000


### Ridge Regression

In [23]:
ridge_reg = Ridge
param_grid_ridge = {
    'alpha': [0.1, 0.9, 1.0, 1.5, 2.0, 10.0]
}

train_and_evaluate_model(
    model_class=ridge_reg,
    param_grid=param_grid_ridge,
    model_name="Ridge Regression",
    X_train=X_train_flat,
    y_train=y_train_flat,
    X_test=X_test_flat,
    y_test=y_test_flat
)

(459, 2) (459, 2)

Ridge Regression Run 1 parameters: {'alpha': 0.1}
Ridge Regression Run 1 Test MSE: 0.4522
Ridge Regression Run 1 Test MAE: 0.6713
Ridge Regression Run 1 Test R^2 Score: 0.0000
Ridge Regression Run 1 Test MAPE: 67.1273
Ridge Regression Run 1 Test RMSE: 0.6724
Ridge Regression Run 1 Test sMAPE: 101.3007
Ridge Regression Run 1 Test Log Loss: 0.0000
Ridge Regression Run 1 Test Brier Loss: 0.0000
(459, 2) (459, 2)

Ridge Regression Run 2 parameters: {'alpha': 0.9}
Ridge Regression Run 2 Test MSE: 0.4522
Ridge Regression Run 2 Test MAE: 0.6714
Ridge Regression Run 2 Test R^2 Score: 0.0000
Ridge Regression Run 2 Test MAPE: 67.1364
Ridge Regression Run 2 Test RMSE: 0.6725
Ridge Regression Run 2 Test sMAPE: 101.3137
Ridge Regression Run 2 Test Log Loss: 0.0000
Ridge Regression Run 2 Test Brier Loss: 0.0000
(459, 2) (459, 2)

Ridge Regression Run 3 parameters: {'alpha': 1.0}
Ridge Regression Run 3 Test MSE: 0.4522
Ridge Regression Run 3 Test MAE: 0.6714
Ridge Regression Run 3 

### Lasso Regression

In [7]:
lasso_reg = Lasso
param_grid_lasso = {
    'alpha': [0.0001, 0.001, 0.01, 0.1, 1.0, 1.5, 2.0, 10.0],
    'max_iter': [10000]
}

train_and_evaluate_model(
    model_class=lasso_reg,
    param_grid=param_grid_lasso,
    model_name="Lasso Regression",
    X_train=X_train_flat,
    y_train=y_train_flat,
    X_test=X_test_flat,
    y_test=y_test_flat
)


Lasso Regression Run 1 parameters: {'alpha': 0.0001, 'max_iter': 10000}
Lasso Regression Run 1 Test MSE: 0.0563
Lasso Regression Run 1 Test MAE: 0.2112
Lasso Regression Run 1 Test R^2 Score: 0.0410
Lasso Regression Run 1 Test MAPE: 111.9876
Lasso Regression Run 1 Test RMSE: 0.2372
Lasso Regression Run 1 Test sMAPE: 67.8755

Lasso Regression Run 2 parameters: {'alpha': 0.001, 'max_iter': 10000}
Lasso Regression Run 2 Test MSE: 0.0572
Lasso Regression Run 2 Test MAE: 0.2150
Lasso Regression Run 2 Test R^2 Score: 0.0260
Lasso Regression Run 2 Test MAPE: 115.0582
Lasso Regression Run 2 Test RMSE: 0.2391
Lasso Regression Run 2 Test sMAPE: 68.8978

Lasso Regression Run 3 parameters: {'alpha': 0.01, 'max_iter': 10000}
Lasso Regression Run 3 Test MSE: 0.0583
Lasso Regression Run 3 Test MAE: 0.2199
Lasso Regression Run 3 Test R^2 Score: 0.0060
Lasso Regression Run 3 Test MAPE: 118.7917
Lasso Regression Run 3 Test RMSE: 0.2415
Lasso Regression Run 3 Test sMAPE: 70.2226

Lasso Regression Run 4 p

### Decision Tree

In [8]:
decision_tree_reg = DecisionTreeRegressor
param_grid_dtree_reg = {
    'max_depth': [None, 5, 10, 20],
    'min_samples_split': [2, 5, 10]
}

train_and_evaluate_model(
    model_class=decision_tree_reg,
    param_grid=param_grid_dtree_reg,
    model_name="Decision Tree Regressor",
    X_train=X_train_flat,
    y_train=y_train_flat,
    X_test=X_test_flat,
    y_test=y_test_flat
)


Decision Tree Regressor Run 1 parameters: {'max_depth': None, 'min_samples_split': 2}
Decision Tree Regressor Run 1 Test MSE: 0.1108
Decision Tree Regressor Run 1 Test MAE: 0.2423
Decision Tree Regressor Run 1 Test R^2 Score: -0.8878
Decision Tree Regressor Run 1 Test MAPE: 127.2581
Decision Tree Regressor Run 1 Test RMSE: 0.3328
Decision Tree Regressor Run 1 Test sMAPE: 73.8828

Decision Tree Regressor Run 2 parameters: {'max_depth': None, 'min_samples_split': 5}
Decision Tree Regressor Run 2 Test MSE: 0.1087
Decision Tree Regressor Run 2 Test MAE: 0.2436
Decision Tree Regressor Run 2 Test R^2 Score: -0.8524
Decision Tree Regressor Run 2 Test MAPE: 128.2310
Decision Tree Regressor Run 2 Test RMSE: 0.3297
Decision Tree Regressor Run 2 Test sMAPE: 73.6349

Decision Tree Regressor Run 3 parameters: {'max_depth': None, 'min_samples_split': 10}
Decision Tree Regressor Run 3 Test MSE: 0.0919
Decision Tree Regressor Run 3 Test MAE: 0.2307
Decision Tree Regressor Run 3 Test R^2 Score: -0.565

### SVR

In [None]:
svr_model_class = SVR
param_grid_svr = {
    'C': [0.1, 1, 10],
    'kernel': ['linear', 'rbf'],
    'epsilon': [0.01, 0.1, 1]
}

train_and_evaluate_model(
    model_class=svr_model_class,
    param_grid=param_grid_svr,
    model_name="Support Vector Regressor",
    X_train=X_train_flat,
    y_train=y_train_flat,
    X_test=X_test_flat,
    y_test=y_test_flat
)

### Random Forest

In [None]:
random_forest_reg = RandomForestRegressor()
param_grid_rf_reg = {
    'n_estimators': [100, 200],
    'max_depth': [None, 5, 10],
    'min_samples_split': [2, 5]
}

train_and_evaluate_model(
    model=random_forest_reg,
    param_grid=param_grid_rf_reg,
    model_name="Random Forest Regressor",
    X_train=X_train_flat,
    y_train=y_train_flat,
    X_test=X_test_flat,
    y_test=y_test_flat
)

### Gradient Boosting

In [None]:
gb_regressor = GradientBoostingRegressor()
param_grid_gb_reg = {
    'n_estimators': [100, 200],
    'learning_rate': [0.01, 0.1],
    'max_depth': [3, 5]
}

train_and_evaluate_model(
    model=gb_regressor,
    param_grid=param_grid_gb_reg,
    model_name="Gradient Boosting Regressor",
    X_train=X_train_flat,
    y_train=y_train_flat,
    X_test=X_test_flat,
    y_test=y_test_flat
)

Fitting 3 folds for each of 8 candidates, totalling 24 fits


### XGBoost

In [9]:
xgboost_reg = xgb.XGBRegressor
param_grid_xgb_reg = {
    'n_estimators': [100, 200],
    'learning_rate': [0.01, 0.1],
    'max_depth': [3, 5],
    'objective': ['reg:squarederror']
}

train_and_evaluate_model(
    model_class=xgboost_reg,
    param_grid=param_grid_xgb_reg,
    model_name="XGBoost Regressor",
    X_train=X_train_flat,
    y_train=y_train_flat,
    X_test=X_test_flat,
    y_test=y_test_flat
)


XGBoost Regressor Run 1 parameters: {'n_estimators': 100, 'learning_rate': 0.01, 'max_depth': 3, 'objective': 'reg:squarederror'}
XGBoost Regressor Run 1 Test MSE: 0.0572
XGBoost Regressor Run 1 Test MAE: 0.2148
XGBoost Regressor Run 1 Test R^2 Score: 0.0259
XGBoost Regressor Run 1 Test MAPE: 113.9762
XGBoost Regressor Run 1 Test RMSE: 0.2391
XGBoost Regressor Run 1 Test sMAPE: 68.9682

XGBoost Regressor Run 2 parameters: {'n_estimators': 100, 'learning_rate': 0.01, 'max_depth': 5, 'objective': 'reg:squarederror'}
XGBoost Regressor Run 2 Test MSE: 0.0571
XGBoost Regressor Run 2 Test MAE: 0.2140
XGBoost Regressor Run 2 Test R^2 Score: 0.0275
XGBoost Regressor Run 2 Test MAPE: 113.7460
XGBoost Regressor Run 2 Test RMSE: 0.2389
XGBoost Regressor Run 2 Test sMAPE: 68.6829

XGBoost Regressor Run 3 parameters: {'n_estimators': 100, 'learning_rate': 0.1, 'max_depth': 3, 'objective': 'reg:squarederror'}
XGBoost Regressor Run 3 Test MSE: 0.0560
XGBoost Regressor Run 3 Test MAE: 0.2047
XGBoost 

### LightGBM

In [None]:
lgbm_reg = lgb.LGBMRegressor()
param_grid_lgbm_reg = {
    'n_estimators': [100, 200],
    'learning_rate': [0.01, 0.1],
    'num_leaves': [31, 63]
}

train_and_evaluate_model(
    model=lgbm_reg,
    param_grid=param_grid_lgbm_reg,
    model_name="LightGBM Regressor",
    X_train=X_train_flat,
    y_train=y_train_flat,
    X_test=X_test_flat,
    y_test=y_test_flat
)

Fitting 3 folds for each of 8 candidates, totalling 24 fits
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.251038 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 3654
[LightGBM] [Info] Number of data points in the train set: 219147, number of used features: 145
[LightGBM] [Info] Start training from score 0.006051
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.086654 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 3654
[LightGBM] [Info] Number of data points in the train set: 219147, number of used features: 145
[LightGBM] [Info] Start training from score 0.006051
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.202939 seconds.
You can set `force_c

### KNN

In [26]:
from sklearn.neighbors import KNeighborsRegressor

knn_model = KNeighborsRegressor
param_grid_knn = {
    'n_neighbors': [3, 5, 7],
    'weights': ['uniform', 'distance']
}

train_and_evaluate_model(
    model_class=knn_model,
    param_grid=param_grid_knn,
    model_name="K-Nearest Neighbors Regressor",
    X_train=X_train_flat,
    y_train=y_train_flat,
    X_test=X_test_flat,
    y_test=y_test_flat
)

Found Intel OpenMP ('libiomp') and LLVM OpenMP ('libomp') loaded at
the same time. Both libraries are known to be incompatible and this
can cause random crashes or deadlocks on Linux when loaded in the
same Python program.
Using threadpoolctl may cause crashes or deadlocks. For more
information and possible workarounds, please see
    https://github.com/joblib/threadpoolctl/blob/master/multiple_openmp.md




K-Nearest Neighbors Regressor Run 1 parameters: {'n_neighbors': 3, 'weights': 'uniform'}
K-Nearest Neighbors Regressor Run 1 Test MSE: 0.0740
K-Nearest Neighbors Regressor Run 1 Test MAE: 0.2188
K-Nearest Neighbors Regressor Run 1 Test R^2 Score: -0.2607
K-Nearest Neighbors Regressor Run 1 Test MAPE: 115.5888
K-Nearest Neighbors Regressor Run 1 Test RMSE: 0.2720
K-Nearest Neighbors Regressor Run 1 Test sMAPE: 68.9993

K-Nearest Neighbors Regressor Run 2 parameters: {'n_neighbors': 3, 'weights': 'distance'}
K-Nearest Neighbors Regressor Run 2 Test MSE: 0.0771
K-Nearest Neighbors Regressor Run 2 Test MAE: 0.2205
K-Nearest Neighbors Regressor Run 2 Test R^2 Score: -0.3130
K-Nearest Neighbors Regressor Run 2 Test MAPE: 115.8314
K-Nearest Neighbors Regressor Run 2 Test RMSE: 0.2776
K-Nearest Neighbors Regressor Run 2 Test sMAPE: 69.3117

K-Nearest Neighbors Regressor Run 3 parameters: {'n_neighbors': 5, 'weights': 'uniform'}
K-Nearest Neighbors Regressor Run 3 Test MSE: 0.0638
K-Nearest Ne

### Neural Network

In [33]:
class RaceRegressionModel(nn.Module):
    def __init__(self, input_size, hidden_size=128):
        super(RaceRegressionModel, self).__init__()
        self.fc1 = nn.Linear(input_size, hidden_size)
        self.relu = nn.ReLU()
        self.fc2 = nn.Linear(hidden_size, 1)  # Output a score for each rider

    def forward(self, x):
        # x should have shape (batch_size, num_features)
        out = self.fc1(x)
        out = self.relu(out)
        out = self.fc2(out)
        return out.squeeze()

In [34]:
from torch.utils.data import Dataset, DataLoader

class RaceRegressionDataset(Dataset):
    def __init__(self, X, y):
        self.X = torch.tensor(X, dtype=torch.float32)  # Shape: (num_samples, num_features)
        self.y = torch.tensor(y, dtype=torch.float32)  # Shape: (num_samples,)

    def __len__(self):
        return len(self.X)

    def __getitem__(self, idx):
        X = self.X[idx]
        y = self.y[idx]
        return X, y

# Create datasets
train_dataset = RaceRegressionDataset(X_train_flat, y_train_flat)
test_dataset = RaceRegressionDataset(X_test_flat, y_test_flat)

# Create data loaders
batch_size = 256
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

In [35]:
# Define the objective function for Optuna
def objective(trial):
    # Hyperparameter suggestions
    hidden_size = trial.suggest_categorical('hidden_size', [64, 128, 256])
    learning_rate = trial.suggest_float('learning_rate', 1e-5, 1e-2)
    weight_decay = trial.suggest_float('weight_decay', 1e-6, 1e-3)
    num_epochs = trial.suggest_int('num_epochs', 10, 30)
    batch_size = trial.suggest_categorical('batch_size', [64, 128, 256])

    # Create data loaders with the suggested batch size
    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
    test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

    device = 'cuda' if torch.cuda.is_available() else 'cpu'
    input_size = X_train_flat.shape[1]

    # Initialize model, loss function, and optimizer
    model = RaceRegressionModel(input_size, hidden_size).to(device)
    criterion = nn.MSELoss()
    optimizer = optim.Adam(model.parameters(), lr=learning_rate, weight_decay=weight_decay)

    # Start MLflow run
    with mlflow.start_run(run_name=f"Neural Network MAE Run {trial.number}"):
        mlflow.log_params({
            'model_class': 'RaceRegressionModel',
            'hidden_size': hidden_size,
            'learning_rate': learning_rate,
            'weight_decay': weight_decay,
            'num_epochs': num_epochs,
            'batch_size': batch_size
        })

        # Training loop
        for epoch in range(num_epochs):
            model.train()
            total_loss = 0
            for X_batch, y_batch in train_loader:
                X_batch = X_batch.to(device)
                y_batch = y_batch.to(device)

                optimizer.zero_grad()
                outputs = model(X_batch)
                loss = criterion(outputs, y_batch)
                loss.backward()
                optimizer.step()
                total_loss += loss.item() * X_batch.size(0)

            average_loss = total_loss / len(train_loader.dataset)
            mlflow.log_metric("train_loss", average_loss, step=epoch)

        # Evaluation on test set
        model.eval()
        y_true_list = []
        y_pred_list = []
        with torch.no_grad():
            for X_batch, y_batch in test_loader:
                X_batch = X_batch.to(device)
                y_batch = y_batch.to(device)

                outputs = model(X_batch)
                y_true_list.extend(y_batch.cpu().numpy())
                y_pred_list.extend(outputs.cpu().numpy())

        y_true_array = np.array(y_true_list)
        y_pred_array = np.array(y_pred_list)

        # Compute evaluation metrics
        mse = mean_squared_error(y_true_array, y_pred_array)
        mae = mean_absolute_error(y_true_array, y_pred_array)
        rmse = np.sqrt(mse)
        r2 = r2_score(y_true_array, y_pred_array)
        mape = mean_absolute_percentage_error(y_true_array, y_pred_array)
        smape = symmetric_mean_absolute_percentage_error(y_true_array, y_pred_array)

        # Log metrics
        mlflow.log_metrics({
            'test_mse': mse,
            'test_mae': mae,
            'test_rmse': rmse,
            'test_r2': r2,
            'test_mape': mape,
            'test_smape': smape
        })

        # Log the model
        input_example = X_train_flat[:5].astype(np.float32)
        input_example_tensor = torch.tensor(input_example, dtype=torch.float32).to(device)
        signature = infer_signature(
            input_example,
            model(input_example_tensor).cpu().detach().numpy()
        )
        mlflow.pytorch.log_model(
            pytorch_model=model,
            artifact_path="model",
            input_example=input_example,
            signature=signature
        )

    # Return the metric to optimize
    return mae

# Create an Optuna study and optimize
study = optuna.create_study(direction='minimize')
study.optimize(objective, n_trials=20)

# Print best hyperparameters
print("Best hyperparameters:", study.best_params)
print("Best sMAPE:", study.best_value)

[I 2024-11-30 17:40:20,519] A new study created in memory with name: no-name-7554d3a2-f650-4733-a0f8-a05d56f19011


[I 2024-11-30 17:40:25,555] Trial 0 finished with value: 0.20807187259197235 and parameters: {'hidden_size': 128, 'learning_rate': 0.008768700064872995, 'weight_decay': 0.00017015278598835897, 'num_epochs': 24, 'batch_size': 64}. Best is trial 0 with value: 0.20807187259197235.
[I 2024-11-30 17:40:28,673] Trial 1 finished with value: 0.20251713693141937 and parameters: {'hidden_size': 128, 'learning_rate': 0.004842895857136095, 'weight_decay': 0.0007043910310181157, 'num_epochs': 14, 'batch_size': 256}. Best is trial 1 with value: 0.20251713693141937.
[I 2024-11-30 17:40:32,529] Trial 2 finished with value: 0.22294147312641144 and parameters: {'hidden_size': 64, 'learning_rate': 0.008527502498029825, 'weight_decay': 0.00018601829593207058, 'num_epochs': 29, 'batch_size': 256}. Best is trial 1 with value: 0.20251713693141937.
[I 2024-11-30 17:40:35,902] Trial 3 finished with value: 0.21592111885547638 and parameters: {'hidden_size': 128, 'learning_rate': 0.0011029834025981058, 'weight_d

Best hyperparameters: {'hidden_size': 128, 'learning_rate': 0.00955428434426838, 'weight_decay': 0.000886153158994072, 'num_epochs': 18, 'batch_size': 256}
Best sMAPE: 0.19805841147899628
