In [None]:
# !pip install imbalanced-learn
!pip install -U skorch
!pip install optuna

Collecting skorch
  Downloading skorch-1.0.0-py3-none-any.whl.metadata (11 kB)
Downloading skorch-1.0.0-py3-none-any.whl (239 kB)
[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/239.4 kB[0m [31m?[0m eta [36m-:--:--[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m239.4/239.4 kB[0m [31m12.6 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: skorch
Successfully installed skorch-1.0.0
Collecting optuna
  Downloading optuna-4.1.0-py3-none-any.whl.metadata (16 kB)
Collecting alembic>=1.5.0 (from optuna)
  Downloading alembic-1.14.0-py3-none-any.whl.metadata (7.4 kB)
Collecting colorlog (from optuna)
  Downloading colorlog-6.9.0-py3-none-any.whl.metadata (10 kB)
Collecting Mako (from alembic>=1.5.0->optuna)
  Downloading Mako-1.3.8-py3-none-any.whl.metadata (2.9 kB)
Downloading optuna-4.1.0-py3-none-any.whl (364 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m364.4/364.4 kB[0m [31m19.5 MB/s[0m eta [36m0:00:00[0

In [None]:
# Import libraries
import numpy as np
import pandas as pd
import torch
from torch import nn
import skorch
from skorch import NeuralNetClassifier
from sklearn.metrics import classification_report
from skorch.callbacks import EarlyStopping
from skorch.helper import predefined_split
import optuna
from sklearn.metrics import f1_score

from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
# Used to split the dataset into train, validation, and test sets based on time
def assign_group(time):
    """
    Assigns a group label ('train', 'val', or 'test') based on a timestamp.

    Parameters:
        time (pd.Timestamp): A timestamp to evaluate.

    Returns:
        str: Group label ('train', 'val', or 'test').
    """
    if time < pd.Timestamp('2023-01-01'):
        return 'train'
    elif pd.Timestamp('2023-01-01') <= time < pd.Timestamp('2023-09-01'):
        return 'val'
    else:
        return 'test'

# Preprocess data
def data_preprocess(df, locations):
    """
    Prepares and preprocesses a DataFrame by filtering locations, computing velocity
    and acceleration, assigning groups, and removing NaN values.

    Parameters:
        df (pd.DataFrame): The input data containing 'Time', 'Location', and 'Temperature'.
        locations (list): List of locations to include in the processing.

    Returns:
        pd.DataFrame: A DataFrame with 'Time', 'Location', 'group', 'Temperature',
                      'Velocity', and 'Acceleration' columns.
    """
    df['Time'] = pd.to_datetime(df['Time'])
    df = df[df['Location'].isin(locations)]

    # Sort the DataFrame by 'Location' and 'Time'
    df = df.sort_values(by=['Location', 'Time']).reset_index(drop=True)

    # Compute Velocity and Acceleration
    df['Velocity'] = df.groupby('Location')['Temperature'].diff() / (15 * 60)
    df['Acceleration'] = df.groupby('Location')['Velocity'].diff() / (15 * 60)

    # Assign groups (train, val, test)
    df['group'] = df['Time'].apply(assign_group)

    # Drop rows with NaN values in Velocity or Acceleration
    df = df.dropna(subset=['Velocity', 'Acceleration']).reset_index(drop=True)

    return df[['Time', 'Location', 'group', 'Temperature', 'Velocity', 'Acceleration']]

# Create Rolling Windows
def create_rolling_windows(arr, window_size=16, horizon=8):
    """
    Creates rolling windows of features and corresponding targets for time-series data.

    Parameters:
        arr (np.ndarray): Input array of shape (n_samples, n_features).
        window_size (int): Number of consecutive timesteps in a window (default=16).
        horizon (int): horizon between the end of the window and the target value (default=8).

    Returns:
        tuple: Two numpy arrays:
               - X: Features of shape (n_windows, window_size, n_features)
               - y: Target values of shape (n_windows,)
    """
    X, y = [], []
    for i in range(len(arr) - window_size - horizon):
        X.append(arr[i:i + window_size])  # Features
        y.append(arr[i + window_size + horizon, 0])  # Target
    return np.array(X), np.array(y)

def create_feature_matrix(df, window_size=16, horizon=8):
    """
    Creates a feature matrix and target vector using rolling windows for time-series data.

    Parameters:
        df (pd.DataFrame): DataFrame containing 'Temperature', 'Velocity', and 'Acceleration'.
        window_size (int): Number of consecutive timesteps in a window (default=16).
        horizon (int): horizon between the end of the window and the target value (default=8).

    Returns:
        tuple: Two numpy arrays:
               - X: Features of shape (n_windows, window_size, n_features)
               - y: Target values of shape (n_windows,)
    """
    # Order matters: Temperature, Velocity, Acceleration.
    feature_matrix = np.column_stack((df['Temperature'].values,
                                      df['Velocity'].values,
                                      df['Acceleration'].values))
    X_matrix, y_vector = create_rolling_windows(feature_matrix, window_size, horizon)
    return X_matrix, y_vector

def prepare_datasets(df, Config):
    """
    Prepares training datasets by creating rolling windows for multiple locations.

    Parameters:
        df (pd.DataFrame): The input data.
        locations (list): List of locations to process.
        window_size (int): Number of consecutive timesteps in a window (default=16).
        horizon (int): horizon between the end of the window and the target value (default=8).

    Returns:
        tuple: Two numpy arrays:
               - X_all: Combined feature arrays of shape (total_windows, window_size, n_features).
               - y_all: Combined target arrays of shape (total_windows,).
    """

    X_all, y_all = [], []
    locations = Config.locations
    window_size = Config.window_size
    horizon = Config.horizon

    for location in locations:
        dff = df[df['Location'] == location]
        X, y = create_feature_matrix(dff, window_size, horizon)
        if len(X) > 0 and len(y) > 0:
            X_all.append(X)
            y_all.append(y)

    X_all = np.concatenate(X_all, axis=0)
    y_all = np.concatenate(y_all, axis=0)

    return X_all, y_all

def add_augmented_data(X, y):
    """
    Augments the dataset by duplicating and slightly modifying samples
    where the target value is 1.0 to address class imbalance.

    Parameters:
        X (np.ndarray): Feature array of shape (n_samples, window_size, n_features).
        y (np.ndarray): Target array of shape (n_samples,).

    Returns:
        tuple: Augmented feature array and target array.
    """
    duplicates_idx = np.where(y == 1.0)[0]  # Find indices where target is 1.0
    X_copy, y_copy = np.copy(X), np.copy(y)
    while True:
        # Add random noise to duplicate samples
        X_duplicated = X_copy[duplicates_idx] + 0.01 * np.random.randn(*X[duplicates_idx].shape)
        y_duplicated = y_copy[duplicates_idx]
        X = np.concatenate([X, X_duplicated], axis=0)
        y = np.concatenate([y, y_duplicated], axis=0)

        # Stop augmenting if the target ratio exceeds 4%
        if np.mean(y) > 0.04:
            print(f"Augmentation completed. {np.mean(y) * 100:.2f}% of train data is 1.0")
            break
    return X, y

# Define Model
class GRUNet(nn.Module):
    """
    A Gated Recurrent Unit (GRU) neural network for time-series forecasting.

    Parameters:
        hidden_size (int): Number of hidden units in each GRU layer (default=16).
        num_layers (int): Number of GRU layers (default=2)
    """
    def __init__(self, hidden_size=16, num_layers=2, dropout=0.5):
        super(GRUNet, self).__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.gru = nn.GRU(3, hidden_size, num_layers, batch_first=True,
                          dropout=dropout if num_layers > 1 else 0.0)
        self.dropout = nn.Dropout(dropout)
        self.fc = nn.Linear(hidden_size, 1)

    def forward(self, x):
        """
        Forward pass of the GRU network.

        Parameters:
            x (torch.Tensor): Input tensor of shape (batch_size, sequence_length, input_size).

        Returns:
            torch.Tensor: Output tensor of shape (batch_size, output_size).
        """
        out, _ = self.gru(x)
        out = self.dropout(out[:, -1, :])
        out = self.fc(out)
        return out


In [None]:
class Config:
    """
    Static configuration class to store imporant parameters.
    """
    window_size = 16
    horizon = 8
    threshold = 85.0
    batch_size = 64
    # air-conditioned cooling system
    locations = ['C76', 'C80', 'E20', 'E30', 'E39', 'L18', 'L20', 'S44']
    # forced cooling system
    # locations = ["A20", "A30", "A40", "A60", "A70", "A80",
    #              "K20", "K30", "M10", "M50", "M90", "R10",
    #              "R20", "R30", "R40", "R50", "W10"]

In [None]:
# Load dataset
# dataframe should have ['Time', 'Location', 'Temperature', 'forced_constant_ventilation_cooling'(optional)] columns
df = pd.read_csv('/content/drive/MyDrive/BART/data/cleaned_data_v3.csv')
df.head()

Unnamed: 0,Time,Location,Temperature,forced_constant_ventilation_cooling
0,2020-12-31 23:50:01,A20,59.9,1
1,2020-12-31 23:35:01,A20,59.6,1
2,2020-12-31 23:20:01,A20,60.3,1
3,2020-12-31 23:05:01,A20,60.6,1
4,2020-12-31 22:50:01,A20,60.4,1


In [None]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 6175997 entries, 0 to 6175996
Data columns (total 4 columns):
 #   Column                               Dtype  
---  ------                               -----  
 0   Time                                 object 
 1   Location                             object 
 2   Temperature                          float64
 3   forced_constant_ventilation_cooling  int64  
dtypes: float64(1), int64(1), object(2)
memory usage: 188.5+ MB


In [None]:
df.shape

(6175997, 4)

In [None]:
# preprocess
# data_preprocess return ['Time', 'Location', 'group', 'Temperature', 'Velocity', 'Acceleration'] columns
df = data_preprocess(df, Config.locations)

# split dataframe into train/val/test
train_df = df[df['group'] == 'train']
val_df = df[df['group'] == 'val']
test_df = df[df['group'] == 'test']

# transform dataframe into X, y matrix, shape is (total_windows, window_size, n_features)
X_train, y_train = prepare_datasets(train_df, Config)
X_val, y_val = prepare_datasets(val_df, Config)
X_test, y_test = prepare_datasets(test_df, Config)

# standardize train, val, test
train_mean = np.mean(X_train, axis=(0, 1))
train_std = np.std(X_train, axis=(0, 1))
X_train  = (X_train - train_mean) / train_std
X_val  = (X_val - train_mean) / train_std
X_test  = (X_test - train_mean) / train_std

# change overheat into 1.0 else 0.0
y_train = (y_train > Config.threshold).astype(float)
y_val = (y_val > Config.threshold).astype(float)
y_test = (y_test > Config.threshold).astype(float)

# add augmented data to x_train, y_train here
X_train, y_train = add_augmented_data(X_train, y_train)

# skorch expect float32 as datatype of numpy/torch array so make sure data type is float 32
X_train, y_train = X_train.astype(np.float32), y_train.astype(np.float32).reshape(-1, 1)
X_val, y_val = X_val.astype(np.float32), y_val.astype(np.float32).reshape(-1, 1)
X_test, y_test = X_test.astype(np.float32), y_test.astype(np.float32).reshape(-1, 1)

# pre-defined validation set
valid_ds = skorch.dataset.Dataset(X_val, y_val)

Augmentation completed. 4.25% of train data is 1.0


## Hyperparameter Tuning

This section identifies the optimal model parameters (such as `hidden_size`, `layer_num`, and `dropout rate`) as well as the learning rate.

Here, I have selected only the most important parameters for tuning, but you can expand the hyperparameter search space as much as you need.


In [None]:
# Define your objective function for Optuna

def objective(trial):

    hidden_size = trial.suggest_categorical('hidden_size', [16, 32, 64])
    num_layers = trial.suggest_categorical('num_layers', [1, 2])
    learning_rate = trial.suggest_float('lr', 1e-5, 1e-2)
    dropout = trial.suggest_float('dropout', 0.0, 0.5)

    model = NeuralNetClassifier(
            GRUNet,
            module__hidden_size=hidden_size,
            module__num_layers=num_layers,
            module__dropout=dropout,
            # smaller epochs are enough to evaluate
            max_epochs=5,
            criterion=nn.BCEWithLogitsLoss,
            optimizer=torch.optim.AdamW,
            lr=learning_rate,
            # no validation dataset is needed here because this validation dataset is supposed to be used internally to use early stopping
            # but we don't need early stopping when tuning hyperparams, thus we set None to train_split
            train_split = None,
            batch_size=Config.batch_size,
            iterator_train__shuffle=True,
            verbose=1,
        )

    model = model.fit(X_train, y_train)
    predicted = model.predict(X_val)
    y_pred = predicted.flatten()
    y_true = y_val.flatten()
    score = f1_score(y_true, y_pred, pos_label=1)
    return score

# Perform optimization using Optuna, it maximizes f1-score
study = optuna.create_study(direction="maximize")

# change n_trials depends on how much time you would spend for param tuning
study.optimize(objective, n_trials=30)

# Print the best parameters
print("Best parameters: ", study.best_params)

# store study.best_params to best_params
best_params = study.best_params

[I 2024-12-08 07:44:36,230] A new study created in memory with name: no-name-a715700c-d2b5-4f3e-8c97-df7fd5974058


  epoch    train_loss      dur
-------  ------------  -------
      1        [36m0.0498[0m  59.9564
      2        [36m0.0422[0m  60.6858
      3        [36m0.0390[0m  60.8210
      4        [36m0.0365[0m  60.9378
      5        [36m0.0343[0m  60.5939


[I 2024-12-08 07:49:47,680] Trial 0 finished with value: 0.6216867469879518 and parameters: {'hidden_size': 64, 'num_layers': 1, 'lr': 0.0011999050670054394, 'dropout': 0.2757141144342244}. Best is trial 0 with value: 0.6216867469879518.


  epoch    train_loss      dur
-------  ------------  -------
      1        [36m0.0496[0m  80.6326
      2        [36m0.0421[0m  79.5872
      3        [36m0.0392[0m  81.0314
      4        [36m0.0376[0m  81.1054
      5        [36m0.0366[0m  81.8614


[I 2024-12-08 07:56:39,550] Trial 1 finished with value: 0.5476419634263715 and parameters: {'hidden_size': 16, 'num_layers': 2, 'lr': 0.0017303730930262576, 'dropout': 0.06735818802560317}. Best is trial 0 with value: 0.6216867469879518.


  epoch    train_loss      dur
-------  ------------  -------
      1        [36m0.0471[0m  61.7787
      2        [36m0.0421[0m  59.8796
      3        [36m0.0406[0m  59.5833
      4        [36m0.0397[0m  60.3478
      5        [36m0.0391[0m  60.2532


[I 2024-12-08 08:01:48,957] Trial 2 finished with value: 0.5609504132231405 and parameters: {'hidden_size': 64, 'num_layers': 1, 'lr': 0.005984608446014745, 'dropout': 0.20134561822611602}. Best is trial 0 with value: 0.6216867469879518.


  epoch    train_loss       dur
-------  ------------  --------
      1        [36m0.0451[0m  109.6547
      2        [36m0.0369[0m  109.5036
      3        [36m0.0327[0m  110.0319
      4        [36m0.0302[0m  108.2052
      5        [36m0.0287[0m  108.2882


[I 2024-12-08 08:11:06,238] Trial 3 finished with value: 0.5883076923076923 and parameters: {'hidden_size': 64, 'num_layers': 2, 'lr': 0.0030949668737970354, 'dropout': 0.024508016418586465}. Best is trial 0 with value: 0.6216867469879518.


  epoch    train_loss       dur
-------  ------------  --------
      1        [36m0.0497[0m  111.2680
      2        [36m0.0425[0m  106.6172
      3        [36m0.0394[0m  107.5608
      4        [36m0.0368[0m  107.3237
      5        [36m0.0348[0m  107.1037


[I 2024-12-08 08:20:17,447] Trial 4 finished with value: 0.5889724310776943 and parameters: {'hidden_size': 64, 'num_layers': 2, 'lr': 0.0007279805992466727, 'dropout': 0.30263116935625606}. Best is trial 0 with value: 0.6216867469879518.


  epoch    train_loss       dur
-------  ------------  --------
      1        [36m0.0471[0m  106.8227
      2        [36m0.0420[0m  107.0379
      3        [36m0.0403[0m  107.9476
      4        [36m0.0397[0m  108.6841
      5        [36m0.0390[0m  106.3481


[I 2024-12-08 08:29:26,087] Trial 5 finished with value: 0.6427688504326329 and parameters: {'hidden_size': 64, 'num_layers': 2, 'lr': 0.005197111385072939, 'dropout': 0.15619562235221834}. Best is trial 5 with value: 0.6427688504326329.


  epoch    train_loss      dur
-------  ------------  -------
      1        [36m0.0524[0m  89.6609
      2        [36m0.0455[0m  89.1946
      3        [36m0.0428[0m  87.7484
      4        [36m0.0410[0m  88.4278
      5        [36m0.0398[0m  89.1007


[I 2024-12-08 08:36:59,646] Trial 6 finished with value: 0.6634009797060881 and parameters: {'hidden_size': 32, 'num_layers': 2, 'lr': 0.00122968640382904, 'dropout': 0.4837916002775095}. Best is trial 6 with value: 0.6634009797060881.


  epoch    train_loss      dur
-------  ------------  -------
      1        [36m0.0508[0m  78.8472
      2        [36m0.0449[0m  78.9288
      3        [36m0.0430[0m  78.4972
      4        [36m0.0424[0m  79.5399
      5        [36m0.0416[0m  78.6417


[I 2024-12-08 08:43:42,006] Trial 7 finished with value: 0.6467218332272437 and parameters: {'hidden_size': 16, 'num_layers': 2, 'lr': 0.004928097363333086, 'dropout': 0.33539968324678426}. Best is trial 6 with value: 0.6634009797060881.


  epoch    train_loss      dur
-------  ------------  -------
      1        [36m0.0480[0m  89.5623
      2        [36m0.0430[0m  88.6367
      3        [36m0.0414[0m  89.6723
      4        [36m0.0406[0m  89.5485
      5        [36m0.0395[0m  89.1974


[I 2024-12-08 08:51:18,134] Trial 8 finished with value: 0.48693379790940766 and parameters: {'hidden_size': 32, 'num_layers': 2, 'lr': 0.005622967606549825, 'dropout': 0.2765285982133938}. Best is trial 6 with value: 0.6634009797060881.


  epoch    train_loss       dur
-------  ------------  --------
      1        [36m0.0478[0m  107.6968
      2        [36m0.0410[0m  108.0193
      3        [36m0.0375[0m  107.3632
      4        [36m0.0349[0m  108.6968
      5        [36m0.0325[0m  107.0053


[I 2024-12-08 09:00:28,366] Trial 9 finished with value: 0.5875845113706207 and parameters: {'hidden_size': 64, 'num_layers': 2, 'lr': 0.0016787273910626752, 'dropout': 0.33663995027246796}. Best is trial 6 with value: 0.6634009797060881.


  epoch    train_loss      dur
-------  ------------  -------
      1        [36m0.0502[0m  51.6022
      2        [36m0.0472[0m  50.9676
      3        [36m0.0465[0m  51.9076
      4        [36m0.0460[0m  52.0959
      5        [36m0.0452[0m  51.6337


[I 2024-12-08 09:04:52,960] Trial 10 finished with value: 0.6792452830188679 and parameters: {'hidden_size': 32, 'num_layers': 1, 'lr': 0.008781997864999797, 'dropout': 0.46746853351295814}. Best is trial 10 with value: 0.6792452830188679.


  epoch    train_loss      dur
-------  ------------  -------
      1        [36m0.0512[0m  51.6663
      2        [36m0.0504[0m  51.4736
      3        [36m0.0488[0m  51.0945
      4        0.0491  51.0633
      5        0.0502  51.3090


[I 2024-12-08 09:09:16,069] Trial 11 finished with value: 0.6488131466828971 and parameters: {'hidden_size': 32, 'num_layers': 1, 'lr': 0.009940820325373297, 'dropout': 0.49883715775985715}. Best is trial 10 with value: 0.6792452830188679.


  epoch    train_loss      dur
-------  ------------  -------
      1        [36m0.0508[0m  51.5805
      2        [36m0.0497[0m  51.6292
      3        [36m0.0489[0m  51.0526
      4        0.0490  51.9851
      5        [36m0.0480[0m  51.2630


[I 2024-12-08 09:13:40,065] Trial 12 finished with value: 0.6349765258215962 and parameters: {'hidden_size': 32, 'num_layers': 1, 'lr': 0.00990805317253231, 'dropout': 0.4931351175138842}. Best is trial 10 with value: 0.6792452830188679.


  epoch    train_loss      dur
-------  ------------  -------
      1        [36m0.0494[0m  51.1459
      2        [36m0.0458[0m  51.8937
      3        [36m0.0456[0m  50.8223
      4        [36m0.0447[0m  50.8008
      5        [36m0.0439[0m  50.9513


[I 2024-12-08 09:18:02,379] Trial 13 finished with value: 0.6237942122186495 and parameters: {'hidden_size': 32, 'num_layers': 1, 'lr': 0.008185910407453612, 'dropout': 0.4118916821817309}. Best is trial 10 with value: 0.6792452830188679.


  epoch    train_loss      dur
-------  ------------  -------
      1        [36m0.0510[0m  51.6380
      2        [36m0.0459[0m  50.7181
      3        [36m0.0447[0m  50.3394
      4        [36m0.0439[0m  50.3806
      5        [36m0.0428[0m  49.5433


[I 2024-12-08 09:22:21,080] Trial 14 finished with value: 0.689041095890411 and parameters: {'hidden_size': 32, 'num_layers': 1, 'lr': 0.007462825116974809, 'dropout': 0.4180932596837986}. Best is trial 14 with value: 0.689041095890411.


  epoch    train_loss      dur
-------  ------------  -------
      1        [36m0.0494[0m  49.8590
      2        [36m0.0453[0m  49.6228
      3        [36m0.0443[0m  50.0775
      4        [36m0.0436[0m  50.4886
      5        [36m0.0432[0m  53.3599


[I 2024-12-08 09:26:41,367] Trial 15 finished with value: 0.5993303571428571 and parameters: {'hidden_size': 32, 'num_layers': 1, 'lr': 0.007591604308373971, 'dropout': 0.4140820399294558}. Best is trial 14 with value: 0.689041095890411.


  epoch    train_loss      dur
-------  ------------  -------
      1        [36m0.0498[0m  50.5414
      2        [36m0.0459[0m  50.9456
      3        [36m0.0451[0m  50.4554
      4        [36m0.0441[0m  50.0809
      5        [36m0.0437[0m  50.2643


[I 2024-12-08 09:30:59,832] Trial 16 finished with value: 0.6102292768959435 and parameters: {'hidden_size': 32, 'num_layers': 1, 'lr': 0.008077278559506408, 'dropout': 0.3987081279603179}. Best is trial 14 with value: 0.689041095890411.


  epoch    train_loss      dur
-------  ------------  -------
      1        [36m0.0496[0m  50.6296
      2        [36m0.0456[0m  50.8971
      3        [36m0.0446[0m  51.0584
      4        [36m0.0442[0m  51.4646
      5        [36m0.0433[0m  51.5719


[I 2024-12-08 09:35:21,903] Trial 17 finished with value: 0.6366995073891626 and parameters: {'hidden_size': 32, 'num_layers': 1, 'lr': 0.006988199091734183, 'dropout': 0.4234553459181469}. Best is trial 14 with value: 0.689041095890411.


  epoch    train_loss      dur
-------  ------------  -------
      1        [36m0.0515[0m  47.1687
      2        [36m0.0469[0m  46.2959
      3        [36m0.0460[0m  46.0025
      4        [36m0.0457[0m  46.5657
      5        [36m0.0452[0m  48.0394


[I 2024-12-08 09:39:21,666] Trial 18 finished with value: 0.6035502958579881 and parameters: {'hidden_size': 16, 'num_layers': 1, 'lr': 0.008955989456976423, 'dropout': 0.3705465366254303}. Best is trial 14 with value: 0.689041095890411.


  epoch    train_loss      dur
-------  ------------  -------
      1        [36m0.0481[0m  52.9595
      2        [36m0.0437[0m  52.9436
      3        [36m0.0420[0m  54.4755
      4        [36m0.0411[0m  54.4545
      5        [36m0.0406[0m  54.0126


[I 2024-12-08 09:43:57,224] Trial 19 finished with value: 0.5402184707050646 and parameters: {'hidden_size': 32, 'num_layers': 1, 'lr': 0.006918539085157655, 'dropout': 0.21115353526535963}. Best is trial 14 with value: 0.689041095890411.


  epoch    train_loss      dur
-------  ------------  -------
      1        [36m0.0500[0m  53.3200
      2        [36m0.0441[0m  53.3993
      3        [36m0.0423[0m  53.5923
      4        [36m0.0414[0m  53.4042
      5        [36m0.0400[0m  53.3553


[I 2024-12-08 09:48:30,943] Trial 20 finished with value: 0.5320921117099461 and parameters: {'hidden_size': 32, 'num_layers': 1, 'lr': 0.0040876357940202585, 'dropout': 0.4467509699678965}. Best is trial 14 with value: 0.689041095890411.


  epoch    train_loss      dur
-------  ------------  -------
      1        [36m0.0504[0m  93.5027
      2        [36m0.0445[0m  91.2822
      3        [36m0.0422[0m  91.6640
      4        [36m0.0412[0m  91.8777
      5        [36m0.0399[0m  91.1655


[I 2024-12-08 09:56:20,232] Trial 21 finished with value: 0.6372360844529751 and parameters: {'hidden_size': 32, 'num_layers': 2, 'lr': 0.003233543829292175, 'dropout': 0.4749187577030277}. Best is trial 14 with value: 0.689041095890411.


  epoch    train_loss      dur
-------  ------------  -------
      1        [36m0.2168[0m  53.3548
      2        [36m0.0661[0m  53.3938
      3        [36m0.0554[0m  53.3488
      4        [36m0.0532[0m  53.7173
      5        [36m0.0525[0m  52.8034


[I 2024-12-08 10:00:53,535] Trial 22 finished with value: 0.6086441681468324 and parameters: {'hidden_size': 32, 'num_layers': 1, 'lr': 1.7469286003780546e-05, 'dropout': 0.45507296269601794}. Best is trial 14 with value: 0.689041095890411.


  epoch    train_loss      dur
-------  ------------  -------
      1        [36m0.0506[0m  93.7155
      2        [36m0.0482[0m  90.8854
      3        0.0491  93.2912
      4        0.0486  91.9494
      5        0.0487  92.2458


[I 2024-12-08 10:08:44,914] Trial 23 finished with value: 0.6289017341040463 and parameters: {'hidden_size': 32, 'num_layers': 2, 'lr': 0.009124430608539787, 'dropout': 0.36798515952529254}. Best is trial 14 with value: 0.689041095890411.


  epoch    train_loss      dur
-------  ------------  -------
      1        [36m0.0500[0m  53.6704
      2        [36m0.0448[0m  53.3099
      3        [36m0.0432[0m  53.4138
      4        [36m0.0426[0m  53.1194
      5        [36m0.0421[0m  53.3428


[I 2024-12-08 10:13:18,527] Trial 24 finished with value: 0.5746630727762804 and parameters: {'hidden_size': 32, 'num_layers': 1, 'lr': 0.006620109807540942, 'dropout': 0.4543214166228036}. Best is trial 14 with value: 0.689041095890411.


  epoch    train_loss      dur
-------  ------------  -------
      1        [36m0.0507[0m  48.0721
      2        [36m0.0471[0m  47.9914
      3        [36m0.0465[0m  48.1530
      4        [36m0.0461[0m  48.5462
      5        [36m0.0456[0m  48.7034


[I 2024-12-08 10:17:25,892] Trial 25 finished with value: 0.5484351713859911 and parameters: {'hidden_size': 16, 'num_layers': 1, 'lr': 0.008617912138090567, 'dropout': 0.37137433562624916}. Best is trial 14 with value: 0.689041095890411.


  epoch    train_loss      dur
-------  ------------  -------
      1        [36m0.0494[0m  92.8844
      2        [36m0.0447[0m  92.0565
      3        [36m0.0423[0m  92.3456
      4        [36m0.0408[0m  92.6830
      5        [36m0.0402[0m  89.6612


[I 2024-12-08 10:25:14,617] Trial 26 finished with value: 0.5723868082727781 and parameters: {'hidden_size': 32, 'num_layers': 2, 'lr': 0.003986996794576621, 'dropout': 0.45468815597462764}. Best is trial 14 with value: 0.689041095890411.


  epoch    train_loss      dur
-------  ------------  -------
      1        [36m0.0506[0m  51.6294
      2        [36m0.0467[0m  51.2478
      3        [36m0.0456[0m  51.4563
      4        [36m0.0441[0m  51.2152
      5        [36m0.0435[0m  51.8164


[I 2024-12-08 10:29:38,279] Trial 27 finished with value: 0.6190741773563859 and parameters: {'hidden_size': 32, 'num_layers': 1, 'lr': 0.007452320224494222, 'dropout': 0.4953181701736284}. Best is trial 14 with value: 0.689041095890411.


  epoch    train_loss      dur
-------  ------------  -------
      1        [36m0.0467[0m  89.5670
      2        [36m0.0394[0m  87.8240
      3        [36m0.0365[0m  88.6769
      4        [36m0.0339[0m  86.9646
      5        [36m0.0320[0m  86.2390


[I 2024-12-08 10:37:07,011] Trial 28 finished with value: 0.5939947780678851 and parameters: {'hidden_size': 32, 'num_layers': 2, 'lr': 0.0029422516366393414, 'dropout': 0.11658270002589122}. Best is trial 14 with value: 0.689041095890411.


  epoch    train_loss      dur
-------  ------------  -------
      1        [36m0.0537[0m  45.4526
      2        [36m0.0460[0m  44.9162
      3        [36m0.0446[0m  45.7208
      4        [36m0.0436[0m  46.0001
      5        [36m0.0427[0m  45.6066


[I 2024-12-08 10:41:00,184] Trial 29 finished with value: 0.6105383734249714 and parameters: {'hidden_size': 16, 'num_layers': 1, 'lr': 0.002212115428327964, 'dropout': 0.31149971187307346}. Best is trial 14 with value: 0.689041095890411.


Best parameters:  {'hidden_size': 32, 'num_layers': 1, 'lr': 0.007462825116974809, 'dropout': 0.4180932596837986}


In [None]:
best_params

{'hidden_size': 32,
 'num_layers': 1,
 'lr': 0.007462825116974809,
 'dropout': 0.4180932596837986}

## OOS validation

After completing the hyperparameter tuning, we now know the optimal parameters to use, and these have been stored in the best_params dictionary.

In [None]:
best_model = NeuralNetClassifier(
    GRUNet,
    module__hidden_size=best_params['hidden_size'],
    module__num_layers=best_params['num_layers'],
    module__dropout=best_params['dropout'],
    max_epochs=30,
    criterion=nn.BCEWithLogitsLoss,
    optimizer=torch.optim.AdamW,
    lr=best_params['lr'],
    batch_size=Config.batch_size,
    # this is early stopping, it stops training iterations if validation loss does not improve for 5 epochs
    callbacks=[EarlyStopping(monitor='valid_loss',
                           patience=5,
                           load_best=True)],
    iterator_train__shuffle=True,
    # use valid_ds as validation dataset to calculate validation loss which is used for early stopping.
    train_split=predefined_split(valid_ds),
    verbose=1,
)

# net add val dataset
best_model.fit(X_train, y_train)

  epoch    train_loss    valid_acc    valid_loss      dur
-------  ------------  -----------  ------------  -------
      1        [36m0.0495[0m       [32m0.9973[0m        [35m0.0116[0m  58.8334
      2        [36m0.0455[0m       0.9970        [35m0.0101[0m  60.0703
      3        [36m0.0445[0m       0.9935        0.0170  58.0195
      4        [36m0.0434[0m       0.9959        0.0111  60.6419
      5        [36m0.0430[0m       0.9970        [35m0.0098[0m  58.2511
      6        [36m0.0426[0m       0.9967        0.0103  58.2111
      7        [36m0.0423[0m       0.9967        0.0115  57.7560
      8        [36m0.0423[0m       0.9969        0.0100  57.9812
      9        [36m0.0421[0m       [32m0.9975[0m        [35m0.0096[0m  58.4369
     10        [36m0.0415[0m       0.9969        0.0101  57.7782
     11        0.0418       0.9963        [35m0.0095[0m  57.2982
     12        0.0418       0.9952        0.0150  57.5152
     13        [36m0.0415[0m     

<class 'skorch.classifier.NeuralNetClassifier'>[initialized](
  module_=GRUNet(
    (gru): GRU(3, 32, batch_first=True)
    (dropout): Dropout(p=0.4180932596837986, inplace=False)
    (fc): Linear(in_features=32, out_features=1, bias=True)
  ),
)

In [None]:
predicted = best_model.predict(X_test)
y_pred = predicted.flatten()
y_true = y_test.flatten()
print(classification_report(y_true, y_pred))

              precision    recall  f1-score   support

         0.0       1.00      1.00      1.00    186207
         1.0       0.76      0.83      0.79       167

    accuracy                           1.00    186374
   macro avg       0.88      0.92      0.90    186374
weighted avg       1.00      1.00      1.00    186374

