# Training the CNN for temperature prediction


In [None]:

import os
from CNN import SequenceCNN
from torch.utils.data import DataLoader as TorchDataLoader

In [2]:
import torch
from torch.optim import lr_scheduler

import torch.optim as optim
import torch.nn as nn
from TemperatureDataset import TemperatureDataset
from functools import partial
import os
import tempfile
from pathlib import Path
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import random_split
import torchvision
import torchvision.transforms as transforms

In [3]:
dataset = TemperatureDataset('data_temp_max', 'data_temp_min')

100%|██████████| 562/562 [00:38<00:00, 14.47it/s]


In [4]:
import optuna
from tqdm import tqdm

def objective(trial):
    # Remove activations_map from here, since it is already defined and used in cell 8.
    # Also, do not redefine batch_size or train_loader, as they are already defined globally.
    # You can remove this placeholder entirely.

    # Use strings to represent tuples for Optuna compatibility
    hidden_dims_options = ["32,32", "32,64", "64,64", "64,32", "32,128", "128,32","64,128","128,64", "128,128", "64,128", "32,256", "128,256", "256,256"]
    kernel_sizes_options = ["3,3", "5,3", "3,5", "5,5"]
    # Define activations_options outside the objective function to avoid dynamic value space error
    activations_options = ["relu,relu", "tanh,relu", "leaky_relu,sigmoid", "sigmoid,tanh", "relu,sigmoid", "tanh,tanh", "leaky_relu,leaky_relu", "tanh,leaky_relu", "relu,leaky_relu", "sigmoid,sigmoid", "sigmoid,leaky_relu", "relu,tanh", "tanh,relu", "leaky_relu,relu", "selu,leaky_relu"]
    hidden_dims_str = trial.suggest_categorical("hidden_dims", hidden_dims_options)
    kernel_sizes_str = trial.suggest_categorical("kernel_sizes", kernel_sizes_options)
    activations_str = trial.suggest_categorical("activations", activations_options)

    hidden_dims = tuple(map(int, hidden_dims_str.split(',')))
    kernel_sizes = tuple(map(int, kernel_sizes_str.split(',')))
    activations = tuple(activations_str.split(','))

    lr = trial.suggest_loguniform("lr", 1e-4, 1e-2)
    batch_size = trial.suggest_categorical("batch_size", (16, 32, 64))
    num_epochs = 5
    sample_input, sample_target = dataset[0]
    input_dim = sample_input.shape[-1]
    output_dim = sample_target.shape[-1]


    model = SequenceCNN(
        input_dim=input_dim,
        hidden_dims=list(hidden_dims),
        kernel_sizes=list(kernel_sizes),
        output_dim=output_dim,
        activations=activations
    )
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model.to(device)

    optimizer = optim.Adam(model.parameters(), lr=lr)
    criterion = nn.MSELoss()

    train_loader = TorchDataLoader(dataset, batch_size=batch_size, shuffle=True)

    for epoch in range(num_epochs):
        print(f"Starting training epoch {epoch + 1}/{num_epochs}")
        print(f"Learning rate: {lr}, Batch size: {batch_size}, Hidden dims: {hidden_dims}, Kernel sizes: {kernel_sizes}, Activations: {activations}")
        model.train()
        for batch in tqdm(train_loader):
            inputs, targets = batch
            inputs, targets = inputs.to(device), targets.to(device)

            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, targets)
            loss.backward()
            optimizer.step()

    return loss.item()

# Example usage:
# study = optuna.create_study(direction="minimize")
# study.optimize(objective, n_trials=10)

  from .autonotebook import tqdm as notebook_tqdm


In [5]:
# Split the dataset into train, validation, and test sets
train_size = int(0.7 * len(dataset))
val_size = int(0.15 * len(dataset))
test_size = len(dataset) - train_size - val_size

train_dataset, val_dataset, test_dataset = random_split(dataset, [train_size, val_size, test_size])

# Create DataLoaders for each set
batch_size = 32  # You can adjust this as needed

train_loader = TorchDataLoader(train_dataset, batch_size=batch_size, shuffle=True)
val_loader = TorchDataLoader(val_dataset, batch_size=batch_size, shuffle=False)
test_loader = TorchDataLoader(test_dataset, batch_size=batch_size, shuffle=False)

In [6]:
import torch
print("CUDA available:", torch.cuda.is_available())
if torch.cuda.is_available():
    print("CUDA device count:", torch.cuda.device_count())
    print("Current device:", torch.cuda.current_device())
    print("Device name:", torch.cuda.get_device_name(torch.cuda.current_device()))
else:
    print("Using CPU")

CUDA available: True
CUDA device count: 1
Current device: 0
Device name: NVIDIA GeForce RTX 3060 Laptop GPU


In [None]:
import optuna
import warnings
warnings.filterwarnings("ignore")
# Run Optuna hyperparameter search
study = optuna.create_study(
    study_name="CNN_Sequence_Optimization_2",
    storage="sqlite:///optuna_study.db",
    load_if_exists=True,
    direction="minimize",
)

study.optimize(objective, n_trials=20)

# Get the best config
best_params = study.best_params
print(f"Best params: {best_params}")


[I 2025-06-07 15:21:18,513] A new study created in RDB with name: CNN_Sequence_Optimization_2


Starting training epoch 1/5
Learning rate: 0.0005588459736553247, Batch size: 64, Hidden dims: (256, 256), Kernel sizes: (5, 3), Activations: ('sigmoid', 'tanh')


100%|██████████| 57516/57516 [03:02<00:00, 315.11it/s]


Starting training epoch 2/5
Learning rate: 0.0005588459736553247, Batch size: 64, Hidden dims: (256, 256), Kernel sizes: (5, 3), Activations: ('sigmoid', 'tanh')


100%|██████████| 57516/57516 [03:03<00:00, 312.76it/s]


Starting training epoch 3/5
Learning rate: 0.0005588459736553247, Batch size: 64, Hidden dims: (256, 256), Kernel sizes: (5, 3), Activations: ('sigmoid', 'tanh')


100%|██████████| 57516/57516 [04:00<00:00, 238.96it/s]


Starting training epoch 4/5
Learning rate: 0.0005588459736553247, Batch size: 64, Hidden dims: (256, 256), Kernel sizes: (5, 3), Activations: ('sigmoid', 'tanh')


100%|██████████| 57516/57516 [04:01<00:00, 237.90it/s]


Starting training epoch 5/5
Learning rate: 0.0005588459736553247, Batch size: 64, Hidden dims: (256, 256), Kernel sizes: (5, 3), Activations: ('sigmoid', 'tanh')


100%|██████████| 57516/57516 [04:00<00:00, 238.77it/s]
[I 2025-06-07 15:39:28,492] Trial 0 finished with value: 0.034969452768564224 and parameters: {'hidden_dims': '256,256', 'kernel_sizes': '5,3', 'activations': 'sigmoid,tanh', 'lr': 0.0005588459736553247, 'batch_size': 64}. Best is trial 0 with value: 0.034969452768564224.


Starting training epoch 1/5
Learning rate: 0.0002533781619525396, Batch size: 16, Hidden dims: (64, 128), Kernel sizes: (3, 5), Activations: ('relu', 'relu')


100%|██████████| 230062/230062 [11:43<00:00, 327.06it/s]


Starting training epoch 2/5
Learning rate: 0.0002533781619525396, Batch size: 16, Hidden dims: (64, 128), Kernel sizes: (3, 5), Activations: ('relu', 'relu')


100%|██████████| 230062/230062 [11:30<00:00, 332.97it/s]


Starting training epoch 3/5
Learning rate: 0.0002533781619525396, Batch size: 16, Hidden dims: (64, 128), Kernel sizes: (3, 5), Activations: ('relu', 'relu')


100%|██████████| 230062/230062 [11:33<00:00, 331.95it/s]


Starting training epoch 4/5
Learning rate: 0.0002533781619525396, Batch size: 16, Hidden dims: (64, 128), Kernel sizes: (3, 5), Activations: ('relu', 'relu')


100%|██████████| 230062/230062 [11:34<00:00, 331.16it/s]


Starting training epoch 5/5
Learning rate: 0.0002533781619525396, Batch size: 16, Hidden dims: (64, 128), Kernel sizes: (3, 5), Activations: ('relu', 'relu')


100%|██████████| 230062/230062 [11:33<00:00, 331.92it/s]
[I 2025-06-07 16:37:23,884] Trial 1 finished with value: 0.03392700105905533 and parameters: {'hidden_dims': '64,128', 'kernel_sizes': '3,5', 'activations': 'relu,relu', 'lr': 0.0002533781619525396, 'batch_size': 16}. Best is trial 1 with value: 0.03392700105905533.


Starting training epoch 1/5
Learning rate: 0.0008419932787618491, Batch size: 16, Hidden dims: (128, 256), Kernel sizes: (5, 5), Activations: ('leaky_relu', 'relu')


100%|██████████| 230062/230062 [11:16<00:00, 340.07it/s]


Starting training epoch 2/5
Learning rate: 0.0008419932787618491, Batch size: 16, Hidden dims: (128, 256), Kernel sizes: (5, 5), Activations: ('leaky_relu', 'relu')


100%|██████████| 230062/230062 [11:34<00:00, 331.08it/s]


Starting training epoch 3/5
Learning rate: 0.0008419932787618491, Batch size: 16, Hidden dims: (128, 256), Kernel sizes: (5, 5), Activations: ('leaky_relu', 'relu')


100%|██████████| 230062/230062 [10:21<00:00, 370.06it/s]


Starting training epoch 4/5
Learning rate: 0.0008419932787618491, Batch size: 16, Hidden dims: (128, 256), Kernel sizes: (5, 5), Activations: ('leaky_relu', 'relu')


100%|██████████| 230062/230062 [11:26<00:00, 334.96it/s]


Starting training epoch 5/5
Learning rate: 0.0008419932787618491, Batch size: 16, Hidden dims: (128, 256), Kernel sizes: (5, 5), Activations: ('leaky_relu', 'relu')


100%|██████████| 230062/230062 [11:33<00:00, 331.81it/s]
[I 2025-06-07 17:33:37,325] Trial 2 finished with value: 0.01977844163775444 and parameters: {'hidden_dims': '128,256', 'kernel_sizes': '5,5', 'activations': 'leaky_relu,relu', 'lr': 0.0008419932787618491, 'batch_size': 16}. Best is trial 2 with value: 0.01977844163775444.


Starting training epoch 1/5
Learning rate: 0.000310251453194837, Batch size: 64, Hidden dims: (128, 256), Kernel sizes: (3, 3), Activations: ('sigmoid', 'tanh')


100%|██████████| 57516/57516 [03:58<00:00, 241.63it/s]


Starting training epoch 2/5
Learning rate: 0.000310251453194837, Batch size: 64, Hidden dims: (128, 256), Kernel sizes: (3, 3), Activations: ('sigmoid', 'tanh')


100%|██████████| 57516/57516 [03:55<00:00, 244.51it/s]


Starting training epoch 3/5
Learning rate: 0.000310251453194837, Batch size: 64, Hidden dims: (128, 256), Kernel sizes: (3, 3), Activations: ('sigmoid', 'tanh')


100%|██████████| 57516/57516 [04:04<00:00, 235.66it/s]


Starting training epoch 4/5
Learning rate: 0.000310251453194837, Batch size: 64, Hidden dims: (128, 256), Kernel sizes: (3, 3), Activations: ('sigmoid', 'tanh')


100%|██████████| 57516/57516 [04:11<00:00, 229.00it/s]


Starting training epoch 5/5
Learning rate: 0.000310251453194837, Batch size: 64, Hidden dims: (128, 256), Kernel sizes: (3, 3), Activations: ('sigmoid', 'tanh')


100%|██████████| 57516/57516 [04:04<00:00, 235.02it/s]
[I 2025-06-07 17:53:50,701] Trial 3 finished with value: 0.03047015517950058 and parameters: {'hidden_dims': '128,256', 'kernel_sizes': '3,3', 'activations': 'sigmoid,tanh', 'lr': 0.000310251453194837, 'batch_size': 64}. Best is trial 2 with value: 0.01977844163775444.


Starting training epoch 1/5
Learning rate: 0.0005776183446882358, Batch size: 32, Hidden dims: (128, 128), Kernel sizes: (5, 3), Activations: ('sigmoid', 'tanh')


100%|██████████| 115031/115031 [06:49<00:00, 281.16it/s]


Starting training epoch 2/5
Learning rate: 0.0005776183446882358, Batch size: 32, Hidden dims: (128, 128), Kernel sizes: (5, 3), Activations: ('sigmoid', 'tanh')


100%|██████████| 115031/115031 [06:44<00:00, 284.48it/s]


Starting training epoch 3/5
Learning rate: 0.0005776183446882358, Batch size: 32, Hidden dims: (128, 128), Kernel sizes: (5, 3), Activations: ('sigmoid', 'tanh')


100%|██████████| 115031/115031 [06:21<00:00, 301.27it/s]


Starting training epoch 4/5
Learning rate: 0.0005776183446882358, Batch size: 32, Hidden dims: (128, 128), Kernel sizes: (5, 3), Activations: ('sigmoid', 'tanh')


100%|██████████| 115031/115031 [06:28<00:00, 296.33it/s]


Starting training epoch 5/5
Learning rate: 0.0005776183446882358, Batch size: 32, Hidden dims: (128, 128), Kernel sizes: (5, 3), Activations: ('sigmoid', 'tanh')


100%|██████████| 115031/115031 [06:23<00:00, 300.09it/s]
[I 2025-06-07 18:26:37,659] Trial 4 finished with value: 0.03172144666314125 and parameters: {'hidden_dims': '128,128', 'kernel_sizes': '5,3', 'activations': 'sigmoid,tanh', 'lr': 0.0005776183446882358, 'batch_size': 32}. Best is trial 2 with value: 0.01977844163775444.


Starting training epoch 1/5
Learning rate: 0.00026688234430782755, Batch size: 64, Hidden dims: (128, 32), Kernel sizes: (5, 3), Activations: ('selu', 'leaky_relu')


100%|██████████| 57516/57516 [03:58<00:00, 240.89it/s]


Starting training epoch 2/5
Learning rate: 0.00026688234430782755, Batch size: 64, Hidden dims: (128, 32), Kernel sizes: (5, 3), Activations: ('selu', 'leaky_relu')


100%|██████████| 57516/57516 [03:51<00:00, 248.56it/s]


Starting training epoch 3/5
Learning rate: 0.00026688234430782755, Batch size: 64, Hidden dims: (128, 32), Kernel sizes: (5, 3), Activations: ('selu', 'leaky_relu')


100%|██████████| 57516/57516 [03:52<00:00, 247.82it/s]


Starting training epoch 4/5
Learning rate: 0.00026688234430782755, Batch size: 64, Hidden dims: (128, 32), Kernel sizes: (5, 3), Activations: ('selu', 'leaky_relu')


100%|██████████| 57516/57516 [03:54<00:00, 245.67it/s]


Starting training epoch 5/5
Learning rate: 0.00026688234430782755, Batch size: 64, Hidden dims: (128, 32), Kernel sizes: (5, 3), Activations: ('selu', 'leaky_relu')


100%|██████████| 57516/57516 [03:52<00:00, 247.47it/s]
[I 2025-06-07 18:46:06,610] Trial 5 finished with value: 0.023979168385267258 and parameters: {'hidden_dims': '128,32', 'kernel_sizes': '5,3', 'activations': 'selu,leaky_relu', 'lr': 0.00026688234430782755, 'batch_size': 64}. Best is trial 2 with value: 0.01977844163775444.


Starting training epoch 1/5
Learning rate: 0.00019626338583202363, Batch size: 16, Hidden dims: (64, 128), Kernel sizes: (3, 3), Activations: ('tanh', 'relu')


100%|██████████| 230062/230062 [10:58<00:00, 349.32it/s]


Starting training epoch 2/5
Learning rate: 0.00019626338583202363, Batch size: 16, Hidden dims: (64, 128), Kernel sizes: (3, 3), Activations: ('tanh', 'relu')


100%|██████████| 230062/230062 [11:00<00:00, 348.55it/s]


Starting training epoch 3/5
Learning rate: 0.00019626338583202363, Batch size: 16, Hidden dims: (64, 128), Kernel sizes: (3, 3), Activations: ('tanh', 'relu')


100%|██████████| 230062/230062 [10:53<00:00, 351.92it/s]


Starting training epoch 4/5
Learning rate: 0.00019626338583202363, Batch size: 16, Hidden dims: (64, 128), Kernel sizes: (3, 3), Activations: ('tanh', 'relu')


100%|██████████| 230062/230062 [11:18<00:00, 339.22it/s]


Starting training epoch 5/5
Learning rate: 0.00019626338583202363, Batch size: 16, Hidden dims: (64, 128), Kernel sizes: (3, 3), Activations: ('tanh', 'relu')


100%|██████████| 230062/230062 [11:26<00:00, 334.89it/s]
[I 2025-06-07 19:41:44,293] Trial 6 finished with value: 0.029518207535147667 and parameters: {'hidden_dims': '64,128', 'kernel_sizes': '3,3', 'activations': 'tanh,relu', 'lr': 0.00019626338583202363, 'batch_size': 16}. Best is trial 2 with value: 0.01977844163775444.


Starting training epoch 1/5
Learning rate: 0.0018391986686781537, Batch size: 64, Hidden dims: (128, 256), Kernel sizes: (3, 3), Activations: ('sigmoid', 'sigmoid')


100%|██████████| 57516/57516 [03:59<00:00, 240.55it/s]


Starting training epoch 2/5
Learning rate: 0.0018391986686781537, Batch size: 64, Hidden dims: (128, 256), Kernel sizes: (3, 3), Activations: ('sigmoid', 'sigmoid')


100%|██████████| 57516/57516 [03:57<00:00, 241.69it/s]


Starting training epoch 3/5
Learning rate: 0.0018391986686781537, Batch size: 64, Hidden dims: (128, 256), Kernel sizes: (3, 3), Activations: ('sigmoid', 'sigmoid')


100%|██████████| 57516/57516 [04:03<00:00, 236.50it/s]


Starting training epoch 4/5
Learning rate: 0.0018391986686781537, Batch size: 64, Hidden dims: (128, 256), Kernel sizes: (3, 3), Activations: ('sigmoid', 'sigmoid')


100%|██████████| 57516/57516 [04:04<00:00, 235.13it/s]


Starting training epoch 5/5
Learning rate: 0.0018391986686781537, Batch size: 64, Hidden dims: (128, 256), Kernel sizes: (3, 3), Activations: ('sigmoid', 'sigmoid')


100%|██████████| 57516/57516 [04:11<00:00, 229.07it/s]
[I 2025-06-07 20:02:00,416] Trial 7 finished with value: 0.029941808432340622 and parameters: {'hidden_dims': '128,256', 'kernel_sizes': '3,3', 'activations': 'sigmoid,sigmoid', 'lr': 0.0018391986686781537, 'batch_size': 64}. Best is trial 2 with value: 0.01977844163775444.


Starting training epoch 1/5
Learning rate: 0.0051066757961476545, Batch size: 64, Hidden dims: (128, 256), Kernel sizes: (5, 3), Activations: ('tanh', 'tanh')


100%|██████████| 57516/57516 [04:03<00:00, 235.73it/s]


Starting training epoch 2/5
Learning rate: 0.0051066757961476545, Batch size: 64, Hidden dims: (128, 256), Kernel sizes: (5, 3), Activations: ('tanh', 'tanh')


100%|██████████| 57516/57516 [04:05<00:00, 234.36it/s]


Starting training epoch 3/5
Learning rate: 0.0051066757961476545, Batch size: 64, Hidden dims: (128, 256), Kernel sizes: (5, 3), Activations: ('tanh', 'tanh')


100%|██████████| 57516/57516 [04:03<00:00, 236.11it/s]


Starting training epoch 4/5
Learning rate: 0.0051066757961476545, Batch size: 64, Hidden dims: (128, 256), Kernel sizes: (5, 3), Activations: ('tanh', 'tanh')


100%|██████████| 57516/57516 [04:05<00:00, 234.36it/s]


Starting training epoch 5/5
Learning rate: 0.0051066757961476545, Batch size: 64, Hidden dims: (128, 256), Kernel sizes: (5, 3), Activations: ('tanh', 'tanh')


100%|██████████| 57516/57516 [04:04<00:00, 235.58it/s]
[I 2025-06-07 20:22:23,119] Trial 8 finished with value: 0.05528274178504944 and parameters: {'hidden_dims': '128,256', 'kernel_sizes': '5,3', 'activations': 'tanh,tanh', 'lr': 0.0051066757961476545, 'batch_size': 64}. Best is trial 2 with value: 0.01977844163775444.


Starting training epoch 1/5
Learning rate: 0.00025219059992236825, Batch size: 16, Hidden dims: (32, 32), Kernel sizes: (3, 5), Activations: ('relu', 'tanh')


100%|██████████| 230062/230062 [11:27<00:00, 334.40it/s]


Starting training epoch 2/5
Learning rate: 0.00025219059992236825, Batch size: 16, Hidden dims: (32, 32), Kernel sizes: (3, 5), Activations: ('relu', 'tanh')


100%|██████████| 230062/230062 [11:21<00:00, 337.52it/s]


Starting training epoch 3/5
Learning rate: 0.00025219059992236825, Batch size: 16, Hidden dims: (32, 32), Kernel sizes: (3, 5), Activations: ('relu', 'tanh')


100%|██████████| 230062/230062 [11:15<00:00, 340.51it/s]


Starting training epoch 4/5
Learning rate: 0.00025219059992236825, Batch size: 16, Hidden dims: (32, 32), Kernel sizes: (3, 5), Activations: ('relu', 'tanh')


100%|██████████| 230062/230062 [11:27<00:00, 334.41it/s]


Starting training epoch 5/5
Learning rate: 0.00025219059992236825, Batch size: 16, Hidden dims: (32, 32), Kernel sizes: (3, 5), Activations: ('relu', 'tanh')


100%|██████████| 230062/230062 [11:22<00:00, 336.97it/s]
[I 2025-06-07 21:19:19,256] Trial 9 finished with value: 0.05603211745619774 and parameters: {'hidden_dims': '32,32', 'kernel_sizes': '3,5', 'activations': 'relu,tanh', 'lr': 0.00025219059992236825, 'batch_size': 16}. Best is trial 2 with value: 0.01977844163775444.


Starting training epoch 1/5
Learning rate: 0.002507800985448251, Batch size: 32, Hidden dims: (32, 64), Kernel sizes: (5, 5), Activations: ('leaky_relu', 'relu')


100%|██████████| 115031/115031 [06:48<00:00, 281.43it/s]


Starting training epoch 2/5
Learning rate: 0.002507800985448251, Batch size: 32, Hidden dims: (32, 64), Kernel sizes: (5, 5), Activations: ('leaky_relu', 'relu')


100%|██████████| 115031/115031 [06:22<00:00, 300.43it/s]


Starting training epoch 3/5
Learning rate: 0.002507800985448251, Batch size: 32, Hidden dims: (32, 64), Kernel sizes: (5, 5), Activations: ('leaky_relu', 'relu')


100%|██████████| 115031/115031 [06:25<00:00, 298.52it/s]


Starting training epoch 4/5
Learning rate: 0.002507800985448251, Batch size: 32, Hidden dims: (32, 64), Kernel sizes: (5, 5), Activations: ('leaky_relu', 'relu')


100%|██████████| 115031/115031 [06:27<00:00, 296.65it/s]


Starting training epoch 5/5
Learning rate: 0.002507800985448251, Batch size: 32, Hidden dims: (32, 64), Kernel sizes: (5, 5), Activations: ('leaky_relu', 'relu')


100%|██████████| 115031/115031 [06:21<00:00, 301.76it/s]
[I 2025-06-07 21:51:45,340] Trial 10 finished with value: 0.018964406102895737 and parameters: {'hidden_dims': '32,64', 'kernel_sizes': '5,5', 'activations': 'leaky_relu,relu', 'lr': 0.002507800985448251, 'batch_size': 32}. Best is trial 10 with value: 0.018964406102895737.


Starting training epoch 1/5
Learning rate: 0.0020100430089346503, Batch size: 32, Hidden dims: (32, 64), Kernel sizes: (5, 5), Activations: ('leaky_relu', 'relu')


100%|██████████| 115031/115031 [06:17<00:00, 304.62it/s]


Starting training epoch 2/5
Learning rate: 0.0020100430089346503, Batch size: 32, Hidden dims: (32, 64), Kernel sizes: (5, 5), Activations: ('leaky_relu', 'relu')


100%|██████████| 115031/115031 [06:10<00:00, 310.20it/s]


Starting training epoch 3/5
Learning rate: 0.0020100430089346503, Batch size: 32, Hidden dims: (32, 64), Kernel sizes: (5, 5), Activations: ('leaky_relu', 'relu')


100%|██████████| 115031/115031 [06:23<00:00, 299.90it/s]


Starting training epoch 4/5
Learning rate: 0.0020100430089346503, Batch size: 32, Hidden dims: (32, 64), Kernel sizes: (5, 5), Activations: ('leaky_relu', 'relu')


100%|██████████| 115031/115031 [06:34<00:00, 291.71it/s]


Starting training epoch 5/5
Learning rate: 0.0020100430089346503, Batch size: 32, Hidden dims: (32, 64), Kernel sizes: (5, 5), Activations: ('leaky_relu', 'relu')


100%|██████████| 115031/115031 [06:29<00:00, 295.62it/s]
[I 2025-06-07 22:23:40,982] Trial 11 finished with value: 0.04812181740999222 and parameters: {'hidden_dims': '32,64', 'kernel_sizes': '5,5', 'activations': 'leaky_relu,relu', 'lr': 0.0020100430089346503, 'batch_size': 32}. Best is trial 10 with value: 0.018964406102895737.


Starting training epoch 1/5
Learning rate: 0.007357680045044963, Batch size: 32, Hidden dims: (64, 64), Kernel sizes: (5, 5), Activations: ('leaky_relu', 'relu')


100%|██████████| 115031/115031 [06:39<00:00, 288.03it/s]


Starting training epoch 2/5
Learning rate: 0.007357680045044963, Batch size: 32, Hidden dims: (64, 64), Kernel sizes: (5, 5), Activations: ('leaky_relu', 'relu')


100%|██████████| 115031/115031 [07:13<00:00, 265.61it/s]


Starting training epoch 3/5
Learning rate: 0.007357680045044963, Batch size: 32, Hidden dims: (64, 64), Kernel sizes: (5, 5), Activations: ('leaky_relu', 'relu')


100%|██████████| 115031/115031 [06:48<00:00, 281.86it/s]


Starting training epoch 4/5
Learning rate: 0.007357680045044963, Batch size: 32, Hidden dims: (64, 64), Kernel sizes: (5, 5), Activations: ('leaky_relu', 'relu')


100%|██████████| 115031/115031 [06:46<00:00, 282.66it/s]


Starting training epoch 5/5
Learning rate: 0.007357680045044963, Batch size: 32, Hidden dims: (64, 64), Kernel sizes: (5, 5), Activations: ('leaky_relu', 'relu')


100%|██████████| 115031/115031 [06:29<00:00, 295.39it/s]
[I 2025-06-07 22:57:38,099] Trial 12 finished with value: 0.0410582572221756 and parameters: {'hidden_dims': '64,64', 'kernel_sizes': '5,5', 'activations': 'leaky_relu,relu', 'lr': 0.007357680045044963, 'batch_size': 32}. Best is trial 10 with value: 0.018964406102895737.


Starting training epoch 1/5
Learning rate: 0.0019586576989347065, Batch size: 32, Hidden dims: (128, 64), Kernel sizes: (5, 5), Activations: ('leaky_relu', 'sigmoid')


100%|██████████| 115031/115031 [06:42<00:00, 285.80it/s]


Starting training epoch 2/5
Learning rate: 0.0019586576989347065, Batch size: 32, Hidden dims: (128, 64), Kernel sizes: (5, 5), Activations: ('leaky_relu', 'sigmoid')


100%|██████████| 115031/115031 [06:34<00:00, 291.38it/s]


Starting training epoch 3/5
Learning rate: 0.0019586576989347065, Batch size: 32, Hidden dims: (128, 64), Kernel sizes: (5, 5), Activations: ('leaky_relu', 'sigmoid')


100%|██████████| 115031/115031 [06:39<00:00, 288.22it/s]


Starting training epoch 4/5
Learning rate: 0.0019586576989347065, Batch size: 32, Hidden dims: (128, 64), Kernel sizes: (5, 5), Activations: ('leaky_relu', 'sigmoid')


100%|██████████| 115031/115031 [06:38<00:00, 288.51it/s]


Starting training epoch 5/5
Learning rate: 0.0019586576989347065, Batch size: 32, Hidden dims: (128, 64), Kernel sizes: (5, 5), Activations: ('leaky_relu', 'sigmoid')


100%|██████████| 115031/115031 [06:44<00:00, 284.30it/s]
[I 2025-06-07 23:30:57,959] Trial 13 finished with value: 0.04386946186423302 and parameters: {'hidden_dims': '128,64', 'kernel_sizes': '5,5', 'activations': 'leaky_relu,sigmoid', 'lr': 0.0019586576989347065, 'batch_size': 32}. Best is trial 10 with value: 0.018964406102895737.


Starting training epoch 1/5
Learning rate: 0.0009403598328269836, Batch size: 16, Hidden dims: (32, 64), Kernel sizes: (5, 5), Activations: ('leaky_relu', 'leaky_relu')


100%|██████████| 230062/230062 [11:19<00:00, 338.49it/s]


Starting training epoch 2/5
Learning rate: 0.0009403598328269836, Batch size: 16, Hidden dims: (32, 64), Kernel sizes: (5, 5), Activations: ('leaky_relu', 'leaky_relu')


100%|██████████| 230062/230062 [11:27<00:00, 334.41it/s]


Starting training epoch 3/5
Learning rate: 0.0009403598328269836, Batch size: 16, Hidden dims: (32, 64), Kernel sizes: (5, 5), Activations: ('leaky_relu', 'leaky_relu')


100%|██████████| 230062/230062 [11:47<00:00, 325.03it/s]


Starting training epoch 4/5
Learning rate: 0.0009403598328269836, Batch size: 16, Hidden dims: (32, 64), Kernel sizes: (5, 5), Activations: ('leaky_relu', 'leaky_relu')


100%|██████████| 230062/230062 [11:14<00:00, 341.14it/s]


Starting training epoch 5/5
Learning rate: 0.0009403598328269836, Batch size: 16, Hidden dims: (32, 64), Kernel sizes: (5, 5), Activations: ('leaky_relu', 'leaky_relu')


100%|██████████| 230062/230062 [11:21<00:00, 337.46it/s]
[I 2025-06-08 00:28:09,724] Trial 14 finished with value: 0.05211879685521126 and parameters: {'hidden_dims': '32,64', 'kernel_sizes': '5,5', 'activations': 'leaky_relu,leaky_relu', 'lr': 0.0009403598328269836, 'batch_size': 16}. Best is trial 10 with value: 0.018964406102895737.


Starting training epoch 1/5
Learning rate: 0.003723748311551364, Batch size: 32, Hidden dims: (32, 256), Kernel sizes: (5, 5), Activations: ('tanh', 'leaky_relu')


100%|██████████| 115031/115031 [06:30<00:00, 294.90it/s]


Starting training epoch 2/5
Learning rate: 0.003723748311551364, Batch size: 32, Hidden dims: (32, 256), Kernel sizes: (5, 5), Activations: ('tanh', 'leaky_relu')


100%|██████████| 115031/115031 [06:28<00:00, 295.72it/s]


Starting training epoch 3/5
Learning rate: 0.003723748311551364, Batch size: 32, Hidden dims: (32, 256), Kernel sizes: (5, 5), Activations: ('tanh', 'leaky_relu')


100%|██████████| 115031/115031 [06:26<00:00, 297.99it/s]


Starting training epoch 4/5
Learning rate: 0.003723748311551364, Batch size: 32, Hidden dims: (32, 256), Kernel sizes: (5, 5), Activations: ('tanh', 'leaky_relu')


100%|██████████| 115031/115031 [06:28<00:00, 295.78it/s]


Starting training epoch 5/5
Learning rate: 0.003723748311551364, Batch size: 32, Hidden dims: (32, 256), Kernel sizes: (5, 5), Activations: ('tanh', 'leaky_relu')


100%|██████████| 115031/115031 [06:33<00:00, 292.45it/s]
[I 2025-06-08 01:00:37,255] Trial 15 finished with value: 0.04080194979906082 and parameters: {'hidden_dims': '32,256', 'kernel_sizes': '5,5', 'activations': 'tanh,leaky_relu', 'lr': 0.003723748311551364, 'batch_size': 32}. Best is trial 10 with value: 0.018964406102895737.


Starting training epoch 1/5
Learning rate: 0.00010818833247580927, Batch size: 16, Hidden dims: (64, 128), Kernel sizes: (5, 5), Activations: ('leaky_relu', 'relu')


100%|██████████| 230062/230062 [11:44<00:00, 326.35it/s]


Starting training epoch 2/5
Learning rate: 0.00010818833247580927, Batch size: 16, Hidden dims: (64, 128), Kernel sizes: (5, 5), Activations: ('leaky_relu', 'relu')


100%|██████████| 230062/230062 [11:39<00:00, 328.70it/s]


Starting training epoch 3/5
Learning rate: 0.00010818833247580927, Batch size: 16, Hidden dims: (64, 128), Kernel sizes: (5, 5), Activations: ('leaky_relu', 'relu')


100%|██████████| 230062/230062 [11:39<00:00, 329.02it/s]


Starting training epoch 4/5
Learning rate: 0.00010818833247580927, Batch size: 16, Hidden dims: (64, 128), Kernel sizes: (5, 5), Activations: ('leaky_relu', 'relu')


100%|██████████| 230062/230062 [11:39<00:00, 328.69it/s]


Starting training epoch 5/5
Learning rate: 0.00010818833247580927, Batch size: 16, Hidden dims: (64, 128), Kernel sizes: (5, 5), Activations: ('leaky_relu', 'relu')


100%|██████████| 230062/230062 [11:37<00:00, 329.81it/s]
[I 2025-06-08 01:58:59,059] Trial 16 finished with value: 0.03839890658855438 and parameters: {'hidden_dims': '64,128', 'kernel_sizes': '5,5', 'activations': 'leaky_relu,relu', 'lr': 0.00010818833247580927, 'batch_size': 16}. Best is trial 10 with value: 0.018964406102895737.


Starting training epoch 1/5
Learning rate: 0.001330430440181451, Batch size: 16, Hidden dims: (64, 128), Kernel sizes: (5, 5), Activations: ('tanh', 'relu')


100%|██████████| 230062/230062 [11:40<00:00, 328.21it/s]


Starting training epoch 2/5
Learning rate: 0.001330430440181451, Batch size: 16, Hidden dims: (64, 128), Kernel sizes: (5, 5), Activations: ('tanh', 'relu')


100%|██████████| 230062/230062 [11:40<00:00, 328.26it/s]


Starting training epoch 3/5
Learning rate: 0.001330430440181451, Batch size: 16, Hidden dims: (64, 128), Kernel sizes: (5, 5), Activations: ('tanh', 'relu')


100%|██████████| 230062/230062 [11:41<00:00, 328.07it/s]


Starting training epoch 4/5
Learning rate: 0.001330430440181451, Batch size: 16, Hidden dims: (64, 128), Kernel sizes: (5, 5), Activations: ('tanh', 'relu')


100%|██████████| 230062/230062 [11:36<00:00, 330.28it/s]


Starting training epoch 5/5
Learning rate: 0.001330430440181451, Batch size: 16, Hidden dims: (64, 128), Kernel sizes: (5, 5), Activations: ('tanh', 'relu')


100%|██████████| 230062/230062 [11:36<00:00, 330.48it/s]
[I 2025-06-08 02:57:15,044] Trial 17 finished with value: 0.02493678405880928 and parameters: {'hidden_dims': '64,128', 'kernel_sizes': '5,5', 'activations': 'tanh,relu', 'lr': 0.001330430440181451, 'batch_size': 16}. Best is trial 10 with value: 0.018964406102895737.


Starting training epoch 1/5
Learning rate: 0.0030610938269223786, Batch size: 32, Hidden dims: (32, 128), Kernel sizes: (5, 5), Activations: ('sigmoid', 'leaky_relu')


100%|██████████| 115031/115031 [06:28<00:00, 295.96it/s]


Starting training epoch 2/5
Learning rate: 0.0030610938269223786, Batch size: 32, Hidden dims: (32, 128), Kernel sizes: (5, 5), Activations: ('sigmoid', 'leaky_relu')


100%|██████████| 115031/115031 [06:26<00:00, 297.49it/s]


Starting training epoch 3/5
Learning rate: 0.0030610938269223786, Batch size: 32, Hidden dims: (32, 128), Kernel sizes: (5, 5), Activations: ('sigmoid', 'leaky_relu')


100%|██████████| 115031/115031 [06:28<00:00, 296.33it/s]


Starting training epoch 4/5
Learning rate: 0.0030610938269223786, Batch size: 32, Hidden dims: (32, 128), Kernel sizes: (5, 5), Activations: ('sigmoid', 'leaky_relu')


100%|██████████| 115031/115031 [06:32<00:00, 292.71it/s]


Starting training epoch 5/5
Learning rate: 0.0030610938269223786, Batch size: 32, Hidden dims: (32, 128), Kernel sizes: (5, 5), Activations: ('sigmoid', 'leaky_relu')


100%|██████████| 115031/115031 [06:28<00:00, 296.03it/s]
[I 2025-06-08 03:29:40,326] Trial 18 finished with value: 0.02745898813009262 and parameters: {'hidden_dims': '32,128', 'kernel_sizes': '5,5', 'activations': 'sigmoid,leaky_relu', 'lr': 0.0030610938269223786, 'batch_size': 32}. Best is trial 10 with value: 0.018964406102895737.


Starting training epoch 1/5
Learning rate: 0.0008911494812012466, Batch size: 32, Hidden dims: (64, 128), Kernel sizes: (3, 5), Activations: ('relu', 'sigmoid')


100%|██████████| 115031/115031 [06:35<00:00, 290.67it/s]


Starting training epoch 2/5
Learning rate: 0.0008911494812012466, Batch size: 32, Hidden dims: (64, 128), Kernel sizes: (3, 5), Activations: ('relu', 'sigmoid')


100%|██████████| 115031/115031 [06:43<00:00, 285.41it/s]


Starting training epoch 3/5
Learning rate: 0.0008911494812012466, Batch size: 32, Hidden dims: (64, 128), Kernel sizes: (3, 5), Activations: ('relu', 'sigmoid')


100%|██████████| 115031/115031 [06:40<00:00, 287.30it/s]


Starting training epoch 4/5
Learning rate: 0.0008911494812012466, Batch size: 32, Hidden dims: (64, 128), Kernel sizes: (3, 5), Activations: ('relu', 'sigmoid')


100%|██████████| 115031/115031 [06:37<00:00, 289.17it/s]


Starting training epoch 5/5
Learning rate: 0.0008911494812012466, Batch size: 32, Hidden dims: (64, 128), Kernel sizes: (3, 5), Activations: ('relu', 'sigmoid')


100%|██████████| 115031/115031 [06:40<00:00, 287.41it/s]
[I 2025-06-08 04:02:57,694] Trial 19 finished with value: 0.03344499692320824 and parameters: {'hidden_dims': '64,128', 'kernel_sizes': '3,5', 'activations': 'relu,sigmoid', 'lr': 0.0008911494812012466, 'batch_size': 32}. Best is trial 10 with value: 0.018964406102895737.


Best params: {'hidden_dims': '32,64', 'kernel_sizes': '5,5', 'activations': 'leaky_relu,relu', 'lr': 0.002507800985448251, 'batch_size': 32}


KeyError: 'l'

In [8]:
activations_map = {
    'relu': torch.nn.ReLU(),
    'tanh': torch.nn.Tanh(),
    'sigmoid': torch.nn.Sigmoid(),
    'leaky_relu': torch.nn.LeakyReLU(),
    'elu': torch.nn.ELU(),
    'gelu': torch.nn.GELU(),
    'selu': torch.nn.SELU(),
    'none': nn.Identity()
}
# Prepare activations
activations = [activations_map[act] for act in best_params['activations'].split(',')]

# Build and train the best model on the full training set
sample_input, sample_target = dataset[0]
input_dim = sample_input.shape[-1]
output_dim = sample_target.shape[-1]
best_model = SequenceCNN(
    input_dim=input_dim,
    hidden_dims=list(best_params['hidden_dims']),
    kernel_sizes=list(best_params['kernel_sizes']),
    output_dim=output_dim,
    activations=activations
)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
best_model.to(device)

optimizer = optim.Adam(best_model.parameters(), lr=best_params['lr'])
criterion = nn.MSELoss()

# Use the full train_loader for training
num_epochs = 5
for epoch in range(num_epochs):
    best_model.train()
    for batch in train_loader:
        inputs, targets = batch
        inputs, targets = inputs.to(device), targets.to(device)
        optimizer.zero_grad()
        outputs = best_model(inputs)
        loss = criterion(outputs, targets)
        loss.backward()
        optimizer.step()

# Evaluate on the test set
best_model.eval()
test_loss = 0.0
with torch.no_grad():
    for inputs, targets in test_loader:
        inputs, targets = inputs.to(device), targets.to(device)
        outputs = best_model(inputs)
        loss = criterion(outputs, targets)
        test_loss += loss.item() * inputs.size(0)
test_loss /= len(test_loader.dataset)
print(f"Test loss: {test_loss}")


AssertionError: hidden_dims and kernel_sizes must have the same length

In [None]:
# Save the best model
model_path = Path("best_cnn_model.pth")
torch.save(best_model.state_dict(), model_path)
print(f"Best model saved to {model_path}")
