# Training the CNN for temperature prediction


In [1]:

import os
from CNN import SequenceCNN
from torch.utils.data import DataLoader as TorchDataLoader

In [2]:
import torch
from torch.optim import lr_scheduler

import torch.optim as optim
import torch.nn as nn
from PrecipitationDataset import PrecipitationDataset
from functools import partial
import os
import tempfile
from pathlib import Path
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import random_split
import torchvision
import torchvision.transforms as transforms

In [3]:
dataset = PrecipitationDataset('data_prcp_summed')

100%|██████████| 562/562 [00:08<00:00, 65.07it/s] 


In [4]:
import optuna
from tqdm import tqdm

def objective(trial):
    # Remove activations_map from here, since it is already defined and used in cell 8.
    # Also, do not redefine batch_size or train_loader, as they are already defined globally.
    # You can remove this placeholder entirely.

    # Use strings to represent tuples for Optuna compatibility
    hidden_dims_options = ["32,32", "32,64", "64,64", "64,32", "32,128", "128,32","64,128","128,64", "128,128", "64,128", "32,256", "128,256", "256,256"]
    kernel_sizes_options = ["3,3", "5,3", "3,5", "5,5", "5,7", "7,5", "7,7", "9,5", "5,9", "9,9", "11,5", "5,11", "11,11", "13,5", "5,13", "13,13"]
    # Define activations_options outside the objective function to avoid dynamic value space error
    activations_options = ["relu,relu", "tanh,relu", "leaky_relu,sigmoid", "sigmoid,tanh", "relu,sigmoid", "tanh,tanh", "leaky_relu,leaky_relu", "tanh,leaky_relu", "relu,leaky_relu", "sigmoid,sigmoid", "sigmoid,leaky_relu", "relu,tanh", "tanh,relu", "leaky_relu,relu", "selu,leaky_relu"]
    hidden_dims_str = trial.suggest_categorical("hidden_dims", hidden_dims_options)
    kernel_sizes_str = trial.suggest_categorical("kernel_sizes", kernel_sizes_options)
    activations_str = trial.suggest_categorical("activations", activations_options)

    hidden_dims = tuple(map(int, hidden_dims_str.split(',')))
    kernel_sizes = tuple(map(int, kernel_sizes_str.split(',')))
    activations = tuple(activations_str.split(','))

    lr = trial.suggest_loguniform("lr", 1e-4, 1e-2)
    batch_size = trial.suggest_categorical("batch_size", (16, 32, 64))
    num_epochs = 5
    sample_input, sample_target = dataset[0]
    input_dim = sample_input.shape[-1]
    output_dim = sample_target.shape[-1]


    model = SequenceCNN(
        input_dim=input_dim,
        hidden_dims=list(hidden_dims),
        kernel_sizes=list(kernel_sizes),
        output_dim=output_dim,
        activations=activations
    )
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model.to(device)

    optimizer = optim.Adam(model.parameters(), lr=lr)
    criterion = nn.MSELoss()

    train_loader = TorchDataLoader(dataset, batch_size=batch_size, shuffle=True)

    for epoch in range(num_epochs):
        print(f"Starting training epoch {epoch + 1}/{num_epochs}")
        print(f"Learning rate: {lr}, Batch size: {batch_size}, Hidden dims: {hidden_dims}, Kernel sizes: {kernel_sizes}, Activations: {activations}")
        model.train()
        for batch in tqdm(train_loader):
            inputs, targets = batch
            inputs, targets = inputs.to(device), targets.to(device)

            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, targets)
            loss.backward()
            optimizer.step()

    return loss.item()

# Example usage:
# study = optuna.create_study(direction="minimize")
# study.optimize(objective, n_trials=10)

  from .autonotebook import tqdm as notebook_tqdm


In [5]:
# Split the dataset into train, validation, and test sets
train_size = int(0.7 * len(dataset))
val_size = int(0.15 * len(dataset))
test_size = len(dataset) - train_size - val_size

train_dataset, val_dataset, test_dataset = random_split(dataset, [train_size, val_size, test_size])

# Create DataLoaders for each set
batch_size = 32  # You can adjust this as needed

train_loader = TorchDataLoader(train_dataset, batch_size=batch_size, shuffle=True)
val_loader = TorchDataLoader(val_dataset, batch_size=batch_size, shuffle=False)
test_loader = TorchDataLoader(test_dataset, batch_size=batch_size, shuffle=False)

In [6]:
import torch
print("CUDA available:", torch.cuda.is_available())
if torch.cuda.is_available():
    print("CUDA device count:", torch.cuda.device_count())
    print("Current device:", torch.cuda.current_device())
    print("Device name:", torch.cuda.get_device_name(torch.cuda.current_device()))
else:
    print("Using CPU")

CUDA available: True
CUDA device count: 1
Current device: 0
Device name: NVIDIA GeForce RTX 3060 Laptop GPU


In [9]:
import optuna
import warnings
warnings.filterwarnings("ignore")
# Run Optuna hyperparameter search
study = optuna.create_study(
    study_name="CNN_Sequence_Rainfall_Prediction_v3",
    storage="sqlite:///optuna_study.db",
    load_if_exists=True,
    direction="minimize",
)

study.optimize(objective, n_trials=20)

# Get the best config
best_params = study.best_params
print(f"Best params: {best_params}")


[I 2025-06-08 20:19:40,129] A new study created in RDB with name: CNN_Sequence_Rainfall_Prediction_v3


Starting training epoch 1/5
Learning rate: 0.0010651942939002158, Batch size: 64, Hidden dims: (64, 128), Kernel sizes: (5, 3), Activations: ('tanh', 'tanh')


100%|██████████| 57516/57516 [03:46<00:00, 254.33it/s]


Starting training epoch 2/5
Learning rate: 0.0010651942939002158, Batch size: 64, Hidden dims: (64, 128), Kernel sizes: (5, 3), Activations: ('tanh', 'tanh')


100%|██████████| 57516/57516 [03:52<00:00, 247.53it/s]


Starting training epoch 3/5
Learning rate: 0.0010651942939002158, Batch size: 64, Hidden dims: (64, 128), Kernel sizes: (5, 3), Activations: ('tanh', 'tanh')


100%|██████████| 57516/57516 [04:02<00:00, 237.38it/s]


Starting training epoch 4/5
Learning rate: 0.0010651942939002158, Batch size: 64, Hidden dims: (64, 128), Kernel sizes: (5, 3), Activations: ('tanh', 'tanh')


100%|██████████| 57516/57516 [04:31<00:00, 212.13it/s]


Starting training epoch 5/5
Learning rate: 0.0010651942939002158, Batch size: 64, Hidden dims: (64, 128), Kernel sizes: (5, 3), Activations: ('tanh', 'tanh')


100%|██████████| 57516/57516 [04:35<00:00, 208.45it/s]
[I 2025-06-08 20:40:28,142] Trial 0 finished with value: 0.08529841154813766 and parameters: {'hidden_dims': '64,128', 'kernel_sizes': '5,3', 'activations': 'tanh,tanh', 'lr': 0.0010651942939002158, 'batch_size': 64}. Best is trial 0 with value: 0.08529841154813766.


Starting training epoch 1/5
Learning rate: 0.0006767246436172493, Batch size: 16, Hidden dims: (32, 32), Kernel sizes: (9, 5), Activations: ('tanh', 'relu')


100%|██████████| 230062/230062 [13:50<00:00, 276.97it/s]


Starting training epoch 2/5
Learning rate: 0.0006767246436172493, Batch size: 16, Hidden dims: (32, 32), Kernel sizes: (9, 5), Activations: ('tanh', 'relu')


100%|██████████| 230062/230062 [14:52<00:00, 257.87it/s]


Starting training epoch 3/5
Learning rate: 0.0006767246436172493, Batch size: 16, Hidden dims: (32, 32), Kernel sizes: (9, 5), Activations: ('tanh', 'relu')


100%|██████████| 230062/230062 [14:46<00:00, 259.43it/s]


Starting training epoch 4/5
Learning rate: 0.0006767246436172493, Batch size: 16, Hidden dims: (32, 32), Kernel sizes: (9, 5), Activations: ('tanh', 'relu')


100%|██████████| 230062/230062 [14:57<00:00, 256.28it/s]


Starting training epoch 5/5
Learning rate: 0.0006767246436172493, Batch size: 16, Hidden dims: (32, 32), Kernel sizes: (9, 5), Activations: ('tanh', 'relu')


100%|██████████| 230062/230062 [15:11<00:00, 252.28it/s]
[I 2025-06-08 21:54:07,499] Trial 1 finished with value: 0.26938411593437195 and parameters: {'hidden_dims': '32,32', 'kernel_sizes': '9,5', 'activations': 'tanh,relu', 'lr': 0.0006767246436172493, 'batch_size': 16}. Best is trial 0 with value: 0.08529841154813766.


Starting training epoch 1/5
Learning rate: 0.00014854999409075044, Batch size: 32, Hidden dims: (64, 128), Kernel sizes: (5, 11), Activations: ('tanh', 'tanh')


100%|██████████| 115031/115031 [07:58<00:00, 240.49it/s]


Starting training epoch 2/5
Learning rate: 0.00014854999409075044, Batch size: 32, Hidden dims: (64, 128), Kernel sizes: (5, 11), Activations: ('tanh', 'tanh')


100%|██████████| 115031/115031 [08:01<00:00, 238.80it/s]


Starting training epoch 3/5
Learning rate: 0.00014854999409075044, Batch size: 32, Hidden dims: (64, 128), Kernel sizes: (5, 11), Activations: ('tanh', 'tanh')


100%|██████████| 115031/115031 [07:09<00:00, 267.67it/s]


Starting training epoch 4/5
Learning rate: 0.00014854999409075044, Batch size: 32, Hidden dims: (64, 128), Kernel sizes: (5, 11), Activations: ('tanh', 'tanh')


100%|██████████| 115031/115031 [05:16<00:00, 363.82it/s]


Starting training epoch 5/5
Learning rate: 0.00014854999409075044, Batch size: 32, Hidden dims: (64, 128), Kernel sizes: (5, 11), Activations: ('tanh', 'tanh')


100%|██████████| 115031/115031 [06:04<00:00, 315.64it/s]
[I 2025-06-08 22:28:38,020] Trial 2 finished with value: 0.3404926657676697 and parameters: {'hidden_dims': '64,128', 'kernel_sizes': '5,11', 'activations': 'tanh,tanh', 'lr': 0.00014854999409075044, 'batch_size': 32}. Best is trial 0 with value: 0.08529841154813766.


Starting training epoch 1/5
Learning rate: 0.008695477088009542, Batch size: 32, Hidden dims: (128, 32), Kernel sizes: (5, 7), Activations: ('relu', 'relu')


100%|██████████| 115031/115031 [06:30<00:00, 294.76it/s]


Starting training epoch 2/5
Learning rate: 0.008695477088009542, Batch size: 32, Hidden dims: (128, 32), Kernel sizes: (5, 7), Activations: ('relu', 'relu')


100%|██████████| 115031/115031 [06:22<00:00, 301.00it/s]


Starting training epoch 3/5
Learning rate: 0.008695477088009542, Batch size: 32, Hidden dims: (128, 32), Kernel sizes: (5, 7), Activations: ('relu', 'relu')


100%|██████████| 115031/115031 [06:31<00:00, 293.88it/s]


Starting training epoch 4/5
Learning rate: 0.008695477088009542, Batch size: 32, Hidden dims: (128, 32), Kernel sizes: (5, 7), Activations: ('relu', 'relu')


100%|██████████| 115031/115031 [06:33<00:00, 292.23it/s]


Starting training epoch 5/5
Learning rate: 0.008695477088009542, Batch size: 32, Hidden dims: (128, 32), Kernel sizes: (5, 7), Activations: ('relu', 'relu')


100%|██████████| 115031/115031 [06:27<00:00, 296.62it/s]
[I 2025-06-08 23:01:03,403] Trial 3 finished with value: 0.15309256315231323 and parameters: {'hidden_dims': '128,32', 'kernel_sizes': '5,7', 'activations': 'relu,relu', 'lr': 0.008695477088009542, 'batch_size': 32}. Best is trial 0 with value: 0.08529841154813766.


Starting training epoch 1/5
Learning rate: 0.00021572977331807706, Batch size: 32, Hidden dims: (256, 256), Kernel sizes: (13, 13), Activations: ('leaky_relu', 'leaky_relu')


100%|██████████| 115031/115031 [06:59<00:00, 274.23it/s]


Starting training epoch 2/5
Learning rate: 0.00021572977331807706, Batch size: 32, Hidden dims: (256, 256), Kernel sizes: (13, 13), Activations: ('leaky_relu', 'leaky_relu')


100%|██████████| 115031/115031 [06:45<00:00, 283.88it/s]


Starting training epoch 3/5
Learning rate: 0.00021572977331807706, Batch size: 32, Hidden dims: (256, 256), Kernel sizes: (13, 13), Activations: ('leaky_relu', 'leaky_relu')


100%|██████████| 115031/115031 [06:08<00:00, 311.90it/s]


Starting training epoch 4/5
Learning rate: 0.00021572977331807706, Batch size: 32, Hidden dims: (256, 256), Kernel sizes: (13, 13), Activations: ('leaky_relu', 'leaky_relu')


100%|██████████| 115031/115031 [06:04<00:00, 316.01it/s]


Starting training epoch 5/5
Learning rate: 0.00021572977331807706, Batch size: 32, Hidden dims: (256, 256), Kernel sizes: (13, 13), Activations: ('leaky_relu', 'leaky_relu')


100%|██████████| 115031/115031 [06:01<00:00, 318.26it/s]
[I 2025-06-08 23:33:02,451] Trial 4 finished with value: 0.02425486035645008 and parameters: {'hidden_dims': '256,256', 'kernel_sizes': '13,13', 'activations': 'leaky_relu,leaky_relu', 'lr': 0.00021572977331807706, 'batch_size': 32}. Best is trial 4 with value: 0.02425486035645008.


Starting training epoch 1/5
Learning rate: 0.0034139889664708037, Batch size: 64, Hidden dims: (128, 128), Kernel sizes: (5, 5), Activations: ('relu', 'relu')


100%|██████████| 57516/57516 [02:58<00:00, 322.98it/s]


Starting training epoch 2/5
Learning rate: 0.0034139889664708037, Batch size: 64, Hidden dims: (128, 128), Kernel sizes: (5, 5), Activations: ('relu', 'relu')


100%|██████████| 57516/57516 [02:55<00:00, 327.82it/s]


Starting training epoch 3/5
Learning rate: 0.0034139889664708037, Batch size: 64, Hidden dims: (128, 128), Kernel sizes: (5, 5), Activations: ('relu', 'relu')


100%|██████████| 57516/57516 [03:45<00:00, 254.54it/s]


Starting training epoch 4/5
Learning rate: 0.0034139889664708037, Batch size: 64, Hidden dims: (128, 128), Kernel sizes: (5, 5), Activations: ('relu', 'relu')


100%|██████████| 57516/57516 [03:34<00:00, 267.59it/s]


Starting training epoch 5/5
Learning rate: 0.0034139889664708037, Batch size: 64, Hidden dims: (128, 128), Kernel sizes: (5, 5), Activations: ('relu', 'relu')


100%|██████████| 57516/57516 [03:32<00:00, 270.69it/s]
[I 2025-06-08 23:49:49,469] Trial 5 finished with value: 0.08249148726463318 and parameters: {'hidden_dims': '128,128', 'kernel_sizes': '5,5', 'activations': 'relu,relu', 'lr': 0.0034139889664708037, 'batch_size': 64}. Best is trial 4 with value: 0.02425486035645008.


Starting training epoch 1/5
Learning rate: 0.0026622348710969488, Batch size: 16, Hidden dims: (256, 256), Kernel sizes: (11, 5), Activations: ('leaky_relu', 'leaky_relu')


100%|██████████| 230062/230062 [11:04<00:00, 345.97it/s]


Starting training epoch 2/5
Learning rate: 0.0026622348710969488, Batch size: 16, Hidden dims: (256, 256), Kernel sizes: (11, 5), Activations: ('leaky_relu', 'leaky_relu')


100%|██████████| 230062/230062 [11:14<00:00, 340.95it/s]


Starting training epoch 3/5
Learning rate: 0.0026622348710969488, Batch size: 16, Hidden dims: (256, 256), Kernel sizes: (11, 5), Activations: ('leaky_relu', 'leaky_relu')


100%|██████████| 230062/230062 [11:09<00:00, 343.85it/s]


Starting training epoch 4/5
Learning rate: 0.0026622348710969488, Batch size: 16, Hidden dims: (256, 256), Kernel sizes: (11, 5), Activations: ('leaky_relu', 'leaky_relu')


100%|██████████| 230062/230062 [11:07<00:00, 344.83it/s]


Starting training epoch 5/5
Learning rate: 0.0026622348710969488, Batch size: 16, Hidden dims: (256, 256), Kernel sizes: (11, 5), Activations: ('leaky_relu', 'leaky_relu')


100%|██████████| 230062/230062 [11:03<00:00, 346.65it/s]
[I 2025-06-09 00:45:29,255] Trial 6 finished with value: 0.15973030030727386 and parameters: {'hidden_dims': '256,256', 'kernel_sizes': '11,5', 'activations': 'leaky_relu,leaky_relu', 'lr': 0.0026622348710969488, 'batch_size': 16}. Best is trial 4 with value: 0.02425486035645008.


Starting training epoch 1/5
Learning rate: 0.00642945315468369, Batch size: 16, Hidden dims: (128, 32), Kernel sizes: (3, 3), Activations: ('leaky_relu', 'relu')


100%|██████████| 230062/230062 [11:40<00:00, 328.66it/s]


Starting training epoch 2/5
Learning rate: 0.00642945315468369, Batch size: 16, Hidden dims: (128, 32), Kernel sizes: (3, 3), Activations: ('leaky_relu', 'relu')


100%|██████████| 230062/230062 [12:02<00:00, 318.30it/s]


Starting training epoch 3/5
Learning rate: 0.00642945315468369, Batch size: 16, Hidden dims: (128, 32), Kernel sizes: (3, 3), Activations: ('leaky_relu', 'relu')


100%|██████████| 230062/230062 [12:01<00:00, 318.99it/s]


Starting training epoch 4/5
Learning rate: 0.00642945315468369, Batch size: 16, Hidden dims: (128, 32), Kernel sizes: (3, 3), Activations: ('leaky_relu', 'relu')


100%|██████████| 230062/230062 [12:24<00:00, 309.13it/s]


Starting training epoch 5/5
Learning rate: 0.00642945315468369, Batch size: 16, Hidden dims: (128, 32), Kernel sizes: (3, 3), Activations: ('leaky_relu', 'relu')


100%|██████████| 230062/230062 [11:56<00:00, 321.16it/s]
[I 2025-06-09 01:45:33,930] Trial 7 finished with value: 0.011398657225072384 and parameters: {'hidden_dims': '128,32', 'kernel_sizes': '3,3', 'activations': 'leaky_relu,relu', 'lr': 0.00642945315468369, 'batch_size': 16}. Best is trial 7 with value: 0.011398657225072384.


Starting training epoch 1/5
Learning rate: 0.0007711804872327728, Batch size: 64, Hidden dims: (32, 32), Kernel sizes: (7, 7), Activations: ('sigmoid', 'leaky_relu')


100%|██████████| 57516/57516 [04:07<00:00, 232.18it/s]


Starting training epoch 2/5
Learning rate: 0.0007711804872327728, Batch size: 64, Hidden dims: (32, 32), Kernel sizes: (7, 7), Activations: ('sigmoid', 'leaky_relu')


100%|██████████| 57516/57516 [04:01<00:00, 238.44it/s]


Starting training epoch 3/5
Learning rate: 0.0007711804872327728, Batch size: 64, Hidden dims: (32, 32), Kernel sizes: (7, 7), Activations: ('sigmoid', 'leaky_relu')


100%|██████████| 57516/57516 [04:00<00:00, 238.99it/s]


Starting training epoch 4/5
Learning rate: 0.0007711804872327728, Batch size: 64, Hidden dims: (32, 32), Kernel sizes: (7, 7), Activations: ('sigmoid', 'leaky_relu')


100%|██████████| 57516/57516 [04:09<00:00, 230.14it/s]


Starting training epoch 5/5
Learning rate: 0.0007711804872327728, Batch size: 64, Hidden dims: (32, 32), Kernel sizes: (7, 7), Activations: ('sigmoid', 'leaky_relu')


100%|██████████| 57516/57516 [04:08<00:00, 231.76it/s]
[I 2025-06-09 02:06:01,754] Trial 8 finished with value: 0.2606435716152191 and parameters: {'hidden_dims': '32,32', 'kernel_sizes': '7,7', 'activations': 'sigmoid,leaky_relu', 'lr': 0.0007711804872327728, 'batch_size': 64}. Best is trial 7 with value: 0.011398657225072384.


Starting training epoch 1/5
Learning rate: 0.0014576269253831129, Batch size: 16, Hidden dims: (32, 128), Kernel sizes: (13, 13), Activations: ('relu', 'tanh')


100%|██████████| 230062/230062 [11:04<00:00, 346.46it/s]


Starting training epoch 2/5
Learning rate: 0.0014576269253831129, Batch size: 16, Hidden dims: (32, 128), Kernel sizes: (13, 13), Activations: ('relu', 'tanh')


100%|██████████| 230062/230062 [10:52<00:00, 352.80it/s]


Starting training epoch 3/5
Learning rate: 0.0014576269253831129, Batch size: 16, Hidden dims: (32, 128), Kernel sizes: (13, 13), Activations: ('relu', 'tanh')


100%|██████████| 230062/230062 [10:56<00:00, 350.51it/s]


Starting training epoch 4/5
Learning rate: 0.0014576269253831129, Batch size: 16, Hidden dims: (32, 128), Kernel sizes: (13, 13), Activations: ('relu', 'tanh')


100%|██████████| 230062/230062 [10:50<00:00, 353.60it/s]


Starting training epoch 5/5
Learning rate: 0.0014576269253831129, Batch size: 16, Hidden dims: (32, 128), Kernel sizes: (13, 13), Activations: ('relu', 'tanh')


100%|██████████| 230062/230062 [10:54<00:00, 351.69it/s]
[I 2025-06-09 03:00:39,158] Trial 9 finished with value: 0.05109427496790886 and parameters: {'hidden_dims': '32,128', 'kernel_sizes': '13,13', 'activations': 'relu,tanh', 'lr': 0.0014576269253831129, 'batch_size': 16}. Best is trial 7 with value: 0.011398657225072384.


Starting training epoch 1/5
Learning rate: 0.008893126687335958, Batch size: 16, Hidden dims: (128, 32), Kernel sizes: (3, 3), Activations: ('leaky_relu', 'relu')


100%|██████████| 230062/230062 [12:02<00:00, 318.58it/s]


Starting training epoch 2/5
Learning rate: 0.008893126687335958, Batch size: 16, Hidden dims: (128, 32), Kernel sizes: (3, 3), Activations: ('leaky_relu', 'relu')


100%|██████████| 230062/230062 [12:05<00:00, 317.09it/s]


Starting training epoch 3/5
Learning rate: 0.008893126687335958, Batch size: 16, Hidden dims: (128, 32), Kernel sizes: (3, 3), Activations: ('leaky_relu', 'relu')


100%|██████████| 230062/230062 [12:24<00:00, 308.87it/s]


Starting training epoch 4/5
Learning rate: 0.008893126687335958, Batch size: 16, Hidden dims: (128, 32), Kernel sizes: (3, 3), Activations: ('leaky_relu', 'relu')


100%|██████████| 230062/230062 [12:15<00:00, 312.60it/s]


Starting training epoch 5/5
Learning rate: 0.008893126687335958, Batch size: 16, Hidden dims: (128, 32), Kernel sizes: (3, 3), Activations: ('leaky_relu', 'relu')


100%|██████████| 230062/230062 [12:01<00:00, 319.07it/s]
[I 2025-06-09 04:01:28,840] Trial 10 finished with value: 0.015621443279087543 and parameters: {'hidden_dims': '128,32', 'kernel_sizes': '3,3', 'activations': 'leaky_relu,relu', 'lr': 0.008893126687335958, 'batch_size': 16}. Best is trial 7 with value: 0.011398657225072384.


Starting training epoch 1/5
Learning rate: 0.008821707111378616, Batch size: 16, Hidden dims: (128, 32), Kernel sizes: (3, 3), Activations: ('leaky_relu', 'relu')


100%|██████████| 230062/230062 [12:03<00:00, 318.11it/s]


Starting training epoch 2/5
Learning rate: 0.008821707111378616, Batch size: 16, Hidden dims: (128, 32), Kernel sizes: (3, 3), Activations: ('leaky_relu', 'relu')


100%|██████████| 230062/230062 [11:56<00:00, 320.97it/s]


Starting training epoch 3/5
Learning rate: 0.008821707111378616, Batch size: 16, Hidden dims: (128, 32), Kernel sizes: (3, 3), Activations: ('leaky_relu', 'relu')


100%|██████████| 230062/230062 [11:49<00:00, 324.30it/s]


Starting training epoch 4/5
Learning rate: 0.008821707111378616, Batch size: 16, Hidden dims: (128, 32), Kernel sizes: (3, 3), Activations: ('leaky_relu', 'relu')


100%|██████████| 230062/230062 [12:13<00:00, 313.61it/s]


Starting training epoch 5/5
Learning rate: 0.008821707111378616, Batch size: 16, Hidden dims: (128, 32), Kernel sizes: (3, 3), Activations: ('leaky_relu', 'relu')


100%|██████████| 230062/230062 [12:15<00:00, 312.72it/s]
[I 2025-06-09 05:01:47,652] Trial 11 finished with value: 0.08991184830665588 and parameters: {'hidden_dims': '128,32', 'kernel_sizes': '3,3', 'activations': 'leaky_relu,relu', 'lr': 0.008821707111378616, 'batch_size': 16}. Best is trial 7 with value: 0.011398657225072384.


Starting training epoch 1/5
Learning rate: 0.004190730241597069, Batch size: 16, Hidden dims: (64, 128), Kernel sizes: (3, 3), Activations: ('leaky_relu', 'relu')


100%|██████████| 230062/230062 [12:47<00:00, 299.65it/s]


Starting training epoch 2/5
Learning rate: 0.004190730241597069, Batch size: 16, Hidden dims: (64, 128), Kernel sizes: (3, 3), Activations: ('leaky_relu', 'relu')


100%|██████████| 230062/230062 [12:06<00:00, 316.59it/s]


Starting training epoch 3/5
Learning rate: 0.004190730241597069, Batch size: 16, Hidden dims: (64, 128), Kernel sizes: (3, 3), Activations: ('leaky_relu', 'relu')


100%|██████████| 230062/230062 [12:00<00:00, 319.20it/s]


Starting training epoch 4/5
Learning rate: 0.004190730241597069, Batch size: 16, Hidden dims: (64, 128), Kernel sizes: (3, 3), Activations: ('leaky_relu', 'relu')


100%|██████████| 230062/230062 [12:08<00:00, 315.64it/s]


Starting training epoch 5/5
Learning rate: 0.004190730241597069, Batch size: 16, Hidden dims: (64, 128), Kernel sizes: (3, 3), Activations: ('leaky_relu', 'relu')


100%|██████████| 230062/230062 [12:07<00:00, 316.11it/s]
[I 2025-06-09 06:02:59,677] Trial 12 finished with value: 0.07912928611040115 and parameters: {'hidden_dims': '64,128', 'kernel_sizes': '3,3', 'activations': 'leaky_relu,relu', 'lr': 0.004190730241597069, 'batch_size': 16}. Best is trial 7 with value: 0.011398657225072384.


Starting training epoch 1/5
Learning rate: 0.00538644174274093, Batch size: 16, Hidden dims: (32, 64), Kernel sizes: (13, 5), Activations: ('relu', 'sigmoid')


100%|██████████| 230062/230062 [12:10<00:00, 314.81it/s]


Starting training epoch 2/5
Learning rate: 0.00538644174274093, Batch size: 16, Hidden dims: (32, 64), Kernel sizes: (13, 5), Activations: ('relu', 'sigmoid')


100%|██████████| 230062/230062 [12:06<00:00, 316.52it/s]


Starting training epoch 3/5
Learning rate: 0.00538644174274093, Batch size: 16, Hidden dims: (32, 64), Kernel sizes: (13, 5), Activations: ('relu', 'sigmoid')


100%|██████████| 230062/230062 [12:07<00:00, 316.27it/s]


Starting training epoch 4/5
Learning rate: 0.00538644174274093, Batch size: 16, Hidden dims: (32, 64), Kernel sizes: (13, 5), Activations: ('relu', 'sigmoid')


100%|██████████| 230062/230062 [12:14<00:00, 313.06it/s]


Starting training epoch 5/5
Learning rate: 0.00538644174274093, Batch size: 16, Hidden dims: (32, 64), Kernel sizes: (13, 5), Activations: ('relu', 'sigmoid')


100%|██████████| 230062/230062 [12:26<00:00, 308.34it/s]
[I 2025-06-09 07:04:05,921] Trial 13 finished with value: 0.002734301146119833 and parameters: {'hidden_dims': '32,64', 'kernel_sizes': '13,5', 'activations': 'relu,sigmoid', 'lr': 0.00538644174274093, 'batch_size': 16}. Best is trial 13 with value: 0.002734301146119833.


Starting training epoch 1/5
Learning rate: 0.004473617404172703, Batch size: 16, Hidden dims: (32, 64), Kernel sizes: (13, 5), Activations: ('relu', 'sigmoid')


100%|██████████| 230062/230062 [12:04<00:00, 317.42it/s]


Starting training epoch 2/5
Learning rate: 0.004473617404172703, Batch size: 16, Hidden dims: (32, 64), Kernel sizes: (13, 5), Activations: ('relu', 'sigmoid')


100%|██████████| 230062/230062 [12:11<00:00, 314.47it/s]


Starting training epoch 3/5
Learning rate: 0.004473617404172703, Batch size: 16, Hidden dims: (32, 64), Kernel sizes: (13, 5), Activations: ('relu', 'sigmoid')


100%|██████████| 230062/230062 [12:04<00:00, 317.59it/s]


Starting training epoch 4/5
Learning rate: 0.004473617404172703, Batch size: 16, Hidden dims: (32, 64), Kernel sizes: (13, 5), Activations: ('relu', 'sigmoid')


100%|██████████| 230062/230062 [12:00<00:00, 319.17it/s]


Starting training epoch 5/5
Learning rate: 0.004473617404172703, Batch size: 16, Hidden dims: (32, 64), Kernel sizes: (13, 5), Activations: ('relu', 'sigmoid')


100%|██████████| 230062/230062 [12:11<00:00, 314.45it/s]
[I 2025-06-09 08:04:39,334] Trial 14 finished with value: 0.24113358557224274 and parameters: {'hidden_dims': '32,64', 'kernel_sizes': '13,5', 'activations': 'relu,sigmoid', 'lr': 0.004473617404172703, 'batch_size': 16}. Best is trial 13 with value: 0.002734301146119833.


Starting training epoch 1/5
Learning rate: 0.0023048129277052514, Batch size: 16, Hidden dims: (128, 64), Kernel sizes: (5, 13), Activations: ('relu', 'sigmoid')


100%|██████████| 230062/230062 [11:42<00:00, 327.29it/s]


Starting training epoch 2/5
Learning rate: 0.0023048129277052514, Batch size: 16, Hidden dims: (128, 64), Kernel sizes: (5, 13), Activations: ('relu', 'sigmoid')


100%|██████████| 230062/230062 [11:38<00:00, 329.24it/s]


Starting training epoch 3/5
Learning rate: 0.0023048129277052514, Batch size: 16, Hidden dims: (128, 64), Kernel sizes: (5, 13), Activations: ('relu', 'sigmoid')


100%|██████████| 230062/230062 [11:46<00:00, 325.76it/s]


Starting training epoch 4/5
Learning rate: 0.0023048129277052514, Batch size: 16, Hidden dims: (128, 64), Kernel sizes: (5, 13), Activations: ('relu', 'sigmoid')


100%|██████████| 230062/230062 [11:46<00:00, 325.57it/s]


Starting training epoch 5/5
Learning rate: 0.0023048129277052514, Batch size: 16, Hidden dims: (128, 64), Kernel sizes: (5, 13), Activations: ('relu', 'sigmoid')


100%|██████████| 230062/230062 [11:42<00:00, 327.43it/s]
[I 2025-06-09 09:03:16,672] Trial 15 finished with value: 0.006539603695273399 and parameters: {'hidden_dims': '128,64', 'kernel_sizes': '5,13', 'activations': 'relu,sigmoid', 'lr': 0.0023048129277052514, 'batch_size': 16}. Best is trial 13 with value: 0.002734301146119833.


Starting training epoch 1/5
Learning rate: 0.0019430415294862113, Batch size: 16, Hidden dims: (128, 64), Kernel sizes: (5, 13), Activations: ('relu', 'sigmoid')


100%|██████████| 230062/230062 [11:44<00:00, 326.39it/s]


Starting training epoch 2/5
Learning rate: 0.0019430415294862113, Batch size: 16, Hidden dims: (128, 64), Kernel sizes: (5, 13), Activations: ('relu', 'sigmoid')


100%|██████████| 230062/230062 [11:44<00:00, 326.79it/s]


Starting training epoch 3/5
Learning rate: 0.0019430415294862113, Batch size: 16, Hidden dims: (128, 64), Kernel sizes: (5, 13), Activations: ('relu', 'sigmoid')


100%|██████████| 230062/230062 [11:44<00:00, 326.71it/s]


Starting training epoch 4/5
Learning rate: 0.0019430415294862113, Batch size: 16, Hidden dims: (128, 64), Kernel sizes: (5, 13), Activations: ('relu', 'sigmoid')


100%|██████████| 230062/230062 [11:44<00:00, 326.60it/s]


Starting training epoch 5/5
Learning rate: 0.0019430415294862113, Batch size: 16, Hidden dims: (128, 64), Kernel sizes: (5, 13), Activations: ('relu', 'sigmoid')


100%|██████████| 230062/230062 [11:30<00:00, 332.96it/s]
[I 2025-06-09 10:01:45,276] Trial 16 finished with value: 0.07914354652166367 and parameters: {'hidden_dims': '128,64', 'kernel_sizes': '5,13', 'activations': 'relu,sigmoid', 'lr': 0.0019430415294862113, 'batch_size': 16}. Best is trial 13 with value: 0.002734301146119833.


Starting training epoch 1/5
Learning rate: 0.00046137951667444725, Batch size: 16, Hidden dims: (32, 64), Kernel sizes: (5, 13), Activations: ('sigmoid', 'sigmoid')


100%|██████████| 230062/230062 [12:14<00:00, 313.35it/s]


Starting training epoch 2/5
Learning rate: 0.00046137951667444725, Batch size: 16, Hidden dims: (32, 64), Kernel sizes: (5, 13), Activations: ('sigmoid', 'sigmoid')


100%|██████████| 230062/230062 [12:08<00:00, 315.73it/s]


Starting training epoch 3/5
Learning rate: 0.00046137951667444725, Batch size: 16, Hidden dims: (32, 64), Kernel sizes: (5, 13), Activations: ('sigmoid', 'sigmoid')


100%|██████████| 230062/230062 [12:08<00:00, 315.66it/s]


Starting training epoch 4/5
Learning rate: 0.00046137951667444725, Batch size: 16, Hidden dims: (32, 64), Kernel sizes: (5, 13), Activations: ('sigmoid', 'sigmoid')


100%|██████████| 230062/230062 [11:58<00:00, 320.33it/s]


Starting training epoch 5/5
Learning rate: 0.00046137951667444725, Batch size: 16, Hidden dims: (32, 64), Kernel sizes: (5, 13), Activations: ('sigmoid', 'sigmoid')


100%|██████████| 230062/230062 [11:57<00:00, 320.70it/s]
[I 2025-06-09 11:02:12,712] Trial 17 finished with value: 0.04041643440723419 and parameters: {'hidden_dims': '32,64', 'kernel_sizes': '5,13', 'activations': 'sigmoid,sigmoid', 'lr': 0.00046137951667444725, 'batch_size': 16}. Best is trial 13 with value: 0.002734301146119833.


Starting training epoch 1/5
Learning rate: 0.0021629840831280498, Batch size: 64, Hidden dims: (32, 256), Kernel sizes: (13, 5), Activations: ('relu', 'sigmoid')


100%|██████████| 57516/57516 [03:54<00:00, 245.23it/s]


Starting training epoch 2/5
Learning rate: 0.0021629840831280498, Batch size: 64, Hidden dims: (32, 256), Kernel sizes: (13, 5), Activations: ('relu', 'sigmoid')


100%|██████████| 57516/57516 [04:07<00:00, 232.45it/s]


Starting training epoch 3/5
Learning rate: 0.0021629840831280498, Batch size: 64, Hidden dims: (32, 256), Kernel sizes: (13, 5), Activations: ('relu', 'sigmoid')


100%|██████████| 57516/57516 [04:06<00:00, 233.05it/s]


Starting training epoch 4/5
Learning rate: 0.0021629840831280498, Batch size: 64, Hidden dims: (32, 256), Kernel sizes: (13, 5), Activations: ('relu', 'sigmoid')


100%|██████████| 57516/57516 [04:09<00:00, 230.19it/s]


Starting training epoch 5/5
Learning rate: 0.0021629840831280498, Batch size: 64, Hidden dims: (32, 256), Kernel sizes: (13, 5), Activations: ('relu', 'sigmoid')


100%|██████████| 57516/57516 [04:08<00:00, 231.87it/s]
[I 2025-06-09 11:22:39,527] Trial 18 finished with value: 0.048905473202466965 and parameters: {'hidden_dims': '32,256', 'kernel_sizes': '13,5', 'activations': 'relu,sigmoid', 'lr': 0.0021629840831280498, 'batch_size': 64}. Best is trial 13 with value: 0.002734301146119833.


Starting training epoch 1/5
Learning rate: 0.0003938717341268774, Batch size: 32, Hidden dims: (128, 64), Kernel sizes: (11, 11), Activations: ('tanh', 'relu')


100%|██████████| 115031/115031 [06:39<00:00, 287.69it/s]


Starting training epoch 2/5
Learning rate: 0.0003938717341268774, Batch size: 32, Hidden dims: (128, 64), Kernel sizes: (11, 11), Activations: ('tanh', 'relu')


100%|██████████| 115031/115031 [06:40<00:00, 286.99it/s]


Starting training epoch 3/5
Learning rate: 0.0003938717341268774, Batch size: 32, Hidden dims: (128, 64), Kernel sizes: (11, 11), Activations: ('tanh', 'relu')


100%|██████████| 115031/115031 [06:47<00:00, 282.40it/s]


Starting training epoch 4/5
Learning rate: 0.0003938717341268774, Batch size: 32, Hidden dims: (128, 64), Kernel sizes: (11, 11), Activations: ('tanh', 'relu')


100%|██████████| 115031/115031 [06:54<00:00, 277.41it/s]


Starting training epoch 5/5
Learning rate: 0.0003938717341268774, Batch size: 32, Hidden dims: (128, 64), Kernel sizes: (11, 11), Activations: ('tanh', 'relu')


100%|██████████| 115031/115031 [06:46<00:00, 283.03it/s]
[I 2025-06-09 11:56:28,751] Trial 19 finished with value: 0.06432613730430603 and parameters: {'hidden_dims': '128,64', 'kernel_sizes': '11,11', 'activations': 'tanh,relu', 'lr': 0.0003938717341268774, 'batch_size': 32}. Best is trial 13 with value: 0.002734301146119833.


Best params: {'hidden_dims': '32,64', 'kernel_sizes': '13,5', 'activations': 'relu,sigmoid', 'lr': 0.00538644174274093, 'batch_size': 16}


In [None]:
activations_map = {
    'relu': torch.nn.ReLU(),
    'tanh': torch.nn.Tanh(),
    'sigmoid': torch.nn.Sigmoid(),
    'leaky_relu': torch.nn.LeakyReLU(),
    'elu': torch.nn.ELU(),
    'gelu': torch.nn.GELU(),
    'selu': torch.nn.SELU(),
    'none': nn.Identity()
}
# Prepare activations
activations = [activations_map[act] for act in best_params['activations'].split(',')]

# Build and train the best model on the full training set
sample_input, sample_target = dataset[0]
input_dim = sample_input.shape[-1]
output_dim = sample_target.shape[-1]
best_model = SequenceCNN(
    input_dim=input_dim,
    hidden_dims=list(best_params['hidden_dims']),
    kernel_sizes=list(best_params['kernel_sizes']),
    output_dim=output_dim,
    activations=activations
)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
best_model.to(device)

optimizer = optim.Adam(best_model.parameters(), lr=best_params['lr'])
criterion = nn.MSELoss()

# Use the full train_loader for training
num_epochs = 5
for epoch in range(num_epochs):
    best_model.train()
    for batch in train_loader:
        inputs, targets = batch
        inputs, targets = inputs.to(device), targets.to(device)
        optimizer.zero_grad()
        outputs = best_model(inputs)
        loss = criterion(outputs, targets)
        loss.backward()
        optimizer.step()

# Evaluate on the test set
best_model.eval()
test_loss = 0.0
with torch.no_grad():
    for inputs, targets in test_loader:
        inputs, targets = inputs.to(device), targets.to(device)
        outputs = best_model(inputs)
        loss = criterion(outputs, targets)
        test_loss += loss.item() * inputs.size(0)
test_loss /= len(test_loader.dataset)
print(f"Test loss: {test_loss}")


AssertionError: hidden_dims and kernel_sizes must have the same length

In [None]:
# Save the best model
model_path = Path("best_cnn_model.pth")
torch.save(best_model.state_dict(), model_path)
print(f"Best model saved to {model_path}")
