In [1]:

import os
from CNN import SequenceCNN
from torch.utils.data import DataLoader as TorchDataLoader
from tqdm import tqdm

In [2]:
import torch
from torch.optim import lr_scheduler

import torch.optim as optim
import torch.nn as nn
from PrecipitationDataset import PrecipitationDataset
from functools import partial
import os
import tempfile
from pathlib import Path
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import random_split
import torchvision
import torchvision.transforms as transforms

In [3]:
dataset = PrecipitationDataset('data_prcp_summed')

100%|██████████| 562/562 [00:20<00:00, 27.34it/s]


In [4]:
best_params = {
    'hidden_dims': [32, 64],
    'kernel_sizes': [13, 5],
    'lr':  0.00538644174274093,
    'batch_size': 16,
    'activations': ['relu', 'sigmoid'],
}

In [5]:
# Split the dataset into train, validation, and test sets
train_size = int(0.7 * len(dataset))
val_size = int(0.15 * len(dataset))
test_size = len(dataset) - train_size - val_size

train_dataset, val_dataset, test_dataset = random_split(dataset, [train_size, val_size, test_size])

# Create DataLoaders for each set
batch_size = best_params['batch_size']  # You can adjust this as needed

train_loader = TorchDataLoader(train_dataset, batch_size=batch_size, shuffle=True)
val_loader = TorchDataLoader(val_dataset, batch_size=batch_size, shuffle=False)
test_loader = TorchDataLoader(test_dataset, batch_size=batch_size, shuffle=False)

In [None]:
activations_map = {
    'relu': torch.nn.ReLU,
    'tanh': torch.nn.Tanh,
    'sigmoid': torch.nn.Sigmoid,
    'leaky_relu': torch.nn.LeakyReLU,
    'elu': torch.nn.ELU,
    'gelu': torch.nn.GELU,
    'selu': torch.nn.SELU,
    'none': nn.Identity
}
# Prepare activations

# Build and train the best model on the full training set
sample_input, sample_target = dataset[0]
input_dim = sample_input.shape[-1]
output_dim = sample_target.shape[-1]
best_model = SequenceCNN(
    input_dim=input_dim,
    hidden_dims=list(best_params['hidden_dims']),
    kernel_sizes=list(best_params['kernel_sizes']),
    output_dim=output_dim,
    activations=best_params['activations'],
)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
best_model.to(device)

optimizer = optim.Adam(best_model.parameters(), lr=best_params['lr'])
criterion = nn.MSELoss()

# Use the full train_loader for training
num_epochs = 50
best_val_loss = float('inf')
for epoch in range(num_epochs):
    best_model.train()
    for batch in tqdm(train_loader, desc=f"Training Epoch {epoch+1}/{num_epochs}"):
        inputs, targets = batch
        inputs, targets = inputs.to(device), targets.to(device)
        optimizer.zero_grad()
        outputs = best_model(inputs)
        loss = criterion(outputs, targets)
        loss.backward()
        optimizer.step()
    with torch.no_grad():
        val_loss = 0.0
        for inputs, targets in tqdm(val_loader, desc=f"Validation Epoch {epoch+1}/{num_epochs}"):
            inputs, targets = inputs.to(device), targets.to(device)
            outputs = best_model(inputs)
            loss = criterion(outputs, targets)
            val_loss += loss.item() * inputs.size(0)
        val_loss /= len(val_loader.dataset)
        if val_loss < best_val_loss:
            best_val_loss = val_loss
            # Save the best model
            torch.save(best_model.state_dict(), 'best_model_prcp_cnn.pt')
torch.save(best_model, 'last_model_prcp_cnn.pt')


Training Epoch 1/50: 100%|██████████| 161044/161044 [06:04<00:00, 442.08it/s]
Validation Epoch 1/50: 100%|██████████| 34510/34510 [00:34<00:00, 1009.87it/s]
Training Epoch 2/50: 100%|██████████| 161044/161044 [07:00<00:00, 383.05it/s]
Validation Epoch 2/50: 100%|██████████| 34510/34510 [00:38<00:00, 889.69it/s] 
Training Epoch 3/50: 100%|██████████| 161044/161044 [07:58<00:00, 336.32it/s]
Validation Epoch 3/50: 100%|██████████| 34510/34510 [00:39<00:00, 870.01it/s] 
Training Epoch 4/50: 100%|██████████| 161044/161044 [07:48<00:00, 343.87it/s]
Validation Epoch 4/50: 100%|██████████| 34510/34510 [00:39<00:00, 871.97it/s] 
Training Epoch 5/50: 100%|██████████| 161044/161044 [07:53<00:00, 340.08it/s]
Validation Epoch 5/50: 100%|██████████| 34510/34510 [00:40<00:00, 846.07it/s] 
Training Epoch 6/50: 100%|██████████| 161044/161044 [07:48<00:00, 343.98it/s]
Validation Epoch 6/50: 100%|██████████| 34510/34510 [00:39<00:00, 876.86it/s] 
Training Epoch 7/50: 100%|██████████| 161044/161044 [07:50

In [None]:
import numpy as np
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score, f1_score, accuracy_score

# Collect all predictions and targets from the test set
all_preds = []
all_targets = []
best_model.eval()
with torch.no_grad():
    for inputs, targets in test_loader:
        inputs, targets = inputs.to(device), targets.to(device)
        outputs = best_model(inputs)
        all_preds.append(outputs.cpu().numpy())
        all_targets.append(targets.cpu().numpy())
all_preds = np.concatenate(all_preds, axis=0)
all_targets = np.concatenate(all_targets, axis=0)
# Calculate regression metrics
mse = mean_squared_error(all_targets, all_preds)
mae = mean_absolute_error(all_targets, all_preds)
r2 = r2_score(all_targets, all_preds)

print(f"Mean Squared Error: {mse:.4f}")
print(f"Mean Absolute Error: {mae:.4f}")
print(f"R2 Score: {r2:.4f}")