In [1]:
from google.colab import drive
drive.mount('/content/drive/')

%cd /content/drive/MyDrive/CorrectlyClonedChesapeake/ChesapeakeBayChlorophyll/notebooks/models/

Mounted at /content/drive/
/content/drive/MyDrive/CorrectlyClonedChesapeake/ChesapeakeBayChlorophyll/notebooks/models


# Set up

In [2]:
import pandas as pd
import numpy as np
import xarray as xr
import matplotlib.pyplot as plt
import os

import logging
from tqdm.notebook import tqdm  # For progress bar
# Configure logging instead of print
logging.basicConfig(filename='tuning.log', level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
import time
from IPython.display import clear_output


import torch
from torch.utils.data import Dataset, DataLoader
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F

from sklearn.model_selection import ParameterGrid
import json
import pickle
from concurrent.futures import ThreadPoolExecutor, as_completed

In [None]:
# Load tensors
# features_tensor = torch.load('../../data/features_masked_tensor.pt')
# chlorophyll_tensor = torch.load('../../data/chlorophyll_masked_tensor.pt')

features_tensor_dict = {}

for i in range(11):
    # Load tensors using formatted string (f-string)
    features_tensor = torch.load(f'../../data/filesForModel/withDateFeatures/tensors/features_region_{i}_tensor.pt')
    chlorophyll_tensor = torch.load(f'../../data/filesForModel/withDateFeatures/tensors/chlorophyll_region_{i}_tensor.pt')

    # # Attach names to tensor dimensions
    # features_tensor.names = ('time','features','position')
    # chlorophyll_tensor.names = ('time','position')

    # Store the tensors in the dictionary
    features_tensor_dict[f'region_{i}_features'] = features_tensor
    features_tensor_dict[f'region_{i}_chlorophyll'] = chlorophyll_tensor

    print(f"Region {i}: features tensor shape: {features_tensor.shape}, chlorophyll tensor shape: {chlorophyll_tensor.shape}")


  features_tensor = torch.load(f'../../data/filesForModel/withDateFeatures/tensors/features_region_{i}_tensor.pt')
  chlorophyll_tensor = torch.load(f'../../data/filesForModel/withDateFeatures/tensors/chlorophyll_region_{i}_tensor.pt')


Region 0: features tensor shape: torch.Size([2764, 10, 127]), chlorophyll tensor shape: torch.Size([2764, 127])
Region 1: features tensor shape: torch.Size([2764, 10, 236]), chlorophyll tensor shape: torch.Size([2764, 236])
Region 2: features tensor shape: torch.Size([2764, 10, 311]), chlorophyll tensor shape: torch.Size([2764, 311])


# Model

We will create create a data stream for each of the 11 regions. Within each of these regions, we use a long short-term memory model (LSTM) to predict the chlorophyll values at each point.

Since the regions are highly irregular in shape, the position grid (latitude and longitude) contains a lot of points outside of the region. These positions have NaN for every variable, so we need to mask them in our model.

## Defining the classes

In [None]:
class RegionalLSTM(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, output_size,h0=None, c0=None):
        super(RegionalLSTM, self).__init__()
        self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True)
        self.fc = nn.Linear(hidden_size, output_size)


        # Store hidden states if needed
        self.h0 = h0
        self.c0 = c0
    def forward(self, x, h0=None, c0=None, time_batch_size= 100):
        # x shape: (batch_size, time_steps, variables, position)
        batch_size, time_steps, variables, positions = x.size()
        device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

        x = x.to(device)
        # Reshape to (batch_size * position, time_steps, variables) to treat each position separately
        x = x.permute(0, 3, 1, 2).reshape(batch_size * positions, time_steps, variables)

        # If no hidden state provided, initialize hidden states
        if h0 is None or c0 is None:
            h0 = torch.zeros(self.lstm.num_layers, x.size(0), self.lstm.hidden_size).to(x.device)
            c0 = torch.zeros(self.lstm.num_layers, x.size(0), self.lstm.hidden_size).to(x.device)


        # Adjust mini-batching to handle cases where time_steps < time_batch_size
        outputs = []
        for start in range(0, time_steps, time_batch_size):
            end = min(start + time_batch_size, time_steps)
            x_time_batch = x[:, start:end, :]  # Mini-batch along time

            # Forward pass through the LSTM with hidden state carryover
            lstm_out, (h0, c0) = self.lstm(x_time_batch, (h0, c0))  # Keep hidden state across time mini-batches

            outputs.append(lstm_out)

        # Concatenate the outputs for all time mini-batches
        lstm_out = torch.cat(outputs, dim=1)  # Concatenate along the time dimension
        # Apply the fully connected layer at every time step for each position
        out = self.fc(lstm_out)  # Shape: (batch_size * positions, time_steps, output_size)

        # Reshape back to (batch_size, time_steps, positions)
        out = out.view(batch_size, positions, time_steps).permute(0, 2, 1)


        return out, (h0,c0)


Since the relationship between regions comes from a map, we will hard code the data. Simply using a 1 if the regions border each other and 0 if they do not.

In [None]:
# Neighbor mask matrix (as described earlier)
neighbor_mask = torch.tensor([
    [0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0],  # Region 0, CB1TF
    [1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0],  # Region 1, CB2OH
    [0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0],  # Region 2, CB3MH
    [0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 0],  # Region 3, CB4MH
    [0, 0, 0, 1, 0, 1, 1, 0, 0, 0, 1],  # Region 4, CB5MH
    [0, 0, 0, 0, 1, 0, 1, 1, 0, 1, 0],  # Region 5, CB6PH
    [0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 1],  # Region 6, CB7PH
    [0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0],  # Region 7, CB8PH
    [0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0],  # Region 8, EASMH
    [0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0],  # Region 9, MOBPH
    [0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0]   # Region 110, TANMH
], dtype=torch.float32)

And we will generate a class that handles all regions, sharing hidden states between neighboring regions.

In [None]:
class MultiRegionModel(nn.Module):
    def __init__(self, neighbor_mask, input_size, hidden_size, num_layers, output_size):
        super(MultiRegionModel, self).__init__()
        self.neighbor_mask = neighbor_mask
        self.regions = nn.ModuleList([
            RegionalLSTM(input_size, hidden_size, num_layers, output_size)
            for _ in range(neighbor_mask.shape[0])
        ])


    def forward(self, region_inputs):
        # region_inputs should be a dictionary where keys are region IDs and values are feature tensors
        region_outputs = {}  # Initialize a dictionary to store outputs for each region

        for region_id, region_data in region_inputs.items():
            # Assuming region_id can be converted to an index
            region_index = int(region_id[-1])
            # Initialize combined_h0 for the first pass
            position_size = region_data.shape[-1]
            current_h0 = torch.zeros((batch_size, position_size, self.regions[region_index].lstm.hidden_size)).to(device)  # Replace with appropriate shape

            # Find neighbors
            neighbors = [i for i in range(1, self.neighbor_mask.shape[0] ) if self.neighbor_mask[region_index, i] == 1]

            neighbor_hidden_states = []
            for neighbor_index in neighbors:
                _, (neighbor_h0, neighbor_c0) = self.regions[neighbor_index](region_data)  # Use region_data for the neighbor

                neighbor_hidden_states.append(neighbor_h0)
                # Assuming neighbor_hidden_states is a list of (batch_size * positions, time_steps, hidden_size) tensors
            if neighbor_hidden_states:
                aggregated_neighbors_h0 = torch.mean(torch.stack(neighbor_hidden_states), dim=0).to(device)  # Mean across neighbors

            # Combine current hidden state with aggregated neighbor hidden states
            combined_h0 = current_h0 + F.interpolate(aggregated_neighbors_h0, size=current_h0.shape[-1], mode='linear', align_corners=False).to(device)
            # Adjust this operation as needed

            # Call the RegionalLSTM with the current input and updated hidden states
            output, (current_h0, current_c0) = self.regions[region_index](region_data, combined_h0, neighbor_c0)

            # Store the output for the current region
            region_outputs[region_id] = output.to(device)# Store the output for the current region
        return region_outputs  # Return the dictionary of outputs for each region


# Training


## Creating training, validation, and testing sets

In [None]:
train_features_dict = {}
val_features_dict = {}
test_features_dict = {}
train_targets_dict = {}
val_targets_dict = {}
test_targets_dict = {}

# feature shape (time, variables, position)
# chlorophyll shape (time, position)
for region_id in range(11):
    # Access the features and chlorophyll tensors from the dictionary
    features_tensor = features_tensor_dict[f'region_{region_id}_features']
    chlorophyll_tensor = features_tensor_dict[f'region_{region_id}_chlorophyll']

    # reshape to (batch, time, variables, position)
    features_tensor = features_tensor.unsqueeze(0)
    chlorophyll_tensor = chlorophyll_tensor.unsqueeze(0)

    # Split data into 70% training, 15% validation, 15% test
    train_size = int(0.7 * features_tensor.shape[1])  # 70% of the time steps
    val_size = int(0.15 * features_tensor.shape[1])   # 15% for validation
    test_size = features_tensor.shape[1] - train_size - val_size  # Remaining for test set

    # Split features into train, validation, and test sets
    train_features = features_tensor[:, :train_size, :, :]  # First 70% for training
    val_features = features_tensor[:, train_size:train_size+val_size, :, :]  # Next 15% for validation
    test_features = features_tensor[:, train_size+val_size:, :, :]  # Remaining for test

    # Split chlorophyll targets (same logic)
    train_targets = chlorophyll_tensor[:, :train_size, :]  # First 70% for training
    val_targets = chlorophyll_tensor[:, train_size:train_size+val_size, :]  # Next 15% for validation
    test_targets = chlorophyll_tensor[:, train_size+val_size:, :]  # Remaining for test

    # Store the splits in dictionaries
    # Also correct the indexing
    train_features_dict[f'region_{region_id}'] = train_features
    val_features_dict[f'region_{region_id}'] = val_features
    test_features_dict[f'region_{region_id}'] = test_features

    train_targets_dict[f'region_{region_id}'] = train_targets
    val_targets_dict[f'region_{region_id}'] = val_targets
    test_targets_dict[f'region_{region_id}'] = test_targets

    # Print shapes for verification
    print(f"Region {region_id}: Train features {train_features.shape}, Validation features {val_features.shape}, Test features {test_features.shape}")
    print(f"Region {region_id}: Train targets {train_targets.shape}, Validation targets {val_targets.shape}, Test targets {test_targets.shape}")


Region 0: Train features torch.Size([1, 1934, 10, 127]), Validation features torch.Size([1, 414, 10, 127]), Test features torch.Size([1, 416, 10, 127])
Region 0: Train targets torch.Size([1, 1934, 127]), Validation targets torch.Size([1, 414, 127]), Test targets torch.Size([1, 416, 127])
Region 1: Train features torch.Size([1, 1934, 10, 236]), Validation features torch.Size([1, 414, 10, 236]), Test features torch.Size([1, 416, 10, 236])
Region 1: Train targets torch.Size([1, 1934, 236]), Validation targets torch.Size([1, 414, 236]), Test targets torch.Size([1, 416, 236])
Region 2: Train features torch.Size([1, 1934, 10, 311]), Validation features torch.Size([1, 414, 10, 311]), Test features torch.Size([1, 416, 10, 311])
Region 2: Train targets torch.Size([1, 1934, 311]), Validation targets torch.Size([1, 414, 311]), Test targets torch.Size([1, 416, 311])
Region 3: Train features torch.Size([1, 1934, 10, 769]), Validation features torch.Size([1, 414, 10, 769]), Test features torch.Size(

In [None]:
torch.save(train_features_dict, '../../data/filesForModel/withDateFeatures/tensors/train_features_dict.pt')
torch.save(val_features_dict, '../../data/filesForModel/withDateFeatures/tensors/val_features_dict.pt')
torch.save(test_features_dict, '../../data/filesForModel/withDateFeatures/tensors/test_features_dict.pt')
torch.save(train_targets_dict, '../../data/filesForModel/withDateFeatures/tensors/train_targets_dict.pt')
torch.save(val_targets_dict, '../../data/filesForModel/withDateFeatures/tensors/val_targets_dict.pt')
torch.save(test_targets_dict, '../../data/filesForModel/withDateFeatures/tensors/test_targets_dict.pt')

## Tuning each region separately

We will compare the results to training each region separately, without any shared hidden states.

In [None]:
model = RegionalLSTM(input_size=10,
                    hidden_size=4,
                    num_layers=2,
                    output_size=1)  # output_size is fixed
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

criterion = nn.MSELoss()  # Mean Squared Error for regression
optimizer = optim.Adam(model.parameters(), lr=0.05)
best_validation_loss = float('inf')
no_improvement_count = 0  # Counter for epochs without improvement

# Move model to GPU if available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

features = train_features_dict['region_0']
targets = train_targets_dict['region_0']

features = features.to(device)
targets = targets.to(device)

model.train()  # Ensure the model is in training mode before each training step

# Retrieve output for the specific region
output, _ = model(features)

In [None]:
import time

def train_and_evaluate_region(model, region_id, train_features_dict, train_targets_dict, val_features_dict, val_targets_dict, learning_rate, num_epochs):
    # Define loss function and optimizer
    criterion = nn.MSELoss()  # Mean Squared Error for regression
    optimizer = optim.Adam(model.parameters(), lr=learning_rate)
    best_validation_loss = float('inf')
    no_improvement_count = 0  # Counter for epochs without improvement

    # Move model to GPU if available
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model.to(device)

    validation_losses = []

    with tqdm(total=num_epochs, desc=f"Processing {region_id}", leave=False) as pbar:
        for epoch in range(num_epochs):
            features = train_features_dict[region_id]
            targets = train_targets_dict[region_id]

            features = features.to(device)
            targets = targets.to(device)

            model.train()  # Ensure the model is in training mode before each training step

            # Retrieve output for the specific region
            output, _ = model(features)

            # Compute loss
            loss = criterion(output, targets)

            # Backpropagation and optimization steps
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            # Validation step
            model.eval()  # Set model to evaluation mode
            val_features = val_features_dict[region_id].to(device)
            val_targets = val_targets_dict[region_id].to(device)

            with torch.no_grad():  # Disable gradient calculation for validation
                val_output,_ = model(val_features)
                val_loss = criterion(val_output, val_targets)
                validation_losses.append(val_loss.item())  # Store the loss in the dictionary

            # Early stopping logic
            if val_loss < best_validation_loss:
                best_validation_loss = val_loss
                no_improvement_count = 0  # Reset counter
                # Save the best model state
            else:
                no_improvement_count += 1
                if no_improvement_count >= 15:
                    print(f"Stopping early at epoch {epoch + 1} due to no improvement.")
                    break  # Stop training

            del val_features, val_targets  # Clear these variables

        # Clear the model from memory after saving
        del model
        pbar.clear()  # Clear the bar if needed

    return validation_losses  # Return losses for each region separately


In [None]:
from IPython.display import clear_output

def hyperparameter_tuning_region(params,region_id, train_features_dict, train_targets_dict, val_features_dict, val_targets_dict):
    checked_params_file = f'checkpoints/dates/checked_{region_id}_params.json'  # Define the path for the checked parameters file
    checkpoint_path = f'checkpoints/dates/model_{region_id}_checkpoint.pt'  # Define the path for the checked parameters file


    if os.path.exists(checked_params_file):
        with open(checked_params_file, 'r') as f:
            checked_params = json.load(f)  # Load as a dictionary
    else:
        checked_params = {}  # Initialize an empty dictionary if no file exists

    if os.path.exists(checkpoint_path):
        checkpoint = torch.load(checkpoint_path,map_location=torch.device('cpu'))
        best_model = checkpoint['model']  # Load the best model
        best_params = checkpoint['params']  # Load the best parameters
        best_validation_loss = checkpoint['validation_loss']  # Load the best individual validation losses
    else:
        best_model = None
        best_params = None
        best_validation_loss = float('inf')

    no_improvement_count = 0  # Counter for epochs without improvement
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")


    # Iterate through each combination of hyperparameters

    with tqdm(total=len(ParameterGrid(params)), desc="Processing Model", leave=False) as pbar:

        for param_combination in ParameterGrid(params):
            tqdm.write(f"Training region {region_id} with parameters: {param_combination}")
            time.sleep(0.5)

            # Skip already checked parameters
            params_key = json.dumps(param_combination, sort_keys=True)
            if params_key in checked_params:
                pbar.update(1)
                continue

            model = RegionalLSTM(input_size=input_size,
                                hidden_size=param_combination['hidden_size'],
                                num_layers=param_combination['num_layers'],
                                output_size=1)  # output_size is fixed
            device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
            model.to(device)

            # Initialize the model with the current parameters
            validation_losses = train_and_evaluate_region(
                model.to(device),
                region_id,
                train_features_dict,
                train_targets_dict,
                val_features_dict,
                val_targets_dict,
                param_combination['learning_rate'],
                param_combination['num_epochs']
                )

            checked_params[params_key] = validation_losses  # Store the validation loss for this combination

            # Save the updated checked parameters dictionary to the file
            with open(checked_params_file, 'w') as f:
                json.dump(checked_params, f)

            # Finds the validation loss for these parameters
            most_recent_losses = validation_losses[-1]


            # Update best model if the current one is better
            if most_recent_losses < best_validation_loss:
                best_validation_loss = most_recent_losses
                best_model = model
                best_params = param_combination
                no_improvement_count = 0  # Reset counter for improvements


                # Save the best model to a checkpoint
                torch.save({
                    'model': best_model.state_dict(),  # Save the model state
                    'params': best_params,  # Save the best parameters
                    'validation_loss': best_validation_loss,  # Save the best individual validation losses
                }, checkpoint_path)

            else:
                no_improvement_count += 1  # Increment if there's no improvement
                if no_improvement_count >= 20:
                    pbar.update(1)
                    tqdm.write(f"Stopping early for region {region_id} due to no improvement in hyperparameters.")
                    time.sleep(0.5)

                    break  # Stop tuning if no improvement for `patience` configurations


        # Clear the model from memory after saving
            del model
    clear_output(wait=True)  # Clears the output after each iteration


    print(f"Best parameters: {best_params} with average validation loss: {best_validation_loss}")
    return best_model, best_params, best_validation_loss



In [None]:
batch_size=1
input_size= 10
param_grid = {
    'hidden_size': [2, 4, 8],
    'num_layers': [1,2,4],
    'learning_rate': [0.001, 0.005, 0.01, 0.05],
    'num_epochs': [20,50,100,150]
}
best_model_dict ={}

for region_id in tqdm(train_features_dict.keys()):
    best_model, best_params, best_validation_loss = hyperparameter_tuning_region(param_grid,region_id, train_features_dict, train_targets_dict, val_features_dict, val_targets_dict)
    best_model_dict[region_id] = {"model": best_model,
                                  "parameters" : best_params,
                                  "validation loss" : best_validation_loss}

Best parameters: {'hidden_size': 2, 'learning_rate': 0.05, 'num_epochs': 100, 'num_layers': 1} with average validation loss: 0.029675839468836784


Processing Model:   0%|          | 0/144 [00:00<?, ?it/s]

Training region region_3 with parameters: {'hidden_size': 2, 'learning_rate': 0.001, 'num_epochs': 20, 'num_layers': 1}


Processing region_3:   0%|          | 0/20 [00:00<?, ?it/s]

Training region region_3 with parameters: {'hidden_size': 2, 'learning_rate': 0.001, 'num_epochs': 20, 'num_layers': 2}


Processing region_3:   0%|          | 0/20 [00:00<?, ?it/s]

Training region region_3 with parameters: {'hidden_size': 2, 'learning_rate': 0.001, 'num_epochs': 20, 'num_layers': 4}


Processing region_3:   0%|          | 0/20 [00:00<?, ?it/s]

Training region region_3 with parameters: {'hidden_size': 2, 'learning_rate': 0.001, 'num_epochs': 50, 'num_layers': 1}


Processing region_3:   0%|          | 0/50 [00:00<?, ?it/s]

In [None]:
best_model_dict ={}

for region_id in train_features_dict.keys():
        if os.path.exists(f'checkpoints/dates/model_{region_id}_checkpoint.pt'):
                checkpoint = torch.load(f'checkpoints/dates/model_{region_id}_checkpoint.pt',map_location=torch.device('cpu'))
                best_model = checkpoint['model']  # Load the best model
                best_params = checkpoint['params']  # Load the best parameters
                best_validation_loss = checkpoint['validation_loss']  # Load the best individual validation losses
                best_model_dict[region_id] = {"model": best_model,
                                  "parameters" : best_params,
                                  "validation loss" : best_validation_loss}
                print(region_id,best_params, best_validation_loss)


In [None]:
best_model_dict

In [None]:
torch.save(best_model_dict,'../../models/withTimeFeatures/individual_training_models.pt')

Now we will compare test these models on the testing sets.

In [None]:
def test_regional_model(region_id, test_features_dict,test_targets_dict, checkpoint_path):
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

    test_dataset = test_features_dict[region_id].to(device)
    test_targets = test_targets_dict[region_id].to(device)

    # checkpoint_path = f'checkpoints/dates/model_{region_id}_checkpoint.pt'  # Define the path for the checked parameters file

    checkpoint = torch.load(checkpoint_path, map_location=torch.device('cpu'))
    best_params = checkpoint['params']

    input_size = test_dataset.shape[2]
    hidden_size = best_params['hidden_size']
    num_layers = best_params['num_layers']

    model = RegionalLSTM(input_size, hidden_size, num_layers,output_size=1)
    model.load_state_dict(checkpoint['model'])  # Load the best model
    model.eval()

    criterion = nn.MSELoss()
    with torch.no_grad():  # Disable gradient calculation
        output,_ = model(test_dataset)

    true_values = test_targets  # Ensure true_values is in the correct shape
    error = criterion(output, true_values)

    return output, error  # Return predictions and metrics if needed

In [None]:
region_outputs_dict ={}

for region_id, dictionary in best_model_dict.items():
    checkpoint_path = f'checkpoints/dates/model_{region_id}_checkpoint.pt'
    output, error = test_regional_model(region_id, test_features_dict,test_targets_dict,checkpoint_path)
    region_outputs_dict[region_id] = {"prediction" : output, "mean squared error": error}
    print(region_id,error)

In [None]:
torch.save(region_outputs_dict,'../../models/withTimeFeatures/individual_training_outputs.pt')

## Hypertuning full model

In [None]:
def train_and_evaluate_model(model, train_features_dict, train_targets_dict, val_features_dict, val_targets_dict, learning_rate,num_epochs):

    # Define loss function and optimizer
    criterion = nn.MSELoss()  # Mean Squared Error for regression
    optimizer = optim.Adam(model.parameters(), lr=learning_rate)

    # Move model to GPU if available
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model.to(device)

    last_outputs_dict = {region_id: [] for region_id in train_features_dict.keys()}
    validation_losses = {region_id: [] for region_id in val_features_dict.keys()}
    region_models ={}

    total_iterations = num_epochs * len(train_features_dict)

    with tqdm(total=total_iterations, desc="Processing Model", leave=True) as pbar:
        for epoch in range(num_epochs):
          for region_id, features in train_features_dict.items():

                targets = train_targets_dict[region_id]

                features = features.to(device)
                targets = targets.to(device)

                model.train() # Ensure the model is in training mode before each training step

                # Forward pass: get the outputs for all regions
                region_outputs = model({region_id: features})

                region_models[region_id]=model
                # Retrieve output for the specific region
                output = region_outputs[region_id]

                # Compute loss
                loss = criterion(output, targets)

                # Backpropagation and optimization steps
                optimizer.zero_grad()
                loss.backward()
                optimizer.step()

                # Validation step
                model.eval()  # Set model to evaluation mode
                val_features = val_features_dict[region_id].to(device)
                val_targets = val_targets_dict[region_id].to(device)

                with torch.no_grad():  # Disable gradient calculation for validation
                    val_region_outputs = model({region_id:val_features})
                    val_output = val_region_outputs[region_id]
                    val_loss = criterion(val_output, val_targets)
                    validation_losses[region_id].append(val_loss.item())  # Store the loss in the dictionary

                del val_features, val_targets, val_region_outputs  # Clear these variables
                pbar.update(1)  # Increment the outer progress bar by 1

    return validation_losses  # Return losses for each region separately

In [None]:
def hyperparameter_tuning(params,train_features_dict, train_targets_dict, val_features_dict, val_targets_dict, neighbor_mask):
    checked_params_file = 'checkpoints/dates/checked_params.json'  # Define the path for the checked parameters file
    checkpoint_path = 'checkpoints/dates/model_checkpoint.pt'  # Define the path for the model checkpoint

    if os.path.exists(checked_params_file):
        with open(checked_params_file, 'r') as f:
            checked_params = json.load(f)  # Load as a dictionary
    else:
        checked_params = {}  # Initialize an empty dictionary if no file exists

    if os.path.exists(checkpoint_path):
        checkpoint = torch.load(checkpoint_path,map_location=torch.device('cpu'))
        best_model = checkpoint['model']  # Load the best model
        best_params = checkpoint['params']  # Load the best parameters

        best_ind_validation_loss = checkpoint['indv_validation_loss']  # Load the best individual validation losses
        best_avg_validation_loss = checkpoint['avg_validation_loss']  # Load the best validation loss

    else:
      best_model = None
      best_params = None
      best_avg_validation_loss = float('inf')

     # Iterate through each combination of hyperparameters
    for params in tqdm(ParameterGrid(params)):
        print(f"Training with parameters: {params}")
        params_key = json.dumps(params, sort_keys=True)  # Convert the parameters to a JSON string

        if params_key in checked_params:
            continue  # Skip this combination if it has been checked

        model = MultiRegionModel(neighbor_mask, input_size=input_size,
                        hidden_size=params['hidden_size'], num_layers=params['num_layers'],
                        output_size=1)  # output_size is fixed

        # Initialize the model with the current parameters
        validation_losses = train_and_evaluate_model(
        model.to(device),
        train_features_dict,
        train_targets_dict,
        val_features_dict,
        val_targets_dict,
        params['learning_rate'],
        params['num_epochs']
    )
        checked_params[params_key] = validation_losses  # Store the validation loss for this combination

        # Save the updated checked parameters dictionary to the file
        with open(checked_params_file, 'w') as f:
            json.dump(checked_params, f)

        # Finds the validation loss for these parameters
        most_recent_losses = [losses[-1] for losses in validation_losses.values()]
        # Calculate the average validation loss
        overall_avg_loss = np.mean(most_recent_losses)


        # Update best model if the current one is better
        if overall_avg_loss < best_avg_validation_loss:
            best_avg_validation_loss = overall_avg_loss
            best_ind_validation_loss = most_recent_losses
            best_model = model
            best_params = params
            # Save the best model to a checkpoint
            torch.save({
                'model': best_model.state_dict(),  # Save the model state
                'params': best_params,  # Save the best parameters
                'indv_validation_loss': best_ind_validation_loss,  # Save the best individual validation losses
                'avg_validation_loss': best_avg_validation_loss  # Save the best average validation loss
            }, checkpoint_path)
        # Clear the model from memory after saving
        del model

    print(f"Best parameters: {best_params} with average validation loss: {best_avg_validation_loss}")
    return best_model, best_ind_validation_loss, best_avg_validation_loss


We choose potential hyperparameters based on the values used by the various Individually tuned regions. This reduces the number of combinations we havve to check.

In [None]:
batch_size=1
input_size= 10
param_grid = {
    'hidden_size': [2, 4, 8],
    'num_layers': [1,2,4],
    'learning_rate': [0.001, 0.005, 0.01, 0.05],
    'num_epochs': [20,50,100,150]
}


device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

best_model, best_ind_validation_loss, best_avg_validation_loss = hyperparameter_tuning(param_grid,train_features_dict, train_targets_dict, val_features_dict, val_targets_dict, neighbor_mask)

Now we test the set on the same set as the individual models.

In [None]:
checkpoint = torch.load('checkpoints/dates/model_checkpoint_dates.pt',map_location=torch.device('cpu'))
best_model = checkpoint['model']  # Load the best model
best_params = checkpoint['params']  # Load the best parameters

best_ind_validation_loss = checkpoint['indv_validation_loss']  # Load the best individual validation losses
best_avg_validation_loss = checkpoint['avg_validation_loss']  # Load the best validation loss
print(best_params, best_ind_validation_loss)

In [None]:
def test_full_model(test_features_dict,test_targets_dict):
    criterion = nn.MSELoss()

    checkpoint_path = f'checkpoints/dates/model_checkpoint_dates.pt'  # Define the path for the checked parameters file

    checkpoint = torch.load(checkpoint_path,map_location=torch.device('cpu'))
    best_params = checkpoint['params']

    input_size = test_features_dict['region_0'].shape[2]
    hidden_size = best_params['hidden_size']
    num_layers = best_params['num_layers']

    model = MultiRegionModel(neighbor_mask, input_size, hidden_size, num_layers, output_size=1)
    model.load_state_dict(checkpoint['model'])  # Load the best model

    model.eval()  # Set model to evaluation mode
    test_losses = {}  # To store losses for each region

    with torch.no_grad():  # Disable gradient calculation for testing
        for region_id, features in test_features_dict.items():
            targets = test_targets_dict[region_id]

            features = features.to(device)
            targets = targets.to(device)

            # Forward pass: get the outputs for the region
            region_outputs = model({region_id: features})
            output = region_outputs[region_id]

            # Compute loss
            loss = criterion(output, targets)
            test_losses[region_id] = {"prediction" : output, "mean squared error": loss.item()}  # Store the loss for this region

    return test_losses  # Return losses for further analysis

In [None]:
batch_size = 1
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

test_losses_dict = test_full_model(test_features_dict, test_targets_dict)

In [None]:
for region_id, data in test_losses_dict.items():
    print(region_id, data['mean squared error'])

In [None]:
checkpoint = torch.load('checkpoints/dates/model_checkpoint_dates.pt', map_location=torch.device('cpu'))

torch.save(checkpoint,'../../models/withTimeFeatures/multiregion_model.pt')

In [None]:
torch.save(test_losses_dict,'../../models/withTimeFeatures/multiregion_outputs.pt')

## Comparing preformance

Now we compare the preformance of the individual regional models and the multiregional model.

Run the region models using the hyperparameters from the MultiRegionModel.

In [None]:
checkpoint = torch.load('checkpoints/dates/model_checkpoint_dates.pt', map_location=torch.device('cpu'))
best_params = checkpoint['params']  # Load the best parameters
batch_size = 1
hidden_size = best_params['hidden_size']
num_layers = best_params['num_layers']
num_epochs = best_params['num_epochs']
learning_rate = best_params['learning_rate']
input_size = 10

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

same_params_region_model_dict = {}


for region_id in train_features_dict.keys():
    checkpoint_path = f'checkpoints/dates/model_same_param_{region_id}_checkpoint.pt'

    model = RegionalLSTM(input_size, hidden_size, num_layers,output_size=1)
    criterion = nn.MSELoss()  # Mean Squared Error for regression
    optimizer = optim.Adam(model.parameters(), lr=learning_rate)


    # Move model to GPU if available
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model.to(device)

    validation_losses = []

    for epoch in tqdm(range(num_epochs),desc=f"Processing {region_id}",leave=False):
        features = train_features_dict[region_id]
        targets = train_targets_dict[region_id]

        features = features.to(device)
        targets = targets.to(device)

        model.train()  # Ensure the model is in training mode before each training step

        # Retrieve output for the specific region
        output, _ = model(features)

        # Compute loss
        loss = criterion(output, targets)

        # Backpropagation and optimization steps
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        # Validation step
        model.eval()  # Set model to evaluation mode
        val_features = val_features_dict[region_id].to(device)
        val_targets = val_targets_dict[region_id].to(device)

        with torch.no_grad():  # Disable gradient calculation for validation
            val_output,_ = model(val_features)
            val_loss = criterion(val_output, val_targets)
            validation_losses.append(val_loss.item())  # Store the loss in the dictionary

        torch.save({
                'model': model.state_dict(),  # Save the model state
                'params': best_params,  # Save the best parameters
                'validation_loss': validation_losses,  # Save the most recent validation loss
            }, checkpoint_path)
        del val_features, val_targets  # Clear these variables

    same_params_region_model_dict[region_id] = {"model": model,
                                "parameters" : best_params,
                                "validation loss" : validation_losses}


In [None]:
same_params_region_model_dict ={}
for region_id in region_outputs_dict.keys():
        same_params_region_model_dict[region_id] = torch.load(f'checkpoints/dates/model_same_param_{region_id}_checkpoint.pt', map_location=torch.device('cpu'))

In [None]:
same_params_region_outputs_dict ={}

main_model = torch.load('checkpoints/dates/model_checkpoint_dates.pt', map_location=torch.device('cpu'))
best_params = main_model['params']  # Load the best parameters
batch_size = 1
hidden_size = best_params['hidden_size']
num_layers = best_params['num_layers']
num_epochs = best_params['num_epochs']
learning_rate = best_params['learning_rate']
input_size = 7


checkpoint_path = f'checkpoints/dates/model_same_param_{region_id}_checkpoint.pt'
for region_id, dictionary in region_outputs_dict.items():
    output, error = test_regional_model(region_id, test_features_dict,test_targets_dict,checkpoint_path)
    same_params_region_outputs_dict[region_id] = {"prediction" : output, "mean squared error": error}
    print(region_id,error)


In [None]:
torch.save(same_params_region_model_dict,'../../models/withTimeFeatures/same_param_individual_models.pt')

In [None]:
torch.save(same_params_region_outputs_dict,'../../models/withTimeFeatures/same_param_individual_outputs.pt')

In [None]:
# Initialize variables for quantification
total_mse_indv_regions = 0
total_mse_indv_regions_all_param = 0
total_mse_all_regions = 0
count = 0
comparison_results = {}

# Compare MSE for each region and quantify
for region_id in region_outputs_dict.keys():
    mse1 = region_outputs_dict[region_id]['mean squared error'].item()
    mse2 = same_params_region_outputs_dict[region_id]['mean squared error'].item()
    mse3 = test_losses_dict[region_id]['mean squared error']


    # Update totals for average calculation
    total_mse_indv_regions += mse1
    total_mse_indv_regions_all_param += mse2
    total_mse_all_regions += mse3
    count += 1

 # Calculate metrics for two model comparisons
    difference_12 = mse1 - mse2
    percentage_improvement_12 = ((mse1 - mse2) / mse1) * 100 if mse1 != 0 else None

    difference_13 = mse1 - mse3
    percentage_improvement_13 = ((mse1 - mse3) / mse1) * 100 if mse1 != 0 else None

    difference_23 = mse2 - mse3
    percentage_improvement_23 = ((mse2 - mse3) / mse2) * 100 if mse2 != 0 else None

    mse_dict = {
        'Individually tuned regions': mse1,
        'Individual regions, same params': mse2,
        'Multiple region model': mse3
    }

    # Sort models by MSE in ascending order
    sorted_models = sorted(mse_dict.items(), key=lambda x: x[1])

    # Store comparison results
    comparison_results[region_id] = {
        'MSE Individually tuned regions': mse1,
        'MSE Individual regions, same params': mse2,
        'MSE Multiple region model': mse3,
        'Difference (Individual - Same Param)': difference_12,
        'Percentage Improvement (Individual - Same Param)': percentage_improvement_12,
        'Difference (Individual - Multiregion)': difference_13,
        'Percentage Improvement (Individual - Multiregion)': percentage_improvement_13,
        'Difference (Same Param - Multiregion)': difference_23,
        'Percentage Improvement (Same Param - Multiregion)': percentage_improvement_23,
        'Performance Order': sorted_models
    }

# Calculate average MSE for all models
average_mse_model1 = total_mse_indv_regions / count if count > 0 else 0
average_mse_model2 = total_mse_indv_regions_all_param / count if count > 0 else 0
average_mse_model3 = total_mse_all_regions / count if count > 0 else 0

data = []

for region_id, results in comparison_results.items():
    # Collecting the results for each region
    data.append({
        'Region': region_id,
        'MSE Individually tuned regions': results['MSE Individually tuned regions'],
        'MSE Individual regions, same params': results['MSE Individual regions, same params'],
        'MSE Multiple region model': results['MSE Multiple region model'],
        # 'Difference (Individual - Multiple)': results['Difference (Individual - Multiple)'],
        'Percentage Improvement (Individual - Same Param)': results['Percentage Improvement (Individual - Same Param)'],
        # 'Difference (Individual - Multiregion)': results['Difference (Individual - Multiregion)'],
        'Percentage Improvement (Individual - Multiregion)': results['Percentage Improvement (Individual - Multiregion)'],
        'Difference (Same Param - Multiregion)': results['Difference (Same Param - Multiregion)'],
        'Percentage Improvement (Same Param - Multiregion)': results['Percentage Improvement (Same Param - Multiregion)'],
        'Model Performance Order': ', '.join([f"{model}" for model, mse in results['Performance Order']])
    })

# Create DataFrame
results_df = pd.DataFrame(data)


# Print average MSE for all models
print(f"\nAverage MSE Individually tuned regions: {average_mse_model1}")
print(f"Average MSE Individual regions, same params : {average_mse_model2}")
print(f"Average MSE Multiple region model: {average_mse_model3}")

In [None]:
results_df.to_csv('../../models/withTimeFeatures/results.csv')