In [1]:
from google.colab import drive
drive.mount('/content/drive/')

%cd /content/drive/MyDrive/CorrectlyClonedChesapeake/ChesapeakeBayChlorophyll/notebooks/models/

Mounted at /content/drive/
/content/drive/MyDrive/CorrectlyClonedChesapeake/ChesapeakeBayChlorophyll/notebooks/models


# Setup

In [15]:
import pandas as pd
import numpy as np
import xarray as xr
import matplotlib.pyplot as plt
import os

import logging
from tqdm.notebook import tqdm  # For progress bar
# Configure logging instead of print
logging.basicConfig(filename='tuning.log', level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
import time
from IPython.display import clear_output


import torch
from torch.utils.data import Dataset, DataLoader
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F

from sklearn.model_selection import ParameterGrid
import json
import pickle
from concurrent.futures import ThreadPoolExecutor, as_completed

In [16]:
# Load tensors
# features_tensor = torch.load('../../data/features_masked_tensor.pt')
# chlorophyll_tensor = torch.load('../../data/chlorophyll_masked_tensor.pt')

satbuoy_tensor_dict = {}
water_tensor_dict = {}

for i in range(11):
    # Load tensors using formatted string (f-string)
    satbuoy_features_tensor = torch.load(f'../../data/filesForModel/withDateFeatures/tensors/features_region_{i}_tensor.pt')
    satbuoy_chlorophyll_tensor = torch.load(f'../../data/filesForModel/withDateFeatures/tensors/chlorophyll_region_{i}_tensor.pt')

    # # Attach names to tensor dimensions
    # features_tensor.names = ('time','features','position')
    # chlorophyll_tensor.names = ('time','position')

    # Store the tensors in the dictionary
    satbuoy_tensor_dict[f'region_{i}_features'] = satbuoy_features_tensor
    satbuoy_tensor_dict[f'region_{i}_chlorophyll'] = satbuoy_chlorophyll_tensor

    print(f"Region {i}: features tensor shape: {satbuoy_features_tensor.shape}, chlorophyll tensor shape: {satbuoy_chlorophyll_tensor.shape}")
    # Load tensors using formatted string (f-string)
    water_features_tensor = torch.load(f'../../data/filesForModel/withDateFeatures/tensors/water_features_region{i}_tensor.pt')
    water_chlorophyll_tensor = torch.load(f'../../data/filesForModel/withDateFeatures/tensors/water_chlorophyll_region{i}_tensor.pt')

    # # Attach names to tensor dimensions
    # features_tensor.names = ('time','features','position')
    # chlorophyll_tensor.names = ('time','position')

    # Store the tensors in the dictionary
    water_tensor_dict[f'region_{i}_features'] = water_features_tensor
    water_tensor_dict[f'region_{i}_chlorophyll'] = satbuoy_chlorophyll_tensor

    print(f"Region {i}: features tensor shape: {water_features_tensor.shape}, chlorophyll tensor shape: {water_chlorophyll_tensor.shape}")


Region 0: features tensor shape: torch.Size([2764, 10, 127]), chlorophyll tensor shape: torch.Size([2764, 127])
Region 0: features tensor shape: torch.Size([413, 14, 127]), chlorophyll tensor shape: torch.Size([413, 127])
Region 1: features tensor shape: torch.Size([2764, 10, 236]), chlorophyll tensor shape: torch.Size([2764, 236])
Region 1: features tensor shape: torch.Size([370, 14, 236]), chlorophyll tensor shape: torch.Size([370, 236])
Region 2: features tensor shape: torch.Size([2764, 10, 311]), chlorophyll tensor shape: torch.Size([2764, 311])
Region 2: features tensor shape: torch.Size([813, 14, 311]), chlorophyll tensor shape: torch.Size([813, 311])
Region 3: features tensor shape: torch.Size([2764, 10, 769]), chlorophyll tensor shape: torch.Size([2764, 769])
Region 3: features tensor shape: torch.Size([745, 14, 769]), chlorophyll tensor shape: torch.Size([745, 769])
Region 4: features tensor shape: torch.Size([2764, 10, 1234]), chlorophyll tensor shape: torch.Size([2764, 1234]

In [17]:
def extract_dates(tensor):
    # Assuming the date components are in the third dimension of the tensor
    years = tensor[0, :, -3, 0]    # Year is the third to last variable
    days = tensor[0, :, -1, 0]     # Day-of-year is last variable

    # Combine into a list of tuples for dates
    dates = []
    time_steps= tensor.size(1)  # Unpack the shape

    for time_step in range(time_steps):
        year = int(years[time_step].item())
        day = int(days[time_step].item())
        dates.append((year, day))  # Append the date tuple

    return dates


def get_common_date_indices(daily_tensor, infrequent_tensor):
    # Extract dates from both tensors
    daily_dates_extracted = extract_dates(daily_tensor)
    infrequent_dates_extracted = extract_dates(infrequent_tensor)

    # Find common dates
    common_dates = set(daily_dates_extracted) & set(infrequent_dates_extracted)

    # Get indices of common dates in the daily tensor
    daily_indices = [i for i, date in enumerate(daily_dates_extracted) if date in common_dates]

    # Get indices of common dates in the infrequent tensor
    infrequent_indices = [i for i, date in enumerate(infrequent_dates_extracted) if date in common_dates]

    return daily_indices, infrequent_indices



In [18]:
def train_test_split(daily_tensor_dict,infrequent_tensor_dict):
    daily_splits_dict = {}
    infrequent_splits_dict = {}
    # feature shape (time, variables, position)
    # chlorophyll shape (time, position)
    for region_id in range(11):
        daily_features_tensor = daily_tensor_dict[f'region_{region_id}_features']
        daily_chlorophyll_tensor = daily_tensor_dict[f'region_{region_id}_chlorophyll']
        infrequent_features_tensor = infrequent_tensor_dict[f'region_{region_id}_features']
        infrequent_chlorophyll_tensor = infrequent_tensor_dict[f'region_{region_id}_chlorophyll']

        # reshape to (batch, time, variables, position)
        daily_features_tensor = daily_features_tensor.unsqueeze(0)
        daily_chlorophyll_tensor = daily_chlorophyll_tensor.unsqueeze(0)
        infrequent_features_tensor = infrequent_features_tensor.unsqueeze(0)
        infrequent_chlorophyll_tensor = infrequent_chlorophyll_tensor.unsqueeze(0)

        daily_common_indices, infrequent_common_indices = get_common_date_indices(daily_features_tensor, infrequent_features_tensor)

        # Split data into 60% training, 15% validation, 25% test (so there are water timesteps in test)
        train_size = int(0.6 * daily_features_tensor.shape[1])  # 60% of the time steps
        val_size = int(0.15 * daily_features_tensor.shape[1])   # 15% for validation
        test_size = daily_features_tensor.shape[1] - train_size - val_size  # Remaining for test set

        last_common_train_index = max(idx for idx in daily_common_indices if idx < train_size)
        last_common_val_index = max(idx for idx in daily_common_indices if idx < val_size)

        # Find the corresponding index in the infrequent indices
        infrequent_train_index = infrequent_common_indices[
            daily_common_indices.index(last_common_train_index)
        ]

        infrequent_val_index = infrequent_common_indices[
            daily_common_indices.index(last_common_val_index)
        ]


        # Split features into train, validation, and test sets
        train_features = daily_features_tensor[:, :train_size, :, :]  # First 60% for training
        val_features = daily_features_tensor[:, train_size:train_size+val_size, :, :]  # Next 15% for validation
        test_features = daily_features_tensor[:, train_size+val_size:, :, :]  # Remaining for test

        # Split chlorophyll targets (same logic)
        train_targets = daily_chlorophyll_tensor[:, :train_size, :]  # First 70% for training
        val_targets = daily_chlorophyll_tensor[:, train_size:train_size+val_size, :]  # Next 15% for validation
        test_targets = daily_chlorophyll_tensor[:, train_size+val_size:, :]  # Remaining for test

        # Store the splits in dictionaries
        # Also correct the indexing
        daily_splits_dict[f'region_{region_id}'] = {"train_features"  : train_features,
                                                    "val_features" : val_features,
                                                    "test_features": test_features,
                                                    "train_targets"  : train_targets,
                                                    "val_targets" : val_targets,
                                                    "test_targets": test_targets}

        infrequent_train_features = infrequent_features_tensor[:, :infrequent_train_index, :, :]  # First 70% for training
        infrequent_val_features = infrequent_features_tensor[:, infrequent_train_index:train_size+infrequent_val_index, :, :]  # Next 15% for validation
        infrequent_test_features = infrequent_features_tensor[:, infrequent_train_index+infrequent_val_index:, :, :]  # Remaining for test

        # Split chlorophyll targets (same logic)
        infrequent_train_targets = infrequent_chlorophyll_tensor[:, :infrequent_train_index, :]  # First 70% for training
        infrequent_val_targets = infrequent_chlorophyll_tensor[:, infrequent_train_index:train_size+infrequent_val_index, :]  # Next 15% for validation
        infrequent_test_targets = infrequent_chlorophyll_tensor[:, infrequent_train_index+infrequent_val_index:, :]  # Remaining for test

        # Store the splits in dictionaries
        # Also correct the indexing
        infrequent_splits_dict[f'region_{region_id}'] = {"train_features" : infrequent_train_features,
                                                    "val_features" : infrequent_val_features,
                                                    "test_features": infrequent_test_features,
                                                    "train_targets" : infrequent_train_targets,
                                                    "val_targets" : infrequent_val_targets,
                                                    "test_targets": infrequent_test_targets}

        # Print shapes for verification
        # print(f"Region {region_id}: Train features {train_features.shape}, Validation features {val_features.shape}, Test features {test_features.shape}")
        # print(f"Train targets {train_targets.shape}, Validation targets {val_targets.shape}, Test targets {test_targets.shape}")
        print(f"Region {region_id}: Train features {infrequent_train_features.shape}, Validation features {infrequent_val_features.shape}, Test features {infrequent_test_features.shape}")

    return daily_splits_dict, infrequent_splits_dict

satbuoy_splits_dict, water_splits_dict = train_test_split(satbuoy_tensor_dict, water_tensor_dict)


Region 0: Train features torch.Size([1, 261, 14, 127]), Validation features torch.Size([1, 152, 14, 127]), Test features torch.Size([1, 0, 14, 127])
Region 1: Train features torch.Size([1, 306, 14, 236]), Validation features torch.Size([1, 64, 14, 236]), Test features torch.Size([1, 18, 14, 236])
Region 2: Train features torch.Size([1, 483, 14, 311]), Validation features torch.Size([1, 330, 14, 311]), Test features torch.Size([1, 132, 14, 311])
Region 3: Train features torch.Size([1, 546, 14, 769]), Validation features torch.Size([1, 199, 14, 769]), Test features torch.Size([1, 0, 14, 769])
Region 4: Train features torch.Size([1, 634, 14, 1234]), Validation features torch.Size([1, 339, 14, 1234]), Test features torch.Size([1, 84, 14, 1234])
Region 5: Train features torch.Size([1, 462, 14, 602]), Validation features torch.Size([1, 99, 14, 602]), Test features torch.Size([1, 0, 14, 602])
Region 6: Train features torch.Size([1, 552, 14, 1251]), Validation features torch.Size([1, 186, 14, 

# Classes

## Multistream -- Adding in water sample data

In [19]:
class RegionalLSTM(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, output_size,h0=None, c0=None):
        super(RegionalLSTM, self).__init__()
        self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True)
        self.fc = nn.Linear(hidden_size, output_size)
        # Store hidden states if needed
        self.h0 = h0
        self.c0 = c0

    def forward(self, x, h0=None, c0=None, time_batch_size= 100):
        # x shape: (batch_size, time_steps, variables, position)
        batch_size, time_steps, variables, positions = x.size()
        device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

        x = x.to(device)
        # Reshape to (batch_size * position, time_steps, variables) to treat each position separately
        x = x.permute(0, 3, 1, 2).reshape(batch_size * positions, time_steps, variables)

        # If no hidden state provided, initialize hidden states
        if h0 is None or c0 is None:
            h0 = torch.zeros(self.lstm.num_layers, x.size(0), self.lstm.hidden_size).to(x.device)
            c0 = torch.zeros(self.lstm.num_layers, x.size(0), self.lstm.hidden_size).to(x.device)


        # Adjust mini-batching to handle cases where time_steps < time_batch_size
        outputs = []
        for start in range(0, time_steps, time_batch_size):
            end = min(start + time_batch_size, time_steps)
            x_time_batch = x[:, start:end, :]  # Mini-batch along time

            # Forward pass through the LSTM with hidden state carryover
            lstm_out, (h0, c0) = self.lstm(x_time_batch, (h0, c0))  # Keep hidden state across time mini-batches

            outputs.append(lstm_out)

        # Concatenate the outputs for all time mini-batches
        lstm_out = torch.cat(outputs, dim=1)  # Concatenate along the time dimension


        return lstm_out, (h0,c0)


In [20]:
import torch
import torch.nn as nn

class MultistreamLSTM(nn.Module):
    def __init__(self, pretrained_model_state, daily_input_size, infrequent_input_size, hidden_size, num_layers, output_size):
        super(MultistreamLSTM, self).__init__()

        self.existing_model = RegionalLSTM(daily_input_size, hidden_size, num_layers, output_size=1)
        if pretrained_model_state is not None:
            self.existing_model.load_state_dict(pretrained_model_state)
        self.water_lstm = nn.LSTM(infrequent_input_size, hidden_size, num_layers, batch_first=True)

        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.output_size = output_size

        # Define any additional layers here if needed
        self.fc = nn.Linear(hidden_size, output_size)

    def forward(self, features_daily, features_infrequent, h0=None, c0=None):
        time_steps = features_daily.size(1)
        # Get common date indices
        daily_indices, infrequent_indices = get_common_date_indices(features_daily, features_infrequent)

        # Get the output from the trained model
        trained_output, (trained_h0, trained_cO) = self.existing_model(features_daily)  # Output shape: (positions, time_steps, hidden_size)

        # Initialize a list to hold the combined outputs
        combined_output_list = []
        combined_h0_list = []
        combined_c0_list = []

        for idx in range(features_daily.size(1)):
            daily_result = trained_output[:, idx, :].unsqueeze(1)  # Shape: (positions, hidden_size) to (positions, time, hidden_size)
            daily_h0 = trained_h0
            daily_c0 = trained_cO

            if idx not in daily_indices:
                positions, time, hidden_size = daily_result.size()
                combined_output = daily_result
                combined_h0 = daily_h0
                combined_c0 = daily_c0
            else:
                # Get the corresponding infrequent index
                infrequent_idx = infrequent_indices[daily_indices.index(idx)]

                if infrequent_idx < features_infrequent.size(1):  # Ensure we only access valid indices
                    infrequent_data = features_infrequent[0, infrequent_idx, :, :]  # Shape: (features, positions)

                    # Reshape to (batch_size * position, time_steps, variables) to treat each position separately
                    variables, positions = infrequent_data.size()
                    infrequent_data= infrequent_data.permute(1, 0).unsqueeze(1)

                    # Forward pass through the LSTM with hidden state carryover
                    infrequent_output, (infrequent_h0, infrequent_cO) = self.water_lstm(infrequent_data)

                    # Combine the results
                    combined_output = torch.mean(torch.stack([infrequent_output, daily_result]), dim=0)  # Shape: (1, batch_size, hidden_size)
                    if daily_h0 is not None and infrequent_h0 is not None:
                        combined_h0 = torch.mean(torch.stack([daily_h0, infrequent_h0]), dim=0)  # Shape: (1, batch_size, hidden_size)
                        combined_c0 = torch.mean(torch.stack([daily_c0, infrequent_cO]), dim=0)  # Shape: (1, batch_size, hidden_size)

            combined_output_list.append(combined_output)
            combined_h0_list.append(combined_h0)
            combined_c0_list.append(combined_c0)

        # Convert combined_outputs to a tensor if needed
        final_combined_output = torch.cat(combined_output_list, dim=1)
        final_combined_h0 = torch.cat(combined_h0_list)
        final_combined_c0 = torch.cat(combined_c0_list)


        # Apply the fully connected layer at every time step for each position
        final_combined_output = self.fc(final_combined_output)  # Shape: (batch_size * positions, time_steps, output_size)
        # Reshape back to (batch_size, time_steps, positions)
        final_combined_output = final_combined_output.view(1, positions, time_steps).permute(0, 2, 1)

        return final_combined_output, (final_combined_h0, final_combined_c0)


## Multiregion -- neighbors share hidden states

In [None]:
# Neighbor mask matrix (as described earlier)
neighbor_mask = torch.tensor([
    [0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0],  # Region 0, CB1TF
    [1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0],  # Region 1, CB2OH
    [0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0],  # Region 2, CB3MH
    [0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 0],  # Region 3, CB4MH
    [0, 0, 0, 1, 0, 1, 1, 0, 0, 0, 1],  # Region 4, CB5MH
    [0, 0, 0, 0, 1, 0, 1, 1, 0, 1, 0],  # Region 5, CB6PH
    [0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 1],  # Region 6, CB7PH
    [0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0],  # Region 7, CB8PH
    [0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0],  # Region 8, EASMH
    [0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0],  # Region 9, MOBPH
    [0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0]   # Region 110, TANMH
], dtype=torch.float32)

In [None]:
class MultiRegionModel(nn.Module):
    def __init__(self, neighbor_mask, daily_input_size, infrequent_input_size, hidden_size, num_layers, output_size):
        super(MultiRegionModel, self).__init__()
        self.neighbor_mask = neighbor_mask
        self.regions = nn.ModuleList([
            MultistreamLSTM(None, daily_input_size, infrequent_input_size, hidden_size, num_layers, output_size)
            for _ in range(neighbor_mask.shape[0])
        ])


    def forward(self, daily_features, infrequent_features):
        # region_inputs should be a dictionary where keys are region IDs and values are feature tensors
        region_outputs = {}  # Initialize a dictionary to store outputs for each region

        for region_id, region_data in daily_features.items():
            # Assuming region_id can be converted to an index
            region_index = int(region_id[-1])
            # Initialize combined_h0 for the first pass
            position_size = region_data.shape[-1]
            current_h0 = torch.zeros((1, position_size, self.regions[region_index].lstm.hidden_size)).to(device)  # Replace with appropriate shape

            # Find neighbors
            neighbors = [i for i in range(1, self.neighbor_mask.shape[0] ) if self.neighbor_mask[region_index, i] == 1]

            neighbor_hidden_states = []
            for neighbor_index in neighbors:
                _, (neighbor_h0, neighbor_c0) = self.regions[neighbor_index](region_data)  # Use region_data for the neighbor

                neighbor_hidden_states.append(neighbor_h0)
                # Assuming neighbor_hidden_states is a list of (batch_size * positions, time_steps, hidden_size) tensors
            if neighbor_hidden_states:
                aggregated_neighbors_h0 = torch.mean(torch.stack(neighbor_hidden_states), dim=0).to(device)  # Mean across neighbors

            # Combine current hidden state with aggregated neighbor hidden states
            combined_h0 = current_h0 + F.interpolate(aggregated_neighbors_h0, size=current_h0.shape[-1], mode='linear', align_corners=False).to(device)
            # Adjust this operation as needed

            # Call the RegionalLSTM with the current input and updated hidden states
            output, (current_h0, current_c0) = self.regions[region_index](region_data, combined_h0, neighbor_c0)

            # Store the output for the current region
            region_outputs[region_id] = output.to(device)# Store the output for the current region
        return region_outputs  # Return the dictionary of outputs for each region


# Training

## Individual Regions
### General code

In [21]:
def train_and_evaluate_region(model_params, region_id, daily_split_dict, infrequent_split_dict,model_state=None):
    hidden_size = model_params['hidden_size']
    num_layers = model_params['num_layers']
    learning_rate = model_params['learning_rate']
    num_epochs = model_params['num_epochs']

    # Define loss function and optimizer
    model = MultistreamLSTM(model_state, daily_input_size=10, infrequent_input_size=14, hidden_size=hidden_size, num_layers=num_layers, output_size=1)  # output_size is fixed

    criterion = nn.MSELoss()  # Mean Squared Error for regression
    optimizer = optim.Adam(model.parameters(), lr=learning_rate)

    # Move model to GPU if available
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model.to(device)

    validation_losses = []

    with tqdm(total=num_epochs, desc=f"Processing {region_id}", leave=False) as pbar:

        for epoch in range(num_epochs):
            daily_features = daily_split_dict[region_id]['train_features']
            daily_targets = daily_split_dict[region_id]['train_targets']
            water_features = infrequent_split_dict[region_id]['train_features']
            water_targets = infrequent_split_dict[region_id]['train_targets']

            daily_features = daily_features.to(device)
            daily_targets = daily_targets.to(device)
            water_features = water_features.to(device)
            water_targets = water_targets.to(device)

            model.train()  # Ensure the model is in training mode before each training step

            # Retrieve output for the specific region
            output, _ = model(daily_features,water_features)
            # Compute loss
            loss = criterion(output, daily_targets)

            # Backpropagation and optimization steps
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            # Validation step
            model.eval()  # Set model to evaluation mode
            val_daily_features = daily_split_dict[region_id]['val_features']
            val_daily_targets = daily_split_dict[region_id]['val_targets']
            val_water_features = infrequent_split_dict[region_id]['val_features']
            val_water_targets = infrequent_split_dict[region_id]['val_targets']

            val_daily_features = val_daily_features.to(device)
            val_daily_targets = val_daily_targets.to(device)
            val_water_features = val_water_features.to(device)
            val_water_targets = val_water_targets.to(device)


            with torch.no_grad():  # Disable gradient calculation for validation
                val_output,_ = model(val_daily_features,val_water_features)
                val_loss = criterion(val_output, val_daily_targets)
                validation_losses.append(val_loss.item())  # Store the loss in the dictionary
            pbar.update(1)

    daily_test_dataset = daily_split_dict[region_id]['test_features']
    water_test_dataset = infrequent_split_dict[region_id]['test_features']

    daily_test_dataset = daily_test_dataset.to(device)
    water_test_dataset = water_test_dataset.to(device)

    with torch.no_grad():  # Disable gradient calculation
        test_output,_ = model(daily_test_dataset,water_test_dataset)

    true_values = daily_split_dict[region_id]['test_targets']

    true_values = true_values.to(device)
    error = criterion(test_output, true_values)

    return model, validation_losses, test_output, error  # Return losses for each region separately


### Using existing parameters

Import the pretrained model states and parameters.

In [None]:
models_dict = torch.load('../../models/withTimeFeatures/individual_training_models.pt')


In [None]:
region_models_pretrained_dict ={}
for i in tqdm(range(11)):
    region_id = f'region_{i}'
    model_state= models_dict[region_id]['model']
    model_params = models_dict[region_id]['parameters']
    model, validation_losses, test_output, error = train_and_evaluate_region(model_params, region_id, satbuoy_splits_dict, water_splits_dict,model_state)

    region_models_pretrained_dict[region_id] = {'model' : model, 'validation losses': validation_losses,
                                     'prediction': test_output, 'mean square error': error}

In [None]:
for i in range(11):
    region_id = f'region_{i}'
    model_params = models_dict[region_id]['parameters']
    print(region_id,model_params)

region_0 {'hidden_size': 2, 'learning_rate': 0.005, 'num_epochs': 100, 'num_layers': 1}
region_1 {'hidden_size': 2, 'learning_rate': 0.001, 'num_epochs': 20, 'num_layers': 4}
region_2 {'hidden_size': 2, 'learning_rate': 0.005, 'num_epochs': 20, 'num_layers': 3}
region_3 {'hidden_size': 2, 'learning_rate': 0.01, 'num_epochs': 20, 'num_layers': 3}
region_4 {'hidden_size': 2, 'learning_rate': 0.001, 'num_epochs': 50, 'num_layers': 2}
region_5 {'hidden_size': 2, 'learning_rate': 0.001, 'num_epochs': 10, 'num_layers': 2}
region_6 {'hidden_size': 2, 'learning_rate': 0.005, 'num_epochs': 50, 'num_layers': 1}
region_7 {'hidden_size': 2, 'learning_rate': 0.01, 'num_epochs': 10, 'num_layers': 1}
region_8 {'hidden_size': 2, 'learning_rate': 0.005, 'num_epochs': 150, 'num_layers': 1}
region_9 {'hidden_size': 2, 'learning_rate': 0.005, 'num_epochs': 20, 'num_layers': 3}
region_10 {'hidden_size': 2, 'learning_rate': 0.005, 'num_epochs': 10, 'num_layers': 2}


In [None]:
torch.save(region_models_pretrained_dict, '../../models/withTimeFeatures/water_pretrained_models.pt')

In [None]:
region_models_retrained_dict ={}
for i in tqdm(range(11)):
    region_id = f'region_{i}'
    model_params = models_dict[region_id]['parameters']
    model, validation_losses, test_output, error = train_and_evaluate_region(model_params, region_id, satbuoy_splits_dict, water_splits_dict)

    region_models_retrained_dict[region_id] = {'model' : model, 'validation losses': validation_losses,
                                     'prediction': test_output, 'mean square error': error}

In [None]:
torch.save(region_models_retrained_dict, '../../models/withTimeFeatures/water_original-params_models.pt')

### Hypertuning

In [9]:
from IPython.display import clear_output

def hyperparameter_tuning_region(params_grid, region_id, daily_split_dict, infrequent_split_dict, model_state=None):
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

    checked_params_file = f'checkpoints/dates/water_checked_{region_id}_params.json'  # Define the path for the checked parameters file
    checkpoint_path = f'checkpoints/dates/water_model_{region_id}_checkpoint.pt'  # Define the path for the checked parameters file


    if os.path.exists(checked_params_file):
        with open(checked_params_file, 'r') as f:
            checked_params = json.load(f)  # Load as a dictionary
    else:
        checked_params = {}  # Initialize an empty dictionary if no file exists

    if os.path.exists(checkpoint_path):
        checkpoint = torch.load(checkpoint_path,map_location=torch.device('cpu'))
        best_model = checkpoint['model']  # Load the best model
        best_params = checkpoint['params']  # Load the best parameters
        best_validation_loss = checkpoint['validation_loss']  # Load the best Pretrained validation losses
        best_prediction = checkpoint['prediction']
        best_error = checkpoint['mean squared error']
    else:
        best_model = None
        best_params = None
        best_validation_loss = float('inf')

    no_improvement_count = 0  # Counter for epochs without improvement


    # Iterate through each combination of hyperparameters

    with tqdm(total=len(ParameterGrid(params_grid)), desc="Processing Model", leave=False) as pbar:

        for param_combination in ParameterGrid(params_grid):
            tqdm.write(f"Training region {region_id} with parameters: {param_combination}")

            model_params = {'hidden_size': param_combination['hidden_size'],
                            'num_layers': param_combination['num_layers'],
                            'learning_rate' : param_combination['learning_rate'],
                            'num_epochs': param_combination['num_epochs']}

            # Skip already checked parameters
            params_key = json.dumps(param_combination, sort_keys=True)
            if params_key in checked_params:
                pbar.update(1)
                continue

            # Initialize the model with the current parameters
            model, validation_losses, test_output, error = train_and_evaluate_region(model_params,region_id,daily_split_dict,infrequent_split_dict)

            checked_params[params_key] = validation_losses  # Store the validation loss for this combination

            # Save the updated checked parameters dictionary to the file
            with open(checked_params_file, 'w') as f:
                json.dump(checked_params, f)

            # Finds the validation loss for these parameters
            most_recent_losses = validation_losses[-1]


            # Update best model if the current one is better
            if most_recent_losses < best_validation_loss:
                best_validation_loss = most_recent_losses
                best_model = model
                best_params = param_combination
                best_prediction = test_output
                best_error = error
                no_improvement_count = 0  # Reset counter for improvements


                # Save the best model to a checkpoint
                torch.save({
                    'model': best_model,  # Save the model state
                    'params': best_params,  # Save the best parameters
                    'validation_loss': best_validation_loss,  # Save the best Pretrained validation losses
                    'prediction' : best_prediction,
                    'mean squared error' : best_error
                }, checkpoint_path)
                pbar.update(1)

            else:
                no_improvement_count += 1  # Increment if there's no improvement
                pbar.update(1)
                if no_improvement_count >= 20:

                    tqdm.write(f"Stopping early for region {region_id} due to no improvement in hyperparameters.")

                    break  # Stop tuning if no improvement for `patience` configurations


        # Clear the model from memory after saving
            del model
    clear_output(wait=True)  # Clears the output after each iteration


    print(f"Best parameters: {best_params} with average validation loss: {best_validation_loss}")
    return best_model, best_params, best_validation_loss, best_prediction, best_error



In [None]:
batch_size=1
param_grid = {
    'hidden_size': [4,2],
    'num_layers': [1,2,4],
    'learning_rate': [0.005, 0.01, 0.05],
    'num_epochs': [20,30,50,75]
}

best_model_dict={}
for i in tqdm(range(6,11)):
    region_id = f'region_{i}'
    best_model, best_params, best_validation_loss, best_prediction, best_error = hyperparameter_tuning_region(param_grid,region_id,satbuoy_splits_dict,water_splits_dict)
    best_model_dict[region_id] = {"model": best_model,
                                  "parameters" : best_params,
                                  "validation loss" : best_validation_loss,
                                  'prediction' : best_prediction,
                                  'mean squared error' : best_error}
    print(best_error.item())

In [22]:
for i in range(11):
    region_id = f'region_{i}'
    checkpoint = torch.load(f'checkpoints/dates/water_model_{region_id}_checkpoint.pt',map_location=torch.device('cpu'))
    best_model_dict[region_id] = checkpoint
    print(region_id, checkpoint['params'], checkpoint['validation_loss'], checkpoint['mean squared error'].item())

region_0 {'hidden_size': 4, 'learning_rate': 0.01, 'num_epochs': 30, 'num_layers': 4} 0.08489374071359634 0.10712746530771255
region_1 {'hidden_size': 4, 'learning_rate': 0.05, 'num_epochs': 20, 'num_layers': 1} 0.049684904515743256 0.06459587067365646
region_2 {'hidden_size': 4, 'learning_rate': 0.01, 'num_epochs': 30, 'num_layers': 4} 0.029481934383511543 0.04297097399830818
region_3 {'hidden_size': 4, 'learning_rate': 0.05, 'num_epochs': 50, 'num_layers': 1} 0.01697954162955284 0.05588475614786148
region_4 {'hidden_size': 2, 'learning_rate': 0.05, 'num_epochs': 50, 'num_layers': 1} 0.04058486223220825 0.0582260899245739
region_5 {'hidden_size': 2, 'learning_rate': 0.05, 'num_epochs': 20, 'num_layers': 1} 0.019707415252923965 0.0214984230697155
region_6 {'hidden_size': 4, 'learning_rate': 0.005, 'num_epochs': 30, 'num_layers': 2} 0.04562634602189064 0.0497429333627224
region_7 {'hidden_size': 4, 'learning_rate': 0.005, 'num_epochs': 20, 'num_layers': 2} 0.013185189105570316 0.0233682

In [None]:
region_param_grid = {
    'region_0': {
        'hidden_size': [4,8],
        'num_layers': [4,5],
        'learning_rate': [0.01],
        'num_epochs': [30]
    },
    'region_1': {
        'hidden_size': [4,8],
        'num_layers': [1,2],
        'learning_rate': [0.1,0.05],
        'num_epochs': [10,20]
    },
    'region_2': {
        'hidden_size': [4,8],
        'num_layers': [2],
        'learning_rate': [0.01],
        'num_epochs': [30]
    },
    'region_3': {
        'hidden_size': [4,8],
        'num_layers': [1],
        'learning_rate': [0.1,0.05],
        'num_epochs': [50]
    },
    'region_4': {
        'hidden_size': [4,2,1],
        'num_layers': [1,2,4],
        'learning_rate': [0.005, 0.01, 0.05],
        'num_epochs': [30,50,75]
    },
    'region_5': {
        'hidden_size': [4,2,1],
        'num_layers': [1,2,4],
        'learning_rate': [0.005, 0.01, 0.05],
        'num_epochs': [30,50,75]
    },
    'region_6': {
        'hidden_size': [4,8],
        'num_layers': [2],
        'learning_rate': [0.005, 0.0001],
        'num_epochs': [10,20]
    },
    'region_7': {
        'hidden_size': [4,8],
        'num_layers': [2],
        'learning_rate': [0.005, 0.0001],
        'num_epochs': [10,20]
    },
    'region_8': {
        'hidden_size': [2],
        'num_layers': [2],
        'learning_rate': [0.1, 0.05],
        'num_epochs': [50]
    },
    'region_9': {
        'hidden_size': [4,8],
        'num_layers': [1],
        'learning_rate': [0.005, 0.0001],
        'num_epochs': [30]
    },
    'region_10': {
        'hidden_size': [4,2,1],
        'num_layers': [1,2,4],
        'learning_rate': [0.005, 0.01, 0.05],
        'num_epochs': [20,30,50,75]
    }
    }

for i in tqdm(range(11)):
    region_id = f'region_{i}'
    param_grid = region_param_grid[region_id]
    best_model, best_params, best_validation_loss, best_prediction, best_error = hyperparameter_tuning_region(param_grid,region_id,satbuoy_splits_dict,water_splits_dict)
    best_model_dict[region_id] = {"model": best_model,
                                  "parameters" : best_params,
                                  "validation loss" : best_validation_loss,
                                  'prediction' : best_prediction,
                                  'mean squared error' : best_error}
    print(best_error.item())

Best parameters: {'hidden_size': 4, 'learning_rate': 0.01, 'num_epochs': 30, 'num_layers': 4} with average validation loss: 0.08489374071359634
0.10712746530771255


Processing Model:   0%|          | 0/16 [00:00<?, ?it/s]

Training region region_1 with parameters: {'hidden_size': 4, 'learning_rate': 0.1, 'num_epochs': 10, 'num_layers': 1}


Processing region_1:   0%|          | 0/10 [00:00<?, ?it/s]

Training region region_1 with parameters: {'hidden_size': 4, 'learning_rate': 0.1, 'num_epochs': 10, 'num_layers': 2}
Training region region_1 with parameters: {'hidden_size': 4, 'learning_rate': 0.1, 'num_epochs': 20, 'num_layers': 1}


Processing region_1:   0%|          | 0/20 [00:00<?, ?it/s]

Training region region_1 with parameters: {'hidden_size': 4, 'learning_rate': 0.1, 'num_epochs': 20, 'num_layers': 2}


Processing region_1:   0%|          | 0/20 [00:00<?, ?it/s]

Training region region_1 with parameters: {'hidden_size': 4, 'learning_rate': 0.05, 'num_epochs': 10, 'num_layers': 1}


Processing region_1:   0%|          | 0/10 [00:00<?, ?it/s]

Training region region_1 with parameters: {'hidden_size': 4, 'learning_rate': 0.05, 'num_epochs': 10, 'num_layers': 2}


Processing region_1:   0%|          | 0/10 [00:00<?, ?it/s]

Training region region_1 with parameters: {'hidden_size': 4, 'learning_rate': 0.05, 'num_epochs': 20, 'num_layers': 1}
Training region region_1 with parameters: {'hidden_size': 4, 'learning_rate': 0.05, 'num_epochs': 20, 'num_layers': 2}
Training region region_1 with parameters: {'hidden_size': 8, 'learning_rate': 0.1, 'num_epochs': 10, 'num_layers': 1}


Processing region_1:   0%|          | 0/10 [00:00<?, ?it/s]

Training region region_1 with parameters: {'hidden_size': 8, 'learning_rate': 0.1, 'num_epochs': 10, 'num_layers': 2}


Processing region_1:   0%|          | 0/10 [00:00<?, ?it/s]

Training region region_1 with parameters: {'hidden_size': 8, 'learning_rate': 0.1, 'num_epochs': 20, 'num_layers': 1}


Processing region_1:   0%|          | 0/20 [00:00<?, ?it/s]

Training region region_1 with parameters: {'hidden_size': 8, 'learning_rate': 0.1, 'num_epochs': 20, 'num_layers': 2}


Processing region_1:   0%|          | 0/20 [00:00<?, ?it/s]

## Multiregion model

In [None]:
def train_and_evaluate_multi(neighbor_mask, model_params, daily_split_dict, infrequent_split_dict,model_state=None):
    hidden_size = model_params['hidden_size']
    num_layers = model_params['num_layers']
    learning_rate = model_params['learning_rate']
    num_epochs = model_params['num_epochs']

    # Define loss function and optimizer
    model =  MultiRegionModel(neighbor_mask, daily_input_size=10, infrequent_input_size=14, hidden_size=hidden_size, num_layers=num_layers, output_size=1)

    criterion = nn.MSELoss()  # Mean Squared Error for regression
    optimizer = optim.Adam(model.parameters(), lr=learning_rate)

    # Move model to GPU if available
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model.to(device)

    validation_losses = []

    with tqdm(total=num_epochs, desc=f"Processing {region_id}", leave=False) as pbar:

        for epoch in range(num_epochs):
            daily_features = daily_split_dict[region_id]['test_features']
            daily_targets = daily_split_dict[region_id]['test_targets']
            water_features = infrequent_split_dict[region_id]['test_features']
            water_targets = infrequent_split_dict[region_id]['test_targets']

            daily_features = daily_features.to(device)
            daily_targets = daily_targets.to(device)
            water_features = water_features.to(device)
            water_targets = water_targets.to(device)

            model.train()  # Ensure the model is in training mode before each training step

            # Retrieve output for the specific region
            output, _ = model(daily_features,water_features)
            # Compute loss
            loss = criterion(output, daily_targets)

            # Backpropagation and optimization steps
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            # Validation step
            model.eval()  # Set model to evaluation mode
            val_daily_features = daily_split_dict[region_id]['val_features']
            val_daily_targets = daily_split_dict[region_id]['val_targets']
            val_water_features = infrequent_split_dict[region_id]['val_features']
            val_water_targets = infrequent_split_dict[region_id]['val_targets']

            val_daily_features = val_daily_features.to(device)
            val_daily_targets = val_daily_targets.to(device)
            val_water_features = val_water_features.to(device)
            val_water_targets = val_water_targets.to(device)


            with torch.no_grad():  # Disable gradient calculation for validation
                val_output,_ = model(val_daily_features,val_water_features)
                val_loss = criterion(val_output, val_daily_targets)
                validation_losses.append(val_loss.item())  # Store the loss in the dictionary
            pbar.update(1)

    daily_test_dataset = daily_split_dict[region_id]['test_features']
    water_test_dataset = infrequent_split_dict[region_id]['test_features']

    daily_test_dataset = daily_test_dataset.to(device)
    water_test_dataset = water_test_dataset.to(device)

    with torch.no_grad():  # Disable gradient calculation
        test_output,_ = model(daily_test_dataset,water_test_dataset)

    true_values = daily_split_dict[region_id]['test_targets']

    true_values = true_values.to(device)
    error = criterion(test_output, true_values)

    return model, validation_losses, test_output, error  # Return losses for each region separately


In [None]:
model_params = {'hidden_size':2,
                'num_layers':2,
                'learning_rate' : 0.05,
                'num_epochs':2}

train_and_evaluate_multi(neighbor_mask, model_params, satbuoy_splits_dict,water_splits_dict)

Processing region_10:   0%|          | 0/2 [00:00<?, ?it/s]

TypeError: MultiRegionModel.forward() takes 2 positional arguments but 3 were given

In [None]:
def hyperparameter_tuning(params,train_features_dict, train_targets_dict, val_features_dict, val_targets_dict, neighbor_mask):
    checked_params_file = 'checkpoints/dates/checked_params.json'  # Define the path for the checked parameters file
    checkpoint_path = 'checkpoints/dates/model_checkpoint.pt'  # Define the path for the model checkpoint

    if os.path.exists(checked_params_file):
        with open(checked_params_file, 'r') as f:
            checked_params = json.load(f)  # Load as a dictionary
    else:
        checked_params = {}  # Initialize an empty dictionary if no file exists

    if os.path.exists(checkpoint_path):
        checkpoint = torch.load(checkpoint_path,map_location=torch.device('cpu'))
        best_model = checkpoint['model']  # Load the best model
        best_params = checkpoint['params']  # Load the best parameters

        best_ind_validation_loss = checkpoint['indv_validation_loss']  # Load the best individual validation losses
        best_avg_validation_loss = checkpoint['avg_validation_loss']  # Load the best validation loss

    else:
      best_model = None
      best_params = None
      best_avg_validation_loss = float('inf')

     # Iterate through each combination of hyperparameters
    for params in tqdm(ParameterGrid(params)):
        print(f"Training with parameters: {params}")
        params_key = json.dumps(params, sort_keys=True)  # Convert the parameters to a JSON string

        if params_key in checked_params:
            continue  # Skip this combination if it has been checked

        model = MultiRegionModel(neighbor_mask, input_size=input_size,
                        hidden_size=params['hidden_size'], num_layers=params['num_layers'],
                        output_size=1)  # output_size is fixed

        # Initialize the model with the current parameters
        validation_losses = train_and_evaluate_model(
        model.to(device),
        train_features_dict,
        train_targets_dict,
        val_features_dict,
        val_targets_dict,
        params['learning_rate'],
        params['num_epochs']
    )
        checked_params[params_key] = validation_losses  # Store the validation loss for this combination

        # Save the updated checked parameters dictionary to the file
        with open(checked_params_file, 'w') as f:
            json.dump(checked_params, f)

        # Finds the validation loss for these parameters
        most_recent_losses = [losses[-1] for losses in validation_losses.values()]
        # Calculate the average validation loss
        overall_avg_loss = np.mean(most_recent_losses)


        # Update best model if the current one is better
        if overall_avg_loss < best_avg_validation_loss:
            best_avg_validation_loss = overall_avg_loss
            best_ind_validation_loss = most_recent_losses
            best_model = model
            best_params = params
            # Save the best model to a checkpoint
            torch.save({
                'model': best_model.state_dict(),  # Save the model state
                'params': best_params,  # Save the best parameters
                'indv_validation_loss': best_ind_validation_loss,  # Save the best individual validation losses
                'avg_validation_loss': best_avg_validation_loss  # Save the best average validation loss
            }, checkpoint_path)
        # Clear the model from memory after saving
        del model

    print(f"Best parameters: {best_params} with average validation loss: {best_avg_validation_loss}")
    return best_model, best_ind_validation_loss, best_avg_validation_loss


# Compare preformances

In [23]:
region_models_pretrained_dict = torch.load('../../models/withTimeFeatures/water_pretrained_models.pt', map_location=torch.device('cpu'))
region_models_retrained_dict = torch.load('../../models/withTimeFeatures/water_original-params_models.pt', map_location=torch.device('cpu'))

In [26]:
# Initialize variables for quantification
total_mse_pretrained_regions = 0
total_mse_retrained_regions = 0
total_mse_tuned_regions = 0
count = 0
comparison_results = {}

# Compare MSE for each region and quantify
for region_id in region_models_pretrained_dict.keys():
    mse1 = region_models_pretrained_dict[region_id]['mean square error'].item()
    mse2 = region_models_retrained_dict[region_id]['mean square error'].item()
    mse3 = best_model_dict[region_id]['mean squared error'].item()


    # Update totals for average calculation
    total_mse_pretrained_regions += mse1
    total_mse_retrained_regions += mse2
    total_mse_tuned_regions += mse3
    count += 1

 # Calculate metrics for two model comparisons
    difference_12 = mse1 - mse2
    percentage_improvement_12 = ((mse1 - mse2) / mse1) * 100 if mse1 != 0 else None

    difference_13 = mse1 - mse3
    percentage_improvement_13 = ((mse1 - mse3) / mse1) * 100 if mse1 != 0 else None

    difference_23 = mse2 - mse3
    percentage_improvement_23 = ((mse2 - mse3) / mse2) * 100 if mse2 != 0 else None

    mse_dict = {
        'Using pretrained model': mse1,
        'Retaining existing model': mse2,
        'Hypertuned model': mse3
    }

    # Sort models by MSE in ascending order
    sorted_models = sorted(mse_dict.items(), key=lambda x: x[1])

    # Store comparison results
    comparison_results[region_id] = {
        'MSE Using pretrained model': mse1,
        'MSE Retaining existing model': mse2,
        'MSE Hypertuned model': mse3,
        'Difference (Pretrained - Retrained)': difference_12,
        'Percentage Improvement (Pretrained - Retrained)': percentage_improvement_12,
        'Difference (Pretrained - Hypertuned)': difference_13,
        'Percentage Improvement (Pretrained - Hypertuned)': percentage_improvement_13,
        'Difference (Retrained - Hypertuned)': difference_23,
        'Percentage Improvement (Retrained - Hypertuned)': percentage_improvement_23,
        'Performance Order': sorted_models
    }

# Calculate average MSE for all models
average_mse_model1 = total_mse_pretrained_regions / count if count > 0 else 0
average_mse_model2 = total_mse_retrained_regions / count if count > 0 else 0
average_mse_model3 = total_mse_tuned_regions / count if count > 0 else 0

data = []

for region_id, results in comparison_results.items():
    # Collecting the results for each region
    data.append({
        'Region': region_id,
        'MSE Using pretrained model': results['MSE Using pretrained model'],
        'MSE Retaining existing model': results['MSE Retaining existing model'],
        'MSE Hypertuned model': results['MSE Hypertuned model'],
        'Difference (Pretrained - Retrained)': results['Difference (Pretrained - Retrained)'],
        'Percentage Improvement (Pretrained - Retrained)': results['Percentage Improvement (Pretrained - Retrained)'],
        'Difference (Pretrained - Hypertuned)': results['Difference (Pretrained - Hypertuned)'],
        'Percentage Improvement (Pretrained - Hypertuned)': results['Percentage Improvement (Pretrained - Hypertuned)'],
        'Difference (Retrained - Hypertuned)': results['Difference (Retrained - Hypertuned)'],
        'Percentage Improvement (Retrained - Hypertuned)': results['Percentage Improvement (Retrained - Hypertuned)'],
        'Model Performance Order': ', '.join([f"{model}" for model, mse in results['Performance Order']])
    })

# Create DataFrame
results_df = pd.DataFrame(data)


# Print average MSE for all models
print(f"\nAverage MSE Using pretrained model: {average_mse_model1}")
print(f"Average MSE Retaining existing model : {average_mse_model2}")
print(f"Average MSE Hypertuned model: {average_mse_model3}")


Average MSE Using pretrained model: 0.4865758222612468
Average MSE Retaining existing model : 0.4306367212398486
Average MSE Hypertuned model: 0.07087174193425612


In [None]:
region_models_pretrained_dict['region_3']['prediction']

tensor([[[nan, nan, nan,  ..., nan, nan, nan],
         [nan, nan, nan,  ..., nan, nan, nan],
         [nan, nan, nan,  ..., nan, nan, nan],
         ...,
         [nan, nan, nan,  ..., nan, nan, nan],
         [nan, nan, nan,  ..., nan, nan, nan],
         [nan, nan, nan,  ..., nan, nan, nan]]])