In [1]:
from google.colab import drive
drive.mount('/content/drive/')

%cd /content/drive/MyDrive/ChesapeakeBay/notebooks/models

Mounted at /content/drive/
/content/drive/MyDrive/ChesapeakeBay/notebooks/models


# Set up

In [2]:
import pandas as pd
import numpy as np
import datetime
import xarray as xr
import matplotlib.pyplot as plt


import logging
from tqdm import tqdm  # For progress bar
# Configure logging instead of print
logging.basicConfig(filename='tuning.log', level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
import itertools

import torch
from torch.utils.data import Dataset, DataLoader
import torch.nn as nn
import torch.optim as optim
import contextlib
from concurrent.futures import ThreadPoolExecutor, as_completed

# Input and shape the data

In [3]:
satellite_buoy = xr.open_dataset('../../data/satelliteBuoy_clean.nc4')

KeyboardInterrupt: 

In [None]:
satellite_buoy

In [None]:
satellite_buoy.data_vars

We need to make sure the timesteps are in order.

In [None]:
# Check if the time dimension is sorted
is_sorted = (satellite_buoy['time'].values == sorted(satellite_buoy['time'].values)).all()

if is_sorted:
    print("The time steps are already in order.")
else:
    print("The time steps are not in order.")


We want to target the chorophyll measurements and use Air Temperature, Air pressure, Humidity, Wind speed, and Wind Direction as features. Let's create the target and features tensor, then save the files for use on the GPU. Since `xarray` is on CPU, this step is MUCH faster on CPU.

In [None]:
# Define the variables to be concatenated into features
variables_to_expand = ['Air Temperature', 'Air pressure', 'Humidity', 'Wind speed', 'Wind Direction']

# Convert xarray combined features to a PyTorch tensor (on CPU)
features = xr.concat([satellite_buoy[var] for var in variables_to_expand], dim='variable')
features_tensor = torch.tensor(features.values, dtype=torch.float32)

# Rearrange the dimensions to match ConvLSTM input format: (time_steps, features, depth, lat, lon)
features_tensor = features_tensor.permute(1, 0, 2, 3, 4)

# Convert 'chlor_a' to a PyTorch tensor (target) and add depth dimension if needed
chlorophyll_tensor = torch.tensor(satellite_buoy['chlor_a'].values, dtype=torch.float32)

# Replace NaN values with -1
chlorophyll_tensor[torch.isnan(chlorophyll_tensor)] = -1

# Now save both tensors as .pt files for later use
torch.save(features_tensor, '../../data/features_tensor.pt')
torch.save(chlorophyll_tensor, '../../data/chlorophyll_tensor.pt')

# Optional: Print shapes to confirm
print(f"Features tensor shape: {features_tensor.shape}")
print(f"Chlorophyll tensor shape: {chlorophyll_tensor.shape}")


In [4]:
# Load tensors
features_tensor = torch.load('../../data/features_tensor.pt')
chlorophyll_tensor = torch.load('../../data/chlorophyll_tensor.pt')

# Optional: Print shapes to confirm
print(f"Loaded Features tensor shape: {features_tensor.shape}")
print(f"Loaded Chlorophyll tensor shape: {chlorophyll_tensor.shape}")

  features_tensor = torch.load('../../data/features_tensor.pt')
  chlorophyll_tensor = torch.load('../../data/chlorophyll_tensor.pt')


Loaded Features tensor shape: torch.Size([2767, 5, 1, 358, 243])
Loaded Chlorophyll tensor shape: torch.Size([2767, 1, 358, 243])


# Model

## Defining the classes

In [5]:
class ConvLSTMCell(nn.Module):
    # For one time slice
    # input has shape (batch_size, features, depth, latitude, longitude)
    def __init__(self, input_channels, hidden_channels, kernel_size):
        super(ConvLSTMCell, self).__init__()
        padding = kernel_size // 2
        self.conv = nn.Conv3d(input_channels + hidden_channels,
                              hidden_channels * 4,  # 4 for i, f, o, g gates
                              kernel_size,
                              padding=padding)

    def forward(self, input_tensor, hidden_state):
        h_cur, c_cur = hidden_state

        # Concatenate input and hidden state
        combined = torch.cat([input_tensor, h_cur], dim=1).contiguous()

        conv_output = self.conv(combined)

        cc_i, cc_f, cc_o, cc_g = torch.split(conv_output, conv_output.shape[1] // 4, dim=1)

        i = torch.sigmoid(cc_i)
        f = torch.sigmoid(cc_f)
        o = torch.sigmoid(cc_o)
        g = torch.tanh(cc_g)

        c_next = f * c_cur + i * g
        h_next = o * torch.tanh(c_next)

        return h_next, c_next

In [6]:
class ConvLSTM(nn.Module):
    def __init__(self, input_channels, hidden_channels, kernel_size, num_layers, output_channels=1):
        super(ConvLSTM, self).__init__()
        self.num_layers = num_layers
        self.hidden_channels = hidden_channels

        # Define a list of ConvLSTM cells
        self.lstm_cells = nn.ModuleList([
            ConvLSTMCell(input_channels if i == 0 else hidden_channels,  # First layer uses input channels, others use hidden channels
                         hidden_channels,
                         kernel_size)
            for i in range(num_layers)
        ])


        self.conv = nn.Conv3d(in_channels=hidden_channels, out_channels=1, kernel_size=1)


    def forward(self, input_tensor, time_step_batch_size=10):
        # if no batch_size dimension, add one
        if len(input_tensor.shape) == 5:
            input_tensor = input_tensor.unsqueeze(0)

        batch_size, time_steps, channels, depth, height, width = input_tensor.size()
        h, c = self.init_hidden(batch_size, depth, height, width, input_tensor.device)

        output_inner = []

        # Process time steps in batches
        for t in range(0, time_steps, time_step_batch_size):
            # Select a batch of time steps to process
            time_step_batch = input_tensor[:, t:t + time_step_batch_size, :, :, :, :]
            for t_batch in range(time_step_batch.size(1)):  # Iterate over the time step batch
                x = time_step_batch[:, t_batch, :, :, :, :]  # Start with the input tensor
                for i, cell in enumerate(self.lstm_cells):
                    h[i], c[i] = cell(x, (h[i], c[i]))  # Pass hidden state to the next layer
                    x = h[i]  # The output of this layer becomes input for the next layer



                output_inner.append(h[-1])  # Save the output from the final layer

        output = torch.stack(output_inner, dim=1)  # Stack outputs across time steps
        output = output.squeeze(0)  # Remove the batch dimension to get (time, hidden_channels, depth, lat, lon)
        # Turn hidden_layers into a predicted value
        final_output = self.conv(output)


        return final_output

    # Define hidden state initialization
    def init_hidden(self, batch_size, depth, height, width, device):
        h = [torch.zeros(batch_size, self.hidden_channels, depth, height, width).to(device) for _ in range(self.num_layers)]
        c = [torch.zeros(batch_size, self.hidden_channels, depth, height, width).to(device) for _ in range(self.num_layers)]
        return h, c


In [7]:
class HyperparameterTuner:
    def __init__(self, input_channels, train_data, val_data, criterion, device):
        self.input_channels = input_channels
        self.train_features, self.train_targets = train_data
        self.val_features, self.val_targets = val_data
        self.criterion = criterion
        self.device = device

    def build_model(self, hidden_channels, kernel_size, num_layers):
        model = ConvLSTM(
            input_channels=self.input_channels,
            hidden_channels=hidden_channels,
            kernel_size=kernel_size,
            num_layers=num_layers
        ).to(self.device)
        return model

    def _run_one_epoch(self, model, features, targets, optimizer, scaler, training=True):
        # Set model mode: training or evaluation
        model.train() if training else model.eval()

        # Use autocast and gradients only if running on a GPU
        use_cuda = torch.cuda.is_available()
        with torch.set_grad_enabled(training), (torch.cuda.amp.autocast() if use_cuda else contextlib.nullcontext()):
            output = model(features)
            predicted_output = output[:, 0, :, :, :]  # Select the first hidden channel
            loss = self.criterion(predicted_output, targets)

        # Backward pass and optimization (only during training)
        if training:
            optimizer.zero_grad()  # Zero out gradients before backward pass
            if use_cuda and scaler:
                scaler.scale(loss).backward()  # Backward pass with mixed precision scaling
                scaler.step(optimizer)
                scaler.update()  # Update the scaler for AMP
            else:
                loss.backward()
                optimizer.step()

        return loss.item()

    def _train_single_config(self, hidden_channels, kernel_size, num_layers, lr, epochs, pbar):
        # Build the model
        model = self.build_model(hidden_channels, kernel_size, num_layers)

        # Ensure model is moved to the correct device (CPU or GPU)
        model = model.to(self.device)

        # Set up the optimizer
        optimizer = torch.optim.Adam(model.parameters(), lr=lr)

        # Train the model and capture losses
        train_losses, val_losses = self.train(model, optimizer, epochs)

        # Free up memory manually after each model is trained
        torch.cuda.empty_cache()  # Free memory if on GPU

        # Update progress bar
        pbar.update(1)

        return val_losses[-1], hidden_channels, kernel_size, num_layers, lr, model

    def train(self, model, optimizer, epochs, early_stopping_patience=5):
        train_losses = []
        val_losses = []
        best_val_loss = float('inf')
        patience_counter = 0

        # Initialize mixed precision scaler only if GPU is available
        scaler = torch.cuda.amp.GradScaler() if torch.cuda.is_available() else None


        for epoch in range(epochs):
            # TRAINING PHASE
            train_loss = self._run_one_epoch(model, self.train_features, self.train_targets, optimizer, scaler, training=True)
            train_losses.append(train_loss)

            # VALIDATION PHASE
            val_loss = self._run_one_epoch(model, self.val_features, self.val_targets, optimizer, scaler, training=False)
            val_losses.append(val_loss)

            # Check for early stopping
            if val_loss < best_val_loss:
                best_val_loss = val_loss
                patience_counter = 0
            else:
                patience_counter += 1

            if patience_counter >= early_stopping_patience:
                print("Early stopping triggered.")
                break

        return train_losses, val_losses

    def tune(self, hidden_channels_list, kernel_size_list, num_layers_list, lr_list, epochs=5):
        best_val_loss = float('inf')
        best_config = None
        best_model = None

        # Create the product of all hyperparameter combinations
        hyperparameter_combinations = list(itertools.product(
            hidden_channels_list, kernel_size_list, num_layers_list, lr_list
        ))

        # Initialize the progress bar
        with tqdm(total=len(hyperparameter_combinations), desc="Hyperparameter Tuning", leave=True) as pbar:
            with ThreadPoolExecutor(max_workers=4) as executor:  # Adjust the number of workers as needed
                futures = []
                for hidden_channels, kernel_size, num_layers, lr in hyperparameter_combinations:
                    # Submit each combination to the ThreadPoolExecutor
                    futures.append(executor.submit(self._train_single_config, hidden_channels, kernel_size, num_layers, lr, epochs, pbar))

                # Process each future as it completes
                for future in as_completed(futures):
                    result = future.result()
                    val_loss, hidden_channels, kernel_size, num_layers, lr, model = result

                    # Check for the best configuration
                    if val_loss < best_val_loss:
                        best_val_loss = val_loss
                        best_config = (hidden_channels, kernel_size, num_layers, lr)
                        best_model = model

        # Log the best configuration
        print(f"Best config: hidden_channels={best_config[0]}, kernel_size={best_config[1]}, num_layers={best_config[2]}, lr={best_config[3]}")
        print(f"Best validation loss: {best_val_loss}")

        return best_model, best_config


We also define a function to run the model on the trainig values.

In [8]:
def test_model(model, test_features, test_targets, criterion):
    model.eval()  # Set the model to evaluation mode

    with torch.no_grad():  # Disable gradient calculations for testing
        test_output = model(test_features)  # Forward pass on the test data

        # Ensure we are selecting the correct dimensions from the model output
        # Assuming that the output is of shape (time, 1, depth, lat, lon), so we squeeze to remove the singleton dimension
        predicted_chlorophyll_test = test_output.squeeze(1)  # Shape should be (time, depth, lat, lon)

        # Calculate the test loss
        test_loss = criterion(predicted_chlorophyll_test, test_targets)

    print(f"Test Loss: {test_loss.item()}")

    return predicted_chlorophyll_test


## Small test on CPU

In [None]:
# Define the small test dataset dimensions
test_time_steps = 15  # Use only 15 time steps
test_lat_range = slice(0, 50)  # Use the first 50 latitude values
test_lon_range = slice(0, 50)  # Use the first 50 longitude values

# Slice the data to get a smaller subset for the test
chlorophyll_test_tensor = chlorophyll_tensor[:test_time_steps, :, test_lat_range, test_lon_range]
features_test_tensor = features_tensor[:test_time_steps, :, :, test_lat_range, test_lon_range]

# Print shapes to verify
print(f"Chlorophyll test tensor shape: {chlorophyll_test_tensor.shape}")
print(f"Features test tensor shape: {features_test_tensor.shape}")

In [None]:
# Split data into 70% training, 15% validation, 15% test
train_size = int(0.7 * features_test_tensor.shape[0])  # Shape[0] corresponds to time_steps
val_size = int(0.15 * features_test_tensor.shape[0])    # 15% for validation
test_size = features_test_tensor.shape[0] - train_size - val_size  # Remaining for test set

# Split features into train, validation, and test sets
train_features = features_test_tensor[:train_size, :, :, :, :]   # First 70% of time steps for training
val_features = features_test_tensor[train_size:train_size+val_size, :, :, :, :]  # Next 15% for validation
test_features = features_test_tensor[train_size+val_size:, :, :, :, :]  # Last 15% for test

# Split targets (chlorophyll) accordingly
train_targets = chlorophyll_test_tensor[:train_size, :, :, :]    # First 70% of chlorophyll targets for training
val_targets = chlorophyll_test_tensor[train_size:train_size+val_size, :, :, :]  # Next 15% for validation
test_targets = chlorophyll_test_tensor[train_size+val_size:, :, :, :]  # Last 15% for test


Testing without the tuner

In [None]:
# Set parameters for ConvLSTM
batch_size = 1
input_channels = features_test_tensor.shape[1]  # Number of features (channels)
hidden_channels = 16  # Set a small number of hidden channels for the test
kernel_size = 3  # Use a small kernel size
num_layers = 2  # Test with a single layer for now

# Initialize the ConvLSTM model
conv_lstm = ConvLSTM(input_channels=input_channels,
                     hidden_channels=hidden_channels,
                     kernel_size=kernel_size,
                     num_layers=num_layers)

In [None]:
# Define the loss function (Mean Squared Error for regression tasks)
criterion = nn.MSELoss()

# Define the optimizer
optimizer = optim.Adam(conv_lstm.parameters(), lr=0.001)

# Set the number of training epochs
epochs = 5  # You can adjust the number of epochs

# Training loop with validation
for epoch in range(epochs):
    # TRAINING PHASE
    conv_lstm.train()  # Set the model to training mode

    optimizer.zero_grad()  # Zero out the gradients

    # Forward pass (training)
    train_output = conv_lstm(train_features)
    predicted_chlorophyll_train = train_output[:, 0, :, :, :]  # Select the first hidden channel

    # Calculate training loss
    train_loss = criterion(predicted_chlorophyll_train, train_targets)

    # Backward pass and optimization
    train_loss.backward()
    optimizer.step()

    # VALIDATION PHASE
    conv_lstm.eval()  # Set the model to evaluation mode
    with torch.no_grad():  # Disable gradient computation for validation
        val_output = conv_lstm(val_features)
        predicted_chlorophyll_val = val_output[:, 0, :, :, :]  # Select the first hidden channel

        # Calculate validation loss
        val_loss = criterion(predicted_chlorophyll_val, val_targets)

    # Print training and validation loss for the current epoch
    print(f"Epoch [{epoch + 1}/{epochs}], Train Loss: {train_loss.item()}, Val Loss: {val_loss.item()}")

print("Training and validation complete!")


Now we can apply our hypertuning

In [None]:
# Prepare your hyperparameter ranges
hidden_channels_list = [8, 16, 32]
kernel_size_list = [3, 5]
num_layers_list = [1, 2]
lr_list = [0.001, 0.0005]
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")


# Create an instance of HyperparameterTuner with your prepared data
tuner = HyperparameterTuner(
    input_channels=train_features.shape[1],  # Ensure this is the correct number of channels
    train_data=(train_features, train_targets),  # Training features and targets
    val_data=(val_features, val_targets),  # Validation features and targets
    criterion=nn.MSELoss(),  # Loss function
    device=device  # Device to run on (CPU or GPU)
)

# Call the tuning method
best_model, best_config = tuner.tune(
    hidden_channels_list=hidden_channels_list,
    kernel_size_list=kernel_size_list,
    num_layers_list=num_layers_list,
    lr_list=lr_list,
    epochs=5  # Number of epochs for each tuning iteration
)

In [None]:
best_model

In [None]:
predicted_chlorophyll_test = test_model(best_model, test_features, test_targets, criterion)


In [None]:
plt.figure(figsize=(10, 5))
plt.plot(test_targets[0, :, :, :].flatten().cpu().numpy(), label='True Values')
plt.plot(predicted_chlorophyll_test[0, :, :, :].flatten().cpu().numpy(), label='Predicted Values')
plt.legend()
plt.title('Predicted vs. Actual Chlorophyll Concentrations (Test Data)')
plt.show()


## Spit the data

In [None]:
# Define the split percentages
train_split = 0.6
val_split = 0.2
test_split = 1 - train_split - val_split

# Number of time steps
n_time_steps = features_tensor.shape[0]

# Indices for the splits (70% train, 20% validation, 10% test)
train_idx = int(n_time_steps * 0.7)  # 70% for training
val_idx = int(n_time_steps * (0.7 + 0.2))  # 20% for validation, 10% for testing

# Split the features and target tensors along the time dimension
train_features = features_tensor[:train_idx, :, :, :, :]
train_target = chlorophyll_tensor[:train_idx, :, :, :]

val_features = features_tensor[train_idx:val_idx, :, :, :, :]
val_target = chlorophyll_tensor[train_idx:val_idx, :, :, :]

test_features = features_tensor[val_idx:, :, :, :, :]
test_target = chlorophyll_tensor[val_idx:, :, :, :]


# Print shapes to verify correctness
print(f"Train features shape: {train_features.shape}")
print(f"Train target shape: {train_target.shape}")
print(f"Validation features shape: {val_features.shape}")
print(f"Validation target shape: {val_target.shape}")
print(f"Test features shape: {test_features.shape}")
print(f"Test target shape: {test_target.shape}")


Now move to the GPU.

In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Move data to GPU (only when necessary)
train_features = train_features.to(device)
train_target = train_target.to(device)
val_features = val_features.to(device)
val_target = val_target.to(device)
test_features = test_features.to(device)
test_target = test_target.to(device)


In [None]:
!export PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True

In [None]:
# Prepare your hyperparameter ranges
hidden_channels_list = [8, 16, 32]
kernel_size_list = [3, 5]
num_layers_list = [1, 2]
lr_list = [0.001, 0.0005]
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")


# Create an instance of HyperparameterTuner with your prepared data
tuner = HyperparameterTuner(
    input_channels=train_features.shape[1],  # Ensure this is the correct number of channels
    train_data=(train_features, train_target),  # Training features and targets
    val_data=(val_features, val_target),  # Validation features and targets
    criterion=nn.MSELoss(),  # Loss function
    device=device  # Device to run on (CPU or GPU)
)

# Call the tuning method
best_model, best_config = tuner.tune(
    hidden_channels_list=hidden_channels_list,
    kernel_size_list=kernel_size_list,
    num_layers_list=num_layers_list,
    lr_list=lr_list,
    epochs=5  # Number of epochs for each tuning iteration
)

In [None]:
predicted_chlorophyll_test = test_model(best_model, test_features, test_targets, criterion)
# Assuming predicted_chlorophyll_test is your model's output and test_targets are the true values
plt.figure(figsize=(10, 5))
plt.plot(test_targets[0, :, :, :].flatten().cpu().numpy(), label='True Values')
plt.plot(predicted_chlorophyll_test[0, :, :, :].flatten().cpu().numpy(), label='Predicted Values')
plt.legend()
plt.title('Predicted vs. Actual Chlorophyll Concentrations (Test Data)')
plt.show()