# Setup

## Git Repository

### Cloning

In [None]:
# !kill -9 -1

In [1]:
# Clone repository
!rm -rf /content/CSML
!git clone https://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@github.com/TylerAnderton/CSML.git
%cd CSML/Final-Project/EE-Estimation/

Cloning into 'CSML'...
remote: Enumerating objects: 922, done.[K
remote: Counting objects: 100% (313/313), done.[K
remote: Compressing objects: 100% (265/265), done.[K
remote: Total 922 (delta 54), reused 297 (delta 43), pack-reused 609 (from 1)[K
Receiving objects: 100% (922/922), 476.04 MiB | 26.00 MiB/s, done.
Resolving deltas: 100% (177/177), done.
Updating files: 100% (753/753), done.
/content/CSML/Final-Project/EE-Estimation


### Saving

In [32]:
%cd Final-Project/EE-Estimation/

/content/CSML/Final-Project/EE-Estimation


In [23]:
!pwd

/content/CSML/Final-Project/EE-Estimation


In [24]:
!ls

data		 generate_requirements.py  model_training_dist.ipynb   references
eda.ipynb	 logs			   model_training_local.ipynb  requirements_full.txt
environment.yml  models			   preprocessing.ipynb	       requirements.txt


In [22]:
# Commit & save changes
!git config --global user.name "Tyler Anderton"
!git config --global user.email "22508406+TylerAnderton@users.noreply.github.com"

!git add .
!git status

On branch main
Your branch is up to date with 'origin/main'.

nothing to commit, working tree clean


In [None]:
!git commit -m "Your commit message"

!git push

## Notebook Setup

In [3]:
# !pip install -r requirements.txt
!pip install -U "ray[data,train,tune,serve]"

Collecting ray[data,serve,train,tune]
  Downloading ray-2.39.0-cp310-cp310-manylinux2014_x86_64.whl.metadata (17 kB)
Collecting tensorboardX>=1.9 (from ray[data,serve,train,tune])
  Downloading tensorboardX-2.6.2.2-py2.py3-none-any.whl.metadata (5.8 kB)
Collecting opencensus (from ray[data,serve,train,tune])
  Downloading opencensus-0.11.4-py2.py3-none-any.whl.metadata (12 kB)
Collecting colorful (from ray[data,serve,train,tune])
  Downloading colorful-0.5.6-py2.py3-none-any.whl.metadata (16 kB)
Collecting virtualenv!=20.21.1,>=20.0.24 (from ray[data,serve,train,tune])
  Downloading virtualenv-20.28.0-py3-none-any.whl.metadata (4.4 kB)
Collecting starlette (from ray[data,serve,train,tune])
  Downloading starlette-0.41.3-py3-none-any.whl.metadata (6.0 kB)
Collecting watchfiles (from ray[data,serve,train,tune])
  Downloading watchfiles-1.0.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (4.9 kB)
Collecting aiohttp-cors (from ray[data,serve,train,tune])
  Downloading

In [19]:
import numpy as np
import pandas as pd

import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
import torch.utils.tensorboard as tb

import ray
from ray import tune, train
from ray.tune.schedulers import ASHAScheduler
from ray.tune import CLIReporter
ray.init(ignore_reinit_error=True)

import os
import pickle

2024-11-26 20:03:05,771	INFO worker.py:1810 -- Started a local Ray instance. View the dashboard at [1m[32m127.0.0.1:8265 [39m[22m


In [5]:
## SAVE FEATURE STATISTICS FROM PREPROCESSING ##
X_MEAN = 21.234056
Y_MEAN = -16.93773
Z_MEAN = 24.948428

HR_MEAN = 81.085869 # bpm
WEIGHT_MEAN = 73.272422 # kg
HEIGHT_MEAN = 172.332927 # cm
GENDER_MEAN = 0.721742 # 0:female, 1:male -- represents class imbalance
AGE_MEAN = 30.236566 # yrs

EE_MEAN = 3481.625380 # kcal/min

X_STD = 35.144448
Y_STD = 37.25645
Z_STD = 23.991521

HR_STD = 29.852576
WEIGHT_STD = 11.258137
HEIGHT_STD = 6.570272
GENDER_STD = 0.448141
AGE_STD = 5.199545

EE_STD = 2752.199323

# Model designs

## CNN feature generator

In [6]:
class CNNFeatureGenerator(nn.Module):
    def __init__(self, num_features, num_conv_layers=3):
        super(CNNFeatureGenerator, self).__init__()
        layers = []
        in_channels = num_features

        for i in range(num_conv_layers):
            out_channels = 16 * (2 ** i)  # Double channels with each layer
            layers.extend([
                nn.Conv1d(in_channels=in_channels, out_channels=out_channels, kernel_size=3, stride=1, padding=1),
                nn.ReLU()
            ])

            if i < num_conv_layers - 1:
                layers.append(nn.MaxPool1d(kernel_size=2, stride=2))

            in_channels = out_channels

        layers.append(nn.AdaptiveAvgPool1d(1))  # Final pooling to reduce to 1D
        self.feature_extractor = nn.Sequential(*layers)

    def forward(self, x):
        """
        Forward pass through the CNN feature extractor.

        Args:
            x: Input tensor of shape (batch_size, num_features, window_size).

        Returns:
            Tensor: Extracted features of shape (batch_size, out_channels).
        """
        x = self.feature_extractor(x)
        return x.view(x.size(0), -1)  # Flatten for FCN

## FCN Regressor

In [7]:
class FCNRegressor(nn.Module):
    def __init__(self, feature_size=64, num_fcn_layers=3, layer_width=128):
        super(FCNRegressor, self).__init__()
        layers = []
        for _ in range(num_fcn_layers - 1):
            layers.extend([
                nn.Linear(feature_size, layer_width),
                nn.ReLU(),
                nn.Dropout(0.5)
            ])
            feature_size = layer_width  # Update input size for next layer
            layer_width //= 2

        layers.append(nn.Linear(feature_size, 1))  # Final regression layer
        self.regressor = nn.Sequential(*layers)

    def forward(self, x):
        return self.regressor(x)

## CNN to FCN Model

In [8]:
class CNNFCNRegressor(nn.Module):
    def __init__(self, num_features=8, num_conv_layers=3, num_fcn_layers=3, fcn_width=128):
        """
        Combines a CNN feature generator with an FCN regressor.

        Args:
            num_features: Number of input features (e.g., channels in the input tensor).
            num_conv_layers: Number of convolutional layers in the CNN.
            num_fcn_layers: Number of fully connected layers in the FCN.
            fcn_width: Width of the hidden layers in the FCN.
        """
        super(CNNFCNRegressor, self).__init__()

        self.cnn = CNNFeatureGenerator(num_features=num_features, num_conv_layers=num_conv_layers)
        conv_output_size = 16 * (2 ** (num_conv_layers - 1))  # Compute feature size based on CNN output channels
        self.fcn = FCNRegressor(feature_size=conv_output_size, num_fcn_layers=num_fcn_layers, layer_width=fcn_width)

    def forward(self, all_features):
        """
        Forward pass through the combined model.

        Args:
            all_features: Tensor of shape (batch_size, num_features, window_size)

        Returns:
            Tensor: Predicted energy expenditure (batch_size, 1)
        """
        cnn_features = self.cnn(all_features)  # Extract features with CNN
        output = self.fcn(cnn_features)  # Regress with FCN
        return output

## LSTM Module

In [9]:
class LSTMModule(nn.Module):
    def __init__(self, input_size=64, hidden_size=64, num_layers=2):
        super(LSTMModule, self).__init__()
        self.lstm = nn.LSTM(
            input_size=input_size,
            hidden_size=hidden_size,
            num_layers=num_layers,
            batch_first=True,
            dropout=0.5
        )

    def forward(self, cnn_features):
        """
        Forward pass through the LSTM module.

        Args:
            cnn_features: Tensor of shape (batch_size, seq_len, input_size)

        Returns:
            Tensor: Output features from the LSTM (batch_size, hidden_size)
        """
        lstm_out, _ = self.lstm(cnn_features)  # lstm_out: (batch_size, seq_len, hidden_size)
        lstm_features = lstm_out[:, -1, :]  # Take the last timestep (batch_size, hidden_size)
        return lstm_features

## CNN to LSTM to FCN Model

In [10]:
class CNNLSTMFCNRegressor(nn.Module):
    def __init__(self, cnn_input_features=8, num_conv_layers=3, lstm_hidden_size=64, lstm_num_layers=2, num_fcn_layers=3, fcn_width=128):
        super(CNNLSTMFCNRegressor, self).__init__()

        self.cnn = CNNFeatureGenerator(num_features=cnn_input_features, num_conv_layers=num_conv_layers)
        conv_output_size = 16 * (2 ** (num_conv_layers - 1))  # Compute feature size based on CNN output channels
        self.lstm = LSTMModule(input_size=conv_output_size, hidden_size=lstm_hidden_size, num_layers=lstm_num_layers)
        self.fcn = FCNRegressor(feature_size=lstm_hidden_size, num_fcn_layers=num_fcn_layers, layer_width=fcn_width)  # Feature size matches LSTM hidden size

    def forward(self, all_features):
        """
        Forward pass through the CNN-LSTM-FCN regressor.

        Args:
            all_features: Tensor of shape (batch_size, num_features, window_size)

        Returns:
            Tensor: Predicted energy expenditure (batch_size, 1)
        """
        cnn_features = self.cnn(all_features)  # Extract features from CNN (batch_size, feature_size)
        lstm_input = cnn_features.unsqueeze(1)  # Add temporal dimension (batch_size, seq_len=1, feature_size)
        lstm_features = self.lstm(lstm_input)  # Pass through LSTM (batch_size, lstm_hidden_size)
        output = self.fcn(lstm_features)  # Pass through FCN regressor (batch_size, 1)
        return output

# Load and prep data

## Load data

In [11]:
def display_windows(subject_windows_list):
        display(subject_windows_list[0])
        display(subject_windows_list[-1])

In [12]:
windows_path = 'data/WEEE Dataset/processed/windows.pkl'
with open(windows_path, 'rb') as f:
    windows_dict = pickle.load(f)

windows_dict.keys()

dict_keys(['P01', 'P02', 'P03', 'P04', 'P05', 'P06', 'P07', 'P08', 'P09', 'P10', 'P11', 'P12', 'P13', 'P14', 'P15', 'P16', 'P17'])

In [13]:
display_windows(windows_dict['P01'])

Unnamed: 0,X,Y,Z,heart_rate,Weight,Height,Gender,Age,EE[kcal/min]
0,0.220972,0.454626,1.294273,0.455724,-0.237377,-0.963877,0,-0.237822,-0.422138
1,0.220972,0.454626,1.294273,0.455724,-0.237377,-0.963877,0,-0.237822,-0.422138
2,0.220972,0.454626,1.252592,0.455724,-0.237377,-0.963877,0,-0.237822,-0.422138
3,0.249426,0.454626,1.294273,0.455724,-0.237377,-0.963877,0,-0.237822,-0.422138
4,0.249426,0.454626,1.294273,0.455724,-0.237377,-0.963877,0,-0.237822,-0.422138
...,...,...,...,...,...,...,...,...,...
187,0.249426,0.454626,1.252592,0.449849,-0.237377,-0.963877,0,-0.237822,-0.384203
188,0.249426,0.454626,1.294273,0.449849,-0.237377,-0.963877,0,-0.237822,-0.384203
189,0.249426,0.454626,1.294273,0.449849,-0.237377,-0.963877,0,-0.237822,-0.384203
190,0.249426,0.454626,1.294273,0.449849,-0.237377,-0.963877,0,-0.237822,-0.384203


Unnamed: 0,X,Y,Z,heart_rate,Weight,Height,Gender,Age,EE[kcal/min]
0,-0.063568,-0.780061,0.794096,2.373159,-0.237377,-0.963877,0,-0.237822,0.846824
1,-0.205838,-0.806902,0.544008,2.373159,-0.237377,-0.963877,0,-0.237822,0.846824
2,-0.120476,-0.780061,0.877459,2.373159,-0.237377,-0.963877,0,-0.237822,0.846824
3,-0.262746,-0.860584,0.544008,2.373159,-0.237377,-0.963877,0,-0.237822,0.846824
4,-0.006660,-0.887425,0.335601,2.373159,-0.237377,-0.963877,0,-0.237822,0.846824
...,...,...,...,...,...,...,...,...,...
187,-0.376562,-0.806902,0.627371,2.003558,-0.237377,-0.963877,0,-0.237822,0.384886
188,0.021794,-0.833743,0.669052,2.003558,-0.237377,-0.963877,0,-0.237822,0.384886
189,-0.177384,-0.967948,0.168875,2.003558,-0.237377,-0.963877,0,-0.237822,0.384886
190,0.050248,-0.941107,0.377282,2.003558,-0.237377,-0.963877,0,-0.237822,0.384886


## Extract features

In [14]:
def extract_features(windows_dict, label_column='EE[kcal/min]'):
    """
    Prepares the features and labels as PyTorch tensors for leave-one-subject-out cross-validation.

    Args:
        windows_dict: Dictionary of preprocessed windows.
        label_column: The column to extract as labels.

    Returns:
        data_dict: A dictionary with subject-wise splits for features and labels as PyTorch tensors.
    """
    data_dict = {}
    for subject_id, windows in windows_dict.items():
        features = []
        labels = []
        for window in windows:
            # Drop the label column and convert to NumPy
            feature_array = window.drop(columns=[label_column]).values
            features.append(feature_array.T)

            # Compute the mean of the label for each window
            label_mean = window[label_column].mean()
            labels.append(label_mean)

        # Convert to PyTorch tensors and store
        data_dict[subject_id] = {
            'features': torch.tensor(np.stack(features), dtype=torch.float32),  # Shape: (num_windows, num_features, window_size)
            'labels': torch.tensor(np.array(labels), dtype=torch.float32)       # Shape: (num_windows,)
        }
    return data_dict

In [15]:
data_dict = extract_features(windows_dict)
data_dict

{'P01': {'features': tensor([[[ 0.2210,  0.2210,  0.2210,  ...,  0.2494,  0.2494,  0.2210],
           [ 0.4546,  0.4546,  0.4546,  ...,  0.4546,  0.4546,  0.4546],
           [ 1.2943,  1.2943,  1.2526,  ...,  1.2943,  1.2943,  1.2943],
           ...,
           [-0.9639, -0.9639, -0.9639,  ..., -0.9639, -0.9639, -0.9639],
           [ 0.0000,  0.0000,  0.0000,  ...,  0.0000,  0.0000,  0.0000],
           [-0.2378, -0.2378, -0.2378,  ..., -0.2378, -0.2378, -0.2378]],
  
          [[ 0.2494,  0.2494,  0.2494,  ...,  0.2494,  0.2494,  0.2210],
           [ 0.4546,  0.4546,  0.4546,  ...,  0.4546,  0.4546,  0.4546],
           [ 1.2943,  1.2943,  1.2943,  ...,  1.2943,  1.2943,  1.2943],
           ...,
           [-0.9639, -0.9639, -0.9639,  ..., -0.9639, -0.9639, -0.9639],
           [ 0.0000,  0.0000,  0.0000,  ...,  0.0000,  0.0000,  0.0000],
           [-0.2378, -0.2378, -0.2378,  ..., -0.2378, -0.2378, -0.2378]],
  
          [[ 0.2494,  0.2494,  0.2494,  ...,  0.2494,  0.2494,  0

## LOO-CV Split

In [16]:
def leave_one_out_split(data_dict):
    """
    Generator for leave-one-subject-out cross-validation splits.

    Args:
        data_dict: Dictionary with subject-wise features and labels as PyTorch tensors.

    Yields:
        train_data: Training features and labels as PyTorch tensors.
        test_data: Test features and labels as PyTorch tensors for the left-out subject.
    """
    subjects = list(data_dict.keys())
    for test_subject in subjects:
        train_features = []
        train_labels = []
        for subject, data in data_dict.items():
            if subject == test_subject:
                test_features = data['features']
                test_labels = data['labels']
            else:
                train_features.append(data['features'])
                train_labels.append(data['labels'])

        # Combine training data using PyTorch tensors
        train_features = torch.cat(train_features, dim=0)
        train_labels = torch.cat(train_labels, dim=0)

        yield (train_features, train_labels), (test_features, test_labels)

In [17]:
loocv_splits = leave_one_out_split(data_dict)
loocv_splits

<generator object leave_one_out_split at 0x7e7bd5142960>

# Train Models

## Training function

### Base

In [None]:
def train_model_base(config, train_data, test_data):
    """
    Training loop compatible with Ray Tune for hyperparameter optimization.

    Args:
        config: Dictionary of hyperparameters provided by Ray Tune.
        train_data: Tuple of training features and labels as PyTorch tensors. Includes data from all but one subject.
        - train_features: Training feature tensor (shape: [num_samples, num_features, window_size]).
        - train_labels: Training label tensor (shape: [num_samples]).
        test_data: Tuple of testing features and labels as PyTorch tensors. Includes data from the left-out subject.
        - test_features: Testing feature tensor (shape: [num_samples, num_features, window_size]).
        - test_labels: Testing label tensor (shape: [num_samples]).
    """
    # Device
    device = torch.device('mps' if torch.backends.mps.is_available() else 'cuda' if torch.cuda.is_available() else 'cpu')
    if 'print' in config and config['print']:
        print(f'Using device: {device}')

    # Unpack config
    learning_rate = config['learning_rate']
    batch_size = config['batch_size']
    epochs = config['epochs']
    model_class = config['model']
    model_class_name = model_class.__name__

    CNN_INPUT_FEATURES = 8 # config.get('cnn_input_features', 8)
    num_conv_layers = config.get('num_conv_layers', 3)
    lstm_hidden_size = config.get('lstm_hidden_size', 64)
    lstm_num_layers = config.get('lstm_num_layers', 2)
    num_fcn_layers = config.get('num_fcn_layers', 3)
    fcn_width = config.get('fcn_width', 128)

    # Logging
    model_name = f'{model_class_name}_lr{learning_rate}_batch{batch_size}_epochs{epochs}' # add subject name & device?
    if 'log_dir' in config:
        log_dir = config['log_dir']
        os.makedirs(log_dir, exist_ok=True)
        tb_logger = tb.SummaryWriter(os.path.join(log_dir, model_name))
    else:
        tb_logger = None

    # Model, loss, and optimizer
    model = model_class(
        num_features=CNN_INPUT_FEATURES,
        num_conv_layers=num_conv_layers,
        num_fcn_layers=num_fcn_layers,
        fcn_width=fcn_width
    ).to(device) if model_class_name == 'CNNFCNRegressor' else model_class(
        cnn_input_features=CNN_INPUT_FEATURES,
        num_conv_layers=num_conv_layers,
        lstm_hidden_size=lstm_hidden_size,
        lstm_num_layers=lstm_num_layers,
        num_fcn_layers=num_fcn_layers,
        fcn_width=fcn_width
    ).to(device)

    criterion = nn.MSELoss()
    optimizer = optim.Adam(model.parameters(), lr=learning_rate)

    # Create DataLoaders
    train_loader = DataLoader(TensorDataset(*train_data), batch_size=batch_size, shuffle=True)
    test_loader = DataLoader(TensorDataset(*test_data), batch_size=batch_size, shuffle=False)

    # Early stopping
    patience = config.get('early_stop_patience', None)
    best_val_loss = float('inf')
    epochs_no_improve = 0

    global_step = 0
    for epoch in range(epochs):
        if 'print' in config and config['print']:
            print(f'Epoch: {epoch+1}/{epochs}')

        # Training
        model.train()
        train_loss = 0.0
        for train_batch, (inputs, labels) in enumerate(train_loader):
            if 'print' in config and config['print']:
                print(f'Batch: {train_batch+1}')
            inputs, labels = inputs.to(device), labels.to(device)

            # Forward pass
            outputs = model(inputs)
            loss = criterion(outputs.squeeze(), labels)

            # Backward pass and optimization
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            train_loss += loss.item()

            global_step += 1

        train_loss /= len(train_loader)

        if tb_logger:
            tb_logger.add_scalar('train_loss', train_loss, global_step)

        # Validation
        model.eval()
        val_loss = 0.0
        with torch.no_grad():
            for inputs, labels in test_loader:
                inputs, labels = inputs.to(device), labels.to(device)

                outputs = model(inputs)
                loss = criterion(outputs.squeeze(), labels)
                val_loss += loss.item()

        val_loss /= len(test_loader)

        if tb_logger:
            tb_logger.add_scalar('val_loss', val_loss, global_step)

        if 'print' in config and config['print']:
            print(f"Epoch {epoch+1}/{epochs}, Train Loss: {train_loss:.4f}, Val Loss: {val_loss:.4f}")

        if (epoch + 1) % 5 == 0 and 'model_dir' in config:
            model_dir = config['model_dir']
            os.makedirs(model_dir, exist_ok=True)
            torch.save(model.state_dict(), os.path.join(model_dir, model_name+f'_epoch{epoch+1}.pt'))

        if 'tune' in config and not ('loocv' in config and config['loocv']):
            train.report(
                {
                    'train_loss':train_loss,
                    'val_loss':val_loss,
                    'model_name':model_name,
                    'epoch':epoch+1
                }
            )

        # Early stopping
        if patience:
            if val_loss < best_val_loss:
                best_val_loss = val_loss
                epochs_no_improve = 0
            else:
                epochs_no_improve += 1

            if epochs_no_improve >= patience:
                if 'print' in config and config['print']:
                    print(f"Early stopping triggered at epoch {epoch+1}.")
                    print(f"Best Validation Loss: {best_val_loss} at epoch {epoch+1 - patience}")
                break

    # Final checkpoint
    if 'model_dir' in config:
            model_dir = config['model_dir']
            os.makedirs(model_dir, exist_ok=True)
            torch.save(model.state_dict(), os.path.join(model_dir, model_name+f'_epoch{epoch+1}_final.pth'))

    if 'loocv' in config and config['loocv']:
        tune_report = {
            'train_loss':train_loss,
            'val_loss':val_loss,
            'model_name':model_name,
            'epoch':epoch+1
        }

        return tune_report

### LOO-CV

In [None]:
# def train_model_loocv(config, data_dict):
#     """
#     Train the model with Leave-One-Out Cross-Validation (LOO-CV) and aggregate results for Ray Tune.

#     Args:
#         config: Dictionary of hyperparameters provided by Ray Tune.
#         data_dict: Dictionary with subject-wise features and labels for LOO-CV.

#     Reports:
#         The average validation loss across all folds to Ray Tune.
#     """
#     val_losses = []

#     # Iterate over folds using LOO-CV
#     for train_data, test_data in leave_one_out_split(data_dict):
#         fold_report = train_model_base(config, train_data, test_data)
#         val_losses.append(fold_report['val_loss'])

#     # Compute average validation loss across all folds
#     avg_val_loss = sum(val_losses) / len(val_losses)

#     # Report to Ray Tune
#     train.report(
#         {
#             'avg_val_loss':avg_val_loss,
#             'model_name':fold_report['model_name']
#         }
#     )

@ray.remote
def train_fold(config, train_data, test_data):
    return train_model_base(config, train_data, test_data)

def train_model_loocv(config, data_dict):
    """
    Train the model with Leave-One-Out Cross-Validation (LOO-CV), distributing each fold to a separate Ray task, and aggregate results for Ray Tune.

    Args:
        config: Dictionary of hyperparameters provided by Ray Tune.
        data_dict: Dictionary with subject-wise features and labels for LOO-CV.

    Reports:
        The average validation loss across all folds to Ray Tune.
    """
    val_loss_futures = []

    # Iterate over folds using LOO-CV
    for train_data, test_data in leave_one_out_split(data_dict):
        val_loss_futures.append(train_fold.remote(config, train_data, test_data))

    # Gather results from all folds
    fold_results = ray.get(val_loss_futures)
    avg_val_loss = sum(result['val_loss'] for result in fold_results) / len(fold_results)

    # Report to Ray Tune
    train.report(
        {
            'avg_val_loss':avg_val_loss,
            'model_name':fold_results[0]['model_name']
        }
    )

## Tuning Wrapper

### Base

In [None]:
def tune_model_base(search_space, train_data, test_data):
    """
    Wrapper for hyperparameter tuning using Ray Tune.

    Args:
        search_space: Dictionary defining the hyperparameter search space.
        train_data: Tuple of training features and labels as PyTorch tensors.
        test_data: Tuple of testing features and labels as PyTorch tensors.
    """
    search_space['tune'] = True  # Ensure the training function reports to Ray Tune

    # CLI Reporter for better logging
    reporter = CLIReporter(
        parameter_columns=['learning_rate', 'batch_size', 'epochs'],
        metric_columns=['train_loss', 'val_loss', 'epoch']
    )

    # Scheduler (e.g., ASHA for early stopping and prioritization)
    scheduler = ASHAScheduler(
        metric='val_loss',
        mode='min',
        grace_period=search_space['early_stop_patience'],
        reduction_factor=2
    )

    # Run the hyperparameter search
    analysis = tune.run(
        tune.with_parameters(
            train_model_base,
            train_data=train_data,
            test_data=test_data
        ),
        config=search_space,
        scheduler=scheduler,
        progress_reporter=reporter
        # num_samples=search_space.get('num_samples', 10)
    )

    # Get the best trial
    best_trial = analysis.get_best_trial(metric="val_loss", mode="min")
    best_config = best_trial.config
    best_val_loss = best_trial.last_result["val_loss"]
    best_epoch = best_trial.last_result["epoch"]

    print("Best Hyperparameters Found:", best_config)
    print("Best Validation Loss:", best_val_loss)
    print("Best Number of Epochs:", best_epoch)

    # If a model path is specified in the search space, print the model save location
    if 'model_dir' in search_space:
        best_model_path = f"{search_space['model_dir']}/{best_trial.last_result['model_name']}.pth"
        print("Best Model Path:", best_model_path)

### LOO-CV

In [None]:
def tune_model_loocv(search_space, data_dict):
    """
    Wrapper for hyperparameter tuning using Ray Tune with Leave-One-Out Cross-Validation.

    Args:
        search_space: Dictionary defining the hyperparameter search space.
        data_dict: Dictionary with subject-wise features and labels for LOO-CV.
    """
    search_space['tune'] = True  # Ensure the training function reports to Ray Tune

    # CLI Reporter for better logging
    model_class_name = search_space['model'].__name__
    parameter_columns = [
        'learning_rate',
        'batch_size',

        'num_conv_layers',
        'num_fcn_layers',
        'fcn_width'
    ] if model_class_name == 'CNNFCNRegressor' else [
        'learning_rate',
        'batch_size',

        'num_conv_layers',
        'num_fcn_layers',
        'fcn_width',
        'lstm_hidden_size',
        'lstm_num_layers'
    ]

    reporter = CLIReporter(
        parameter_columns=parameter_columns,
        metric_columns=['avg_val_loss', 'model_name']
    )

    # Scheduler (e.g., ASHA for early stopping and prioritization)
    scheduler = ASHAScheduler(
        metric='avg_val_loss', # averaged val_loss across all folds
        mode='min',
        grace_period=search_space['early_stop_patience'],
        reduction_factor=2
    )

    # Run the hyperparameter search
    analysis = tune.run(
        tune.with_parameters(
            train_model_loocv,
            data_dict=data_dict
        ),
        config=search_space,
        scheduler=scheduler,
        progress_reporter=reporter
        # resources_per_trial={"cpu": 2, "gpu": 1}  # Adjust based on your Colab environment
    )

    # Get the best trial
    best_trial = analysis.get_best_trial(metric='avg_val_loss', mode='min')
    best_config = best_trial.config
    best_avg_val_loss = best_trial.last_result['avg_val_loss']

    print('Best Hyperparameters Found:', best_config)
    print('Best Avg Validation Loss:', best_avg_val_loss)

    # If a model path is specified in the search space, print the model save location
    if 'model_dir' in search_space:
        best_model_path = f"{search_space['model_dir']}/{best_trial.last_result['model_name']}.pth"
        print('Best Model Path:', best_model_path)

## Test runs

Tests 1-5 were run locally to establish basic training and LOOCV tuning abilities. Continue with Test 6+ for distributed trainining with Colab.

### Test6 CNNFCN Distributed

In [None]:
LOGS_CNNFCN_TEST6 = os.path.join(os.getcwd(), 'logs/CNNFCN/test6')
MODELS_CNNFCN_TEST6 = os.path.join(os.getcwd(), 'models/CNNFCN/test6')

search_space_CNNFCN_test6 = {
    'learning_rate': tune.grid_search([1e-3]), # 1e-4, 5e-3, 1e-3, 5e-2, 1e-2
    'batch_size': tune.grid_search([16, 32]), # 16, 32, 64
    'epochs': 5,

    'model': CNNLSTMFCNRegressor,
    'num_conv_layers':tune.grid_search([1]), # 1, 2, 3
    'num_fcn_layers':tune.grid_search([1]), # 1, 2, 3
    'fcn_width':tune.grid_search([32]), # , 64, 128

    'lstm_hidden_size':tune.grid_search([32]), # , 64, 128
    'lstm_num_layers':tune.grid_search([1]), # , 2, 3

    'log_dir': LOGS_CNNFCN_TEST6,
    'model_dir': MODELS_CNNFCN_TEST6,

    # 'num_samples': 4,
    'early_stop_patience': 3,

    'print': False,
    'tune': True,
    'loocv': True
}

In [None]:
tune_model_loocv(search_space_CNNFCN_test6, data_dict)

2024-11-25 18:05:54,703	INFO tune.py:616 -- [output] This uses the legacy output and progress reporter, as Jupyter notebooks are not supported by the new engine, yet. For more information, please see https://github.com/ray-project/ray/issues/36949


== Status ==
Current time: 2024-11-25 18:05:54 (running for 00:00:00.14)
Using AsyncHyperBand: num_stopped=0
Bracket: Iter 96.000: None | Iter 48.000: None | Iter 24.000: None | Iter 12.000: None | Iter 6.000: None | Iter 3.000: None
Logical resource usage: 2.0/10 CPUs, 0/0 GPUs
Result logdir: /tmp/ray/session_2024-11-25_14-14-56_534262_37851/artifacts/2024-11-25_18-05-54/train_model_loocv_2024-11-25_18-05-54/driver_artifacts
Number of trials: 2/2 (2 PENDING)


== Status ==
Current time: 2024-11-25 18:05:59 (running for 00:00:05.19)
Using AsyncHyperBand: num_stopped=0
Bracket: Iter 96.000: None | Iter 48.000: None | Iter 24.000: None | Iter 12.000: None | Iter 6.000: None | Iter 3.000: None
Logical resource usage: 2.0/10 CPUs, 0/0 GPUs
Result logdir: /tmp/ray/session_2024-11-25_14-14-56_534262_37851/artifacts/2024-11-25_18-05-54/train_model_loocv_2024-11-25_18-05-54/driver_artifacts
Number of trials: 2/2 (2 RUNNING)


== Status ==
Current time: 2024-11-25 18:06:04 (running for 00:00:10

Trial name,avg_val_loss,model_name
train_model_loocv_348a3_00000,0.78766,CNNLSTMFCNRegressor_lr0.001_batch16_epochs5
train_model_loocv_348a3_00001,0.899806,CNNLSTMFCNRegressor_lr0.001_batch32_epochs5


== Status ==
Current time: 2024-11-25 18:08:42 (running for 00:02:47.59)
Using AsyncHyperBand: num_stopped=0
Bracket: Iter 96.000: None | Iter 48.000: None | Iter 24.000: None | Iter 12.000: None | Iter 6.000: None | Iter 3.000: None
Logical resource usage: 1.0/10 CPUs, 0/0 GPUs
Result logdir: /tmp/ray/session_2024-11-25_14-14-56_534262_37851/artifacts/2024-11-25_18-05-54/train_model_loocv_2024-11-25_18-05-54/driver_artifacts
Number of trials: 2/2 (1 RUNNING, 1 TERMINATED)


== Status ==
Current time: 2024-11-25 18:08:47 (running for 00:02:52.67)
Using AsyncHyperBand: num_stopped=0
Bracket: Iter 96.000: None | Iter 48.000: None | Iter 24.000: None | Iter 12.000: None | Iter 6.000: None | Iter 3.000: None
Logical resource usage: 1.0/10 CPUs, 0/0 GPUs
Result logdir: /tmp/ray/session_2024-11-25_14-14-56_534262_37851/artifacts/2024-11-25_18-05-54/train_model_loocv_2024-11-25_18-05-54/driver_artifacts
Number of trials: 2/2 (1 RUNNING, 1 TERMINATED)


== Status ==
Current time: 2024-11-25 18

2024-11-25 18:11:02,940	INFO tune.py:1009 -- Wrote the latest version of all result files and experiment state to '/Users/tyler/ray_results/train_model_loocv_2024-11-25_18-05-54' in 0.0055s.
2024-11-25 18:11:02,942	INFO tune.py:1041 -- Total run time: 308.24 seconds (308.22 seconds for the tuning loop).


== Status ==
Current time: 2024-11-25 18:11:02 (running for 00:05:08.23)
Using AsyncHyperBand: num_stopped=0
Bracket: Iter 96.000: None | Iter 48.000: None | Iter 24.000: None | Iter 12.000: None | Iter 6.000: None | Iter 3.000: None
Logical resource usage: 1.0/10 CPUs, 0/0 GPUs
Result logdir: /tmp/ray/session_2024-11-25_14-14-56_534262_37851/artifacts/2024-11-25_18-05-54/train_model_loocv_2024-11-25_18-05-54/driver_artifacts
Number of trials: 2/2 (2 TERMINATED)
+-------------------------------+------------+-----------------+-----------------+--------------+-------------------+------------------+-------------+--------------------+-------------------+----------------+----------------------+
| Trial name                    | status     | loc             |   learning_rate |   batch_size |   num_conv_layers |   num_fcn_layers |   fcn_width |   lstm_hidden_size |   lstm_num_layers |   avg_val_loss | model_name           |
|-------------------------------+------------+-----------------+-----