# Setup

In [17]:
import numpy as np
import pandas as pd

import torch
import torch.nn as nn

import pickle

In [14]:
## SAVE FEATURE STATISTICS FROM PREPROCESSING ##
X_MEAN = 21.234056
Y_MEAN = -16.93773
Z_MEAN = 24.948428

HR_MEAN = 81.085869 # bpm
WEIGHT_MEAN = 73.272422 # kg
HEIGHT_MEAN = 172.332927 # cm
GENDER_MEAN = 0.721742 # 0:female, 1:male -- represents class imbalance
AGE_MEAN = 30.236566 # yrs

EE_MEAN = 3481.625380 # kcal/min

X_STD = 35.144448
Y_STD = 37.25645
Z_STD = 23.991521

HR_STD = 29.852576
WEIGHT_STD = 11.258137
HEIGHT_STD = 6.570272
GENDER_STD = 0.448141
AGE_STD = 5.199545

EE_STD = 2752.199323

# Model designs

## CNN feature generator

In [4]:
class CNNFeatureGenerator(nn.Module):
    def __init__(self, num_features):
        super(CNNFeatureGenerator, self).__init__()
        
        self.feature_extractor = nn.Sequential(
            nn.Conv1d(in_channels=num_features, out_channels=16, kernel_size=3, stride=1, padding=1),
            nn.ReLU(),
            nn.MaxPool1d(kernel_size=2, stride=2),
            
            nn.Conv1d(in_channels=16, out_channels=32, kernel_size=3, stride=1, padding=1),
            nn.ReLU(),
            nn.MaxPool1d(kernel_size=2, stride=2),
            
            nn.Conv1d(in_channels=32, out_channels=64, kernel_size=3, stride=1, padding=1),
            nn.ReLU(),
            nn.AdaptiveAvgPool1d(1)
        )
    
    def forward(self, all_features):
        """
        Forward pass through the feature extractor.

        Args:
            all_features: Tensor of shape (batch_size, num_features, window_size)

        Returns:
            Tensor: Extracted features of shape (batch_size, 64)
        """
        features = self.feature_extractor(all_features)
        features = features.view(features.size(0), -1)
        return features

## FCN Regressor

In [None]:
class FCNRegressor(nn.Module):
    def __init__(self, feature_size=64):
        super(FCNRegressor, self).__init__()
        
        self.regressor = nn.Sequential(
            nn.Linear(feature_size, 128),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(128, 64),
            nn.ReLU(),
            nn.Linear(64, 1)
        )
    
    def forward(self, cnn_features):
        """
        Forward pass through the FCN regressor.

        Args:
            cnn_features: Tensor of shape (batch_size, feature_size)

        Returns:
            Tensor: Predicted energy expenditure (batch_size, 1)
        """
        output = self.regressor(cnn_features)
        return output

## CNN to FCN Model

In [8]:
class CNNFCNRegressor(nn.Module):
    def __init__(self, num_features=8):
        super(CNNFCNRegressor, self).__init__()
        
        self.cnn_feature_generator = CNNFeatureGenerator(num_features=num_features)
        self.fcn_regressor = FCNRegressor()
    
    def forward(self, all_features):
        """
        Forward pass through the combined model.

        Args:
            all_features: Tensor of shape (batch_size, num_features, window_size)

        Returns:
            Tensor: Predicted energy expenditure (batch_size, 1)
        """
        cnn_features = self.cnn_feature_generator(all_features)  # Extract features with CNN
        output = self.fcn_regressor(cnn_features)  # Regress with FCN
        return output

## LSTM Module

In [9]:
class LSTMModule(nn.Module):
    def __init__(self, input_size=64, hidden_size=64, num_layers=2):
        super(LSTMModule, self).__init__()
        self.lstm = nn.LSTM(
            input_size=input_size,
            hidden_size=hidden_size,
            num_layers=num_layers,
            batch_first=True,
            dropout=0.5
        )
    
    def forward(self, cnn_features):
        """
        Forward pass through the LSTM module.

        Args:
            cnn_features: Tensor of shape (batch_size, seq_len, input_size)

        Returns:
            Tensor: Output features from the LSTM (batch_size, hidden_size)
        """
        lstm_out, _ = self.lstm(cnn_features)  # lstm_out: (batch_size, seq_len, hidden_size)
        lstm_features = lstm_out[:, -1, :]  # Take the last timestep (batch_size, hidden_size)
        return lstm_features

## CNN to LSTM to FCN Model

In [None]:
class CNNLSTMFCNRegressor(nn.Module):
    def __init__(self, cnn_input_features=8, lstm_input_size=64, hidden_size=64, num_layers=2):
        super(CNNLSTMFCNRegressor, self).__init__()
        
        self.cnn = CNNFeatureGenerator(num_features=cnn_input_features)
        self.lstm = LSTMModule(input_size=lstm_input_size, hidden_size=hidden_size, num_layers=num_layers)
        self.fcn = FCNRegressor(feature_size=hidden_size)  # Feature size matches LSTM hidden size
    
    def forward(self, all_features):
        """
        Forward pass through the CNN-LSTM-FCN regressor.

        Args:
            all_features: Tensor of shape (batch_size, num_features, window_size)

        Returns:
            Tensor: Predicted energy expenditure (batch_size, 1)
        """
        cnn_features = self.cnn(all_features)  # Extract features from CNN (batch_size, feature_size)
        lstm_input = cnn_features.unsqueeze(1)  # Add temporal dimension (batch_size, seq_len=1, feature_size)
        lstm_features = self.lstm(lstm_input)  # Pass through LSTM (batch_size, hidden_size)
        output = self.fcn(lstm_features)  # Pass through FCN regressor (batch_size, 1)
        return output

# Load and prep data

## Load data

In [10]:
def display_windows(subject_windows_list):
        display(subject_windows_list[0])
        display(subject_windows_list[-1])

In [15]:
windows_path = 'data/WEEE Dataset/processed/windows.pkl'
with open(windows_path, 'rb') as f:
    windows_dict = pickle.load(f)

windows_dict.keys()

dict_keys(['P01', 'P02', 'P03', 'P04', 'P05', 'P06', 'P07', 'P08', 'P09', 'P10', 'P11', 'P12', 'P13', 'P14', 'P15', 'P16', 'P17'])

In [16]:
display_windows(windows_dict['P01'])

Unnamed: 0,X,Y,Z,heart_rate,Weight,Height,Gender,Age,EE[kcal/min]
0,0.220972,0.454626,1.294273,0.455724,-0.237377,-0.963877,0,-0.237822,-0.422138
1,0.220972,0.454626,1.294273,0.455724,-0.237377,-0.963877,0,-0.237822,-0.422138
2,0.220972,0.454626,1.252592,0.455724,-0.237377,-0.963877,0,-0.237822,-0.422138
3,0.249426,0.454626,1.294273,0.455724,-0.237377,-0.963877,0,-0.237822,-0.422138
4,0.249426,0.454626,1.294273,0.455724,-0.237377,-0.963877,0,-0.237822,-0.422138
...,...,...,...,...,...,...,...,...,...
187,0.249426,0.454626,1.252592,0.449849,-0.237377,-0.963877,0,-0.237822,-0.384203
188,0.249426,0.454626,1.294273,0.449849,-0.237377,-0.963877,0,-0.237822,-0.384203
189,0.249426,0.454626,1.294273,0.449849,-0.237377,-0.963877,0,-0.237822,-0.384203
190,0.249426,0.454626,1.294273,0.449849,-0.237377,-0.963877,0,-0.237822,-0.384203


Unnamed: 0,X,Y,Z,heart_rate,Weight,Height,Gender,Age,EE[kcal/min]
0,-0.063568,-0.780061,0.794096,2.373159,-0.237377,-0.963877,0,-0.237822,0.846824
1,-0.205838,-0.806902,0.544008,2.373159,-0.237377,-0.963877,0,-0.237822,0.846824
2,-0.120476,-0.780061,0.877459,2.373159,-0.237377,-0.963877,0,-0.237822,0.846824
3,-0.262746,-0.860584,0.544008,2.373159,-0.237377,-0.963877,0,-0.237822,0.846824
4,-0.006660,-0.887425,0.335601,2.373159,-0.237377,-0.963877,0,-0.237822,0.846824
...,...,...,...,...,...,...,...,...,...
187,-0.376562,-0.806902,0.627371,2.003558,-0.237377,-0.963877,0,-0.237822,0.384886
188,0.021794,-0.833743,0.669052,2.003558,-0.237377,-0.963877,0,-0.237822,0.384886
189,-0.177384,-0.967948,0.168875,2.003558,-0.237377,-0.963877,0,-0.237822,0.384886
190,0.050248,-0.941107,0.377282,2.003558,-0.237377,-0.963877,0,-0.237822,0.384886


## Extract features

In [18]:
def extract_features(windows_dict, label_column='EE[kcal/min]'):
    """
    Prepares the features and labels as PyTorch tensors for leave-one-subject-out cross-validation.
    
    Args:
        windows_dict: Dictionary of preprocessed windows.
        label_column: The column to extract as labels.
        
    Returns:
        data_dict: A dictionary with subject-wise splits for features and labels as PyTorch tensors.
    """
    data_dict = {}
    for subject_id, windows in windows_dict.items():
        features = []
        labels = []
        for window in windows:
            # Drop the label column and convert to NumPy
            feature_array = window.drop(columns=[label_column]).values
            features.append(feature_array)
            
            # Compute the mean of the label for each window
            label_mean = window[label_column].mean()
            labels.append(label_mean)
        
        # Convert to PyTorch tensors and store
        data_dict[subject_id] = {
            'features': torch.tensor(np.stack(features), dtype=torch.float32),  # Shape: (num_windows, num_features, window_size)
            'labels': torch.tensor(np.array(labels), dtype=torch.float32)       # Shape: (num_windows,)
        }
    return data_dict

## LOO-CV Split

In [19]:
def leave_one_out_split(data_dict):
    """
    Generator for leave-one-subject-out cross-validation splits.
    
    Args:
        data_dict: Dictionary with subject-wise features and labels as PyTorch tensors.
        
    Yields:
        train_data: Training features and labels as PyTorch tensors.
        test_data: Test features and labels as PyTorch tensors for the left-out subject.
    """
    subjects = list(data_dict.keys())
    for test_subject in subjects:
        train_features = []
        train_labels = []
        for subject, data in data_dict.items():
            if subject == test_subject:
                test_features = data['features']
                test_labels = data['labels']
            else:
                train_features.append(data['features'])
                train_labels.append(data['labels'])
        
        # Combine training data using PyTorch tensors
        train_features = torch.cat(train_features, dim=0)
        train_labels = torch.cat(train_labels, dim=0)
        
        yield (train_features, train_labels), (test_features, test_labels)