<a href="https://colab.research.google.com/github/franciscoerramuspe/masters_thesis/blob/main/WHOOP_MLP.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
! gdown --id 10bbWtKE3n1BpAYitwQxeQdfXYvpXhacZ

Downloading...
From: https://drive.google.com/uc?id=10bbWtKE3n1BpAYitwQxeQdfXYvpXhacZ
To: /content/Final_Labeled_Scored_Data.xlsx
100% 20.3M/20.3M [00:00<00:00, 52.4MB/s]


In [3]:
import pandas as pd


file_path = '/content/Final_Labeled_Scored_Data.xlsx'
df = pd.read_excel(file_path)


In [4]:
import torch
import torch.nn as nn
import torch.optim as optim
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split


# Print DataFrame info
print("DataFrame shape:", df.shape)
print("\nNaN values in each column:")
print(df.isna().sum())

# Define numerical columns
numerical_columns = ['Activity Strain', 'Altitude change (meters)', 'Altitude gain (meters)',
                     'Asleep duration (min)_x', 'Asleep duration (min)_y', 'Average HR (bpm)_x',
                     'Average HR (bpm)_y', 'Awake duration (min)_x', 'Awake duration (min)_y',
                     'Blood oxygen %', 'Day Strain', 'Deep (SWS) duration (min)_x',
                     'Deep (SWS) duration (min)_y', 'Distance (meters)', 'Duration (min)',
                     'Energy burned (cal)_x', 'Energy burned (cal)_y', 'HR Zone 1 %', 'HR Zone 2 %',
                     'HR Zone 3 %', 'HR Zone 4 %', 'HR Zone 5 %', 'Heart rate variability (ms)',
                     'In bed duration (min)_x', 'In bed duration (min)_y', 'Light sleep duration (min)_x',
                     'Light sleep duration (min)_y', 'Max HR (bpm)_x', 'Max HR (bpm)_y',
                     'REM duration (min)_x', 'REM duration (min)_y', 'Recovery score %',
                     'Respiratory rate (rpm)_x', 'Respiratory rate (rpm)_y', 'Resting heart rate (bpm)',
                     'Skin temp (celsius)', 'Sleep consistency %_x', 'Sleep consistency %_y',
                     'Sleep debt (min)_x', 'Sleep debt (min)_y', 'Sleep efficiency %_x',
                     'Sleep efficiency %_y', 'Sleep need (min)_x', 'Sleep need (min)_y',
                     'Sleep performance %_x', 'Sleep performance %_y', 'Jump - Jump Height (in)_x',
                     'Jump - Jump Height (in)_y']

# Create a 'physical_capability' column if it doesn't exist
if 'physical_capability' not in df.columns:
    df['physical_capability'] = df['Recovery score %'].fillna(0) * 0.5 + \
                                df['Sleep efficiency %_x'].fillna(0) * 0.3 + \
                                df['Average HR (bpm)_x'].fillna(0).clip(upper=100) * 0.2

# Fill NaN values with column means
for col in numerical_columns + ['physical_capability']:
    df[col] = df[col].fillna(df[col].mean())

# Normalize the data
scaler = StandardScaler()
df[numerical_columns] = scaler.fit_transform(df[numerical_columns])

# Prepare data for MLP (flattened sequence)
sequence_length = 7
input_size = len(numerical_columns) * sequence_length

def create_mlp_sequences(data):
    sequences = []
    targets = []
    for i in range(len(data) - sequence_length):
        seq = data.iloc[i:i+sequence_length][numerical_columns].values.flatten()
        target = data.iloc[i+sequence_length]['physical_capability']
        sequences.append(seq)
        targets.append(target)
    return torch.FloatTensor(sequences), torch.FloatTensor(targets)

X, y = create_mlp_sequences(df)

if len(X) == 0 or len(y) == 0:
    raise ValueError(f"No valid sequences created. DataFrame has {len(df)} rows, using sequence length {sequence_length}.")

print(f"Created {len(X)} sequences")

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Define the MLP model
class MLPModel(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super(MLPModel, self).__init__()
        self.fc1 = nn.Linear(input_size, hidden_size)
        self.fc2 = nn.Linear(hidden_size, hidden_size // 2)
        self.fc3 = nn.Linear(hidden_size // 2, output_size)
        self.relu = nn.ReLU()
        self.sigmoid = nn.Sigmoid()

    def forward(self, x):
        out = self.relu(self.fc1(x))
        out = self.relu(self.fc2(out))
        out = self.sigmoid(self.fc3(out)) * 100  # Scale output to 0-100
        return out

# Initialize the model
hidden_size = 128
output_size = 1

model = MLPModel(input_size, hidden_size, output_size)
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters())

# Training loop
num_epochs = 100
batch_size = 32

for epoch in range(num_epochs):
    model.train()
    for i in range(0, len(X_train), batch_size):
        batch_X = X_train[i:i+batch_size]
        batch_y = y_train[i:i+batch_size]

        outputs = model(batch_X)
        loss = criterion(outputs, batch_y.unsqueeze(1))

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

    # Validation
    model.eval()
    with torch.no_grad():
        val_outputs = model(X_test)
        val_loss = criterion(val_outputs, y_test.unsqueeze(1))

    print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}, Val Loss: {val_loss.item():.4f}')

# Prediction
model.eval()
with torch.no_grad():
    test_output = model(X_test)
    print("Predicted physical capability percentages:", test_output.numpy().flatten())

DataFrame shape: (82811, 89)

NaN values in each column:
Activity Strain              10087
Activity name                10087
Altitude change (meters)     61839
Altitude gain (meters)       61839
Answered yes                  6620
                             ...  
What is your gender?           725
Scan Date_x                   5559
Jump - Jump Height (in)_x     5748
Scan Date_y                  82694
Jump - Jump Height (in)_y    82694
Length: 89, dtype: int64


  return torch.FloatTensor(sequences), torch.FloatTensor(targets)


Created 82804 sequences
Epoch [1/100], Loss: 0.4385, Val Loss: 1.0696
Epoch [2/100], Loss: 0.3458, Val Loss: 0.8852
Epoch [3/100], Loss: 0.1085, Val Loss: 0.7167
Epoch [4/100], Loss: 0.0555, Val Loss: 0.7081
Epoch [5/100], Loss: 0.0883, Val Loss: 0.7703
Epoch [6/100], Loss: 0.1777, Val Loss: 0.6774
Epoch [7/100], Loss: 0.0342, Val Loss: 0.6856
Epoch [8/100], Loss: 0.0284, Val Loss: 0.6464
Epoch [9/100], Loss: 0.0782, Val Loss: 0.6589
Epoch [10/100], Loss: 0.0235, Val Loss: 0.6396
Epoch [11/100], Loss: 0.0141, Val Loss: 0.6351
Epoch [12/100], Loss: 0.0062, Val Loss: 0.6624
Epoch [13/100], Loss: 0.0507, Val Loss: 0.6352
Epoch [14/100], Loss: 0.0456, Val Loss: 0.6450
Epoch [15/100], Loss: 0.0407, Val Loss: 0.6511
Epoch [16/100], Loss: 0.1148, Val Loss: 0.6407
Epoch [17/100], Loss: 0.0007, Val Loss: 0.6205
Epoch [18/100], Loss: 0.0306, Val Loss: 0.6405
Epoch [19/100], Loss: 0.0124, Val Loss: 0.6267
Epoch [20/100], Loss: 0.0562, Val Loss: 0.6246
Epoch [21/100], Loss: 0.0235, Val Loss: 0.616