# Note this notebook is desgined for google colab, but can easily be adapted to any other python UI

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


# Libraries


In [None]:
# Standard Libraries
import pandas as pd
import numpy as np
import plotly.express as px
import math
from math import sqrt

# Torch
import torch
import torch.nn as nn
from torch.nn.utils.rnn import pad_sequence
from torch.utils.data import Dataset, DataLoader

# Sklearn
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler, Normalizer
from sklearn.metrics import r2_score, explained_variance_score, mean_squared_error, mean_absolute_error

# Load Data



In [None]:
# Note: Links might have to be changed
Kaggle_Data = pd.read_csv("https://raw.githubusercontent.com/amaye15/Data/main/Team%20Project%20(SIT764)/Kaggle_Data.csv")
Cardio_Data = pd.read_csv("https://raw.githubusercontent.com/amaye15/Data/main/Team%20Project%20(SIT764)/Cardio_Data.csv")
Malaga_Data = pd.read_csv("https://raw.githubusercontent.com/amaye15/Data/main/Team%20Project%20(SIT764)/Malaga_Data.csv")

In [None]:
temp = [i.split(":") for i in Cardio_Data.Time.values]
Cardio_Data["Time"] = [float(i[0] + i[1])/100 if len(i) == 2 else float(i[0]) for i in temp]
Malaga_Data.dropna(inplace=True)

# Data Preprocessing

In [None]:
# Min-Max transformation for Kaggle Data - time, Oxygen, HR, RF
  # Filter by Participant and Method
X = []
Y = []

for participant in Kaggle_Data.Participant.unique():
  for method in Kaggle_Data.Method.unique():
    time = MinMaxScaler().fit_transform(Kaggle_Data.loc[(Kaggle_Data["Participant"] == participant) & (Kaggle_Data["Method"] == method), "time"].values.reshape(-1,1)).reshape(-1)
    oxygen = MinMaxScaler().fit_transform(Kaggle_Data.loc[(Kaggle_Data["Participant"] == participant) & (Kaggle_Data["Method"] == method), "Oxygen"].values.reshape(-1,1)).reshape(-1)
    heart_rate = MinMaxScaler().fit_transform(Kaggle_Data.loc[(Kaggle_Data["Participant"] == participant) & (Kaggle_Data["Method"] == method), "HR"].values.reshape(-1,1)).reshape(-1)
    breathing_rate = MinMaxScaler().fit_transform(Kaggle_Data.loc[(Kaggle_Data["Participant"] == participant) & (Kaggle_Data["Method"] == method), "RF"].values.reshape(-1,1)).reshape(-1)
    X.append(np.array(list(zip(time, heart_rate, breathing_rate))))
    Y.append(np.array(oxygen))

# Min-Max transformation for Cardio Data - Time, RF, HR, Oxygen
  # Filter by Participant, Speed, and Exercise

for participant in Cardio_Data.Participant.unique():
  for speed in Cardio_Data.Speed.unique():
    for exercise in Cardio_Data.Exercise.unique():
      try:
        time = MinMaxScaler().fit_transform(Cardio_Data.loc[(Cardio_Data["Participant"] == participant) & (Cardio_Data["Speed"] == speed) & (Cardio_Data["Exercise"] == exercise), "Time"].values.reshape(-1,1)).reshape(-1)
        oxygen = MinMaxScaler().fit_transform(Cardio_Data.loc[(Cardio_Data["Participant"] == participant) & (Cardio_Data["Speed"] == speed) & (Cardio_Data["Exercise"] == exercise), "Oxygen"].values.reshape(-1,1)).reshape(-1)
        heart_rate = MinMaxScaler().fit_transform(Cardio_Data.loc[(Cardio_Data["Participant"] == participant) & (Cardio_Data["Speed"] == speed) & (Cardio_Data["Exercise"] == exercise), "HR"].values.reshape(-1,1)).reshape(-1)
        breathing_rate = MinMaxScaler().fit_transform(Cardio_Data.loc[(Cardio_Data["Participant"] == participant) & (Cardio_Data["Speed"] == speed) & (Cardio_Data["Exercise"] == exercise), "RF"].values.reshape(-1,1)).reshape(-1)
        X.append(np.array(list(zip(time, heart_rate, breathing_rate))))
        Y.append(np.array(oxygen))
      except:
        pass

# Min-Max transformation for Malaga Data - time, HR, VO2, RR	
  # Filter by Participant and Method
for ID in Malaga_Data.ID_test.unique():
  time = MinMaxScaler().fit_transform(Malaga_Data.loc[(Malaga_Data["ID_test"] == ID), "time"].values.reshape(-1,1)).reshape(-1)
  oxygen = MinMaxScaler().fit_transform(Malaga_Data.loc[(Malaga_Data["ID_test"] == ID), "VO2"].values.reshape(-1,1)).reshape(-1)
  heart_rate = MinMaxScaler().fit_transform(Malaga_Data.loc[(Malaga_Data["ID_test"] == ID), "HR"].values.reshape(-1,1)).reshape(-1)
  breathing_rate = MinMaxScaler().fit_transform(Malaga_Data.loc[(Malaga_Data["ID_test"] == ID), "RR"].values.reshape(-1,1)).reshape(-1)
  X.append(np.array(list(zip(time, heart_rate, breathing_rate))))
  Y.append(np.array(oxygen))


# Pytorch Setup 
- Custom dataset
- Custom model
- Custom batch preprocessor (pad collate)
- Custom loss

In [None]:
class CustomDataset(Dataset):
  def __init__(self, x, y):
    self.x = x
    self.y = y
    self.data_length = len(x)
  
  def __len__(self):
      return self.data_length

  def __getitem__(self, index):
    return index, self.x[index], self.y[index]


In [None]:
class Model(nn.Module):
  def __init__(self, input_size, output_size, num_layers):
        super(Model, self).__init__()
        self.lstm1 = nn.LSTM(input_size=input_size, hidden_size=output_size, num_layers=num_layers, batch_first=True, bidirectional=True, dropout=0.2)
        self.linear = nn.Linear(output_size*2, 1)

  def forward(self, x):
    output, (hn, cn) = self.lstm1(x)
    output = self.linear(output)
    return output

In [None]:
def pad_collate(batch):
  (indexes, x, y) = zip(*batch)
  x = [torch.tensor(i,  dtype=torch.float64) for i in x]
  y = [torch.tensor(i,  dtype=torch.float64) for i in y]
  return indexes, pad_sequence(x, batch_first=True, padding_value=0), pad_sequence(y, batch_first=True, padding_value=0)

In [None]:
class RMSELoss(nn.Module):
    def __init__(self):
        super().__init__()
        self.mse = nn.MSELoss()
        
    def forward(self,yhat,y):
        return torch.sqrt(self.mse(yhat,y))

# Final preparation
- Test-train split
- Dataloaders
- GPUs
- 

In [None]:
x_train, x_test, y_train, y_test = train_test_split(X,Y, shuffle = True, test_size=0.3)

In [None]:
load_train_data = DataLoader(CustomDataset(x_train, y_train), batch_size=32, shuffle=True, collate_fn=pad_collate)
load_test_data = DataLoader(CustomDataset(x_test, y_test), batch_size=1, shuffle=True, collate_fn=pad_collate)
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# Here you need to make a choice do you want to train a new model or a existing model 

#loaded_model = Model(input_size = 3, output_size= 100, num_layers = 1).to(device).to(torch.double)
loaded_model = torch.load('/content/drive/MyDrive/Deakin/O2_Model')

criterion = RMSELoss().to(device)
optimizer = torch.optim.Adam(loaded_model.parameters(), lr=0.00001)

In [None]:
epochs = 50


for epoch in range(epochs):
  # Training Phase
  loaded_model.train()
  train_loss = []
  for index, x, y in load_train_data:
    x = x.to(device)
    y = y.to(device)
    predicted = loaded_model(x)
    loss = criterion(predicted.squeeze(), y)
    train_loss.append(loss.item())
    loss.backward()
    optimizer.step()
  
  # Testing Phase
  loaded_model.eval()
  test_loss = []
  for index, x, y in load_test_data:
    x = x.to(device)
    y = y.to(device)
    predicted = loaded_model(x)
    loss = criterion(predicted.flatten(), y.flatten())
    test_loss.append(loss.item())
  
  # Print loss 
  print(f"Epoch {epoch}")
  print(f"Train Loss - {np.mean(train_loss)}")
  print(f"Test Loss - {np.mean(test_loss)}")
  if epoch % 10 == 0:
    D = pd.DataFrame(data={'Predicted': predicted.cpu().flatten().detach().numpy(), 'Real': y.cpu().flatten().detach().numpy()})
    fig = px.line(data_frame = D, width=800, height=400)
    fig.update_xaxes(title_text='Time')
    fig.update_yaxes(title_text='Min-Max Oxygen Values')
    fig.update_layout(title_text="Oxygen Consumption (VO2) Over Time", title_x=0.5)

# Change this to the location you want to save the model
torch.save(loaded_model, '/content/drive/MyDrive/Deakin/O2_Model')

Epoch 0
Train Loss - 0.0814052985223039
Test Loss - 0.11532663127789505


Epoch 1
Train Loss - 0.07796058735131903
Test Loss - 0.11478980046540338
Epoch 2
Train Loss - 0.08002485760298265
Test Loss - 0.11432096815936849
Epoch 3
Train Loss - 0.07353620325520496
Test Loss - 0.11394522059443878
Epoch 4
Train Loss - 0.07473400448173166
Test Loss - 0.11362049736482474
Epoch 5
Train Loss - 0.07292899267426156
Test Loss - 0.11334698244552872
Epoch 6
Train Loss - 0.07665909733547079
Test Loss - 0.11303285100111378
Epoch 7
Train Loss - 0.07681746042437208
Test Loss - 0.11252923736888484
Epoch 8
Train Loss - 0.0782670862353874
Test Loss - 0.11175293877229593
Epoch 9
Train Loss - 0.0758894424532642
Test Loss - 0.11097497138721134
Epoch 10
Train Loss - 0.06983416441922864
Test Loss - 0.11015063807298348


Epoch 11
Train Loss - 0.07083908548764657
Test Loss - 0.10947245137723659
Epoch 12
Train Loss - 0.07200889039508768
Test Loss - 0.1090655590930272
Epoch 13
Train Loss - 0.07314951755884305
Test Loss - 0.1092687725350681
Epoch 14
Train Loss - 0.07108421588898074
Test Loss - 0.11080421017936314
Epoch 15
Train Loss - 0.0716987973938665
Test Loss - 0.1119584701612217
Epoch 16
Train Loss - 0.074294438587695
Test Loss - 0.109750104488101
Epoch 17
Train Loss - 0.07090464885651619
Test Loss - 0.10786221032269167
Epoch 18
Train Loss - 0.0683093579860057
Test Loss - 0.10789158372948567
Epoch 19
Train Loss - 0.0722332330771807
Test Loss - 0.1078506725796071
Epoch 20
Train Loss - 0.07279336688497474
Test Loss - 0.10921259925780502


Epoch 21
Train Loss - 0.07424637655575436
Test Loss - 0.11085783104910049
Epoch 22
Train Loss - 0.0744211481760059
Test Loss - 0.10945941225856387
Epoch 23
Train Loss - 0.07687345742262205
Test Loss - 0.11180412834010806
Epoch 24
Train Loss - 0.07749677972528053
Test Loss - 0.11570944915341562
Epoch 25
Train Loss - 0.07607276925257656
Test Loss - 0.10951028724975376
Epoch 26
Train Loss - 0.07201961482828616
Test Loss - 0.1121161481863647
Epoch 27
Train Loss - 0.07327407983967645
Test Loss - 0.12270360300284724
Epoch 28
Train Loss - 0.0822993064002824
Test Loss - 0.12215193863190779
Epoch 29
Train Loss - 0.07416710711234253
Test Loss - 0.11144711028637101
Epoch 30
Train Loss - 0.07544705762618807
Test Loss - 0.11010749537651303


Epoch 31
Train Loss - 0.08565381129246057
Test Loss - 0.11818923308822121
Epoch 32
Train Loss - 0.08950912445963047
Test Loss - 0.11567038329993364
Epoch 33
Train Loss - 0.07956927400799846
Test Loss - 0.1084059960676955
Epoch 34
Train Loss - 0.07422539163064276
Test Loss - 0.11887942731470942
Epoch 35
Train Loss - 0.08194892800833509
Test Loss - 0.13694691716505197
Epoch 36
Train Loss - 0.09039358749080796
Test Loss - 0.14371743907614318
Epoch 37
Train Loss - 0.09073469547032752
Test Loss - 0.1342184770955741
Epoch 38
Train Loss - 0.083239415522882
Test Loss - 0.11573956233105573
Epoch 39
Train Loss - 0.07254386011490808
Test Loss - 0.10748423525312151
Epoch 40
Train Loss - 0.07853464040425429
Test Loss - 0.12226558405643785


Epoch 41
Train Loss - 0.09261062608366064
Test Loss - 0.1422984406863216
Epoch 42
Train Loss - 0.10341159260681422
Test Loss - 0.14603137326397184
Epoch 43
Train Loss - 0.09624941077548219
Test Loss - 0.12989887178842133
Epoch 44
Train Loss - 0.08634041501810218
Test Loss - 0.11090057504147947
Epoch 45
Train Loss - 0.07266148643951294
Test Loss - 0.10935579306047337
Epoch 46
Train Loss - 0.08023794142330079
Test Loss - 0.12370106798539837
Epoch 47
Train Loss - 0.08661614026649467
Test Loss - 0.13970103337937723
Epoch 48
Train Loss - 0.098570459840384
Test Loss - 0.14965122391023972
Epoch 49
Train Loss - 0.10474318051606782
Test Loss - 0.1513110547431185


# Check Model
 This part is just to load a existing model and get some visualisations

In [None]:
model = torch.load('/content/drive/MyDrive/Deakin/O2_Model')
model.eval()
test_loss = []
for index, x, y in load_test_data:
  x = x.to(device)
  y = y.to(device)
  predicted = model(x)
  loss = criterion(predicted.flatten(), y.flatten())
  test_loss.append(loss.item())


print(f"Test Loss - {np.mean(test_loss)}")
graph = px.line(y =[predicted.cpu().flatten().detach().numpy(),y.cpu().flatten().detach().numpy()], title="Oxygen Consumption (VO2) Over Time")


D = pd.DataFrame(data={'Predicted': predicted.cpu().flatten().detach().numpy(), 'Real': y.cpu().flatten().detach().numpy()})
fig = px.line(data_frame = D, width=800, height=400)
fig.update_xaxes(title_text='Time')
fig.update_yaxes(title_text='Min-Max Oxygen Values')
fig.update_layout(title_text="Oxygen Consumption (VO2) Over Time", title_x=0.5)

Test Loss - 0.1116806464634718


# Metrics 

In [None]:
print(r2_score(predicted.cpu().flatten().detach().numpy().reshape(-1,1),y.cpu().flatten().detach().numpy().reshape(-1,1)))
print(explained_variance_score(predicted.cpu().flatten().detach().numpy().reshape(-1,1),y.cpu().flatten().detach().numpy().reshape(-1,1)))
print(mean_squared_error(predicted.cpu().flatten().detach().numpy().reshape(-1,1),y.cpu().flatten().detach().numpy().reshape(-1,1)))
print(sqrt(mean_squared_error(predicted.cpu().flatten().detach().numpy().reshape(-1,1),y.cpu().flatten().detach().numpy().reshape(-1,1))))
print(mean_absolute_error(predicted.cpu().flatten().detach().numpy().reshape(-1,1),y.cpu().flatten().detach().numpy().reshape(-1,1)))

0.8949349408089011
0.8971629175548614
0.005435460498719772
0.07372557560792437
0.06139860134780832
