In [1]:
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt

import os
import warnings

import torch
from torch import nn
from torch.utils.data import Dataset, DataLoader
from torch.nn.utils.rnn import pad_sequence

from sklearn.model_selection import KFold

In [2]:
class EE_LSTM(nn.Module):
    
    def __init__(self, input_dim, hidden_dim, num_layers, dropout, output_dim = 1):
        super(EE_LSTM, self).__init__()
        
        self.input_dim = input_dim
        self.hidden_dim = hidden_dim
        self.output_dim = output_dim
        self.num_layers = num_layers
        self.dropout = dropout
        
        self.lstm = nn.LSTM(input_size=self.input_dim,
                     hidden_size=self.hidden_dim,
                     num_layers=self.num_layers,
                     batch_first=True,
                     dropout=self.dropout)
        
        self.dense = nn.Linear(self.hidden_dim, self.output_dim)

    def forward(self, x):
        
        out, hidden = self.lstm(x)
        outputs = self.dense(out)
        return outputs

    def test(self,x):
        outputs_list = []
        seq_len = x.size()[1]
        for i in range(seq_len):
            if i == 0:
              xtemp = x[:,i,:].unsqueeze(1)
              xtemp, hidden = self.lstm(xtemp)
            else:
              xtemp = x[:,i,:].unsqueeze(1)
              xtemp, hidden = self.lstm(xtemp, hidden)
            xtemp = self.dense(xtemp)
            outputs_list.append(xtemp)
        outputs = torch.cat(outputs_list,1)
        return outputs

In [3]:
class Energy_Expenditure(Dataset):
    def __init__(self, X, y):
        self.X = X
        self.y = y
        self.len = len(self.X)

    def __getitem__(self, index):
        return torch.tensor(self.X[index]), torch.tensor(self.y[index])

    def __len__(self):
        return self.len

In [4]:
def getSourceList(keyword):
    if keyword == 'All':
        source_list = ['ANKLE','THIGH','HR']
    if keyword == 'IMU':
        source_list = ['ANKLE','THIGH']
    if keyword == 'HR':
        source_list = ['HR']

    return source_list

In [5]:
def activity_data_convert(activity_name,data_len):
    if activity_name == 'Walk':
        activity_num = 1
    if activity_name == 'Run':
        activity_num = 2
    if activity_name == 'Stairclimb':
        activity_num = 3
    if activity_name == 'Cycle':
        activity_num = 4

    output_list = activity_num*np.ones(data_len)
    output_list = np.expand_dims(output_list, axis=1)

    return output_list

In [6]:
def loadData(jsi_data, slade_data, source_list, activity_flag):
    data_list = []
    target_list =[]
    activity_list =[]

    # JSI Data
    for filename in os.listdir(jsi_data):
        if 'Person' in filename:
            temp = pd.read_csv(jsi_data+'/'+filename)
            target_cols = [col for col in temp.columns if 'COSMED' in col]
            target_list.append(temp[target_cols].values)
            data_cols = [col for col in temp.columns for j in source_list if j in col ]
            input_data = temp[data_cols].values
            temp_name = filename.split(sep='_')[1]
            activity_name = temp_name.replace(".csv","")
            if activity_flag == True:
                output_list = activity_data_convert(activity_name,len(input_data))
                input_data = np.append(input_data, output_list,1)
            data_list.append(input_data)
            
            activity_list.append(activity_name)

    # Slade Data
    for filename in os.listdir(slade_data):
        if '.csv' in filename:
            temp = pd.read_csv(slade_data+'/'+filename)
            target_cols = [col for col in temp.columns if 'MET' in col and not 'MET HR' in col]
            target_list.append(temp[target_cols].values)
            data_cols = [col for col in temp.columns for j in source_list if j in col ]
            input_data = temp[data_cols].values
            
            #1- quiet standing
            #2- walking at 1.0 m/s
            #3- walking at 1.5 m/s
            #4- running at 2.5 m/s
            #5- running at 3.0 m/s
            #6- climbing stairs at 50 steps/min
            #7- climbing stairs at 70 steps/min
            #8- biking with resistance of 50 Watts
            #9- biking with resistance of 120 Watts 

            if 'C02' in filename or 'C03' in filename:
                activity_name = 'Walk'
            elif 'C04' in filename or 'C05' in filename:
                activity_name = 'Run'
            elif 'C06' in filename or 'C07' in filename:
                activity_name = 'Stairclimb'
            elif 'C08' in filename or 'C09' in filename:
                activity_name = 'Cycle'

            if activity_flag == True:
                output_list = activity_data_convert(activity_name,len(input_data))
                input_data = np.append(input_data, output_list,1)

            activity_list.append(activity_name)
            data_list.append(input_data)

    return data_list, target_list, activity_list

In [7]:
def pad_collate(batch):
    (xx, yy) = zip(*batch)
    x_lens = [len(x) for x in xx]
    y_lens = [len(y) for y in yy]

    xx_pad = pad_sequence(xx, batch_first=True, padding_value=0)
    yy_pad = pad_sequence(yy, batch_first=True, padding_value=0)

    return xx_pad, yy_pad, x_lens, y_lens

In [8]:
def activityListToNumeric(activity_list):
    newList = []
    for act in activity_list:
        if act == 'Walk':
            newList.append(0)
        elif act == 'Run':
            newList.append(1)
        elif act == 'Stairclimb':
            newList.append(2)
        else: # act == 'Cycle':
            newList.append(3)
    return newList

In [9]:
def appendCNNactivity(data_list, activity_list):
  # Here we are replacing the true activity labels in data_list with the predicted labels from the CNN
  for i in range(len(data_list)):
    for j in range(len(data_list[i])):
      # Need to account for the fact the the LSTM models expect labels in range {1,2,3,4} but CNN model outputs labels {0,1,2,3}
      data_list[i][j][-1] = activity_list[i] + 1
  return data_list

In [10]:
def evalModel(keyword, activity_flag, test_loader, device):
    lstm_model_path = '/content/drive/MyDrive/CMU/Spring 2022/24789/24789 EE Project/Trained Models/Saved .pkl Files/LSTM/Formatted'

    if keyword == 'HR':
        if activity_flag == True:
            features = [0,7]
            model = torch.load(lstm_model_path + '/HR_Act.pkl')
        else:
            features = [0]
            model = torch.load(lstm_model_path + '/HR.pkl')
    elif keyword == 'IMU':
        if activity_flag == True:
            features = [1,2,3,4,5,6,7]
            model = torch.load(lstm_model_path + '/IMU_Act5.pkl')
        else:
            features = [1,2,3,4,5,6]
            model = torch.load(lstm_model_path + '/IMU.pkl')
    else: # keyword == 'All'
        if activity_flag == True:
            features = [0,1,2,3,4,5,6,7]
            model = EE_LSTM(len(features), 128, 4, 0.1)
            weights = torch.load(lstm_model_path + '/All_Act.pkl')
            model.load_state_dict(weights)
        else:
            features = [0,1,2,3,4,5,6]
            model = EE_LSTM(len(features), 128, 4, 0.1)
            weights = torch.load(lstm_model_path + '/All.pkl')
            model.load_state_dict(weights)

    model.to(device)
    model.to(torch.double)

    #################################

    l1_loss = nn.L1Loss()
    mse_loss = nn.MSELoss()

    l1_err = 0.0
    mse_err = 0.0

    model.eval()
    with torch.no_grad():
        for n_batch, [in_batch, label, in_len, label_len] in enumerate(test_loader):
            in_batch, label = in_batch.to(device), label.to(device)
            # Only consider the model features
            in_batch = in_batch[:,:,features]
            # Get prediction
            pred = model.test(in_batch)

            l1_err += l1_loss(pred, label).item()
            mse_err += mse_loss(pred, label).item()
    
    return l1_err, mse_err

In [11]:
# USE THIS ASSUMING CNN PREDICTED ACTIVITY LABELS

# Main

# Mount Drive
from google.colab import drive
drive.mount('/content/drive')

# Check if cuda available
device = 'cuda' if torch.cuda.is_available() else 'cpu'
print('*' * 50)
if torch.cuda.is_available():  
    print('CUDA is found! Training on %s...' %torch.cuda.get_device_name(0))
else:
    warnings.warn('CUDA not found! Training may be slow...')

# Path to files
jsi_path = '/content/drive/MyDrive/CMU/Spring 2022/24789/24789 EE Project/JSI Preprocessing/Aligned & Interpolated/1Hz'
slade_path = '/content/drive/MyDrive/CMU/Spring 2022/24789/24789 EE Project/Slade Preprocessing/Aligned & Interpolated/1 Hz'
CNN_path = '/content/drive/MyDrive/CMU/Spring 2022/24789/24789 EE Project/Saved Variables'
CNN_HR_list = np.load(CNN_path + '/labels_hr.npy')
CNN_IMU_list = np.load(CNN_path + '/labels_imus.npy')
CNN_All_list = np.load(CNN_path + '/labels_all.npy')

source_list = getSourceList('All')
activity_flag = True
 
data_list, target_list, activity_list = loadData(jsi_path, slade_path, source_list, activity_flag)

data_list_hr = appendCNNactivity(data_list, CNN_HR_list)
data_list_imu = appendCNNactivity(data_list, CNN_IMU_list)
data_list_all = appendCNNactivity(data_list, CNN_All_list)

# N KFold Passes
# Because of the size of our dataset, even 5 fold cross validation can return varying results based on the random state of the fold splits
# Thus, we run our kfold cross val N times for different random states, and average the results

N = 10

l1_crossval_hr = []; mse_crossval_hr = []
l1_crossval_hr_act = []; mse_crossval_hr_act = []
l1_crossval_imu = []; mse_crossval_imu = []
l1_crossval_imu_act = []; mse_crossval_imu_act = []
l1_crossval_all = []; mse_crossval_all = []
l1_crossval_all_act = []; mse_crossval_all_act = []

for state in range(N):
  kfold = KFold(n_splits = 5, shuffle = True, random_state = state)
  for fold, (_, test_idx) in enumerate(kfold.split(data_list)):
    print('Working on state #', state, ', fold #', fold, '...')

    test_target = [target_list[i] for i in test_idx]

    # Need to created a data loader for each of the activity_label predictions from the CNN

    # CNN HR Only Activity Labels
    test_data_hr = [data_list_hr[i] for i in test_idx]
    test_hr = Energy_Expenditure(test_data_hr, test_target)
    test_loader_hr = DataLoader(dataset=test_hr, batch_size=50, shuffle=False, num_workers=2, collate_fn=pad_collate)

    # CNN IMU Only Activity Labels
    test_data_imu = [data_list_imu[i] for i in test_idx]
    test_imu = Energy_Expenditure(test_data_imu, test_target)
    test_loader_imu = DataLoader(dataset=test_imu, batch_size=50, shuffle=False, num_workers=2, collate_fn=pad_collate)

    # CNN All Activity Labels
    test_data_all = [data_list_all[i] for i in test_idx]
    test_all = Energy_Expenditure(test_data_all, test_target)
    test_loader_all = DataLoader(dataset=test_all, batch_size=50, shuffle=False, num_workers=2, collate_fn=pad_collate)


    # Run all the models
    # HR Only
    l1, mse = evalModel('HR', False, test_loader_hr, device)
    l1_crossval_hr.append(l1); mse_crossval_hr.append(mse)
    l1, mse = evalModel('HR', True, test_loader_hr, device)
    l1_crossval_hr_act.append(l1); mse_crossval_hr_act.append(mse)
    # IMU
    l1, mse = evalModel('IMU', False, test_loader_imu, device)
    l1_crossval_imu.append(l1); mse_crossval_imu.append(mse)
    l1, mse = evalModel('IMU', True, test_loader_imu, device)
    l1_crossval_imu_act.append(l1); mse_crossval_imu_act.append(mse)
    # # ALL
    l1, mse = evalModel('All', False, test_loader_all, device)
    l1_crossval_all.append(l1); mse_crossval_all.append(mse)
    l1, mse = evalModel('All', True, test_loader_all, device)
    l1_crossval_all_act.append(l1); mse_crossval_all_act.append(mse)

print('-' * 35)
print('WITH CNN PREDICTED ACTIVITY LABELS')

print('-' * 35)
print('HR Only')
print('MSE Error: %.4f, MAE Error: %.4f' %(np.mean(mse_crossval_hr), np.mean(l1_crossval_hr)))
print('MSE St Dev: %.4f, MAE St Dev: %.4f' %(np.std(mse_crossval_hr), np.std(l1_crossval_hr)))

print('-' * 35)
print('HR and Activity')
print('MSE Error: %.4f, MAE Error: %.4f' %(np.mean(mse_crossval_hr_act), np.mean(l1_crossval_hr_act)))
print('MSE St Dev: %.4f, MAE St Dev: %.4f' %(np.std(mse_crossval_hr_act), np.std(l1_crossval_hr_act)))

print('-' * 35)
print('IMU Only')
print('MSE Error: %.4f, MAE Error: %.4f' %(np.mean(mse_crossval_imu), np.mean(l1_crossval_imu)))
print('MSE St Dev: %.4f, MAE St Dev: %.4f' %(np.std(mse_crossval_imu), np.std(l1_crossval_imu)))

print('-' * 35)
print('IMU and Activity')
print('MSE Error: %.4f, MAE Error: %.4f' %(np.mean(mse_crossval_imu_act), np.mean(l1_crossval_imu_act)))
print('MSE St Dev: %.4f, MAE St Dev: %.4f' %(np.std(mse_crossval_imu_act), np.std(l1_crossval_imu_act)))

print('-' * 35)
print('All Only')
print('MSE Error: %.4f, MAE Error: %.4f' %(np.mean(mse_crossval_all), np.mean(l1_crossval_all)))
print('MSE St Dev: %.4f, MAE St Dev: %.4f' %(np.std(mse_crossval_all), np.std(l1_crossval_all)))

print('-' * 35)
print('All and Activity')
print('MSE Error: %.4f, MAE Error: %.4f' %(np.mean(mse_crossval_all_act), np.mean(l1_crossval_all_act)))
print('MSE St Dev: %.4f, MAE St Dev: %.4f' %(np.std(mse_crossval_all_act), np.std(l1_crossval_all_act)))

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
**************************************************
CUDA is found! Training on Tesla K80...
Working on state # 0 , fold # 0 ...
Working on state # 0 , fold # 1 ...
Working on state # 0 , fold # 2 ...
Working on state # 0 , fold # 3 ...
Working on state # 0 , fold # 4 ...
Working on state # 1 , fold # 0 ...
Working on state # 1 , fold # 1 ...
Working on state # 1 , fold # 2 ...
Working on state # 1 , fold # 3 ...
Working on state # 1 , fold # 4 ...
Working on state # 2 , fold # 0 ...
Working on state # 2 , fold # 1 ...
Working on state # 2 , fold # 2 ...
Working on state # 2 , fold # 3 ...
Working on state # 2 , fold # 4 ...
Working on state # 3 , fold # 0 ...
Working on state # 3 , fold # 1 ...
Working on state # 3 , fold # 2 ...
Working on state # 3 , fold # 3 ...
Working on state # 3 , fold # 4 ...
Working on state # 4 , fold # 0 ...
Working on state # 4 , f

In [12]:
# USE THIS ASSUMING CORRECT ACTIVITY LABELS

# Main

# Mount Drive
from google.colab import drive
drive.mount('/content/drive')

# Check if cuda available
device = 'cuda' if torch.cuda.is_available() else 'cpu'
print('*' * 50)
if torch.cuda.is_available():  
    print('CUDA is found! Training on %s...' %torch.cuda.get_device_name(0))
else:
    warnings.warn('CUDA not found! Training may be slow...')

# Path to files
jsi_path = '/content/drive/MyDrive/CMU/Spring 2022/24789/24789 EE Project/JSI Preprocessing/Aligned & Interpolated/1Hz'
slade_path = '/content/drive/MyDrive/CMU/Spring 2022/24789/24789 EE Project/Slade Preprocessing/Aligned & Interpolated/1 Hz'
CNN_path = '/content/drive/MyDrive/CMU/Spring 2022/24789/24789 EE Project/Saved Variables'
CNN_HR_list = np.load(CNN_path + '/labels_hr.npy')
CNN_IMU_list = np.load(CNN_path + '/labels_imus.npy')
CNN_All_list = np.load(CNN_path + '/labels_all.npy')

source_list = getSourceList('All')
activity_flag = True
 
data_list, target_list, activity_list = loadData(jsi_path, slade_path, source_list, activity_flag)

# N KFold Passes
# Because of the size of our dataset, even 5 fold cross validation can return varying results based on the random state of the fold splits
# Thus, we run our kfold cross val N times for different random states, and average the results

N = 10

l1_crossval_hr = []; mse_crossval_hr = []
l1_crossval_hr_act = []; mse_crossval_hr_act = []
l1_crossval_imu = []; mse_crossval_imu = []
l1_crossval_imu_act = []; mse_crossval_imu_act = []
l1_crossval_all = []; mse_crossval_all = []
l1_crossval_all_act = []; mse_crossval_all_act = []

for state in range(N):
  kfold = KFold(n_splits = 5, shuffle = True, random_state = state)
  for fold, (_, test_idx) in enumerate(kfold.split(data_list)):
    print('Working on state #', state, ', fold #', fold, '...')
    # data_list = appendCNNActivity(data_list, all_activity)

    test_data = [data_list[i] for i in test_idx]
    test_target = [target_list[i] for i in test_idx]

    test = Energy_Expenditure(test_data, test_target)
    test_loader = DataLoader(dataset=test, batch_size=50, shuffle=False, num_workers=2, collate_fn=pad_collate)

    # Run all the models
    # HR Only
    l1, mse = evalModel('HR', False, test_loader, device)
    l1_crossval_hr.append(l1); mse_crossval_hr.append(mse)
    l1, mse = evalModel('HR', True, test_loader, device)
    l1_crossval_hr_act.append(l1); mse_crossval_hr_act.append(mse)
    # IMU
    l1, mse = evalModel('IMU', False, test_loader, device)
    l1_crossval_imu.append(l1); mse_crossval_imu.append(mse)
    l1, mse = evalModel('IMU', True, test_loader, device)
    l1_crossval_imu_act.append(l1); mse_crossval_imu_act.append(mse)
    # # ALL
    l1, mse = evalModel('All', False, test_loader, device)
    l1_crossval_all.append(l1); mse_crossval_all.append(mse)
    l1, mse = evalModel('All', True, test_loader, device)
    l1_crossval_all_act.append(l1); mse_crossval_all_act.append(mse)

print('-' * 35)
print('WITH GROUND TRUTH ACTIVITY LABELS')

print('-' * 35)
print('HR Only')
print('MSE Error: %.4f, MAE Error: %.4f' %(np.mean(mse_crossval_hr), np.mean(l1_crossval_hr)))
print('MSE St Dev: %.4f, MAE St Dev: %.4f' %(np.std(mse_crossval_hr), np.std(l1_crossval_hr)))

print('-' * 35)
print('HR and Activity')
print('MSE Error: %.4f, MAE Error: %.4f' %(np.mean(mse_crossval_hr_act), np.mean(l1_crossval_hr_act)))
print('MSE St Dev: %.4f, MAE St Dev: %.4f' %(np.std(mse_crossval_hr_act), np.std(l1_crossval_hr_act)))

print('-' * 35)
print('IMU Only')
print('MSE Error: %.4f, MAE Error: %.4f' %(np.mean(mse_crossval_imu), np.mean(l1_crossval_imu)))
print('MSE St Dev: %.4f, MAE St Dev: %.4f' %(np.std(mse_crossval_imu), np.std(l1_crossval_imu)))

print('-' * 35)
print('IMU and Activity')
print('MSE Error: %.4f, MAE Error: %.4f' %(np.mean(mse_crossval_imu_act), np.mean(l1_crossval_imu_act)))
print('MSE St Dev: %.4f, MAE St Dev: %.4f' %(np.std(mse_crossval_imu_act), np.std(l1_crossval_imu_act)))

print('-' * 35)
print('All Only')
print('MSE Error: %.4f, MAE Error: %.4f' %(np.mean(mse_crossval_all), np.mean(l1_crossval_all)))
print('MSE St Dev: %.4f, MAE St Dev: %.4f' %(np.std(mse_crossval_all), np.std(l1_crossval_all)))

print('-' * 35)
print('All and Activity')
print('MSE Error: %.4f, MAE Error: %.4f' %(np.mean(mse_crossval_all_act), np.mean(l1_crossval_all_act)))
print('MSE St Dev: %.4f, MAE St Dev: %.4f' %(np.std(mse_crossval_all_act), np.std(l1_crossval_all_act)))

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
**************************************************
CUDA is found! Training on Tesla K80...
Working on state # 0 , fold # 0 ...
Working on state # 0 , fold # 1 ...
Working on state # 0 , fold # 2 ...
Working on state # 0 , fold # 3 ...
Working on state # 0 , fold # 4 ...
Working on state # 1 , fold # 0 ...
Working on state # 1 , fold # 1 ...
Working on state # 1 , fold # 2 ...
Working on state # 1 , fold # 3 ...
Working on state # 1 , fold # 4 ...
Working on state # 2 , fold # 0 ...
Working on state # 2 , fold # 1 ...
Working on state # 2 , fold # 2 ...
Working on state # 2 , fold # 3 ...
Working on state # 2 , fold # 4 ...
Working on state # 3 , fold # 0 ...
Working on state # 3 , fold # 1 ...
Working on state # 3 , fold # 2 ...
Working on state # 3 , fold # 3 ...
Working on state # 3 , fold # 4 ...
Working on state # 4 , fold # 0 ...
Working on state # 4 , f