# Task 2 - Time Series
Define, train, and test an ML model (preferably a neural network) to recognize the activity being performed in the following human activity recognition dataset: https://www.kaggle.com/datasets/uciml/human-activity-recognition-with-smartphones

## Labels
['STANDING', 'SITTING', 'LAYING', 'WALKING', 'WALKING_DOWNSTAIRS',
       'WALKING_UPSTAIRS']

In [2]:
from sklearn.preprocessing import LabelEncoder
from torch.utils.data import Dataset, DataLoader
import matplotlib.pyplot as plt
import pandas as pd
import os
import torch.nn as nn
import torch
import torch.nn.functional as F
import numpy as np

Helper Functions

In [3]:
def get_path(task_number, *args):
    notebook_path = os.path.abspath(f"Task_{task_number}.ipynb")
    return os.path.join(os.path.dirname(notebook_path), *args)

def preprocess_data(df, column="Activity"):
    le = LabelEncoder()
    return  df.drop(column, axis=1), le.fit_transform(df[column])

# Convert numpy array of output lables to one hot encoded tensor
def get_output_tensor(array):
    tensor = torch.zeros((len(array), 6), dtype=torch.float32)
    for i, val in enumerate(array):
        tensor[i][val] = 1
    return tensor

## Load The Data Into Pandas


In [4]:
MODEL_PATH = get_path(2, "model", "activity_loc_rnn.pt")

train_data = pd.read_csv(get_path(2, "data", "train.csv"))
test_data = pd.read_csv(get_path(2, "data", "test.csv"))

# Split into label and features
train_data, train_labels = preprocess_data(train_data)
test_data, test_labels = preprocess_data(test_data)

train_data.head()

Unnamed: 0,tBodyAcc-mean()-X,tBodyAcc-mean()-Y,tBodyAcc-mean()-Z,tBodyAcc-std()-X,tBodyAcc-std()-Y,tBodyAcc-std()-Z,tBodyAcc-mad()-X,tBodyAcc-mad()-Y,tBodyAcc-mad()-Z,tBodyAcc-max()-X,...,fBodyBodyGyroJerkMag-skewness(),fBodyBodyGyroJerkMag-kurtosis(),"angle(tBodyAccMean,gravity)","angle(tBodyAccJerkMean),gravityMean)","angle(tBodyGyroMean,gravityMean)","angle(tBodyGyroJerkMean,gravityMean)","angle(X,gravityMean)","angle(Y,gravityMean)","angle(Z,gravityMean)",subject
0,0.288585,-0.020294,-0.132905,-0.995279,-0.983111,-0.913526,-0.995112,-0.983185,-0.923527,-0.934724,...,-0.298676,-0.710304,-0.112754,0.0304,-0.464761,-0.018446,-0.841247,0.179941,-0.058627,1
1,0.278419,-0.016411,-0.12352,-0.998245,-0.9753,-0.960322,-0.998807,-0.974914,-0.957686,-0.943068,...,-0.595051,-0.861499,0.053477,-0.007435,-0.732626,0.703511,-0.844788,0.180289,-0.054317,1
2,0.279653,-0.019467,-0.113462,-0.99538,-0.967187,-0.978944,-0.99652,-0.963668,-0.977469,-0.938692,...,-0.390748,-0.760104,-0.118559,0.177899,0.100699,0.808529,-0.848933,0.180637,-0.049118,1
3,0.279174,-0.026201,-0.123283,-0.996091,-0.983403,-0.990675,-0.997099,-0.98275,-0.989302,-0.938692,...,-0.11729,-0.482845,-0.036788,-0.012892,0.640011,-0.485366,-0.848649,0.181935,-0.047663,1
4,0.276629,-0.01657,-0.115362,-0.998139,-0.980817,-0.990482,-0.998321,-0.979672,-0.990441,-0.942469,...,-0.351471,-0.699205,0.12332,0.122542,0.693578,-0.615971,-0.847865,0.185151,-0.043892,1


In [56]:

class TimeSeriesDataset(Dataset):
    def __init__(self, X, Y):
        self.X = X
        self.Y = Y

    def __len__(self):
        return len(self.X)

    def __getitem__(self, idx):
        return np.array(self.X.values[idx], dtype=np.float32), np.array(self.Y[idx], dtype=np.float32)

train_dataset = TimeSeriesDataset(train_data, train_labels)

batch_size = 64
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=False)


## Define the Neural Network
In this case we want an MLP

In [126]:
class LSTM(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, num_classes):
        super(LSTM, self).__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True)
        self.fc = nn.Linear(hidden_size, num_classes)

    def forward(self, x):
        # Initialize hidden and cell states 
        h0 = torch.zeros(self.num_layers, self.hidden_size).to(x.device)
        c0 = torch.zeros(self.num_layers, self.hidden_size).to(x.device)

        # Forward propagate LSTM
        out, _ = self.lstm(x, (h0, c0))  
        
        out = self.fc(out)

        # Decode the hidden state of the last time step
        #out = self.fc(out[:, -1, :])
        return out

## Set Network Parameters


In [159]:
input_size = 562
hidden_size = 256
num_layers = 2
num_classes = 6
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")


### Reset The Model

In [None]:
model = LSTM(input_size, hidden_size, num_layers, num_classes)

## Train the LSTM Model

In [184]:
lr = 1e-4
num_epochs = 50
batch_size = 64
save_steps = 5

criterion = torch.nn.CrossEntropyLoss().to(device)
optimizer = torch.optim.SGD (model.parameters(), lr=lr, momentum=0.9)

dataset = TimeSeriesDataset(train_data, train_labels)
dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=False)

# Iterate through number of epochs and determine next step
for epoch in range(num_epochs):
    for i, (inputs, labels) in enumerate(dataloader):
        
        # Get input and labels
        inputs = inputs.to(device)
        labels = labels.to(device)
        
        # Forward pass
        outputs = model(inputs.float())
        loss = criterion(outputs, labels.long())

        # Backward pass and optimization
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        # print loss
        print (f"Epoch [{epoch+1}/{num_epochs}], Step [{i+1}/{train_data.shape[0]//batch_size}], Loss: {loss:.4f}")
        
      
        # Save the model here, in case of interruption or if I'm bored cause my cpu too slow  
        # Save every [save_steps] steps
        if i % save_steps == 0:
            torch.save(model.state_dict(), MODEL_PATH)
            print("Model saved at", MODEL_PATH)


print("Training finished")

Epoch [1/50], Step [1/114], Loss: 1.1203
Model saved at c:\Users\Admin\Desktop\Uni\Thesis\Preliminary Tasks\Task 2\model\activity_loc_rnn.pt
Epoch [1/50], Step [2/114], Loss: 1.0185
Epoch [1/50], Step [3/114], Loss: 1.1029
Epoch [1/50], Step [4/114], Loss: 1.0604
Epoch [1/50], Step [5/114], Loss: 1.0102
Epoch [1/50], Step [6/114], Loss: 1.1228
Model saved at c:\Users\Admin\Desktop\Uni\Thesis\Preliminary Tasks\Task 2\model\activity_loc_rnn.pt
Epoch [1/50], Step [7/114], Loss: 1.0435
Epoch [1/50], Step [8/114], Loss: 1.0655
Epoch [1/50], Step [9/114], Loss: 1.1438
Epoch [1/50], Step [10/114], Loss: 1.0011
Epoch [1/50], Step [11/114], Loss: 1.1627
Model saved at c:\Users\Admin\Desktop\Uni\Thesis\Preliminary Tasks\Task 2\model\activity_loc_rnn.pt
Epoch [1/50], Step [12/114], Loss: 1.0987
Epoch [1/50], Step [13/114], Loss: 0.9999
Epoch [1/50], Step [14/114], Loss: 1.1469
Epoch [1/50], Step [15/114], Loss: 1.0042
Epoch [1/50], Step [16/114], Loss: 1.1412
Model saved at c:\Users\Admin\Desktop

KeyboardInterrupt: 

# Save the Model


In [None]:
torch.save(model.state_dict(), MODEL_PATH)

# Load the Model


In [185]:
model = LSTM(input_size, hidden_size, num_layers, num_classes)
model.load_state_dict(torch.load(MODEL_PATH))
model.eval()

LSTM(
  (lstm): LSTM(562, 256, num_layers=2, batch_first=True)
  (fc): Linear(in_features=256, out_features=6, bias=True)
)

# Test The Model

In [186]:
correct = 0
total = 0

with torch.no_grad():
    for i in range(0, test_data.shape[0], batch_size):
                
        inputs = torch.tensor(test_data.values[i:i+batch_size], dtype=torch.float32).to(device)
        true_labels = torch.tensor(test_labels[i:i+batch_size]).to(device) #get_output_tensor(test_labels[i:i+batch_size]).to(device)
        
        outputs = model(inputs)
        _, predicted = torch.max(outputs.data, 1)
        total += true_labels.size(0)
        correct += (predicted == true_labels).sum().item()
        
        print(f"Accuracy of the network on test frames {i+1}-{i+batch_size}: {round(100 * correct / total, 4)} %")
        

Accuracy of the network on test frames 1-64: 15.625 %
Accuracy of the network on test frames 65-128: 48.4375 %
Accuracy of the network on test frames 129-192: 33.8542 %
Accuracy of the network on test frames 193-256: 46.0938 %
Accuracy of the network on test frames 257-320: 43.4375 %
Accuracy of the network on test frames 321-384: 44.2708 %
Accuracy of the network on test frames 385-448: 44.6429 %
Accuracy of the network on test frames 449-512: 39.4531 %
Accuracy of the network on test frames 513-576: 44.2708 %
Accuracy of the network on test frames 577-640: 40.1562 %
Accuracy of the network on test frames 641-704: 42.0455 %
Accuracy of the network on test frames 705-768: 40.7552 %
Accuracy of the network on test frames 769-832: 40.2644 %
Accuracy of the network on test frames 833-896: 41.8527 %
Accuracy of the network on test frames 897-960: 40.2083 %
Accuracy of the network on test frames 961-1024: 43.8477 %
Accuracy of the network on test frames 1025-1088: 41.8199 %
Accuracy of the 