# Task 2 - Time Series
Define, train, and test an ML model (preferably a neural network) to recognize the activity being performed in the following human activity recognition dataset: https://www.kaggle.com/datasets/uciml/human-activity-recognition-with-smartphones

## Labels
['STANDING', 'SITTING', 'LAYING', 'WALKING', 'WALKING_DOWNSTAIRS',
       'WALKING_UPSTAIRS']

In [151]:
from sklearn.preprocessing import LabelEncoder
from torch.utils.data import Dataset, DataLoader
import matplotlib.pyplot as plt
import pandas as pd
import os
import torch.nn as nn
import torch
import torch.nn.functional as F
import numpy as np

Helper Functions

In [102]:
def get_path(task_number, *args):
    notebook_path = os.path.abspath(f"Task_{task_number}.ipynb")
    return os.path.join(os.path.dirname(notebook_path), *args)

def preprocess_data(df, column="Activity"):
    le = LabelEncoder()
    return  df.drop(column, axis=1), le.fit_transform(df[column])

# Convert numpy array of output lables to one hot encoded tensor
def get_output_tensor(array):
    tensor = torch.zeros((len(array), 6), dtype=torch.float32)
    for i, val in enumerate(array):
        tensor[i][val] = 1
    return tensor

## Load The Data Into Pandas


In [167]:
model_path = get_path(2, "model", "fashion_mnist_cnn.pt")

train_data = pd.read_csv(get_path(2, "data", "train.csv"))
test_data = pd.read_csv(get_path(2, "data", "test.csv"))

# Split into label and features
train_data, train_labels = preprocess_data(train_data)
test_data, test_labels = preprocess_data(test_data)

train_data.head()

Unnamed: 0,tBodyAcc-mean()-X,tBodyAcc-mean()-Y,tBodyAcc-mean()-Z,tBodyAcc-std()-X,tBodyAcc-std()-Y,tBodyAcc-std()-Z,tBodyAcc-mad()-X,tBodyAcc-mad()-Y,tBodyAcc-mad()-Z,tBodyAcc-max()-X,...,fBodyBodyGyroJerkMag-skewness(),fBodyBodyGyroJerkMag-kurtosis(),"angle(tBodyAccMean,gravity)","angle(tBodyAccJerkMean),gravityMean)","angle(tBodyGyroMean,gravityMean)","angle(tBodyGyroJerkMean,gravityMean)","angle(X,gravityMean)","angle(Y,gravityMean)","angle(Z,gravityMean)",subject
0,0.288585,-0.020294,-0.132905,-0.995279,-0.983111,-0.913526,-0.995112,-0.983185,-0.923527,-0.934724,...,-0.298676,-0.710304,-0.112754,0.0304,-0.464761,-0.018446,-0.841247,0.179941,-0.058627,1
1,0.278419,-0.016411,-0.12352,-0.998245,-0.9753,-0.960322,-0.998807,-0.974914,-0.957686,-0.943068,...,-0.595051,-0.861499,0.053477,-0.007435,-0.732626,0.703511,-0.844788,0.180289,-0.054317,1
2,0.279653,-0.019467,-0.113462,-0.99538,-0.967187,-0.978944,-0.99652,-0.963668,-0.977469,-0.938692,...,-0.390748,-0.760104,-0.118559,0.177899,0.100699,0.808529,-0.848933,0.180637,-0.049118,1
3,0.279174,-0.026201,-0.123283,-0.996091,-0.983403,-0.990675,-0.997099,-0.98275,-0.989302,-0.938692,...,-0.11729,-0.482845,-0.036788,-0.012892,0.640011,-0.485366,-0.848649,0.181935,-0.047663,1
4,0.276629,-0.01657,-0.115362,-0.998139,-0.980817,-0.990482,-0.998321,-0.979672,-0.990441,-0.942469,...,-0.351471,-0.699205,0.12332,0.122542,0.693578,-0.615971,-0.847865,0.185151,-0.043892,1


In [182]:

class TimeSeriesDataset(Dataset):
    def __init__(self, X, Y):
        self.X = X
        self.Y = Y

    def __len__(self):
        return len(self.X)

    def __getitem__(self, idx):
        return self.X[idx], self.Y[idx]

# Assuming X_train, y_train, X_valid, y_valid are your training and validation data
train_dataset = TimeSeriesDataset(train_data, train_labels)

# Define the data loaders
batch_size = 64
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=False)


## Define the Neural Network
In this case we want an MLP

In [183]:
sequence_length = 50

class NeuralNetTask2(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, num_classes, sequence_length=sequence_length):
        super(NeuralNetTask2, self).__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.rnn = nn.RNN(input_size, hidden_size, num_layers, batch_first=True)
        self.fc = nn.Linear(hidden_size*sequence_length, num_classes)
        self.device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

    
    def forward(self, x):
        # Initialize hidden state with zeros
        h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(self.device)
        
        # If using unbatched input, squeeze the second dimension
        if x.size(0) == 1:
            h0 = h0.squeeze(1)
        
        # Forward propagate RNN
        out, _ = self.rnn(x, h0)
        out = self.fc(out[:, -1, :])
        return out
    

## Set Network and Training Parameters


In [184]:
lr = 1e-2
num_epochs = 10

input_size = 562
hidden_size = 128
num_layers = 2
num_classes = 6
model = NeuralNetTask2(input_size, hidden_size, num_layers, num_classes)
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

criterion = torch.nn.CrossEntropyLoss().to(device)
optimizer = torch.optim.SGD(model.parameters(), lr=lr, momentum = 0.9)


## Train the RNN

In [185]:
losses = []

# Iterate through number of epochs and determine next step
for epoch in range(num_epochs):
    for batch_idx, (data, targets) in enumerate(train_loader): # in range(0, train_data.shape[0], batch_size):
        
        # Get Input batch and lables
        inputs = torch.tensor(data).to(device)
        labels = torch.tensor(train_labels).to(device)
        
        # forward pass
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        
        # backward pass
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        # print loss
        print ('Epoch [{}/{}], Step [{}/{}], Loss: {:.4f}' .format(epoch+1, num_epochs, (i+1)//batch_size, train_data.shape[0]//batch_size, loss.item()))
        
        # Draw plot of loss
        # losses.append(loss.item())
        # pl.plot(losses, 'b')
        # display.clear_output(wait=True)
        # display.display(pl.gcf())
      
    # Save the model here, in case of interruption or if I'm bored cause my cpu too slow  
    torch.save(model.state_dict(), model_path)


print("Training finished")

KeyError: 0

In [None]:
# Save the Model
torch.save(model.state_dict(), model_path)

In [150]:
# Load the Model
model = NeuralNetTask2(input_size, hidden_size, num_layers, num_classes)
model.load_state_dict(torch.load(model_path))
model.eval()

RuntimeError: Error(s) in loading state_dict for NeuralNetTask2:
	Missing key(s) in state_dict: "rnn.weight_ih_l0", "rnn.weight_hh_l0", "rnn.bias_ih_l0", "rnn.bias_hh_l0", "rnn.weight_ih_l1", "rnn.weight_hh_l1", "rnn.bias_ih_l1", "rnn.bias_hh_l1", "fc.weight", "fc.bias". 
	Unexpected key(s) in state_dict: "fc1.weight", "fc1.bias", "fc2.weight", "fc2.bias". 

In [118]:
# Test the saved model on the Test-Data

# Only relevant for the plot
# size_constraint = 100
# figsize = (size_constraint//batch_size, size_constraint//batch_size)

correct = 0
total = 0

with torch.no_grad():
    for i in range(0, test_data.shape[0], batch_size):
                
        inputs = torch.tensor(test_data.values[i:i+batch_size], dtype=torch.float32).to(device)
        true_labels = torch.tensor(test_labels[i:i+batch_size]).to(device) #get_output_tensor(test_labels[i:i+batch_size]).to(device)
        
        outputs = model(inputs)
        _, predicted = torch.max(outputs.data, 1)
        total += true_labels.size(0)
        correct += (predicted == true_labels).sum().item()
        
        # Print the accuracy
        print(f"Accuracy of the network on test frames {i+1}-{i+batch_size}: {round(100 * correct / total, 4)} %")
        
        # Plot the images
        # f, axarr = plt.subplots(1, batch_size, figsize=figsize)
        # for k in range(batch_size):   
        #     axarr[k].imshow(test_data[i:i+batch_size][k], interpolation='nearest')
        #     axarr[k].set_title(f"Predicted: {labels[predicted[k]]},\nActual: {labels[true_labels[k]]}")
        # plt.show()

Accuracy of the network on test frames 1-5: 0.0 %
Accuracy of the network on test frames 6-10: 0.0 %
Accuracy of the network on test frames 11-15: 0.0 %
Accuracy of the network on test frames 16-20: 0.0 %
Accuracy of the network on test frames 21-25: 0.0 %
Accuracy of the network on test frames 26-30: 0.0 %
Accuracy of the network on test frames 31-35: 0.0 %
Accuracy of the network on test frames 36-40: 0.0 %
Accuracy of the network on test frames 41-45: 0.0 %
Accuracy of the network on test frames 46-50: 0.0 %
Accuracy of the network on test frames 51-55: 0.0 %
Accuracy of the network on test frames 56-60: 8.3333 %
Accuracy of the network on test frames 61-65: 15.3846 %
Accuracy of the network on test frames 66-70: 21.4286 %
Accuracy of the network on test frames 71-75: 26.6667 %
Accuracy of the network on test frames 76-80: 30.0 %
Accuracy of the network on test frames 81-85: 28.2353 %
Accuracy of the network on test frames 86-90: 26.6667 %
Accuracy of the network on test frames 91-9