## Using MoVi data in Python

MoVi dataset is originally provided as `.mat` format.  
We provide some utility functions to make it easy reading MoVi files in Python environment

In [None]:
# Import Libraries
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import math
import time
import nltk
import random
import collections
import glob


import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from torch.nn import functional as F
from pathlib import Path
from numpy.random import randint
from sklearn.model_selection import train_test_split

%load_ext autoreload
%autoreload 2

from google.colab import drive
drive.mount('/content/drive')


**SUBJECT 10**

In [None]:
#looking at just one participant
data = pd.read_csv('/content/drive/MyDrive/Neuromatch/preproc/PP_10_I1.csv')
for col in data.columns:
    print(col)

In [None]:
#input 
input = data[['RightForeArm_rot_0',
              'RightForeArm_rot_1',
              'RightForeArm_rot_2',
              'RightForeArm_rot_3',
              'RightForeArm_rot_4',
              'RightForeArm_rot_5',
              'RightForeArm_rot_6',
              'RightForeArm_rot_7',
              'RightForeArm_rot_8',
              
              'RightForeArm_acc_0',
              'RightForeArm_acc_1',
              'RightForeArm_acc_2',]]

input.head()

In [None]:
#output

output = data[['RightShoulder_rot_0',
                'RightShoulder_rot_1',
                'RightShoulder_rot_2',
                'RightShoulder_rot_3',
                'RightShoulder_rot_4',
                'RightShoulder_rot_5',
                'RightShoulder_rot_6',
                'RightShoulder_rot_7',
                'RightShoulder_rot_8',
                'RightArm_rot_0',
                'RightArm_rot_1',
                'RightArm_rot_2',
                'RightArm_rot_3',
                'RightArm_rot_4',
                'RightArm_rot_5',
                'RightArm_rot_6',
                'RightArm_rot_7',
                'RightArm_rot_8',
                'RightHand_rot_0',
                'RightHand_rot_1',
                'RightHand_rot_2',
                'RightHand_rot_3',
                'RightHand_rot_4',
                'RightHand_rot_5',
                'RightHand_rot_6',
                'RightHand_rot_7',
                'RightHand_rot_8',
                'RightForeArm_acc_0',
                'RightForeArm_acc_1',
                'RightForeArm_acc_2',]]

output.head()

In [None]:
#load as torch for subject #10
x = torch.tensor(input.values)
y = torch.tensor(output.values)

print("input data:", x.shape, "\n\n", x)

print("output data:", y.shape, "\n\n", y)

In [None]:
#HYPER_PARAMETERS

# Number of features used as input. (Number of columns)
INPUT_SIZE = 12
# Number of previous time stamps taken into account.
SEQ_LENGTH = 2827
# Number of features in last hidden state ie. number of output time-
HIDDEN_SIZE = 15
# Number of stacked rnn layers.
NUM_LAYERS = 1
# Number of features in the output
OUTPUT_SIZE = 30
# We have total of 16962 rows in our input. 
# We divide the input into 6 batches, with a sequence of 2827 rows.
BATCH_SIZE = 6
#learning rate
LEARNING_RATE = 0.003
#dropout parameter
DROPOUT_PROB=0.2

In [None]:
# Initialize the Bidirectional RNN. 
rnn = nn.RNN(input_size=INPUT_SIZE, hidden_size=HIDDEN_SIZE, num_layers = NUM_LAYERS, batch_first=True, bidirectional = True).double()

# input size : (batch, seq_len, input_size)
inputs = x.view(BATCH_SIZE, SEQ_LENGTH, INPUT_SIZE)
print("input data:", inputs.shape)

In [None]:
# out shape = (batch, seq_len, num_directions * hidden_size)
# h_n shape  = (num_layers * num_directions, batch, hidden_size)
#output of the RNN from all timesteps from the last RNN layer,  hidden value from the last time-step of all RNN layers.
out, hidden = rnn(inputs.double()) 

print('\nOutput: ', out.shape, '\n', out)
print('\nHidden: ', hidden.shape, '\n', hidden)

**DATA LOADER**

Train | Validation | Test Split

In [None]:
filenames = glob.glob("/content/drive/MyDrive/Neuromatch/MoVi Data/IMU_Subjects/preproc/*.csv")

train, test = train_test_split(filenames)
train, val = train_test_split(train)

print(f' train size: {len(train)} files | validation size: {len(val)} files | test size: {len(test)} files')

In [None]:

input = ['RightForeArm_rot_0',
         'RightForeArm_rot_1',
         'RightForeArm_rot_2',
         'RightForeArm_rot_3',
         'RightForeArm_rot_4',
         'RightForeArm_rot_5',
         'RightForeArm_rot_6',
         'RightForeArm_rot_7',
         'RightForeArm_rot_8',   
         'RightForeArm_acc_0',
         'RightForeArm_acc_1',
         'RightForeArm_acc_2',]

output =    ['RightShoulder_rot_0',
            'RightShoulder_rot_1',
            'RightShoulder_rot_2',
            'RightShoulder_rot_3',
            'RightShoulder_rot_4',
            'RightShoulder_rot_5',
            'RightShoulder_rot_6',
            'RightShoulder_rot_7',
            'RightShoulder_rot_8',

            'RightArm_rot_0',
            'RightArm_rot_1',
            'RightArm_rot_2',
            'RightArm_rot_3',
            'RightArm_rot_4',
            'RightArm_rot_5',
            'RightArm_rot_6',
            'RightArm_rot_7',
            'RightArm_rot_8',

            'RightHand_rot_0',
            'RightHand_rot_1',
            'RightHand_rot_2',
            'RightHand_rot_3',
            'RightHand_rot_4',
            'RightHand_rot_5',
            'RightHand_rot_6',
            'RightHand_rot_7',
            'RightHand_rot_8',

            'RightForeArm_acc_0',
            'RightForeArm_acc_1',
            'RightForeArm_acc_2']

In [None]:
class IMU_Dataset(Dataset):
    def __init__(self, dataDir,xCols, yCols, windowSize = 512, transform=None, target_transform=None):
        self.xCols = xCols
        self.yCols = yCols
        self.dataDir = dataDir
        self.windowSize = windowSize
        self.transform = transform
        self.target_transform = target_transform
        self.files = dataDir
        # pre-load all files
        dataSets = []
        for filenames in dataDir:
            samp_path = filenames
            # load the file
            dat = pd.read_csv(samp_path, index_col='Unnamed: 0')            
            dataSets.append(dat)
        self.dataSets = dataSets

    def __len__(self):
        return len(self.files)

    def __getitem__(self, idx):
        # get the file
        dat = self.dataSets[idx]
        # get random window in file
        starti = randint(0,dat.shape[0] - self.windowSize)
        endi = starti + self.windowSize
        dat = dat.iloc[starti:endi]
        #select variables
        x = torch.tensor(dat[self.xCols].values)
        y = torch.tensor(dat[self.yCols].values)
        
        # do optional transforms
        if self.transform:
            x = self.transform(x)
        if self.target_transform:
            y = self.target_transform(y)

        if idx == self.__len__():
            raise IndexError            
        return x, y

train_dataset = IMU_Dataset(train,input,output)
val_dataset = IMU_Dataset(val,input,output)
test_dataset = IMU_Dataset(test,input,output)

In [None]:
# Device configuration
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

**Bidirectional Long Short-Term Memory Model**

In [None]:
# Bidirectional LSTM 

class BiLSTM(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, output_size, dropout_prob):
        super(BiLSTM, self).__init__()

        # Defining the number of layers and the nodes in each layer
        self.hidden_size = hidden_size
        self.num_layers= num_layers

        # LSTM layers
        self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True, dropout=dropout_prob, bidirectional=True)

        # Fully connected layer
        self.fc = nn.Linear(hidden_size*2, output_size) #*2 for bidirection    
    
    def forward(self, x):
        # Initializing hidden state for first input with zeros
        h0 = torch.zeros(self.num_layers*2, x.size(0), self.hidden_size).double().to(device) #*2 for bidirection
        #h0 = torch.zeros(self.num_layers*2, x.size(0), self.hidden_size).to(device) #*2 for bidirection
        #Initializing cell state for first input with zeros
        c0 = torch.zeros(self.num_layers*2, x.size(0), self.hidden_size).to(device) #*2 for bidirection

        # Forward propagation LSTM by passing in the input, hidden state, and cell state into the model
        out, (hn, cn) = self.lstm(x, (h0, c0)) # out: tensor of shape (batch_size, seq_length, hidden_size*2)
        
        # the outputs in the shape of (batch_size, seq_length, hidden_size), so that it can fit into the fully connected layer
        # Convert the final state to our desired output shape (batch_size, output_dim)
        out = self.fc(out)
        return out

## **Training the Model**

In [None]:
#HYPER-PARAMETERS
# Number of features used as input. (Number of columns)
INPUT_SIZE = 12
# Number of features in last hidden state ie. number of output time-
HIDDEN_SIZE = 15
# sequence length
SEQUENCE_LENGTH = 512
# Number of stacked rnn layers.
NUM_LAYERS = 1
# Number of features in the output
OUTPUT_SIZE = 30
# We have total of 16962 rows in our input. 
# We divide the input into 6 batches, with a sequence of 2827 rows.
BATCH_SIZE = 6
#learning rate
LEARNING_RATE = 0.003
#dropout parameter
DROPOUT_PROB=0.2
#epochs
NUM_EPOCHS = 10

In [None]:
#Data Loader
trainLoader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle = True)
valLoader =  DataLoader(val_dataset, batch_size=BATCH_SIZE, shuffle = True)
testLoader = DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle = False)

In [None]:

#initializing the model
model = BiLSTM(INPUT_SIZE,HIDDEN_SIZE,NUM_LAYERS,OUTPUT_SIZE, DROPOUT_PROB).double().to(device)
# Loss and optimizer
criterion = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=LEARNING_RATE)

In [None]:
# Train the model
total_step = len(trainLoader)
losses = []
for epoch in range(NUM_EPOCHS):
  running_loss = 0.0
  for i,(xBatch,yBatch) in enumerate(trainLoader):
      x_batch = xBatch.reshape(-1, SEQUENCE_LENGTH, INPUT_SIZE).to(device)
      target = yBatch.to(device)

      #print(x_batch.shape)
      #print(y_batch.shape)
      
      # Forward pass
      outputs = model(x_batch.double())
      loss = criterion(outputs, target)
      
      # Backward and optimize
      optimizer.zero_grad()
      loss.backward()
      optimizer.step()

      running_loss += loss.item() * x_batch.size(0)
  
  epoch_loss = running_loss / len(trainLoader)
  losses.append(epoch_loss)
  

In [None]:
  # training_loss
  plt.plot(np.array(losses), label="Training loss")

In [None]:
#Test the model
with torch.no_grad():
  correct = 0
  total = 0
  predictions = []
  values = []
  for x_test, y_test in testLoader:
      x_test = x_test.reshape(-1, SEQUENCE_LENGTH, INPUT_SIZE).to(device)
      y_test = y_test.to(device)
      
      yhat = model(x_test)
      predictions.append(yhat.to(device).detach().numpy())
      values.append(y_test.to(device).detach().numpy())
      total += y_test.size(0)

      #accuracy
      

  #print('Test Accuracy of the model on the IMU Data: {} %'.format(100 * correct / total)) 
  #print('Predictions: {} %'.format(predictions))
