# Data cleaning and reformating

In [118]:
import torch
from torch import nn
from torch import optim
import torch.nn.functional as F
from torchvision import datasets, transforms, models
from torch.utils.data import DataLoader, Dataset\


import pandas as pd
import numpy as np
import os

from random import shuffle, seed

### Hyperparameter
window_size: The number of timesteps in one window (e.g. how many rows in one window).

channel: The number of features in one window. Similar to image channels (RGB).

batch_size: The numebr of windows in one batch.

learning_rate: How fast the model learns.

In [119]:
window_size = 180
channel = 1
batch_size = 32
learning_rate = 0.001

---
### Go to 
<a href=#bookmark> Run all cell above</a>

In [120]:
index_dict = {'Pulling_OneH': 0, 'Overhead': 1, 'Pulling': 2, 'Sitting': 3, 'Lifting': 4, 
              'Crawling': 5, 'Standing': 6, 'Carrying': 7, 'Walking': 8, 'Pushing': 9, 
              'Reaching': 10, 'Static_Stoop': 11, 'Kneeling': 12, 'Lifting_OneH': 13, 'Crouching': 14}

In [121]:
print(index_dict)

{'Pulling_OneH': 0, 'Overhead': 1, 'Pulling': 2, 'Sitting': 3, 'Lifting': 4, 'Crawling': 5, 'Standing': 6, 'Carrying': 7, 'Walking': 8, 'Pushing': 9, 'Reaching': 10, 'Static_Stoop': 11, 'Kneeling': 12, 'Lifting_OneH': 13, 'Crouching': 14}


In [122]:
data_df = pd.read_csv('./calibration_readone_data.csv', error_bad_lines=False)
data_df

Unnamed: 0,TimeSec,Sensor,Quatx,Quaty,Quatz,Quat0,Heading,Pitch,Roll,LinAccx,LinAccy,LinAccz,Vbat,Accx,Accy,Accz,Gyrox,Gyroy,Gyroz,activity
0,0.0362,2,0.822,0.01,0.26,-0.51,306.93,25.57,-16.37,-0.03,-0.03,-0.02,3.94,-19.84,-11.14,16.33,-0.47,-0.28,0.81,7
1,0.0377,3,0.625,-0.21,0.26,0.71,113.60,38.21,7.42,0.06,-0.08,-0.08,3.80,-37.84,-4.27,53.71,-0.51,0.03,0.77,7
2,0.0961,0,0.008,0.08,0.02,1.00,192.97,-9.34,2.16,0.11,-0.13,-0.02,3.92,2.75,-1.68,36.01,0.07,-0.10,0.98,7
3,0.0978,6,-0.978,0.03,-0.00,-0.21,38.13,1.13,-2.73,0.08,0.36,0.33,3.81,-18.92,-64.70,118.26,-0.13,0.07,1.20,7
4,0.1018,5,0.448,0.44,0.49,0.61,116.18,-5.60,82.09,0.26,-0.13,-0.21,3.87,5.19,45.62,-2.90,0.15,0.67,0.35,7
5,0.1032,1,0.105,-0.09,0.01,-0.99,206.37,-9.81,-2.63,-0.01,0.04,-0.05,3.91,-3.51,18.31,61.04,0.20,-0.02,0.84,7
6,0.1069,4,0.540,-0.59,0.39,-0.45,300.68,-6.56,-90.39,0.10,0.06,-0.26,3.79,8.85,-50.35,-32.35,0.15,-1.09,0.06,7
7,0.1086,7,0.955,0.11,0.16,0.23,42.78,15.05,16.65,0.11,0.29,-0.22,0.00,-14.34,-24.11,85.60,-0.28,-0.09,0.82,7
8,0.1283,2,0.838,-0.01,0.26,-0.48,310.55,24.85,-16.97,-0.12,0.06,-0.06,3.94,-31.13,-19.23,33.87,-0.37,-0.39,0.77,7
9,0.1301,3,0.586,-0.22,0.23,0.74,119.48,37.14,5.69,-0.01,0.01,-0.16,3.80,-56.00,1.37,29.91,-0.50,0.10,0.68,7


In [123]:
# result_df = alex_data_df
# result_df
result_df = data_df

In [124]:
# # the sensor label index range from 0 to 7 
# sensor_data = []
# for i in range(0, 8):
#     df = alex_data_df.where(alex_data_df['Sensor'] == i).dropna()
#     sensor_data.append(df)
# result_df = pd.concat(sensor_data).reset_index(drop=True)
# result_df

---
# Data processing and deep learning model

In [125]:
# Process original dataset, create windows (window_size samples(rows), about 1 second)
data = []
window = 1
while window*window_size < len(result_df):
    data_window = result_df[(window - 1)*window_size:window*window_size]
    data.append(data_window.values)
    window += 1
#data

In [126]:
len(data)

5478

In [127]:
# delete window if multiple activities and sensors presents
cleaned_data = []
for i in data:
    previous_activity = -1
    previous_sensor = -1
    for j in i:
        current_activity = j[19]
        current_sensor = j[1]
        if (previous_activity != -1) and (current_activity != previous_activity):
            print("data contains different activities! Window droped")
            break
#         elif (previous_sensor != -1) and (current_sensor != previous_sensor):
#             print("data contains different sensors! Window droped")
#             break
        else:
            previous_activity = current_activity
            previous_sensor = current_sensor
    else:
        cleaned_data.append(i)

data contains different activities! Window droped
data contains different activities! Window droped
data contains different activities! Window droped
data contains different activities! Window droped
data contains different activities! Window droped
data contains different activities! Window droped
data contains different activities! Window droped
data contains different activities! Window droped
data contains different activities! Window droped
data contains different activities! Window droped
data contains different activities! Window droped
data contains different activities! Window droped
data contains different activities! Window droped
data contains different activities! Window droped
data contains different activities! Window droped
data contains different activities! Window droped
data contains different activities! Window droped
data contains different activities! Window droped
data contains different activities! Window droped
data contains different activities! Window droped


In [128]:
len(cleaned_data)

5370

In [129]:
cleaned_data[0].shape

(180, 20)

In [130]:
# shuffle the data
seed(101)
shuffle(cleaned_data)
#cleaned_data

In [131]:
# extract label from each window
labels = []
for i in cleaned_data:
    label = i[0][19]
    labels.append(label)
labels = np.array(labels)
#labels

In [132]:
# extract features from each window
features = []
for i in cleaned_data:
    new = np.delete(i, 19, 1)
    features.append(new)
features = np.array(features)
#features

In [133]:
features[0].shape

(180, 19)

In [134]:
# combine the features and labels
k = list(zip(features, labels))
activity_data = pd.DataFrame(k)
activity_data.columns = ['features', 'labels']
activity_data

Unnamed: 0,features,labels
0,"[[1.689, 0.0, -0.189, -0.14, -0.18, -0.96, 169...",4.0
1,"[[22.592, 6.0, 0.14400000000000002, 0.14, -0.0...",7.0
2,"[[33.924, 2.0, 0.209, 0.38, 0.78, -0.44, 160.5...",1.0
3,"[[2.4442, 5.0, 0.644, 0.19, 0.3, 0.68, 109.8, ...",4.0
4,"[[19.282, 6.0, 0.8759999999999999, -0.05, -0.0...",1.0
5,"[[1.013, 0.0, 0.125, 0.48, 0.12, -0.86, 205.11...",4.0
6,"[[1.26, 5.0, 0.931, 0.02, 0.08, 0.36, 56.3, 7....",13.0
7,"[[3.2235, 2.0, 0.18600000000000005, 0.19, 0.2,...",14.0
8,"[[39.567, 7.0, 0.269, -0.26, 0.1, 0.92, 162.54...",0.0
9,"[[1.14, 5.0, -0.121, -0.78, -0.57, -0.23, 89.2...",4.0


In [135]:
# check the size of activity. The final output of neural net 
# has to have max_index + 1 output
max_index = activity_data['labels'].max()
label_size = int(max_index + 1)

---
## Load data

In [136]:
# check if CUDA is available
train_on_gpu = torch.cuda.is_available()
if train_on_gpu:
    print("CUDA is available! Training on GPU.")
else:
    print("CUDA is not available. Training on CPU...")
torch.cuda.current_device()

CUDA is available! Training on GPU.


0

In [137]:
# split the data to test, validation, and train
valid_size = 0.2
test_size = 0.2
activity_data.columns = ["features", "labels"]
activity_data_train = activity_data[:int(len(activity_data)*(1-valid_size-test_size))]
activity_data_valid = activity_data[int(len(activity_data)*(1-valid_size-test_size)):int(len(activity_data)*(1-test_size))]
activity_data_test = activity_data[int(len(activity_data)*(1-test_size)):]
# activity_data_train.to_csv("./activity_data_train.csv", encoding='utf-8-sig')
# activity_data_valid.to_csv("./activity_data_valid.csv", encoding='utf-8-sig')
# activity_data_train.to_csv("./activity_data_test.csv", encoding='utf-8-sig')

In [138]:
# define our dataset in pytorch
class DatasetSpineTrack(Dataset):
    
    def __init__(self, file, transform=None):
        #self.data = pd.read_csv(file_path)
        self.data = file
        self.transform = transform
        
    def __len__(self):
        return len(self.data)
    
    def __getitem__(self, index):
        # load image as ndarray type (Height * Width * Channels)
        # be carefull for converting dtype to np.uint8 [Unsigned integer (0 to 255)]
        # in this example, i don't use ToTensor() method of torchvision.transforms
        # so you can convert numpy ndarray shape to tensor in PyTorch (H, W, C) --> (C, H, W)
        
        features = torch.tensor(self.data["features"].iloc[index])
        features = features.view(channel, window_size, 19) 
        labels = torch.tensor(self.data["labels"].iloc[index], dtype=torch.long)
        #print(labels.type())
        
#         if self.transform is not None:
#             image = self.transform(image)
            
        return features, labels

In [139]:
# construct training and testing dataset in csv
# train_dataset = DatasetSpineTrack("./activity_data_train.csv")
# valid_dataset = DatasetSpineTrack("./activity_data_valid.csv")
# test_dataset = DatasetSpineTrack("./activity_data_test.csv")
train_dataset = DatasetSpineTrack(activity_data_train)
valid_dataset = DatasetSpineTrack(activity_data_valid)
test_dataset = DatasetSpineTrack(activity_data_test)
feature, label = train_dataset.__getitem__(0)
#feature
#label

In [140]:
# load data
trainloader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
validloader = DataLoader(valid_dataset, batch_size=batch_size, shuffle=True)
testloader = DataLoader(test_dataset, batch_size=batch_size)

---
## Network Architechture

In [141]:
# train_on_gpu = False
# train_on_gpu = True

In [148]:
model = models.resnet18(pretrained=False)
# window_size channels
# model.conv1 = torch.nn.Conv2d(window_size, batch_size, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
model.conv1 = torch.nn.Conv2d(channel, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
model.fc = torch.nn.Linear(512, label_size, bias=True)
model.add_module("dropout", torch.nn.Dropout(p=0.5))
model = model.double()

# move tensors to GPU is CUDA is available
if train_on_gpu:
    model.cuda()
print(model)

ResNet(
  (conv1): Conv2d(1, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
    (1): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace)
      (conv2): Co

---
### Run all above 
<a name='bookmark' />

---
## Training the Network

Remember to look at how the training and validation loss decreases over time; if the validation loss ever increases it indicates possible overfitting.

In [143]:
import time
start_time = time.time()

criterion = nn.CrossEntropyLoss()
#criterion = nn.NLLLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

# number of epochs to train the model
n_epochs = 30

valid_loss_min = np.Inf # track change in validation loss

for epoch in range(1, n_epochs+1):

    # keep track of training and validation loss
    train_loss = 0.0
    valid_loss = 0.0
    
    ###################
    # train the model #
    ###################
    model.train()
    for features, labels in trainloader:
        # move tensors to GPU if CUDA is available
        if train_on_gpu:
            features, labels = features.cuda(), labels.cuda()
        # clear the gradients of all optimized variables
        optimizer.zero_grad()
        # forward pass: compute predicted outputs by passing inputs to the model
        output = model(features)
        # calculate the batch loss
        loss = criterion(output, labels)
        # backward pass: compute gradient of the loss with respect to model parameters
        loss.backward()
        # perform a single optimization step (parameter update)
        optimizer.step()
        # update training loss
        train_loss += loss.item()*features.size(0)
        
    ######################    
    # validate the model #
    ######################
    model.eval()
    for features, labels in validloader:
        # move tensors to GPU if CUDA is available
        if train_on_gpu:
            features, labels = features.cuda(), labels.cuda()
        # forward pass: compute predicted outputs by passing inputs to the model
        output = model(features)
        # calculate the batch loss
        loss = criterion(output, labels)
        # update average validation loss 
        valid_loss += loss.item()*features.size(0)
    
    # calculate average losses
    train_loss = train_loss/len(trainloader.sampler)
    valid_loss = valid_loss/len(validloader.sampler)
        
    # print training/validation statistics 
    print('Epoch: {} \tTraining Loss: {:.6f} \tValidation Loss: {:.6f}'.format(
        epoch, train_loss, valid_loss))
    
    # save model if validation loss has decreased
    if valid_loss <= valid_loss_min:
        print('Validation loss decreased ({:.6f} --> {:.6f}).  Saving model ...'.format(
        valid_loss_min,
        valid_loss))
        torch.save(model.state_dict(), 'model_Spinetrack_data.pt')
        valid_loss_min = valid_loss

# output running time
running_time = time.time() - start_time
sec = running_time % 60
miniute = running_time / 60
print("Training time: {} min {} sec".format(int(miniute), int(sec)))

Epoch: 1 	Training Loss: 1.192740 	Validation Loss: 1.376116
Validation loss decreased (inf --> 1.376116).  Saving model ...
Epoch: 2 	Training Loss: 0.653408 	Validation Loss: 0.992900
Validation loss decreased (1.376116 --> 0.992900).  Saving model ...
Epoch: 3 	Training Loss: 0.563575 	Validation Loss: 1.880785
Epoch: 4 	Training Loss: 0.397641 	Validation Loss: 8.896060
Epoch: 5 	Training Loss: 0.312598 	Validation Loss: 2.344358
Epoch: 6 	Training Loss: 0.343913 	Validation Loss: 0.491321
Validation loss decreased (0.992900 --> 0.491321).  Saving model ...
Epoch: 7 	Training Loss: 0.259472 	Validation Loss: 0.472596
Validation loss decreased (0.491321 --> 0.472596).  Saving model ...
Epoch: 8 	Training Loss: 0.215603 	Validation Loss: 2.101978
Epoch: 9 	Training Loss: 0.191101 	Validation Loss: 0.380959
Validation loss decreased (0.472596 --> 0.380959).  Saving model ...
Epoch: 10 	Training Loss: 0.168735 	Validation Loss: 0.331388
Validation loss decreased (0.380959 --> 0.331388)

###  Load the Model with the Lowest Validation Loss

In [149]:
# load on gpu
# model.load_state_dict(torch.load('model_Spinetrack_3.pt'))

# load on cpu
model.load_state_dict(torch.load('model_Spinetrack_6.pt', map_location=lambda storage, loc: storage))

IncompatibleKeys(missing_keys=[], unexpected_keys=[])

---
## Test the Trained Network

Test your trained model on previously unseen data! A "good" result will be a result that gets more than 70% accuracy on these test data.

In [150]:
train_on_gpu = False

In [151]:
# Release all the GPU memory cache that can be freed
torch.cuda.empty_cache() 

### Overall accuracy and each class accuracy

In [152]:
# track test loss
test_loss = 0.0
class_correct = list(0. for i in range(len(index_dict)))
class_total = list(0. for i in range(len(index_dict)))

criterion = nn.CrossEntropyLoss()
#criterion = nn.NLLLoss()
optimizer = optim.Adam(model.parameters(), lr=0.003)

model.eval()
torch.no_grad()
# iterate over test data
for features, labels in testloader:
    # move tensors to GPU if CUDA is available
    if train_on_gpu:
        features, labels = features.cuda(), labels.cuda()
    # forward pass: compute predicted outputs by passing inputs to the model
    output = model(features)
    # calculate the batch loss
    loss = criterion(output, labels)
    # update test loss 
    test_loss += loss.item()*features.size(0)
    # convert output probabilities to predicted class
    _, pred = torch.max(output, 1)    
    # compare predictions to true label
    correct_tensor = pred.eq(labels.data.view_as(pred))
    correct = np.squeeze(correct_tensor.numpy()) if not train_on_gpu else np.squeeze(correct_tensor.cpu().numpy())
    # calculate test accuracy for each object class
    for i in range(batch_size):
        try:
            label = labels.data[i]
            class_correct[label] += correct[i].item()
            class_total[label] += 1
        except:
            pass

# average test loss
test_loss = test_loss/len(testloader.dataset)
print('Test Loss: {:.6f}\n'.format(test_loss))

for i in range(len(index_dict)):
    if class_total[i] > 0:
        print('Test Accuracy of %5s: %2d%% (%2d/%2d)' % (
            list(index_dict.keys())[i], 100 * class_correct[i] / class_total[i],
            np.sum(class_correct[i]), np.sum(class_total[i])))
    else:
        print('Test Accuracy of %5s: N/A (no training examples)' % (list(index_dict.keys())[i]))

print('\nTest Accuracy (Overall): %2d%% (%2d/%2d)' % (
    100. * np.sum(class_correct) / np.sum(class_total),
    np.sum(class_correct), np.sum(class_total)))

Test Loss: 0.252560

Test Accuracy of Pulling_OneH: 95% (58/61)
Test Accuracy of Overhead: 95% (64/67)
Test Accuracy of Pulling: 95% (40/42)
Test Accuracy of Sitting: 100% (24/24)
Test Accuracy of Lifting: 96% (328/339)
Test Accuracy of Crawling: 94% (34/36)
Test Accuracy of Standing: 100% (31/31)
Test Accuracy of Carrying: 93% (83/89)
Test Accuracy of Walking: 100% (42/42)
Test Accuracy of Pushing: 86% (58/67)
Test Accuracy of Reaching: 92% (82/89)
Test Accuracy of Static_Stoop: 96% (27/28)
Test Accuracy of Kneeling: 100% (26/26)
Test Accuracy of Lifting_OneH: 75% (72/95)
Test Accuracy of Crouching: 71% (27/38)

Test Accuracy (Overall): 92% (996/1074)


### Overall accuracy (different calculation method)

In [None]:
# Turn off gradients for validation, saves memory and computations
torch.no_grad()
accuracy = 0
for features, labels in testloader:
    # move tensors to GPU if CUDA is available
    if train_on_gpu:
        features, labels = features.cuda(), labels.cuda()
    loss = model(features)
    test_loss += criterion(loss, labels)

#     ps = torch.exp(loss)
    top_p, top_class = loss.topk(1, dim=1)
    equals = top_class == labels.view(*top_class.shape)
    accuracy += torch.mean(equals.type(torch.FloatTensor))
print("Test Accuracy: {:.3f}".format(accuracy/len(testloader)))

In [None]:
# TODO: Figure out pulling_OneH