### Data Testing
After training and saving the model to local, now it is time to test it! First apply the same data reformatting and processing method on the task dataset, then feed the processed data to model to calculate accuracy.

In [51]:
import torch
from torch import nn
from torch import optim
import torch.nn.functional as F
from torchvision import datasets, transforms, models
from torch.utils.data import DataLoader, Dataset

import pandas as pd
import numpy as np
import os

from random import shuffle, seed

### Hyperparameter
window_size: The number of timesteps in one window (e.g. how many rows in one window).

channel: The number of features in one window. Similar to image channels (RGB).

batch_size: The numebr of windows in one batch.

learning_rate: How fast the model learns.

In [137]:
window_size = 60
channel = 1
batch_size = 32
learning_rate = 0.001

## Data Reformatting

In [138]:
index_dict = {'Pulling_OneH': 0, 'Overhead': 1, 'Pulling': 2, 'Sitting': 3, 
              'Lifting': 4, 'Crawling': 5, 'Standing': 6, 'Carrying': 7, 
              'Walking': 8, 'Pushing': 9, 'Reaching': 10, 'Static_Stoop': 11, 
              'Kneeling': 12, 'Lifting_OneH': 13, 'Crouching': 14}

header = ['TimeSec', 'Sensor', 'Quatx', 'Quaty', 'Quatz', 'Quat0', 'Heading',
       'Pitch', 'Roll', 'LinAccx', 'LinAccy', 'LinAccz', 'Vbat', 'Accx',
       'Accy', 'Accz', 'Gyrox', 'Gyroy', 'Gyroz']

In [139]:
index_text = open('./Spinetrack Data/task/Alex_task/timeStamps_Everything.txt', 'r').readlines()
act_index = []
for line in index_text[1:]:
    row = line.rstrip().split('\t')
    if len(row[2].split('_')) > 1 and row[2].split('_')[1] == 'OneH':
        row[2] = '_'.join(row[2].split('_')[:2])
    else:
        row[2] = row[2].split('_')[0]
    
    if row[2] not in index_dict.keys():
        row.append(999)
    else:
        row.append(index_dict[row[2]])
    
    act_index.append(row)
index_df = pd.DataFrame(act_index)
index_df.columns = ['Frame', 'Time(s)', 'Task', 'Index']
# index_df

In [140]:
recording_df = pd.read_csv('./Spinetrack Data/task/Alex_task/Everything.csv', error_bad_lines=False)
recording_df.columns = header
# recording_df

In [141]:
frame = []
for index in range(1, len(index_df)):
    activity_index = index_df.iloc[index - 1]['Index']
    if activity_index != 999:
        time_lower_bound = float(index_df.iloc[index - 1]['Time(s)'])
        time_upper_bound = float(index_df.iloc[index]['Time(s)'])
        df_part = recording_df.loc[(recording_df['TimeSec'] >= time_lower_bound) & (recording_df['TimeSec'] < time_upper_bound)]
        df_part['activity'] = activity_index
        frame.append(df_part)
frame

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  


[        TimeSec  Sensor  Quatx  Quaty  Quatz  Quat0  Heading  Pitch    Roll  \
 3      0.033613       4 -0.960   0.07  -0.21   0.17   351.15  22.39  -12.56   
 4      0.035664       6  0.992  -0.01  -0.11  -0.07     6.32 -12.22    0.09   
 5      0.087600       5 -0.807   0.06  -0.18  -0.56    84.68  20.55    6.07   
 6      0.089361       3 -0.777   0.07  -0.04  -0.62    91.38   8.52   -3.52   
 7      0.117070       0  0.119  -0.06  -0.04  -0.99   207.53  -7.88    3.25   
 8      0.119090       7 -0.973  -0.07   0.06  -0.21    37.95  -8.50    6.78   
 9      0.123040       2 -0.759  -0.06  -0.04   0.65   293.28   7.46    2.43   
 10     0.124540       1 -0.066  -0.02  -0.00   1.00   201.60   2.80    0.15   
 11     0.128380       4 -0.960   0.07  -0.21   0.17   351.30  22.42  -12.53   
 12     0.130690       6  0.992  -0.01  -0.11  -0.07     6.26 -12.18    0.12   
 13     0.170590       5 -0.806   0.06  -0.17  -0.56    84.91  20.46    6.11   
 14     0.178780       3 -0.777   0.07  

In [142]:
data_df = pd.concat(frame)
data_df

Unnamed: 0,TimeSec,Sensor,Quatx,Quaty,Quatz,Quat0,Heading,Pitch,Roll,LinAccx,LinAccy,LinAccz,Vbat,Accx,Accy,Accz,Gyrox,Gyroy,Gyroz,activity
3,0.033613,4,-0.960,0.07,-0.21,0.17,351.15,22.39,-12.56,-0.01,0.02,0.01,3.64,-0.31,-0.15,2.29,-0.39,-0.22,0.91,6
4,0.035664,6,0.992,-0.01,-0.11,-0.07,6.32,-12.22,0.09,-0.01,0.00,0.00,3.66,0.00,0.00,-0.31,0.20,-0.00,0.98,6
5,0.087600,5,-0.807,0.06,-0.18,-0.56,84.68,20.55,6.07,-0.05,-0.03,0.01,3.71,-0.15,0.31,0.92,-0.35,0.16,0.94,6
6,0.089361,3,-0.777,0.07,-0.04,-0.62,91.38,8.52,-3.52,-0.01,0.00,0.01,3.65,-0.92,0.00,0.92,-0.15,-0.05,1.00,6
7,0.117070,0,0.119,-0.06,-0.04,-0.99,207.53,-7.88,3.25,0.02,-0.00,0.00,3.77,0.15,0.31,0.61,0.12,0.06,0.99,6
8,0.119090,7,-0.973,-0.07,0.06,-0.21,37.95,-8.50,6.78,-0.03,0.01,-0.00,0.00,0.15,0.00,0.31,0.12,0.12,0.98,6
9,0.123040,2,-0.759,-0.06,-0.04,0.65,293.28,7.46,2.43,-0.02,0.00,0.00,3.79,0.00,0.00,1.53,-0.13,0.02,0.99,6
10,0.124540,1,-0.066,-0.02,-0.00,1.00,201.60,2.80,0.15,0.01,-0.00,0.01,3.77,0.00,0.46,0.15,-0.06,0.00,1.00,6
11,0.128380,4,-0.960,0.07,-0.21,0.17,351.30,22.42,-12.53,0.00,0.00,0.00,3.64,0.00,-0.61,0.31,-0.38,-0.20,0.91,6
12,0.130690,6,0.992,-0.01,-0.11,-0.07,6.26,-12.18,0.12,-0.02,-0.00,0.00,3.66,0.31,-0.15,0.61,0.19,0.00,0.98,6


In [143]:
# # the sensor label index range from 0 to 7 
# sensor_data = []
# for i in range(0, 8):
#     df = data_df.where(data_df['Sensor'] == i).dropna()
#     sensor_data.append(df)
# result_df = pd.concat(sensor_data).reset_index(drop=True)
# result_df

In [144]:
result_df = data_df

In [145]:
# Process original dataset, create windows (window_size samples(rows), about 1 second)
data = []
window = 1
while window*window_size < len(result_df):
    data_window = result_df[(window - 1)*window_size:window*window_size]
    data.append(data_window.values)
    window += 1
#data

In [146]:
len(data)

313

In [147]:
# delete window if multiple activities and sensors presents
cleaned_data = []
for i in data:
    previous_activity = -1
    previous_sensor = -1
    for j in i:
        current_activity = j[19]
        current_sensor = j[1]
        if (previous_activity != -1) and (current_activity != previous_activity):
            print("data contains different activities! Window droped")
            break
#         elif (previous_sensor != -1) and (current_sensor != previous_sensor):
#             print("data contains different sensors! Window droped")
#             break
        else:
            previous_activity = current_activity
            previous_sensor = current_sensor
    else:
        cleaned_data.append(i)

data contains different activities! Window droped
data contains different activities! Window droped
data contains different activities! Window droped
data contains different activities! Window droped
data contains different activities! Window droped
data contains different activities! Window droped
data contains different activities! Window droped
data contains different activities! Window droped
data contains different activities! Window droped
data contains different activities! Window droped
data contains different activities! Window droped
data contains different activities! Window droped
data contains different activities! Window droped
data contains different activities! Window droped
data contains different activities! Window droped
data contains different activities! Window droped
data contains different activities! Window droped
data contains different activities! Window droped
data contains different activities! Window droped
data contains different activities! Window droped


In [148]:
len(cleaned_data)

290

In [149]:
cleaned_data[0].shape

(60, 20)

In [150]:
# shuffle the data
seed(101)
shuffle(cleaned_data)
#cleaned_data

In [151]:
# extract label from each window
labels = []
for i in cleaned_data:
    label = i[0][19]
    labels.append(label)
labels = np.array(labels)
#labels

In [152]:
# extract features from each window
features = []
for i in cleaned_data:
    new = np.delete(i, 19, 1)
    features.append(new)
features = np.array(features)
#features

In [153]:
features[0].shape

(60, 19)

In [154]:
# combine the features and labels
k = list(zip(features, labels))
activity_data = pd.DataFrame(k)
activity_data.columns = ['features', 'labels']
activity_data

Unnamed: 0,features,labels
0,"[[71.9, 2.0, -0.309, -0.04, 0.15, 0.94, 230.42...",14.0
1,"[[105.46, 0.0, -0.442, -0.16, 0.04, -0.88, 140...",10.0
2,"[[46.923, 6.0, 0.9840000000000001, 0.03, -0.02...",12.0
3,"[[85.271, 0.0, -0.489, -0.12, 0.04, -0.86, 134...",10.0
4,"[[157.44, 6.0, 0.149, 0.02, -0.04, 0.99, 176.9...",5.0
5,"[[130.88, 7.0, -0.982, -0.05, 0.04, 0.18, 353....",1.0
6,"[[119.86, 2.0, -0.071, -0.42, -0.77, 0.48, 151...",10.0
7,"[[272.43, 4.0, 0.488, -0.22, 0.61, -0.58, 257....",6.0
8,"[[27.011999999999997, 1.0, 0.235, 0.22, -0.01,...",3.0
9,"[[313.04, 5.0, 0.7440000000000001, 0.2, 0.56, ...",6.0


In [155]:
# check the size of activity. The final output of neural net 
# has to have max_index + 1 output
max_index = activity_data['labels'].max()
label_size = int(max_index + 1)

## Load Data

In [156]:
# check if CUDA is available
train_on_gpu = torch.cuda.is_available()
if train_on_gpu:
    print("CUDA is available! Training on GPU.")
else:
    print("CUDA is not available. Training on CPU...")
torch.cuda.current_device()

CUDA is available! Training on GPU.


0

In [157]:
# define our dataset in pytorch
class DatasetSpineTrack(Dataset):
    
    def __init__(self, file, transform=None):
        #self.data = pd.read_csv(file_path)
        self.data = file
        self.transform = transform
        
    def __len__(self):
        return len(self.data)
    
    def __getitem__(self, index):
        # load image as ndarray type (Height * Width * Channels)
        # be carefull for converting dtype to np.uint8 [Unsigned integer (0 to 255)]
        # in this example, i don't use ToTensor() method of torchvision.transforms
        # so you can convert numpy ndarray shape to tensor in PyTorch (H, W, C) --> (C, H, W)
        
        features = torch.tensor(self.data["features"].iloc[index])
        features = features.view(channel, window_size, 19) 
        labels = torch.tensor(self.data["labels"].iloc[index], dtype=torch.long)
        #print(labels.type())
        
#         if self.transform is not None:
#             image = self.transform(image)
            
        return features, labels

In [158]:
# create test data set
activity_data_test = activity_data
test_dataset = DatasetSpineTrack(activity_data_test)

# load data
testloader = DataLoader(test_dataset, batch_size=batch_size, shuffle=True)

In [159]:
# define the model
model = models.resnet18(pretrained=False)
# window_size channels
model.conv1 = torch.nn.Conv2d(channel, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
model.fc = torch.nn.Linear(512, label_size, bias=True)
model.add_module("dropout", torch.nn.Dropout(p=0.5))
model = model.double()

# move tensors to GPU is CUDA is available
if train_on_gpu:
    model.cuda()
print(model)

ResNet(
  (conv1): Conv2d(1, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
    (1): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace)
      (conv2): Co

In [160]:
# load model
model.load_state_dict(torch.load('model_Spinetrack_3.pt'))

IncompatibleKeys(missing_keys=[], unexpected_keys=[])

In [161]:
# Release all the GPU memory cache that can be freed
torch.cuda.empty_cache() 

### Overall accuracy and each class accuracy

In [162]:
# track test loss
test_loss = 0.0
class_correct = list(0. for i in range(len(index_dict)))
class_total = list(0. for i in range(len(index_dict)))

criterion = nn.CrossEntropyLoss()
#criterion = nn.NLLLoss()
optimizer = optim.Adam(model.parameters(), lr=0.003)

model.eval()
torch.no_grad()
# iterate over test data
for features, labels in testloader:
    # move tensors to GPU if CUDA is available
    if train_on_gpu:
        features, labels = features.cuda(), labels.cuda()
    # forward pass: compute predicted outputs by passing inputs to the model
    output = model(features)
    # calculate the batch loss
    loss = criterion(output, labels)
    # update test loss 
    test_loss += loss.item()*features.size(0)
    # convert output probabilities to predicted class
    _, pred = torch.max(output, 1)    
    # compare predictions to true label
    correct_tensor = pred.eq(labels.data.view_as(pred))
    correct = np.squeeze(correct_tensor.numpy()) if not train_on_gpu else np.squeeze(correct_tensor.cpu().numpy())
    # calculate test accuracy for each object class
    for i in range(batch_size):
        try:
            label = labels.data[i]
            class_correct[label] += correct[i].item()
            class_total[label] += 1
        except:
            pass

# average test loss
test_loss = test_loss/len(testloader.dataset)
print('Test Loss: {:.6f}\n'.format(test_loss))

for i in range(len(index_dict)):
    if class_total[i] > 0:
        print('Test Accuracy of %5s: %2d%% (%2d/%2d)' % (
            list(index_dict.keys())[i], 100 * class_correct[i] / class_total[i],
            np.sum(class_correct[i]), np.sum(class_total[i])))
    else:
        print('Test Accuracy of %5s: N/A (no training examples)' % (list(index_dict.keys())[i]))

print('\nTest Accuracy (Overall): %2d%% (%2d/%2d)' % (
    100. * np.sum(class_correct) / np.sum(class_total),
    np.sum(class_correct), np.sum(class_total)))

Test Loss: 5.986851

Test Accuracy of Pulling_OneH: N/A (no training examples)
Test Accuracy of Overhead: 91% (34/37)
Test Accuracy of Pulling: 55% ( 5/ 9)
Test Accuracy of Sitting:  0% ( 0/17)
Test Accuracy of Lifting:  0% ( 0/13)
Test Accuracy of Crawling: 75% (12/16)
Test Accuracy of Standing:  0% ( 0/76)
Test Accuracy of Carrying: N/A (no training examples)
Test Accuracy of Walking: 14% ( 2/14)
Test Accuracy of Pushing: 20% ( 2/10)
Test Accuracy of Reaching: 38% (19/50)
Test Accuracy of Static_Stoop: N/A (no training examples)
Test Accuracy of Kneeling:  0% ( 0/16)
Test Accuracy of Lifting_OneH:  0% ( 0/ 5)
Test Accuracy of Crouching: 51% (14/27)

Test Accuracy (Overall): 30% (88/290)
