### Data Testing
After training and saving the model to local, now it is time to test it! First apply the same data reformatting and processing method on the task dataset, then feed the processed data to model to calculate accuracy.

In [46]:
import torch
from torch import nn
from torch import optim
import torch.nn.functional as F
from torchvision import datasets, transforms, models
from torch.utils.data import DataLoader, Dataset

import pandas as pd
import numpy as np
import os

from random import shuffle, seed

### Hyperparameter
window_size: The number of timesteps in one window (e.g. how many rows in one window).

channel: The number of features in one window. Similar to image channels (RGB).

batch_size: The numebr of windows in one batch.

learning_rate: How fast the model learns.

In [75]:
window_size = 420
channel = 1
batch_size = 32
learning_rate = 0.001

## Data Reformatting

In [76]:
index_dict = {'Pulling_OneH': 0, 'Overhead': 1, 'Pulling': 2, 'Sitting': 3, 
              'Lifting': 4, 'Crawling': 5, 'Standing': 6, 'Carrying': 7, 
              'Walking': 8, 'Pushing': 9, 'Reaching': 10, 'Static_Stoop': 11, 
              'Kneeling': 12, 'Lifting_OneH': 13, 'Crouching': 14}

header = ['TimeSec', 'Sensor', 'Quatx', 'Quaty', 'Quatz', 'Quat0', 'Heading',
       'Pitch', 'Roll', 'LinAccx', 'LinAccy', 'LinAccz', 'Vbat', 'Accx',
       'Accy', 'Accz', 'Gyrox', 'Gyroy', 'Gyroz']

In [77]:
directory = "./Spinetrack Data/task/"
name_dir = []

for d in sorted(os.listdir(directory)):
    name_dir.append(d)
#name_dir

In [78]:
total_data = []
for d in name_dir:
    try:
        # parsing timestep txt file
        index_text = open('./Spinetrack Data/task/{0}/timeStamps_Everything.txt'.format(d), 'r').readlines()
        act_index = []
        for line in index_text[1:]:
            row = line.rstrip().split('\t')
            if len(row[2].split('_')) > 1 and row[2].split('_')[1] == 'OneH':
                row[2] = '_'.join(row[2].split('_')[:2])
            else:
                row[2] = row[2].split('_')[0]

            if row[2] not in index_dict.keys():
                row.append(999)
            else:
                row.append(index_dict[row[2]])

            act_index.append(row)
        index_df = pd.DataFrame(act_index)
        index_df.columns = ['Frame', 'Time(s)', 'Task', 'Index']
        
        # parsing everything csv file
        recording_df = pd.read_csv('./Spinetrack Data/task/{0}/Everything.csv'.format(d), error_bad_lines=False).dropna()
        recording_df.columns = header
#         print(recording_df)
        
        # adding activity columns
        frame = []
        for index in range(1, len(index_df)):
            activity_index = index_df.iloc[index - 1]['Index']
            if activity_index != 999:
                time_lower_bound = float(index_df.iloc[index - 1]['Time(s)'])
                time_upper_bound = float(index_df.iloc[index]['Time(s)'])
                df_part = recording_df.loc[(recording_df['TimeSec'] >= (time_lower_bound + 0.5)) & (recording_df['TimeSec'] < (time_upper_bound - 0.5))]
                df_part['activity'] = [int(activity_index) for i in range(len(df_part))]
                frame.append(df_part)
        data_df = pd.concat(frame)
        total_data.append(data_df)
    except:
        pass

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
b'Skipping line 399: expected 19 fields, saw 23\nSkipping line 531: expected 19 fields, saw 25\nSkipping line 2239: expected 19 fields, saw 26\nSkipping line 2461: expected 19 fields, saw 20\nSkipping line 2829: expected 19 fields, saw 23\nSkipping line 3031: expected 19 fields, saw 22\nSkipping line 3229: expected 19 fields, saw 20\nSkipping line 3253: expected 19 fields, saw 21\nSkipping line 3535: expected 19 fields, saw 21\nSkipping line 3997: expected 19 fields, saw 21\nSkipping line 4989: expected 19 fields, saw 20\nSkipping line 5317: expected 19 fields, saw 26\nSkipping line 5693: expected 19 fields, saw 21\nSkipping line 5963: expected 19 fields, saw 24\nSkipping line 6091: expected 19 fields, saw 23\nSkipping line 6155: expected 19 fi

b'Skipping line 3: expected 16 fields, saw 19\nSkipping line 4: expected 16 fields, saw 19\nSkipping line 5: expected 16 fields, saw 19\nSkipping line 6: expected 16 fields, saw 19\nSkipping line 7: expected 16 fields, saw 19\nSkipping line 8: expected 16 fields, saw 19\nSkipping line 9: expected 16 fields, saw 19\nSkipping line 10: expected 16 fields, saw 19\nSkipping line 11: expected 16 fields, saw 19\nSkipping line 12: expected 16 fields, saw 19\nSkipping line 13: expected 16 fields, saw 19\nSkipping line 14: expected 16 fields, saw 19\nSkipping line 15: expected 16 fields, saw 19\nSkipping line 16: expected 16 fields, saw 19\nSkipping line 17: expected 16 fields, saw 19\nSkipping line 18: expected 16 fields, saw 19\nSkipping line 19: expected 16 fields, saw 19\nSkipping line 20: expected 16 fields, saw 19\nSkipping line 21: expected 16 fields, saw 19\nSkipping line 22: expected 16 fields, saw 19\nSkipping line 23: expected 16 fields, saw 19\nSkipping line 24: expected 16 fields, s

b'Skipping line 15103: expected 19 fields, saw 21\nSkipping line 15955: expected 19 fields, saw 29\nSkipping line 16021: expected 19 fields, saw 21\nSkipping line 17483: expected 19 fields, saw 21\nSkipping line 17833: expected 19 fields, saw 20\n'
b'Skipping line 17639: expected 19 fields, saw 21\nSkipping line 17667: expected 19 fields, saw 25\nSkipping line 21813: expected 19 fields, saw 21\nSkipping line 24837: expected 19 fields, saw 24\n'
b'Skipping line 11936: expected 19 fields, saw 27\nSkipping line 13584: expected 19 fields, saw 23\nSkipping line 13956: expected 19 fields, saw 20\nSkipping line 14034: expected 19 fields, saw 20\nSkipping line 14062: expected 19 fields, saw 25\nSkipping line 17772: expected 19 fields, saw 22\nSkipping line 17814: expected 19 fields, saw 25\nSkipping line 23176: expected 19 fields, saw 23\n'
b'Skipping line 5519: expected 19 fields, saw 26\nSkipping line 6793: expected 19 fields, saw 23\nSkipping line 9533: expected 19 fields, saw 21\nSkipping 

b'Skipping line 3: expected 10 fields, saw 19\nSkipping line 4: expected 10 fields, saw 19\nSkipping line 5: expected 10 fields, saw 19\nSkipping line 6: expected 10 fields, saw 19\nSkipping line 7: expected 10 fields, saw 19\nSkipping line 8: expected 10 fields, saw 19\nSkipping line 9: expected 10 fields, saw 19\nSkipping line 10: expected 10 fields, saw 19\nSkipping line 11: expected 10 fields, saw 19\nSkipping line 12: expected 10 fields, saw 19\nSkipping line 13: expected 10 fields, saw 19\nSkipping line 14: expected 10 fields, saw 19\nSkipping line 15: expected 10 fields, saw 19\nSkipping line 16: expected 10 fields, saw 19\nSkipping line 17: expected 10 fields, saw 19\nSkipping line 18: expected 10 fields, saw 19\nSkipping line 19: expected 10 fields, saw 19\nSkipping line 20: expected 10 fields, saw 19\nSkipping line 21: expected 10 fields, saw 19\nSkipping line 22: expected 10 fields, saw 19\nSkipping line 23: expected 10 fields, saw 19\nSkipping line 24: expected 10 fields, s

In [79]:
result_df = pd.concat(total_data)
# result_df

In [80]:
# single test set


# index_text = open('./Spinetrack Data/task/Alex_task/timeStamps_Everything.txt', 'r').readlines()
# act_index = []
# for line in index_text[1:]:
#     row = line.rstrip().split('\t')
#     if len(row[2].split('_')) > 1 and row[2].split('_')[1] == 'OneH':
#         row[2] = '_'.join(row[2].split('_')[:2])
#     else:
#         row[2] = row[2].split('_')[0]
    
#     if row[2] not in index_dict.keys():
#         row.append(999)
#     else:
#         row.append(index_dict[row[2]])
    
#     act_index.append(row)
# index_df = pd.DataFrame(act_index)
# index_df.columns = ['Frame', 'Time(s)', 'Task', 'Index']

# recording_df = pd.read_csv('./Spinetrack Data/task/Alex_task/Everything.csv', error_bad_lines=False).dropna()
# recording_df.columns = header
# print(recording_df)

# frame = []
# for index in range(1, len(index_df)):
#     activity_index = index_df.iloc[index - 1]['Index']
#     if activity_index != 999:
#         time_lower_bound = float(index_df.iloc[index - 1]['Time(s)'])
#         time_upper_bound = float(index_df.iloc[index]['Time(s)'])
#         df_part = recording_df.loc[(recording_df['TimeSec'] >= (time_lower_bound + 2)) & (recording_df['TimeSec'] < (time_upper_bound - 2))]
#         df_part['activity'] = activity_index
#         frame.append(df_part)

# data_df = pd.concat(frame)
# data_df

In [81]:
# result_df = data_df

In [82]:
# Process original dataset, create windows (window_size samples(rows), about 1 second)
data = []
window = 1
while window*window_size < len(result_df):
    data_window = result_df[(window - 1)*window_size:window*window_size]
    data.append(data_window.values)
    window += 1
#data

In [83]:
len(data)

268

In [84]:
# delete window if multiple activities and sensors presents
cleaned_data = []
for i in data:
    previous_activity = -1
    previous_sensor = -1
    for j in i:
        current_activity = j[19]
        current_sensor = j[1]
        if (previous_activity != -1) and (current_activity != previous_activity):
            print("data contains different activities! Window droped")
            break
#         elif (previous_sensor != -1) and (current_sensor != previous_sensor):
#             print("data contains different sensors! Window droped")
#             break
        else:
            previous_activity = current_activity
            previous_sensor = current_sensor
    else:
        cleaned_data.append(i)

data contains different activities! Window droped
data contains different activities! Window droped
data contains different activities! Window droped
data contains different activities! Window droped
data contains different activities! Window droped
data contains different activities! Window droped
data contains different activities! Window droped
data contains different activities! Window droped
data contains different activities! Window droped
data contains different activities! Window droped
data contains different activities! Window droped
data contains different activities! Window droped
data contains different activities! Window droped
data contains different activities! Window droped
data contains different activities! Window droped
data contains different activities! Window droped
data contains different activities! Window droped
data contains different activities! Window droped
data contains different activities! Window droped
data contains different activities! Window droped


In [85]:
len(cleaned_data)

152

In [86]:
cleaned_data[0].shape

(420, 20)

In [87]:
# # shuffle the data
# seed(101)
# shuffle(cleaned_data)
# #cleaned_data

In [88]:
# extract label from each window
labels = []
for i in cleaned_data:
    label = i[0][19]
    labels.append(label)
labels = np.array(labels)
#labels

In [89]:
# extract features from each window
features = []
for i in cleaned_data:
    new = np.delete(i, 19, 1)
    features.append(new)
features = np.array(features)
#features

In [90]:
features[0].shape

(420, 19)

In [91]:
# combine the features and labels
k = list(zip(features, labels))
activity_data = pd.DataFrame(k)
activity_data.columns = ['features', 'labels']
activity_data

Unnamed: 0,features,labels
0,"[[0.53401, 4.0, -0.96, 0.07, -0.21, 0.17, 351....",6.0
1,"[[5.8864, 0.0, 0.08900000000000001, -0.1, -0.0...",6.0
2,"[[11.155, 5.0, -0.78, 0.05, -0.17, -0.6, 90.46...",6.0
3,"[[27.017, 2.0, -0.731, -0.0, -0.06, 0.68, 288....",3.0
4,"[[43.382, 2.0, -0.727, -0.05, -0.05, 0.68, 287...",12.0
5,"[[48.722, 5.0, -0.815, 0.02, -0.2, -0.55, 83.6...",12.0
6,"[[63.318000000000005, 4.0, 0.9840000000000001,...",14.0
7,"[[68.59899999999999, 0.0, -0.787, 0.05, -0.61,...",14.0
8,"[[82.228, 0.0, -0.483, -0.12, 0.04, -0.87, 135...",10.0
9,"[[87.569, 4.0, -0.556, -0.27, -0.74, 0.27, 170...",10.0


In [92]:
# check the size of activity. The final output of neural net 
# has to have max_index + 1 output
max_index = activity_data['labels'].max()
label_size = int(max_index + 1)

In [93]:
set(activity_data['labels'])

{1.0, 2.0, 3.0, 5.0, 6.0, 8.0, 9.0, 10.0, 12.0, 14.0}

## Load Data

In [94]:
# check if CUDA is available
train_on_gpu = torch.cuda.is_available()
if train_on_gpu:
    print("CUDA is available! Training on GPU.")
else:
    print("CUDA is not available. Training on CPU...")
torch.cuda.current_device()

CUDA is available! Training on GPU.


0

In [95]:
# define our dataset in pytorch
class DatasetSpineTrack(Dataset):
    
    def __init__(self, file, transform=None):
        #self.data = pd.read_csv(file_path)
        self.data = file
        self.transform = transform
        
    def __len__(self):
        return len(self.data)
    
    def __getitem__(self, index):
        # load image as ndarray type (Height * Width * Channels)
        # be carefull for converting dtype to np.uint8 [Unsigned integer (0 to 255)]
        # in this example, i don't use ToTensor() method of torchvision.transforms
        # so you can convert numpy ndarray shape to tensor in PyTorch (H, W, C) --> (C, H, W)
        
        features = torch.tensor(self.data["features"].iloc[index])
        features = features.view(channel, window_size, 19) 
        labels = torch.tensor(self.data["labels"].iloc[index], dtype=torch.long)
        #print(labels.type())
        
#         if self.transform is not None:
#             image = self.transform(image)
            
        return features, labels

In [96]:
# create test data set
activity_data_test = activity_data
test_dataset = DatasetSpineTrack(activity_data_test)

# load data
testloader = DataLoader(test_dataset, batch_size=batch_size)

In [97]:
train_on_gpu = False

In [98]:
# define the model
model = models.resnet18(pretrained=False)
# window_size channels
model.conv1 = torch.nn.Conv2d(channel, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
model.fc = torch.nn.Linear(512, label_size, bias=True)
model.add_module("dropout", torch.nn.Dropout(p=0.5))
model = model.double()

# move tensors to GPU is CUDA is available
if train_on_gpu:
    model.cuda()
print(model)

ResNet(
  (conv1): Conv2d(1, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
    (1): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace)
      (conv2): Co

In [99]:
# load model
# model.load_state_dict(torch.load('model_Spinetrack_3.pt'))

# if wanty to run in cpu...
model.load_state_dict(torch.load('model_Spinetrack_3.pt', map_location=lambda storage, loc: storage))

IncompatibleKeys(missing_keys=[], unexpected_keys=[])

In [100]:
# Release all the GPU memory cache that can be freed
torch.cuda.empty_cache() 

### Overall accuracy and each class accuracy

In [101]:
# track test loss
test_loss = 0.0
class_correct = list(0. for i in range(len(index_dict)))
class_total = list(0. for i in range(len(index_dict)))

criterion = nn.CrossEntropyLoss()
#criterion = nn.NLLLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

model.eval()
torch.no_grad()
# iterate over test data
for features, labels in testloader:
    # move tensors to GPU if CUDA is available
    if train_on_gpu:
        features, labels = features.cuda(), labels.cuda()
    # forward pass: compute predicted outputs by passing inputs to the model
    output = model(features)
    # calculate the batch loss
    loss = criterion(output, labels)
    # update test loss 
    test_loss += loss.item()*features.size(0)
    # convert output probabilities to predicted class
    _, pred = torch.max(output, 1)    
    # compare predictions to true label
    correct_tensor = pred.eq(labels.data.view_as(pred))
    correct = np.squeeze(correct_tensor.numpy()) if not train_on_gpu else np.squeeze(correct_tensor.cpu().numpy())

    # calculate test accuracy for each object class
    for i in range(batch_size):
        try:
            label = labels.data[i]
            class_correct[label] += correct[i].item()
            class_total[label] += 1
        except:
            pass

# average test loss
test_loss = test_loss/len(testloader.dataset)
print('Test Loss: {:.6f}\n'.format(test_loss))

for i in range(len(index_dict)):
    if class_total[i] > 0:
        print('Test Accuracy of %5s: %2d%% (%2d/%2d)' % (
            list(index_dict.keys())[i], 100 * class_correct[i] / class_total[i],
            np.sum(class_correct[i]), np.sum(class_total[i])))
    else:
        print('Test Accuracy of %5s: N/A (no training examples)' % (list(index_dict.keys())[i]))

print('\nTest Accuracy (Overall): %2d%% (%2d/%2d)' % (
    100. * np.sum(class_correct) / np.sum(class_total),
    np.sum(class_correct), np.sum(class_total)))

Test Loss: 2.597238

Test Accuracy of Pulling_OneH: N/A (no training examples)
Test Accuracy of Overhead: 85% (23/27)
Test Accuracy of Pulling: 66% ( 2/ 3)
Test Accuracy of Sitting: 62% ( 5/ 8)
Test Accuracy of Lifting: N/A (no training examples)
Test Accuracy of Crawling: 100% (10/10)
Test Accuracy of Standing: 44% (11/25)
Test Accuracy of Carrying: N/A (no training examples)
Test Accuracy of Walking: 41% ( 5/12)
Test Accuracy of Pushing:  0% ( 0/ 3)
Test Accuracy of Reaching: 62% (28/45)
Test Accuracy of Static_Stoop: N/A (no training examples)
Test Accuracy of Kneeling: 22% ( 2/ 9)
Test Accuracy of Lifting_OneH: N/A (no training examples)
Test Accuracy of Crouching: 40% ( 4/10)

Test Accuracy (Overall): 59% (90/152)
