# Data cleaning and reformating

In [1]:
import torch
from torch import nn
from torch import optim
import torch.nn.functional as F
from torchvision import datasets, transforms, models
from torch.utils.data import DataLoader, Dataset\


import pandas as pd
import numpy as np
import os

from random import shuffle, seed

In [2]:
header = ['TimeSec', 'Sensor', 'Quatx', 'Quaty', 'Quatz', 'Quat0', 'Heading',
       'Pitch', 'Roll', 'LinAccx', 'LinAccy', 'LinAccz', 'Vbat', 'Accx',
       'Accy', 'Accz', 'Gyrox', 'Gyroy', 'Gyroz']

In [3]:
### Take a look at one dataset
# file_path = "./spinetrack data/Task_Drilling_Stoop.csv"
# df = pd.read_csv(file_path, error_bad_lines=False)
# df.columns = header

# df["Activity"] = [1 for i in range(len(df))]
# df

In [4]:
index_lst = []
index_dict = {}
for i in sorted(os.listdir('./Spinetrack Data/data/Alex_data/Processed_redone')):
    if i.endswith(".csv") and i != 'super_features.csv': 
        # should we combinme same activities together?
        act_lst = i.split('.')[0].split('_')
        if len(act_lst) > 1 and act_lst[1] == 'OneH':
            act = act_lst[0] + '_' + act_lst[1]
        else:
            act = act_lst[0]
        #act = i.split('.')[0]
        index_lst.append(act)

# delete multiple items
index_set = set(index_lst)
index_set.remove('Static')
index_set.remove('L')
index_set.remove('L_OneH')
index_set.add('Static_Stoop')
index_set.add('Lifting')
index_set.add('Lifting_OneH')

index_dict = {act:i for i, act in enumerate(index_set)}

In [5]:
print(index_dict)

{'Overhead': 0, 'Pushing': 1, 'Crawling': 2, 'Sitting': 3, 'Static_Stoop': 4, 'Standing': 5, 'Lifting': 6, 'Crouching': 7, 'Kneeling': 8, 'Reaching': 9, 'Pulling': 10, 'Walking': 11, 'Lifting_OneH': 12, 'Carrying': 13, 'Pulling_OneH': 14}


In [6]:
directory = "./Spinetrack Data/data/"
file = []

for d in sorted(os.listdir(directory)):
    if d != '.DS_Store':
        files = directory + d + "/Processed_redone"
        name = []
        try:
            for f in sorted(os.listdir(files)):
                if f.endswith(".csv") and f != 'super_features.csv': 
                    name.append(files + "/" + f)
            file.append(name)
            print("%s number of activity: %s" %(d, len(name)))
        except:
            pass
#file

Alex_data number of activity: 44
Alexander_data number of activity: 43
Charlotte_data number of activity: 43
Christian_data number of activity: 34
Elias_data number of activity: 44
Jesse_data number of activity: 44
Jiyoo_data number of activity: 43


In [7]:
frame = []
for person in file:
    for file_path in person:
        activity_name = ''
        activity_lst = file_path.split("/")[-1].split('.')[0].split('_')
        
        if activity_lst[0] == 'L' and activity_lst[1] == 'OneH':
            activity_name = 'Lifting_OneH'
        elif activity_lst[0] == 'L' and activity_lst[1] != 'OneH':
            activity_name = 'Lifting'
        elif activity_lst[0] == 'Static':
            activity_name = 'Static_Stoop'
        elif activity_lst[0] == 'Pulling' and activity_lst[1] == 'OneH':
            activity_name = 'Pulling_OneH'
        else:
            activity_name = activity_lst[0]
#         print(activity_name)
#         print("processing: ", file_path)
        if activity_name in index_dict.keys():
            df = pd.read_csv(file_path, error_bad_lines=False)
            df.columns = header
            df["activity"] = [index_dict[activity_name] for i in range(len(df))]
            frame.append(df)
            
            # mkdir if not exist. Save to local csv file
#             if not (os.path.exists('./Spinetrack Data/Yibin_Processed/' + category + folder_name)):
#                 os.makedirs('./Spinetrack Data/Yibin_Processed/' + category + folder_name)
#             csv_name = '/' + activity_name + '.csv'
#             folder_name = file_path.split("/")[-3] # person's name 
#             category = 'data/' # data or task
#             file_name = './Spinetrack Data/Yibin_Processed/' + category + folder_name + csv_name
#             df.to_csv(file_name) # save csv processed file to local
            
            #print(file_path)
        

In [8]:
# the correct dataset should have 986091 rows and 20 columns
data_df = pd.concat(frame)
data_df

Unnamed: 0,TimeSec,Sensor,Quatx,Quaty,Quatz,Quat0,Heading,Pitch,Roll,LinAccx,LinAccy,LinAccz,Vbat,Accx,Accy,Accz,Gyrox,Gyroy,Gyroz,activity
0,0.0362,2,0.822,0.01,0.26,-0.51,306.93,25.57,-16.37,-0.03,-0.03,-0.02,3.94,-19.84,-11.14,16.33,-0.47,-0.28,0.81,13
1,0.0377,3,0.625,-0.21,0.26,0.71,113.60,38.21,7.42,0.06,-0.08,-0.08,3.80,-37.84,-4.27,53.71,-0.51,0.03,0.77,13
2,0.0961,0,0.008,0.08,0.02,1.00,192.97,-9.34,2.16,0.11,-0.13,-0.02,3.92,2.75,-1.68,36.01,0.07,-0.10,0.98,13
3,0.0978,6,-0.978,0.03,-0.00,-0.21,38.13,1.13,-2.73,0.08,0.36,0.33,3.81,-18.92,-64.70,118.26,-0.13,0.07,1.20,13
4,0.1018,5,0.448,0.44,0.49,0.61,116.18,-5.60,82.09,0.26,-0.13,-0.21,3.87,5.19,45.62,-2.90,0.15,0.67,0.35,13
5,0.1032,1,0.105,-0.09,0.01,-0.99,206.37,-9.81,-2.63,-0.01,0.04,-0.05,3.91,-3.51,18.31,61.04,0.20,-0.02,0.84,13
6,0.1069,4,0.540,-0.59,0.39,-0.45,300.68,-6.56,-90.39,0.10,0.06,-0.26,3.79,8.85,-50.35,-32.35,0.15,-1.09,0.06,13
7,0.1086,7,0.955,0.11,0.16,0.23,42.78,15.05,16.65,0.11,0.29,-0.22,0.00,-14.34,-24.11,85.60,-0.28,-0.09,0.82,13
8,0.1283,2,0.838,-0.01,0.26,-0.48,310.55,24.85,-16.97,-0.12,0.06,-0.06,3.94,-31.13,-19.23,33.87,-0.37,-0.39,0.77,13
9,0.1301,3,0.586,-0.22,0.23,0.74,119.48,37.14,5.69,-0.01,0.01,-0.16,3.80,-56.00,1.37,29.91,-0.50,0.10,0.68,13


In [12]:
# # the sensor label index range from 0 to 7 
sensor_data = []
for i in range(0, 8):
    df = data_df.where(data_df['Sensor'] == i).dropna()
    sensor_data.append(df)
result_df = pd.concat(sensor_data).reset_index(drop=True)
result_df

Unnamed: 0,TimeSec,Sensor,Quatx,Quaty,Quatz,Quat0,Heading,Pitch,Roll,LinAccx,LinAccy,LinAccz,Vbat,Accx,Accy,Accz,Gyrox,Gyroy,Gyroz,activity
0,0.0961,0.0,0.008,0.08,0.02,1.00,192.97,-9.34,2.16,0.11,-0.13,-0.02,3.92,2.75,-1.68,36.01,0.07,-0.10,0.98,13.0
1,0.1662,0.0,-0.045,0.09,0.02,1.00,199.05,-10.01,1.58,0.04,-0.03,-0.01,3.92,10.53,-7.32,64.24,0.13,0.00,0.98,13.0
2,0.2443,0.0,-0.101,0.09,0.03,0.99,205.54,-10.56,1.96,0.04,0.05,0.02,3.92,14.04,7.93,55.54,0.15,0.09,0.99,13.0
3,0.3251,0.0,-0.162,0.09,0.04,0.98,212.51,-10.29,2.50,0.04,0.03,-0.11,3.92,13.12,9.77,85.14,0.14,0.10,0.88,13.0
4,0.4662,0.0,-0.240,0.08,0.04,0.97,221.67,-10.09,2.66,0.02,0.11,-0.04,3.92,14.65,5.49,90.33,0.21,0.15,0.94,13.0
5,0.5513,0.0,-0.327,0.08,0.06,0.94,232.17,-11.17,2.98,-0.01,0.13,-0.03,3.92,8.24,2.75,99.64,0.29,0.16,0.90,13.0
6,0.6311,0.0,-0.406,0.09,0.07,0.91,241.95,-12.66,3.01,0.07,0.15,-0.07,3.92,10.83,-10.07,82.55,0.27,0.20,0.91,13.0
7,0.7422,0.0,-0.476,0.10,0.09,0.87,250.95,-15.09,3.72,0.09,0.11,0.06,3.92,8.70,-7.63,76.45,0.32,0.21,1.00,13.0
8,0.8486,0.0,-0.531,0.11,0.11,0.83,258.43,-17.20,4.42,0.06,0.04,0.07,3.92,16.33,-5.19,71.26,0.33,0.14,1.03,13.0
9,0.9771,0.0,-0.586,0.10,0.13,0.79,266.18,-17.94,4.90,0.06,-0.02,0.09,3.92,15.26,6.87,39.67,0.30,0.14,1.05,13.0


---
# Data processing and deep learning model

### Hyperparameter
window_size: The number of timesteps in one window (e.g. how many rows in one window).

channel: The number of features in one window. Similar to image channels (RGB).

batch_size: The numebr of windows in one batch.

learning_rate: How fast the model learns.

In [13]:
window_size = 60
channel = 1
batch_size = 32
learning_rate = 0.001

In [14]:
# Process original dataset, create windows (window_size samples(rows), about 1 second)
data = []
window = 1
while window*window_size < len(result_df):
    data_window = result_df[(window - 1)*window_size:window*window_size]
    data.append(data_window.values)
    window += 1
#data

In [15]:
len(data)

16434

In [18]:
# delete window if multiple activities and sensors presents
cleaned_data = []
for i in data:
    previous_activity = -1
    previous_sensor = -1
    for j in i:
        current_activity = j[19]
        current_sensor = j[1]
        if (previous_activity != -1) and (current_activity != previous_activity):
            print("data contains different activities! Window droped")
            break
        elif (previous_sensor != -1) and (current_sensor != previous_sensor):
            print("data contains different sensors! Window droped")
            break
        else:
            previous_activity = current_activity
            previous_sensor = current_sensor
    else:
        cleaned_data.append(i)

data contains different activities! Window droped
data contains different activities! Window droped
data contains different activities! Window droped
data contains different activities! Window droped
data contains different activities! Window droped
data contains different activities! Window droped
data contains different activities! Window droped
data contains different activities! Window droped
data contains different activities! Window droped
data contains different activities! Window droped
data contains different activities! Window droped
data contains different activities! Window droped
data contains different activities! Window droped
data contains different activities! Window droped
data contains different activities! Window droped
data contains different activities! Window droped
data contains different activities! Window droped
data contains different activities! Window droped
data contains different activities! Window droped
data contains different activities! Window droped


data contains different activities! Window droped
data contains different activities! Window droped
data contains different activities! Window droped
data contains different activities! Window droped
data contains different activities! Window droped
data contains different activities! Window droped
data contains different activities! Window droped
data contains different activities! Window droped
data contains different activities! Window droped
data contains different activities! Window droped
data contains different activities! Window droped
data contains different activities! Window droped
data contains different activities! Window droped
data contains different activities! Window droped
data contains different activities! Window droped
data contains different activities! Window droped
data contains different activities! Window droped
data contains different activities! Window droped
data contains different activities! Window droped
data contains different activities! Window droped


data contains different activities! Window droped
data contains different activities! Window droped
data contains different activities! Window droped
data contains different activities! Window droped
data contains different activities! Window droped
data contains different activities! Window droped
data contains different activities! Window droped
data contains different activities! Window droped
data contains different activities! Window droped
data contains different activities! Window droped
data contains different activities! Window droped
data contains different activities! Window droped
data contains different activities! Window droped
data contains different activities! Window droped
data contains different activities! Window droped
data contains different activities! Window droped
data contains different activities! Window droped
data contains different activities! Window droped
data contains different activities! Window droped
data contains different activities! Window droped


data contains different activities! Window droped
data contains different activities! Window droped
data contains different activities! Window droped
data contains different activities! Window droped
data contains different activities! Window droped
data contains different activities! Window droped
data contains different activities! Window droped
data contains different activities! Window droped
data contains different activities! Window droped
data contains different activities! Window droped
data contains different activities! Window droped
data contains different activities! Window droped
data contains different activities! Window droped
data contains different activities! Window droped
data contains different activities! Window droped
data contains different activities! Window droped
data contains different activities! Window droped
data contains different activities! Window droped
data contains different activities! Window droped
data contains different activities! Window droped


data contains different activities! Window droped
data contains different activities! Window droped
data contains different activities! Window droped
data contains different activities! Window droped
data contains different activities! Window droped
data contains different activities! Window droped
data contains different activities! Window droped
data contains different activities! Window droped
data contains different activities! Window droped
data contains different activities! Window droped
data contains different activities! Window droped
data contains different activities! Window droped
data contains different activities! Window droped
data contains different activities! Window droped
data contains different activities! Window droped
data contains different activities! Window droped
data contains different activities! Window droped
data contains different activities! Window droped
data contains different activities! Window droped
data contains different activities! Window droped


data contains different activities! Window droped
data contains different activities! Window droped
data contains different activities! Window droped
data contains different activities! Window droped
data contains different activities! Window droped
data contains different activities! Window droped
data contains different activities! Window droped
data contains different activities! Window droped
data contains different activities! Window droped
data contains different activities! Window droped
data contains different activities! Window droped
data contains different activities! Window droped
data contains different activities! Window droped
data contains different activities! Window droped
data contains different activities! Window droped
data contains different activities! Window droped
data contains different activities! Window droped
data contains different activities! Window droped
data contains different activities! Window droped
data contains different activities! Window droped


In [19]:
len(cleaned_data)

15564

In [20]:
cleaned_data[0].shape

(60, 20)

In [21]:
# shuffle the data
seed(101)
shuffle(cleaned_data)
#cleaned_data

In [22]:
# extract label from each window
labels = []
for i in cleaned_data:
    label = i[0][19]
    labels.append(label)
labels = np.array(labels)
#labels

In [23]:
# extract features from each window
features = []
for i in cleaned_data:
    new = np.delete(i, 19, 1)
    features.append(new)
features = np.array(features)
#features

In [24]:
features[0].shape

(60, 19)

In [25]:
# combine the features and labels
k = list(zip(features, labels))
activity_data = pd.DataFrame(k)
activity_data.columns = ['features', 'labels']
activity_data

Unnamed: 0,features,labels
0,"[[1.1, 0.0, -0.374, -0.28, -0.12, 0.87, 240.58...",6.0
1,"[[4.354, 1.0, 0.833, -0.19, 0.31, 0.42, 65.27,...",1.0
2,"[[43.784, 4.0, 0.344, -0.01, 0.16, -0.93, 234....",8.0
3,"[[0.18, 2.0, -0.5660000000000001, 0.06, -0.45,...",6.0
4,"[[16.615, 4.0, 0.293, -0.21, 0.93, 0.08, 218.9...",9.0
5,"[[12.56, 2.0, 0.308, 0.13, 0.26, -0.9, 226.31,...",13.0
6,"[[0.5915, 5.0, -0.324, 0.07, -0.94, -0.03, 202...",9.0
7,"[[2.417, 7.0, 0.9740000000000001, 0.07, 0.2, 0...",6.0
8,"[[12.827, 4.0, -0.975, -0.05, -0.19, 0.09, 4.1...",14.0
9,"[[1.92, 4.0, -0.521, 0.46, -0.55, 0.46, 281.88...",6.0


In [26]:
# check the size of activity. The final output of neural net 
# has to have max_index + 1 output
max_index = activity_data['labels'].max()
label_size = int(max_index + 1)

---
## Load data

In [27]:
# check if CUDA is available
train_on_gpu = torch.cuda.is_available()
if train_on_gpu:
    print("CUDA is available! Training on GPU.")
else:
    print("CUDA is not available. Training on CPU...")
torch.cuda.current_device()

CUDA is available! Training on GPU.


0

In [28]:
# split the data to test, validation, and train
valid_size = 0.2
test_size = 0.2
activity_data.columns = ["features", "labels"]
activity_data_train = activity_data[:int(len(activity_data)*(1-valid_size-test_size))]
activity_data_valid = activity_data[int(len(activity_data)*(1-valid_size-test_size)):int(len(activity_data)*(1-test_size))]
activity_data_test = activity_data[int(len(activity_data)*(1-test_size)):]
# activity_data_train.to_csv("./activity_data_train.csv", encoding='utf-8-sig')
# activity_data_valid.to_csv("./activity_data_valid.csv", encoding='utf-8-sig')
# activity_data_train.to_csv("./activity_data_test.csv", encoding='utf-8-sig')

In [29]:
# define our dataset in pytorch
class DatasetSpineTrack(Dataset):
    
    def __init__(self, file, transform=None):
        #self.data = pd.read_csv(file_path)
        self.data = file
        self.transform = transform
        
    def __len__(self):
        return len(self.data)
    
    def __getitem__(self, index):
        # load image as ndarray type (Height * Width * Channels)
        # be carefull for converting dtype to np.uint8 [Unsigned integer (0 to 255)]
        # in this example, i don't use ToTensor() method of torchvision.transforms
        # so you can convert numpy ndarray shape to tensor in PyTorch (H, W, C) --> (C, H, W)
        
        features = torch.tensor(self.data["features"].iloc[index])
        features = features.view(channel, window_size, 19) 
        labels = torch.tensor(self.data["labels"].iloc[index], dtype=torch.long)
        #print(labels.type())
        
#         if self.transform is not None:
#             image = self.transform(image)
            
        return features, labels

In [30]:
# construct training and testing dataset in csv
# train_dataset = DatasetSpineTrack("./activity_data_train.csv")
# valid_dataset = DatasetSpineTrack("./activity_data_valid.csv")
# test_dataset = DatasetSpineTrack("./activity_data_test.csv")
train_dataset = DatasetSpineTrack(activity_data_train)
valid_dataset = DatasetSpineTrack(activity_data_valid)
test_dataset = DatasetSpineTrack(activity_data_test)
feature, label = train_dataset.__getitem__(0)
#feature
#label

In [31]:
# load data
trainloader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
validloader = DataLoader(valid_dataset, batch_size=batch_size, shuffle=True)
testloader = DataLoader(test_dataset, batch_size=batch_size, shuffle=True)

---
## Network Architechture

In [32]:
model = models.resnet18(pretrained=False)
# window_size channels
# model.conv1 = torch.nn.Conv2d(window_size, batch_size, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
model.conv1 = torch.nn.Conv2d(channel, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
model.fc = torch.nn.Linear(512, label_size, bias=True)
model.add_module("dropout", torch.nn.Dropout(p=0.5))
model = model.double()

# move tensors to GPU is CUDA is available
if train_on_gpu:
    model.cuda()
print(model)

ResNet(
  (conv1): Conv2d(1, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
    (1): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace)
      (conv2): Co

---
## Training the Network

Remember to look at how the training and validation loss decreases over time; if the validation loss ever increases it indicates possible overfitting.

In [33]:
import time
start_time = time.time()

criterion = nn.CrossEntropyLoss()
#criterion = nn.NLLLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

# number of epochs to train the model
n_epochs = 30

valid_loss_min = np.Inf # track change in validation loss

for epoch in range(1, n_epochs+1):

    # keep track of training and validation loss
    train_loss = 0.0
    valid_loss = 0.0
    
    ###################
    # train the model #
    ###################
    model.train()
    for features, labels in trainloader:
        # move tensors to GPU if CUDA is available
        if train_on_gpu:
            features, labels = features.cuda(), labels.cuda()
        # clear the gradients of all optimized variables
        optimizer.zero_grad()
        # forward pass: compute predicted outputs by passing inputs to the model
        output = model(features)
        # calculate the batch loss
        loss = criterion(output, labels)
        # backward pass: compute gradient of the loss with respect to model parameters
        loss.backward()
        # perform a single optimization step (parameter update)
        optimizer.step()
        # update training loss
        train_loss += loss.item()*features.size(0)
        
    ######################    
    # validate the model #
    ######################
    model.eval()
    for features, labels in validloader:
        # move tensors to GPU if CUDA is available
        if train_on_gpu:
            features, labels = features.cuda(), labels.cuda()
        # forward pass: compute predicted outputs by passing inputs to the model
        output = model(features)
        # calculate the batch loss
        loss = criterion(output, labels)
        # update average validation loss 
        valid_loss += loss.item()*features.size(0)
    
    # calculate average losses
    train_loss = train_loss/len(trainloader.sampler)
    valid_loss = valid_loss/len(validloader.sampler)
        
    # print training/validation statistics 
    print('Epoch: {} \tTraining Loss: {:.6f} \tValidation Loss: {:.6f}'.format(
        epoch, train_loss, valid_loss))
    
    # save model if validation loss has decreased
    if valid_loss <= valid_loss_min:
        print('Validation loss decreased ({:.6f} --> {:.6f}).  Saving model ...'.format(
        valid_loss_min,
        valid_loss))
        torch.save(model.state_dict(), 'model_Spinetrack_data_same_sensor.pt')
        valid_loss_min = valid_loss

# output running time
running_time = time.time() - start_time
sec = running_time % 60
miniute = running_time / 60
print("Training time: {} min {} sec".format(int(miniute), int(sec)))

Epoch: 1 	Training Loss: 1.453295 	Validation Loss: 1.132790
Validation loss decreased (inf --> 1.132790).  Saving model ...
Epoch: 2 	Training Loss: 1.095815 	Validation Loss: 0.952771
Validation loss decreased (1.132790 --> 0.952771).  Saving model ...
Epoch: 3 	Training Loss: 0.974487 	Validation Loss: 0.913781
Validation loss decreased (0.952771 --> 0.913781).  Saving model ...
Epoch: 4 	Training Loss: 0.880631 	Validation Loss: 1.300962
Epoch: 5 	Training Loss: 0.821040 	Validation Loss: 0.858858
Validation loss decreased (0.913781 --> 0.858858).  Saving model ...
Epoch: 6 	Training Loss: 0.777594 	Validation Loss: 0.801989
Validation loss decreased (0.858858 --> 0.801989).  Saving model ...
Epoch: 7 	Training Loss: 0.713982 	Validation Loss: 0.759894
Validation loss decreased (0.801989 --> 0.759894).  Saving model ...
Epoch: 8 	Training Loss: 0.676197 	Validation Loss: 0.733976
Validation loss decreased (0.759894 --> 0.733976).  Saving model ...
Epoch: 9 	Training Loss: 0.634906 

###  Load the Model with the Lowest Validation Loss

In [34]:
model.load_state_dict(torch.load('model_Spinetrack_data_same_sensor.pt'))

IncompatibleKeys(missing_keys=[], unexpected_keys=[])

---
## Test the Trained Network

Test your trained model on previously unseen data! A "good" result will be a result that gets more than 70% accuracy on these test data.

In [35]:
# Release all the GPU memory cache that can be freed
torch.cuda.empty_cache() 

### Overall accuracy and each class accuracy

In [36]:
# track test loss
test_loss = 0.0
class_correct = list(0. for i in range(len(index_dict)))
class_total = list(0. for i in range(len(index_dict)))

criterion = nn.CrossEntropyLoss()
#criterion = nn.NLLLoss()
optimizer = optim.Adam(model.parameters(), lr=0.003)

model.eval()
torch.no_grad()
# iterate over test data
for features, labels in testloader:
    # move tensors to GPU if CUDA is available
    if train_on_gpu:
        features, labels = features.cuda(), labels.cuda()
    # forward pass: compute predicted outputs by passing inputs to the model
    output = model(features)
    # calculate the batch loss
    loss = criterion(output, labels)
    # update test loss 
    test_loss += loss.item()*features.size(0)
    # convert output probabilities to predicted class
    _, pred = torch.max(output, 1)    
    # compare predictions to true label
    correct_tensor = pred.eq(labels.data.view_as(pred))
    correct = np.squeeze(correct_tensor.numpy()) if not train_on_gpu else np.squeeze(correct_tensor.cpu().numpy())
    # calculate test accuracy for each object class
    for i in range(batch_size):
        try:
            label = labels.data[i]
            class_correct[label] += correct[i].item()
            class_total[label] += 1
        except:
            pass

# average test loss
test_loss = test_loss/len(testloader.dataset)
print('Test Loss: {:.6f}\n'.format(test_loss))

for i in range(len(index_dict)):
    if class_total[i] > 0:
        print('Test Accuracy of %5s: %2d%% (%2d/%2d)' % (
            list(index_dict.keys())[i], 100 * class_correct[i] / class_total[i],
            np.sum(class_correct[i]), np.sum(class_total[i])))
    else:
        print('Test Accuracy of %5s: N/A (no training examples)' % (list(index_dict.keys())[i]))

print('\nTest Accuracy (Overall): %2d%% (%2d/%2d)' % (
    100. * np.sum(class_correct) / np.sum(class_total),
    np.sum(class_correct), np.sum(class_total)))

Test Loss: 0.660859

Test Accuracy of Overhead: 83% (154/185)
Test Accuracy of Pushing: 71% (137/191)
Test Accuracy of Crawling: 81% (72/88)
Test Accuracy of Sitting: 45% (38/84)
Test Accuracy of Static_Stoop: 79% (62/78)
Test Accuracy of Standing: 64% (54/84)
Test Accuracy of Lifting: 96% (1004/1044)
Test Accuracy of Crouching: 50% (45/89)
Test Accuracy of Kneeling: 43% (34/78)
Test Accuracy of Reaching: 94% (275/292)
Test Accuracy of Pulling: 47% (37/78)
Test Accuracy of Walking: 64% (52/81)
Test Accuracy of Lifting_OneH: 71% (216/304)
Test Accuracy of Carrying: 90% (224/247)
Test Accuracy of Pulling_OneH: 60% (114/190)

Test Accuracy (Overall): 80% (2518/3113)


### Overall accuracy (different calculation method)

In [118]:
# Turn off gradients for validation, saves memory and computations
torch.no_grad()
accuracy = 0
for features, labels in testloader:
    # move tensors to GPU if CUDA is available
    if train_on_gpu:
        features, labels = features.cuda(), labels.cuda()
    loss = model(features)
    test_loss += criterion(loss, labels)

#     ps = torch.exp(loss)
    top_p, top_class = loss.topk(1, dim=1)
    equals = top_class == labels.view(*top_class.shape)
    accuracy += torch.mean(equals.type(torch.FloatTensor))
print("Test Accuracy: {:.3f}".format(accuracy/len(testloader)))

RuntimeError: CUDA out of memory. Tried to allocate 20.00 MiB (GPU 0; 4.00 GiB total capacity; 2.92 GiB already allocated; 15.19 MiB free; 13.71 MiB cached)

In [None]:
# TODO: Figure out pulling_OneH