In [1]:
#!/home/xyang18/miniconda3/envs/pytorch/bin/ python
# -*- coding: utf-8 -*-
# Python version: 3.6

import os
import sys
import copy
import time
import pickle
import numpy as np
import pandas as pd
from tqdm import tqdm

import torch
from torch.utils.data import TensorDataset
from torch.autograd import Variable
from torch.nn import functional as F
from torch.utils.data import WeightedRandomSampler, TensorDataset
from torch.nn import Linear, ReLU, CrossEntropyLoss, Sequential, Conv2d, MaxPool2d, Module, Softmax, BatchNorm2d, BatchNorm1d, Dropout, Flatten, BCELoss
from torch.optim import Adam, SGD
from torch import nn
# from torchsummary import summary
# from torch.utils.tensorboard import SummaryWriter

from tensorflow.keras.utils import to_categorical
from sklearn.preprocessing import StandardScaler, MinMaxScaler
import sklearn.metrics as metrics
from sklearn.model_selection import train_test_split


In [2]:
gpu_id=3

In [3]:
if gpu_id>=0:
    os.environ["CUDA_VISIBLE_DEVICES"] = str(gpu_id)
    cuda_id = "cuda:" + str(0)  # cuda:2

device = torch.device(cuda_id if torch.cuda.is_available() else "cpu")
print("Device:", device)
if (torch.cuda.is_available()):
    torch.cuda.set_device(cuda_id)
    print("Current GPU ID:", torch.cuda.current_device())

Device: cuda:0
Current GPU ID: 0


In [4]:
def prepare_data_PAMAP2(root_path='../../PAMAP2_Dataset/Protocol/subject10'):
    X=[]
    user_labels=[]
    act_labels=[]

    window_len = 512
    stride_len = 20
    # columns for IMU data
    imu_locs = [14,15,16,31,32,33,48,49,50
            ] 
    
    act_list = [1, 2, 3, 4, 5, 6, 7, 12, 13, 16, 17, 24]

    scaler = MinMaxScaler()
    # scaler = StandardScaler()

    for uid in np.arange(1,10):
        path = root_path + str(uid) + '.dat'
        df = pd.read_table(path, sep=' ', header=None)
        act_imu_filter = df.iloc[:, imu_locs] 

        for act_id in range(len(act_list)):
            act_filter =  act_imu_filter[df.iloc[:, 1] == act_list[act_id]]
            act_data = act_filter.to_numpy()
                
            act_data = np.transpose(act_data)
            # sliding window segmentation
            start_idx = 0
            while start_idx + window_len < act_data.shape[1]:
                window_data = act_data[:, start_idx:start_idx + window_len]
                downsamp_data = window_data[:, ::3] # downsample from 100hz to 33.3hz
                downsamp_data = np.nan_to_num(downsamp_data) # remove nan

                X.append(downsamp_data)
                user_labels.append(uid)
                act_labels.append(act_id)
                start_idx = start_idx + stride_len

    X_n = np.array(X).astype('float32')

    normalized_X = np.zeros_like(X_n) # allocate numpy array for normalized data
    for ch_id in range(X_n.shape[1]): # loop the 27 sensor channels
        ch_data = X_n[:, ch_id, :] # the data of channel id
        scaler = MinMaxScaler() # maybe different scalers?
        ch_data = scaler.fit_transform(ch_data) # scale the data in this channel to [0,1]
        normalized_X[:, ch_id, :] = ch_data # assign normalized data to normalized_X
    normalized_X = np.transpose(normalized_X, (0, 2, 1)) # overwrote X here, changed dimensions into: num_samples, sequence_length, feature_length
        
    # convert list to numpy array
    # normalized_X= normalized_X.reshape(normalized_X.shape[0], 1, normalized_X.shape[1], normalized_X.shape[2]) 
    act_labels = np.array(act_labels).astype('float32')
    act_labels = act_labels.reshape(act_labels.shape[0],1)
    act_labels = to_categorical(act_labels, num_classes=len(act_list))

    return normalized_X, act_labels

In [5]:
normalized_X, act_labels = prepare_data_PAMAP2()

In [6]:
print(normalized_X.shape)

(94895, 171, 27)


In [7]:
X_train, X_test, y_train, y_test = train_test_split(normalized_X, act_labels, test_size=0.2, random_state=42)

In [8]:
# from sliding_window import sliding_window
# import pickle as cp

In [9]:
# # Number of Sensor Channels used in the OPPORTUNITY dataset.
# NB_SENSOR_CHANNELS = 113

# # Number of classes in which data is classified (or to be classified).
# NUM_CLASSES = 5

# # Length of the sliding window used to segmenting the time-series-data.
# SLIDING_WINDOW_LENGTH = 24

# # Steps of the sliding window used in segmenting the data.
# SLIDING_WINDOW_STEP = 12

# act_labels_txt = ['std', 'wlk', 'sit', 'lie', 'null']

# # Variable for Batch Size.
# # BATCH_SIZE = 100

# # Number filters used in convolutional layers.
# # NUM_FILTERS = 64

# # Size of filters used in convolutional layers.
# # FILTER_SIZE = 5

# # Units in the long short-term recurrent layers.
# # NUM_UNITS_LSTM = 128

In [10]:
# def load_dataset(filename):

#     f = open(filename, 'rb')
#     data = cp.load(f)
#     f.close()

#     X_train, y_train = data[0]
#     X_test, y_test = data[1]

#     print(" ..from file {}".format(filename))
#     print(" ..reading instances: train {0}, test {1}".format(X_train.shape, X_test.shape))

#     X_train = X_train.astype(np.float32)
#     X_test = X_test.astype(np.float32)

#     # The targets are casted to int8 for GPU compatibility.
#     y_train = y_train.astype(np.uint8)
#     y_test = y_test.astype(np.uint8)

#     return X_train, y_train, X_test, y_test

# print("Loading Data...")
# X_train, y_train, X_test, y_test = load_dataset('../../data/oppChallenge_gestures.data')

# assert NB_SENSOR_CHANNELS == X_train.shape[1]
# def opp_sliding_window(data_x, data_y, ws, ss):
#     data_x = sliding_window(data_x,(ws,data_x.shape[1]),(ss,1))
#     data_y = np.asarray([[i[-1]] for i in sliding_window(data_y,ws,ss)])
#     return data_x.astype(np.float32), data_y.reshape(len(data_y)).astype(np.uint8)

# # Sensor data is segmented using a sliding window mechanism
# X_test, y_test = opp_sliding_window(X_test, y_test, SLIDING_WINDOW_LENGTH, SLIDING_WINDOW_STEP)
# print(" ..after sliding window (testing): inputs {0}, targets {1}".format(X_test.shape, y_test.shape))

# # Data is reshaped since the input of the network is a 4 dimension tensor
# X_test = X_test.reshape((-1, SLIDING_WINDOW_LENGTH, NB_SENSOR_CHANNELS))
# # X_test = X_test.reshape((-1, 1, SLIDING_WINDOW_LENGTH, NB_SENSOR_CHANNELS))

# # X_test = np.transpose(X_test, (0, 2, 1))

In [11]:
# X_train, y_train = opp_sliding_window(X_train, y_train, SLIDING_WINDOW_LENGTH, SLIDING_WINDOW_STEP)
# print(" ..after sliding window (training): inputs {0}, targets {1}".format(X_train.shape, y_train.shape))
# X_train = X_train.reshape((-1,SLIDING_WINDOW_LENGTH, NB_SENSOR_CHANNELS))
# # X_train = X_train.reshape((-1,1,SLIDING_WINDOW_LENGTH, NB_SENSOR_CHANNELS,1))

# # X_train = np.transpose(X_train, (0, 2, 1))
# X_train.shape


In [12]:
# y_train = to_categorical(y_train, num_classes=NUM_CLASSES)
# y_test = to_categorical(y_test, num_classes=NUM_CLASSES)

In [13]:
# normalized_X = np.zeros_like(X)
# for ch_id in range(X.shape[1]):
#     ch_data = X[:, ch_id, :]
#     scaler = MinMaxScaler()
#     ch_data = scaler.fit_transform(ch_data)
#     normalized_X[:, ch_id, :] = ch_data
# X = normalized_X
# # X = np.transpose(normalized_X, (0, 2, 1))
# print(X.shape)
# # (94895, 27, 171)

In [14]:
# X_train = X_train.reshape(X_train.shape[0], 1, X_train.shape[1], X_train.shape[2]) # convert list to numpy array
# X_test = X_test.reshape(X_test.shape[0], 1, X_test.shape[1], X_test.shape[2]) # convert list to numpy array
# #print(X.shape)
# # #(94895, 1, 27, 171)

# # act_labels = np.array(act_labels).astype('float32')
# # act_labels = act_labels.reshape(act_labels.shape[0],1)
# # act_labels = to_categorical(act_labels, num_classes=len(act_list))
# y_train = to_categorical(y_train, num_classes=NUM_CLASSES)
# y_test = to_categorical(y_test, num_classes=NUM_CLASSES)

In [15]:
class HARModel(nn.Module):
    
    def __init__(self, n_sensor_channels=113, len_seq=24, n_hidden=128, n_layers=1, n_filters=64, 
                 n_classes=5, filter_size=(1,5), drop_prob=0.5):
        super(HARModel, self).__init__()
        self.drop_prob = drop_prob
        self.n_layers = n_layers
        self.n_hidden = n_hidden
        self.n_filters = n_filters
        self.n_classes = n_classes
        self.filter_size = filter_size
        self.n_sensor_channels = n_sensor_channels
        self.len_seq = len_seq

             
        self.conv1 = nn.Conv2d(1, n_filters, filter_size)
        self.conv2 = nn.Conv2d(n_filters, n_filters, filter_size)
        self.conv3 = nn.Conv2d(n_filters, n_filters, filter_size)
        self.conv4 = nn.Conv2d(n_filters, n_filters, filter_size)
        
        # self.lstm1  = nn.LSTM(64, n_hidden, n_layers)
        # self.lstm2  = nn.LSTM(n_hidden, n_hidden, n_layers)
        self.multihead_attn = nn.MultiheadAttention(embed_dim=n_sensor_channels*n_filters, num_heads=1) # 7232=113*64
        # self.fc0 = nn.Linear(57856, 128)
        self.fc = nn.Linear(n_sensor_channels*n_filters*(len_seq-4*(filter_size[1]-1)), n_classes) #57856 = 8*113*64

        self.dropout = nn.Dropout(drop_prob)
        # self.softmax = nn.Softmax(dim=1)
    
    def forward(self, x):
        # x = x.view(-1, NB_SENSOR_CHANNELS, SLIDING_WINDOW_LENGTH,1) # for direct channel_gate
        # batch_size = x.shape[0]

        # x = x.view(-1, NB_SENSOR_CHANNELS, SLIDING_WINDOW_LENGTH) # for deepconvlstm conv layers
        # x = torch.permute(x,(1,0,2))
        # print(x.shape)
        # x, attn_output_weights = self.multihead_attn0(x,x,x)

        # print(x.shape)
        # x = torch.permute(x,(2,1,0))
        # print(x.shape)
        # x = x.view(-1, 1, NB_SENSOR_CHANNELS, SLIDING_WINDOW_LENGTH) # draft
        x = torch.permute(x, (0,2,1))
        x = torch.unsqueeze(x, dim=1)
        # print(x.shape)
        x = F.relu(self.conv1(x))
        x = F.relu(self.conv2(x))
        x = F.relu(self.conv3(x))
        x = F.relu(self.conv4(x)) # [64, 113, 8]
        # x = x.view(-1, NB_SENSOR_CHANNELS, 8, 1)
        # x = x.view(x.shape[0], x.shape[1], x.shape[2], 1)
        # x = x.view(x.shape[0], -1, 8)
        
        # print(x.shape)
        x = torch.permute(x, (3,0,1,2))
        x = x.view(x.shape[0], x.shape[1],-1)
        
        # print(x.shape)
        # x = x.view(8, x.shape[0], -1) # bak
        
    
        x, attn_output_weights = self.multihead_attn(x,x,x)
        x = self.dropout(x)
        x = F.relu(x)
#         x, attn_output_weights = self.multihead_attn1(x,x,x)
#         # x = self.dropout(x)
#         x = F.relu(x)    
        
        x = torch.permute(x, (1,0,2))
        
        # x, hidden = self.lstm1(x, hidden)
        # # x = self.dropout(x)
        # x, hidden = self.lstm2(x, hidden)
        # x = self.dropout(x)
        
        # x = x.contiguous().view(-1, self.n_hidden)

        x = torch.reshape(x, (x.shape[0],-1))
        # x = F.relu(self.fc0(x))
        # x = self.dropout(x)
        x = self.fc(x)
        
        # out = x.view(batch_size, -1, self.n_classes)[:,-1,:]
        return x
    
#     def init_hidden(self, batch_size):
#         ''' Initializes hidden state '''
#         # Create two new tensors with sizes n_layers x batch_size x n_hidden,
#         # initialized to zero, for hidden state and cell state of LSTM
#         weight = next(self.parameters()).data
        
#         if (train_on_gpu):
#             hidden = (weight.new(self.n_layers, batch_size, self.n_hidden).zero_().cuda(),
#                   weight.new(self.n_layers, batch_size, self.n_hidden).zero_().cuda())
#         else:
#             hidden = (weight.new(self.n_layers, batch_size, self.n_hidden).zero_(),
#                       weight.new(self.n_layers, batch_size, self.n_hidden).zero_())
        
#         return hidden
    
net = HARModel(n_sensor_channels=X_train.shape[2], len_seq=X_train.shape[1], n_classes=12)

In [16]:
# def init_weights(m):
#     if type(m) == nn.LSTM:
#         for name, param in m.named_parameters():
#             if 'weight_ih' in name:
#                 torch.nn.init.orthogonal_(param.data)
#             elif 'weight_hh' in name:
#                 torch.nn.init.orthogonal_(param.data)
#             elif 'bias' in name:
#                 param.data.fill_(0)
#     elif type(m) == nn.Conv1d or type(m) == nn.Linear:
#         torch.nn.init.orthogonal_(m.weight)
#         m.bias.data.fill_(0)
# net.apply(init_weights)    

In [17]:
# def iterate_minibatches(inputs, targets, batchsize, shuffle=True):
#     assert len(inputs) == len(targets)
#     if shuffle:
#         indices = np.arange(len(inputs))
#         np.random.shuffle(indices)
#     for start_idx in range(0, len(inputs) - batchsize + 1, batchsize):
#         if shuffle:
#             excerpt = indices[start_idx:start_idx + batchsize]
#         else:
#             excerpt = slice(start_idx, start_idx + batchsize)
#         yield inputs[excerpt], targets[excerpt]

In [19]:
def train(net, epochs=10, batch_size=64, lr=0.01):
    # opt = torch.optim.Adam(net.parameters(), lr=lr)
    opt = torch.optim.SGD(net.parameters(), lr=lr, momentum=0.9, weight_decay=1e-4)
    # opt = torch.optim.RMSprop(net.parameters(), lr=lr, momentum=0.1)
    # opt = torch.optim.SGD(net.parameters(), lr=lr)
    criterion = nn.CrossEntropyLoss()
    # criterion = nn.BCEWithLogitsLoss()
    
    train_dataset = TensorDataset(torch.from_numpy(X_train), torch.from_numpy(y_train))
    train_loader = torch.utils.data.DataLoader(train_dataset,
        batch_size=batch_size, shuffle=True, drop_last = True)  

    test_dataset = TensorDataset(torch.from_numpy(X_test), torch.from_numpy(y_test))
    test_loader = torch.utils.data.DataLoader(test_dataset,
        batch_size=batch_size, shuffle=False, drop_last = True) 
    
    if(train_on_gpu):
        net.cuda()
     
    for e in range(epochs):
        
        # initialize hidden state
        # h = net.init_hidden(batch_size)         
        train_losses = []    
        net.train()
        # for batch in iterate_minibatches(X_train, y_train, batch_size):
        for batch in train_loader:
            x, y = batch

            # inputs, targets = torch.from_numpy(x), torch.from_numpy(y)
            inputs, targets = x.to(device), y.to(device)  

            # if(train_on_gpu):
            #         inputs, targets = inputs.cuda(), targets.cuda()

            # Creating new variables for the hidden state, otherwise
            # we'd backprop through the entire training history
            # h = tuple([each.data for each in h])
            
            # h = h[0].reshape((batch_size, -1)) # for GRU
            
            # zero accumulated gradients
            opt.zero_grad()   
            
            # get the output from the model
            output = net(inputs)
            # loss = criterion(output, torch.from_numpy(to_categorical(y, num_classes=NUM_CLASSES)).to(device))
            loss = criterion(output, torch.argmax(targets,dim=1))
            # print(output.shape)
            # print(targets.shape)
            # loss = criterion(output, targets)
            train_losses.append(loss.item())
            loss.backward()
            opt.step()
            
        # val_h = net.init_hidden(batch_size)
        val_losses = []
        accuracy=0
        f1score=0
        
        correct = 0
        total = 0
        total_true = []
        total_pred = []
        
        net.eval()
        with torch.no_grad():
            for batch in test_loader:
                x, y = batch
                inputs, targets = x.to(device), y.to(device)  
 
                # print(images.shape)            
            # for batch in iterate_minibatches(X_test, y_test, batch_size):
            #     x, y = batch     

                # inputs, targets = torch.from_numpy(x), torch.from_numpy(y)

                # val_h = tuple([each.data for each in val_h])

                if(train_on_gpu):
                    inputs, targets = inputs.cuda(), targets.cuda()
                    
                output = net(inputs)

                # val_loss = criterion(output, torch.from_numpy(to_categorical(y, num_classes=NUM_CLASSES)).to(device))
                val_loss = criterion(output, torch.argmax(targets,dim=1))
                # val_loss = criterion(output, targets)
                val_losses.append(val_loss.item())
                
                predicted = torch.argmax(output.data, dim=1)
                total += targets.size(0)
                correct += (predicted == torch.argmax(targets, dim=1)).sum().item()

                total_pred = total_pred + predicted.cpu().numpy().tolist()
                total_true = total_true + (torch.argmax(targets, dim=1).cpu().numpy().tolist())


#                 top_p, top_class = output.topk(1, dim=1)
                
#                 # equals = top_class == torch.argmax(targets, dim=1)
#                 equals = top_class == targets.view(*top_class.shape).long()
#                 accuracy += torch.mean(equals.type(torch.FloatTensor))
#                 # f1score += metrics.f1_score(top_class.cpu(), torch.argmax(targets, dim=1).cpu(), average='micro')
#                 f1score += metrics.f1_score(top_class.cpu(), targets.view(*top_class.shape).long().cpu(), average='micro')
        net.train() # reset to train mode after iterationg through validation data
    
        # print(f'Test Accuracy: {100.0 * correct / total} %')
        # print(" | ".join(act_labels_txt))
        # conf_mat = confusion_matrix(y_true = total_true, y_pred = total_pred)
        # conf_mat = conf_mat.astype('float') / conf_mat.sum(axis=1)[:, np.newaxis]
        # print(np.array(conf_mat).round(3) * 100)  
        f1_score = metrics.f1_score(y_true = total_true, y_pred = total_pred, average='weighted')
        # print('F1 score:', f1_score)
        # print('')      

        print("Epoch: {}/{}...".format(e+1, epochs),
        "Train Loss: {:.4f}...".format(np.mean(train_losses)),
        "Val Loss: {:.4f}...".format(np.mean(val_losses)),
        "Val Acc: {:.4f}...".format(correct / total),
        "F1-Score: {:.4f}...".format(f1_score))
        
        PATH = 'pamap2_ConvAttn_ep'+str(e)+'.pt'
        torch.save(net.state_dict(), PATH)
        
## check if GPU is available
train_on_gpu = torch.cuda.is_available()
if(train_on_gpu):
    print('Training on GPU!')
else: 
    print('No GPU available, training on CPU; consider making n_epochs very small.')

train(net)

Training on GPU!
Epoch: 1/10... Train Loss: 0.7282... Val Loss: 0.2738... Val Acc: 0.9202... F1-Score: 0.9198...
Epoch: 2/10... Train Loss: 0.1473... Val Loss: 0.1114... Val Acc: 0.9664... F1-Score: 0.9664...
Epoch: 3/10... Train Loss: 0.0661... Val Loss: 0.0433... Val Acc: 0.9876... F1-Score: 0.9877...
Epoch: 4/10... Train Loss: 0.0425... Val Loss: 0.0295... Val Acc: 0.9920... F1-Score: 0.9920...
Epoch: 5/10... Train Loss: 0.0306... Val Loss: 0.0216... Val Acc: 0.9940... F1-Score: 0.9940...
Epoch: 6/10... Train Loss: 0.0240... Val Loss: 0.0175... Val Acc: 0.9947... F1-Score: 0.9947...
Epoch: 7/10... Train Loss: 0.0218... Val Loss: 0.0211... Val Acc: 0.9940... F1-Score: 0.9940...
Epoch: 8/10... Train Loss: 0.0154... Val Loss: 0.0115... Val Acc: 0.9964... F1-Score: 0.9964...
Epoch: 9/10... Train Loss: 0.0133... Val Loss: 0.0103... Val Acc: 0.9974... F1-Score: 0.9974...
Epoch: 10/10... Train Loss: 0.0116... Val Loss: 0.0252... Val Acc: 0.9929... F1-Score: 0.9929...


In [None]:
# PATH = 'opportunity_ConvAttn.pt'
# torch.save(net.state_dict(), PATH)