In [1]:
import os
import sys
import math
import random
import torch
import numpy as np
import syft as sy
from torchvision import datasets
from torch.utils.data import DataLoader, Dataset
from torchvision import transforms
from torch.utils.data.sampler import SubsetRandomSampler
import ujson as json
import pandas as pd
import re
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

In [2]:
def loadDataset(path, screenName):

    users = [ f.path for f in os.scandir(path) if f.is_dir() ]
    info = pd.DataFrame(columns= ['accelometer_size', 'gyroscope_size', 'timestamp'])

    accelerometer = pd.DataFrame(columns=['x', 'y', 'z', 'screen', 'user', 'magnitude','combine_angle', 'timestamp'])
    gyroscope = pd.DataFrame(columns=['x_gyroscope', 'y_gyroscope', 'z_gyroscope', 'screen_gyroscope', 'user_gyroscope', 'magnitude_gyroscope', 'combine_angle_gyroscope', 'timestamp_gyroscope'])

    # Read sensors data from json file and save them in Dataframes
    for i in range(0, len(users)):

        json_files = [pos_json for pos_json in os.listdir(users[i]) if pos_json.endswith('.json')]

        for index, js in enumerate(json_files):
            with open(os.path.join(users[i], js)) as json_file:
                json_text = json.load(json_file)
                accSize = 0
                gyrSize = 0
                js = js.replace('.json','')
                arr = js.split('_')

                for j in json_text['accelerometer']:
                    if screenName in j['screen']:
                        x = j['x']
                        y = j['y']
                        z = j['z']
                        if x == 0 and y == 0:
                            continue
                        screen = j['screen']
                        user = arr[0]
                        m = x**2 + y**2 + z**2
                        m = np.sqrt(m)
                        ca = np.sqrt(y**2 + z**2)
                        timestamp = arr[1]
                        accSize = accSize + 1
                        df = {'x': x, 'y': y, 'z' : z, 'screen' : screen, 'user': user, 'magnitude' : m, 'combine_angle': ca, 'timestamp': timestamp}
                        accelerometer = accelerometer.append(df, ignore_index=True)
                        
                for j in json_text['gyroscope']:
                    if screenName in j['screen']:
                        x = j['x']
                        y = j['y']
                        z = j['z']
                        if x == 0 and y == 0:
                            continue
                        screen = j['screen']
                        user = arr[0]
                        m = x**2 + y**2 + z**2
                        m = np.sqrt(m)
                        ca = np.sqrt(y**2 + z**2)
                        timestamp = arr[1]
                        gyrSize =  gyrSize + 1
                        df = {'x_gyroscope': x, 'y_gyroscope': y, 'z_gyroscope' : z, 'screen_gyroscope' : screen, 'user_gyroscope': user, 'magnitude_gyroscope' : m, 'combine_angle_gyroscope': ca, 'timestamp_gyroscope': timestamp}
                        gyroscope = gyroscope.append(df, ignore_index=True)
                    
                dframe = {'accelometer_size': accSize, 'gyroscope_size': gyrSize, 'timestamp': arr[1]}
                info = info.append(dframe, ignore_index=True)

    return accelerometer, gyroscope, info, users

In [3]:
path = 'C:/Users/SouthSystem/Documents/Pessoal/TCC/Impl/sensors_data'
screen = 'MathisisGame'
accelerometer, gyroscope, info, users = loadDataset(path, screen)

In [4]:
metrics = ['x','y','z']

In [30]:
    user_list = list(sorted(set(accelerometer['user'])))
    datadict = {}
    user_y = []
    finaldata = []
    for user in user_list:
        is_user = accelerometer['user']==user
        accelerometer_user = accelerometer[is_user]    
        filtered = accelerometer_user[['x','y','z','timestamp']]    
        time_list = list(sorted(set(filtered['timestamp'])))
        for idx, metric in enumerate(metrics):
            data = []
            for timestamp in time_list:
                df = filtered[filtered['timestamp']==timestamp]            
                list_df = list(df[metric].head(100))
                if len(list_df) > 49:
                    user_y.append(user)
                    data.append(list_df)
            datadict[idx] = data
        data0 = datadict[0]
        data1 = datadict[1]
        data2 = datadict[2]
        for idx, val in enumerate(data0):
            finaldata.append(val + data1[idx] + data2[idx])

    df = pd.DataFrame(finaldata)  
    data_array = np.asarray(df)
    user_ids = [user_list.index(user) for user in user_y]
    user_array = np.asarray(user_ids)
    
    

In [36]:
print(df)

          0         1         2         3         4         5         6    \
0    0.058594  0.065430  0.069336  0.079102  0.166016  0.107422  0.091797   
1    0.023438  0.054688  0.021484 -0.011719 -0.055664  0.013672  0.001953   
2    0.023438  0.054688  0.021484 -0.011719 -0.055664  0.013672  0.001953   
3   -0.337891 -0.333008 -0.340820 -0.334961 -0.333984 -0.343750 -0.338867   
4   -0.000977 -0.002930 -0.004883 -0.002930 -0.003906 -0.003906 -0.003906   
..        ...       ...       ...       ...       ...       ...       ...   
355 -0.119263 -0.132935 -0.093873 -0.103638  0.005737 -0.074341 -0.082153   
356 -0.179810 -0.201293 -0.072388 -0.037231  0.033081  0.076049  0.148316   
357  0.083862  0.078004  0.078004  0.093629 -0.066528  0.060424  0.029174   
358 -0.137829  0.000289  0.168484  0.195267  0.147812  0.160579  0.152421   
359  0.162786  0.220567  0.190296  0.221627  0.255922  0.230804  0.148014   

          7         8         9    ...       290       291       292  \
0  

In [6]:
X = None
x_data = data_array
for i in range(len(x_data)):
    row = np.asarray(x_data[i, :])
    row = row.reshape(3, 100).T
    if X is None:
        X = np.zeros((len(x_data), 100, 3))
        X[i] = row
print(X.shape)

(360, 100, 3)


In [7]:
#print(filtered['timestamp'].value_counts())

In [8]:
# Initial project configuration
result = []
project_name = 'Human Activity Recognition'
arch = "Convolution + pooling + convolution + pooling + dense + dense + dense + output"
batch_size = 64
epochs = 50
lr = 0.01
momentum = 0.9

In [9]:
torch.manual_seed(29)

<torch._C.Generator at 0x19974c878f0>

In [38]:
train_dataset = accelerometer.sample(frac=0.8, random_state=25)
test_dataset = accelerometer.drop(train_dataset.index)

In [37]:
def load(batch_size=64):
    x_train, y_train, x_test, y_test = load_data()
    x_train, x_test = x_train.reshape(
        (-1, 3, 1, 100)), x_test.reshape((-1, 3, 1, 100))
    transform = None
    train_set = Data_loader(x_train, y_train, transform)
    test_set = Data_loader(x_test, y_test, transform)
    train_loader = DataLoader(
        train_set, batch_size=batch_size, shuffle=True, drop_last=True)
    test_loader = DataLoader(test_set, batch_size=batch_size, shuffle=False)
    return train_loader, test_loader


def load_data():
    X_train, Y_train = format_data(train_dataset)
    X_test, Y_test = format_data(test_dataset)
    return X_train, Y_train, X_test, Y_test

def format_data(dataitem):
    user_list = list(sorted(set(dataitem['user'])))
    datadict = {}
    user_y = []
    finaldata = []
    for user in user_list:
        is_user = dataitem['user']==user
        accelerometer_user = dataitem[is_user]    
        filtered = accelerometer_user[['x','y','z','timestamp']]    
        time_list = list(sorted(set(filtered['timestamp'])))
        for idx, metric in enumerate(metrics):
            data = []
            for timestamp in time_list:
                df = filtered[filtered['timestamp']==timestamp]            
                list_df = list(df[metric].head(100))
                if len(list_df) > 49:
                    user_y.append(user)
                    data.append(list_df)
            datadict[idx] = data
        data0 = datadict[0]
        data1 = datadict[1]
        data2 = datadict[2]
        for idx, val in enumerate(data0):
            finaldata.append(val + data1[idx] + data2[idx])

    df = pd.DataFrame(finaldata)  
    data_array = np.asarray(df)
    user_ids = [user_list.index(user) for user in user_y]
    user_array = np.asarray(user_ids)
    #print(data_array)
    
    X = None
    x_data = data_array
    for i in range(len(x_data)):
        row = np.asarray(x_data[i, :])
        row = row.reshape(3, 100).T
        if X is None:
            X = np.zeros((len(x_data), 100, 3))
        X[i] = row
    #print(X)
    return X, user_array

class Data_loader(Dataset):
    def __init__(self, samples, labels, t):
        self.samples = samples
        self.labels = labels
        self.T = t

    def __getitem__(self, index):
        sample, target = self.samples[index], self.labels[index]
        if self.T:
            return self.T(sample), target
        else:
            return sample, target

    def __len__(self):
        return len(self.samples)

In [39]:
# Defining network architecture
class Network(nn.Module):
    def __init__(self):
        super(Network, self).__init__()
        self.conv1 = nn.Sequential(
            nn.Conv2d(in_channels=3, out_channels=9, kernel_size=(1, 3)),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=(1, 2), stride=2)
        )
        self.conv2 = nn.Sequential(
            nn.Conv2d(in_channels=9, out_channels=64, kernel_size=(1, 3)),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=(1, 2), stride=2)
        )
        self.fc1 = nn.Sequential(
            nn.Linear(in_features=64 * 23, out_features=400),
            nn.ReLU()
        )
        self.fc2 = nn.Sequential(
            nn.Linear(in_features=400, out_features=200),
            nn.ReLU()
        )
        self.fc3 = nn.Sequential(
            nn.Linear(in_features=200, out_features=2)
        )

    def forward(self, x):
        #print(len(x))
        out = self.conv1(x)
        #print(out.shape)
        out = self.conv2(out)
        #print(out.shape)
        out = out.reshape(-1, 64 * 23)
        #print(len(out))
        out = self.fc1(out)
        out = self.fc2(out)
        out = self.fc3(out)
        out = F.softmax(out, dim=1)
        return out

In [40]:
# Train and plot functions
def train(model, optimizer, train_loader, test_loader):
    n_batch = len(train_loader.dataset) // batch_size
    criterion = nn.CrossEntropyLoss()

    for e in range(epochs):
        model.train()
        correct, total_loss = 0, 0
        total = 0
        for index, (sample, target) in enumerate(train_loader):
            print(target)
            sample, target = sample.to(
                DEVICE).float(), target.to(DEVICE).long()
            sample = sample.view(-1, 3, 1, 100)
            print(target)
            output = model(sample)
            loss = criterion(output, target)
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            total_loss += loss.item()
            _, predicted = torch.max(output.data, 1)
            total += target.size(0)
            correct += (predicted == target).sum()

        acc_train = float(correct) * 100.0 / (batch_size * n_batch)
        print(f'Epoch: [{e+1}/{epochs}], loss: {total_loss}, train acc: {acc_train}%')

        # We proceed now to use the test data to evaluate intermediate results without modifying the model (no training)
        model.train(False)
        with torch.no_grad():
            correct, total = 0, 0
            for sample, target in test_loader:
                sample, target = sample.to(
                    DEVICE).float(), target.to(DEVICE).long()
                sample = sample.view(-1, 3, 1, 100)
                output = model(sample)
                _, predicted = torch.max(output.data, 1)
                total += target.size(0)
                correct += (predicted == target).sum()
        acc_test = float(correct) * 100 / total
        print(f'Epoch: [{e+1}/{epochs}], test acc: {float(correct) * 100 / total}%')
        result.append([acc_train, acc_test])
        result_np = np.array(result, dtype=float)
        np.savetxt('result.csv', result_np, fmt='%.2f', delimiter=',')

In [41]:
# Get GPU if available
def get_default_device():
    """Pick GPU if available, else CPU"""
    if torch.cuda.is_available():
        return torch.device('cuda')
    else:
        return torch.device('cpu')
DEVICE = get_default_device()
DEVICE

device(type='cpu')

In [42]:
train_loader, test_loader = load(
        batch_size=batch_size)

In [20]:
model = Network().to(DEVICE)
for index, (sample, target) in enumerate(train_loader):
    sample, target = sample.to(    
        DEVICE).float(), target.to(DEVICE).long() 
    sample = sample.view(-1, 3, 1, 100)
    #print(len(sample))
    output = model(sample)
    #print(target.shape)

In [43]:
# Load to selected device and traing model
model = Network().to(DEVICE)
optimizer = optim.SGD(params=model.parameters(), lr=lr, momentum=momentum)
train(model, optimizer, train_loader, test_loader)
result = np.array(result, dtype=float)

tensor([10,  4,  2,  7,  6,  4,  5,  9,  2,  0,  4,  2,  8,  0,  6,  9,  6,  4,
         6,  4,  2,  0,  6,  6,  0,  3,  6,  6, 11,  9,  2,  1,  5,  2, 11, 11,
         5, 10,  2,  2,  0,  5,  2,  7,  1,  2,  8,  6,  4,  5,  3,  9, 10, 10,
         6,  2,  6,  4,  6,  6,  9,  8,  0,  3], dtype=torch.int32)
tensor([10,  4,  2,  7,  6,  4,  5,  9,  2,  0,  4,  2,  8,  0,  6,  9,  6,  4,
         6,  4,  2,  0,  6,  6,  0,  3,  6,  6, 11,  9,  2,  1,  5,  2, 11, 11,
         5, 10,  2,  2,  0,  5,  2,  7,  1,  2,  8,  6,  4,  5,  3,  9, 10, 10,
         6,  2,  6,  4,  6,  6,  9,  8,  0,  3])


IndexError: Target 10 is out of bounds.