In [1]:
import os
import numpy as np
import pandas as pd
import scipy.stats as stats
import matplotlib.pyplot as plt
from tqdm import tqdm
import copy
import time
from sklearn import preprocessing
from sklearn.model_selection import train_test_split, cross_val_score, KFold
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.metrics import confusion_matrix, classification_report
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from sklearn.model_selection import train_test_split

import torch
from torch.autograd import Variable
import torch.nn as nn
from torch.nn import Linear, ReLU, CrossEntropyLoss, Sequential, Conv2d, MaxPool2d, Module, Softmax, BatchNorm2d, Dropout
from torch.optim import Adam, SGD
from torch.utils.data import DataLoader, Dataset
import torch.nn.functional as F
from torchvision import datasets, transforms

In [2]:
base_path = "./data"
phone_accel_file_paths = []
phone_gyro_file_paths = []

for directories, subdirectories, files in os.walk(base_path):
    for filename in files:
        if "accel" in filename:
            phone_accel_file_paths.append(f"{base_path}/accel/{filename}")
        elif "gyro" in filename:
            phone_gyro_file_paths.append(f"{base_path}/gyro/{filename}")

In [4]:
screen_dict = {"1":"Focus",
                "2":"Mathisis",
                "3":"Memoria",
                "4":"Reacton",
                "5":"Speedy"
              }
screens = ["Focus", "Mathisis", "Memoria", "Reacton", "Speedy"]
screens_code = ["1", "2", "3", "4", "5"]

In [404]:
def clean_data(dataframe):
    cleaned_df = dataframe.drop(["player_id", "timestamp"], axis = 1).copy()
    
    return cleaned_df

def scale_data(data):
    """ Normalizes the data using StandardScaler() function """
    
    data.columns = ['acc_X', 'acc_Y', 'acc_Z', 'gyr_X', 'gyr_Y', 'gyr_Z', 'Screen']
    
    le = LabelEncoder()
    data['s'] = le.fit_transform(data['Screen'])
    
    X = data[['acc_X', 'acc_Y', 'acc_Z', 'gyr_X', 'gyr_Y', 'gyr_Z']]
    y = data['Screen']
    
    scaler = StandardScaler()
    X = scaler.fit_transform(X)
    
    scaled_df = pd.DataFrame(data = X, columns = ['acc_X', 'acc_Y', 'acc_Z', 'gyr_X', 'gyr_Y', 'gyr_Z'])
    scaled_df['Screen'] = y.values
    
    return scaled_df



def activity_dictionary(dataframe):    
    """ Decodes the activity labels and stores them in the dictionary """

    activity_labels = dataframe["screen"]
    le = LabelEncoder()
    activity_indices = le.fit_transform(activity_labels)
    mapped_labels = dict(zip(le.transform(le.classes_), le.classes_))
    
    return mapped_labels



def preprocess_data(dataframe):    
    """ Preprocesses the data using balance(), clean(), and scale() functions """

    cleaned_df = clean_data(dataframe)
    
    return scale_data(cleaned_df)


def plot_learningCurve(history, epochs):
    """ Plots training & validation accuracy values """

    epoch_range = range(1, epochs+1)
    plt.plot(epoch_range, history.history['accuracy'])
    plt.plot(epoch_range, history.history["val_accuracy"])
    plt.title('Model accuracy')
    plt.ylabel('Accuracy')
    plt.xlabel('Epoch')
    plt.legend(['Train', 'Val'], loc='lower right')
    plt.show()
    
    """ Plots training & validation loss values """
    
    plt.plot(epoch_range, history.history['loss'])
    plt.plot(epoch_range, history.history['val_loss'])
    plt.title('Model loss')
    plt.ylabel('Loss')
    plt.xlabel('Epoch')
    plt.legend(['Train', 'Val'], loc='upper right')
    plt.show()

In [399]:
""" Divides data into 2D frames """

frequency = 10 # Based on Hertz
time_period = 10 # Based on Second
frame_size = frequency * time_period
step_size = frame_size # In order not to have an overlap

def get_frames(df, frame_size, step_size):
    n_features = 6
    frames = []
    labels = []
    for i in range(0, len(df) - frame_size, step_size):
        acc_x = df['acc_X'].values[i: i + frame_size]
        acc_y = df['acc_Y'].values[i: i + frame_size]
        acc_z = df['acc_Z'].values[i: i + frame_size]
        gyr_x = df['gyr_X'].values[i: i + frame_size]
        gyr_y = df['gyr_Y'].values[i: i + frame_size]
        gyr_z = df['gyr_Z'].values[i: i + frame_size]
        
        label = stats.mode(df['Screen'][i: i + frame_size])[0][0]
        frames.append([acc_x, acc_y, acc_z, gyr_x, gyr_y, gyr_z])
        labels.append(label)

    frames = np.asarray(frames).reshape(-1, frame_size, n_features)
    labels = np.asarray(labels)
    
    print(frames.shape)
    print(labels.shape)

    return frames, labels

In [7]:
def get_model():
  
    s0 = nn.Conv2d(1, 32, (1, 3))
    s1 = nn.ReLU()
    s2 = nn.MaxPool2d((1, 2), 2)
    s3 = nn.Conv2d(32, 64, (1, 3))
    s4 = nn.ReLU()
    s5 = nn.MaxPool2d((1, 2), 2)         
    s6 = nn.Flatten()
    s7 = nn.Linear(64 * 23 , 64)
    s8 = nn.ReLU()
    s9 = torch.nn.Dropout(p=0.5)
    s10 = nn.Linear(64, 16)
    s11 = nn.Softmax()

    model = nn.Sequential(s0,s1,s2,s3,s4,s5,s6,s7,s8,s9,s10,s11)
    
    return model

In [7]:
phone_accel_accuracy = {}
phone_accel_precision = {}
phone_accel_recall = {}
phone_accel_f1 = {}

phone_accel_matrix = {}
phone_accel_activity_accuracy = {}
phone_accel_classification_reports={}

data = pd.concat(map(pd.read_csv, phone_accel_file_paths))

In [None]:
data_acc_gyr = pd.read_csv('data/data_all.csv')
data_acc_gyr['player_id'].value_counts()
data = data_acc_gyr

In [135]:
def train(epoch, train_x, train_y):
    model.train()
    tr_loss = 0

    optimizer.zero_grad()
    
    output_train = model(train_x)
    loss_train = criterion(output_train, train_y)
    train_losses.append(loss_train)

    loss_train.backward()
    optimizer.step()
    tr_loss = loss_train.item()

    print('Epoch : ',epoch+1, '\t', 'loss :', tr_loss)

In [None]:
subjectIDs = data["player_id"].unique()
subjectIDs

In [10]:
for idx, val in enumerate(screens):
    data.loc[data.screen.str.contains(screens[idx]), 'screen'] = screens_code[idx]

In [406]:
all_labels = data[['screen']]

In [407]:
train_data, test_data, label_train, label_test = train_test_split(
data, all_labels, test_size=0.2, shuffle=True)

In [408]:
processed_train_data = preprocess_data(train_data)
processed_test_data = preprocess_data(test_data)

In [None]:
X_train, y_train = get_frames(processed_train_data, frame_size, step_size)
X_test, y_test = get_frames(processed_test_data, frame_size, step_size)

In [None]:
X_train = X_train.reshape(X_train.shape[0], 1, 6, X_train.shape[1])
X_test = X_test.reshape(X_test.shape[0], 1, 6, X_test.shape[1])

In [429]:
train_x = X_train.astype(np.float32)
train_x = torch.from_numpy(train_x)
    
train_y = y_train.astype(np.float64)
train_y = torch.from_numpy(train_y)
train_y = train_y.type(torch.LongTensor)

In [249]:
test_x = X_test.astype(np.float32)
test_x = torch.from_numpy(test_x)
    
test_y = y_test.astype(np.float64)
test_y = torch.from_numpy(test_y)
test_y = test_y.type(torch.LongTensor)

In [None]:
model = get_model()

optimizer = Adam(model.parameters(), lr=0.01)

criterion = CrossEntropyLoss()

n_epochs = 10

train_losses = []

for epoch in range(n_epochs):
    train(epoch, train_x, train_y)    

In [None]:
output = model(train_x)
    
softmax = torch.exp(output).cpu()
prob = list(softmax.detach().numpy())
predictions = np.argmax(prob, axis=1)

# accuracy on training set
accuracy_score(train_y, predictions)

In [None]:
fi_los = [fl.item() for fl in train_losses ]
plt.plot(range(n_epochs), fi_los)
plt.ylabel('Loss')
plt.xlabel('Epoch');

In [421]:
user_groups = data_iid(train_x, 10)

In [32]:
def data_iid(dataset, num_users):
    num_items = int(len(dataset)/num_users)
    dict_users, all_idxs = {}, [i for i in range(len(dataset))]
    for i in range(num_users):
        dict_users[i] = set(np.random.choice(all_idxs, num_items,
                                             replace=False))
        all_idxs = list(set(all_idxs) - dict_users[i])
    return dict_users

In [61]:
def average_weights(w):
    """
    Returns the average of the weights.
    """
    w_avg = copy.deepcopy(w[0])
    for key in w_avg.keys():
        for i in range(1, len(w)):
            w_avg[key] += w[i][key]
        w_avg[key] = torch.div(w_avg[key], len(w))
    return w_avg

In [442]:
class LocalUpdate(object):
    def __init__(self, dataset, label, idxs):        
        self.trainloader, self.validloader, self.testloader = self.train_val_test(
            dataset, label, list(idxs))
        self.device = 'cpu'
        # Default criterion set to NLL loss function
        self.criterion = nn.CrossEntropyLoss().to(self.device)

    def train_val_test(self, dataset, label, idxs):
        """
        Returns train, validation and test dataloaders for a given dataset
        and user indexes.
        """
        idxs = np.asarray(idxs)  
        
        # split indexes for train, validation, and test (80, 10, 10)
        idxs_train = idxs[:int(0.8*len(idxs))]
        idxs_val = idxs[int(0.8*len(idxs)):int(0.9*len(idxs))]
        idxs_test = idxs[int(0.9*len(idxs)):]    
       
        trainloader = DataLoader(DatasetSplit(dataset, label, idxs_train),
                                 batch_size=64, shuffle=True)
        validloader = DataLoader(DatasetSplit(dataset, label, idxs_val),
                                 batch_size=int(len(idxs_val)/10), shuffle=False)
        testloader = DataLoader(DatasetSplit(dataset, label, idxs_test),
                                batch_size=int(len(idxs_test)/10), shuffle=False)
        return trainloader, validloader, testloader
    
    def update_weights(self, model, global_round):
        # Set mode to train model
        model.train()
        epoch_loss = []

        optimizer = torch.optim.SGD(model.parameters(), lr=0.001, momentum=0.9)
        
        #optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
        
        for iter in range(10):
            batch_loss = []
            for batch_idx, (images, labels) in enumerate(self.trainloader):
                images, labels = images.to(self.device), labels.to(self.device)

                model.zero_grad()
                log_probs = model(images)
                loss = self.criterion(log_probs, labels)
                loss.backward()
                optimizer.step()

                if (batch_idx % 300 == 0):
                    print('| Global Round : {} | Local Epoch : {} | [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
                        global_round, iter, batch_idx * len(images),
                        len(self.trainloader.dataset),
                        100. * batch_idx / len(self.trainloader), loss.item()))                
                batch_loss.append(loss.item())
            epoch_loss.append(sum(batch_loss)/len(batch_loss))

        return model.state_dict(), sum(epoch_loss) / len(epoch_loss)
    
    def inference(self, model):
        """ Returns the inference accuracy and loss.
        """

        model.eval()
        loss, total, correct = 0.0, 0.0, 0.0

        for batch_idx, (images, labels) in enumerate(self.testloader):
            images, labels = images.to(self.device), labels.to(self.device)

            # Inference
            outputs = model(images)
            batch_loss = self.criterion(outputs, labels)
            loss += batch_loss.item()

            # Prediction
            _, pred_labels = torch.max(outputs, 1)
            pred_labels = pred_labels.view(-1)
            correct += torch.sum(torch.eq(pred_labels, labels)).item()
            total += len(labels)

        accuracy = correct/total
        return accuracy, loss

In [49]:
class DatasetSplit(Dataset):
    """An abstract Dataset class wrapped around Pytorch Dataset class.
    """

    def __init__(self, dataset, label, idxs):
        self.dataset = dataset
        self.label = label
        self.idxs = [int(i) for i in idxs]

    def __len__(self):
        return len(self.idxs)

    def __getitem__(self, item):
        label = self.label[self.idxs[item]]
        image = self.dataset[self.idxs[item]]
        return image, label

In [443]:
class CNNModel(nn.Module):
    def __init__(self):
        super(CNNModel, self).__init__()
        self.conv1 = nn.Conv2d(1, 64, kernel_size=(1,2))
        self.conv2 = nn.Conv2d(64, 128, kernel_size=(1,2))
        self.conv2_drop = nn.Dropout2d(0.5)
        self.fc1 = nn.Linear(128 * 588, 64)
        self.fc2 = nn.Linear(64, 16)
        
    def forward(self, x):
        x = F.relu(self.conv1(x))
        x = F.max_pool2d(x, 1)
        x = F.relu(self.conv2(x))
        x = F.max_pool2d(x, 1)
        x = x.view(-1, x.shape[1]*x.shape[2]*x.shape[3])
        x = F.relu(self.fc1(x))
        #x = F.dropout(x, training=self.training)
        x = self.fc2(x)
        return F.log_softmax(x, dim=1)

In [433]:
train_dataset_x = train_x
train_dataset_y = train_y

In [None]:
device = 'cpu'

global_model = CNNModel()

global_model.to(device)
global_model.train()
print(global_model)

# copy weights
global_weights = global_model.state_dict()

# Training
train_loss, train_accuracy = [], []
val_acc_list, net_list = [], []
cv_loss, cv_acc = [], []
print_every = 2
val_loss_pre, counter = 0, 0

In [None]:
for epoch in tqdm(range(10)):
    local_weights, local_losses = [], []
    print(f'\n | Global Training Round : {epoch+1} |\n')

    global_model.train()
    m = max(int(0.1 * 10), 1)
    idxs_users = np.random.choice(range(10), m, replace=False)

    for idx in idxs_users:
        local_model = LocalUpdate(dataset=train_dataset_x, label = train_dataset_y,
                                  idxs=user_groups[idx])
        w, loss = local_model.update_weights(
            model=copy.deepcopy(global_model), global_round=epoch)
        local_weights.append(copy.deepcopy(w))
        local_losses.append(copy.deepcopy(loss))

    # update global weights
    global_weights = average_weights(local_weights)

    # update global weights
    global_model.load_state_dict(global_weights)

    loss_avg = sum(local_losses) / len(local_losses)
    train_loss.append(loss_avg)

    # Calculate avg training accuracy over all users at every epoch
    list_acc, list_loss = [], []
    global_model.eval()
    for c in range(10):
        local_model = LocalUpdate(dataset=train_dataset_x, label = train_dataset_y,
                                  idxs=user_groups[idx])
        acc, loss = local_model.inference(model=global_model)
        list_acc.append(acc)
        list_loss.append(loss)
    train_accuracy.append(sum(list_acc)/len(list_acc))

    # print global training loss after every 'i' rounds
    if (epoch+1) % print_every == 0:
        print(f' \nAvg Training Stats after {epoch+1} global rounds:')
        print(f'Training Loss : {np.mean(np.array(train_loss))}')
        print('Train Accuracy: {:.2f}% \n'.format(100*train_accuracy[-1]))

In [271]:
def test_inference(model, test_dataset):
    """ Returns the test accuracy and loss.
    """

    model.eval()
    loss, total, correct = 0.0, 0.0, 0.0

    device = 'cpu'
    criterion = nn.NLLLoss().to(device)
    testloader = DataLoader(test_dataset, batch_size=115,
                            shuffle=False)

    for batch_idx, images in enumerate(testloader):        
        images = images.to(device)
        
        #print(images.shape)
        
        labels = test_y_shape[batch_idx] 
        print(labels.shape)
        labels = labels.to(device)
       
        # Inference
        outputs = model(images)
        batch_loss = criterion(outputs, labels)
        loss += batch_loss.item()

        # Prediction
        _, pred_labels = torch.max(outputs, 1)
        pred_labels = pred_labels.view(-1)
        correct += torch.sum(torch.eq(pred_labels, labels)).item()
        total += len(labels)

    accuracy = correct/total
    return accuracy, loss

In [None]:
# Test inference after completion of training
test_acc, test_loss = test_inference(global_model, test_x)

print(f' \n Results after {10} global rounds of training:')
print("|---- Avg Train Accuracy: {:.2f}%".format(100*train_accuracy[-1]))
print("|---- Test Accuracy: {:.2f}%".format(100*test_acc))

In [None]:
test_y_shape = test_y.reshape(-1, 115)
test_y_shape[0].shape

In [376]:
# PLOTTING (optional)
import matplotlib
import matplotlib.pyplot as plt

# Plot Loss curve
plt.figure()
plt.title('Training Loss vs Communication rounds')
plt.plot(range(len(train_loss)), train_loss, color='r')
plt.ylabel('Training loss')
plt.xlabel('Communication Rounds')
plt.savefig("./data/fed_loss.png")

# # Plot Average Accuracy vs Communication rounds
plt.figure()
plt.title('Average Accuracy vs Communication rounds')
plt.plot(range(len(train_accuracy)), train_accuracy, color='k')
plt.ylabel('Average Accuracy')
plt.xlabel('Communication Rounds')
plt.savefig("./data/fed_acc.png")