In [1]:
import pandas as pd
import numpy as np
import random
import os
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelBinarizer
from sklearn.model_selection import train_test_split
from sklearn.utils import shuffle
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import OneHotEncoder, LabelEncoder

import tensorflow_federated as tff

import nest_asyncio
nest_asyncio.apply()

import tensorflow as tf
import tensorflow.keras as keras
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D
from tensorflow.keras.layers import MaxPooling2D
from tensorflow.keras.layers import Activation
from tensorflow.keras.layers import Flatten
from tensorflow.keras.layers import Dense
from tensorflow.keras.optimizers import SGD
from tensorflow.keras import backend as K

In [19]:
def unique(list1):       
    list_set = set(list1) 
    unique_list = (list(list_set)) 
    unique_list.sort()
    return unique_list

def create_userids( df ):
    array = df.values
    y = array[:, -1]
    return unique( y )

In [21]:
def split_dataframe(df):
    RANDOM_STATE = 11235
    
    userids = create_userids(df)
    nbclasses = len(userids)    
    array = df.values
    nsamples, nfeatures = array.shape
    nfeatures = nfeatures -1 
    X = array[:,0:nfeatures]
    y = array[:,-1]
    
    enc = OneHotEncoder()
    enc.fit(y.reshape(-1,1))
    y = enc.transform(y.reshape(-1, 1)).toarray()
    X = X.reshape(-1, 128, 3)
    
    X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.25, random_state=RANDOM_STATE)
    
    mini_batch_size = int(min(X_train.shape[0]/10, 32))
        
    X_train = np.asarray(X_train).astype(np.float32)
    X_val = np.asarray(X_val).astype(np.float32)
    
    train_ds = tf.data.Dataset.from_tensor_slices((X_train, y_train))
    val_ds = tf.data.Dataset.from_tensor_slices((X_val, y_val))

    
    BATCH_SIZE = mini_batch_size
    SHUFFLE_BUFFER_SIZE = 100
    
    train_ds = train_ds.shuffle(SHUFFLE_BUFFER_SIZE).batch(BATCH_SIZE)
    val_ds = val_ds.batch(BATCH_SIZE)
    
    return train_ds, val_ds, nbclasses    

In [14]:
def load_data():
    screens = ['Focus', 'Mathisis', 'Memoria', 'Reacton', 'Speedy']
    screens_code = ['1', '2', '3', '4', '5']

    base_path = "C:/Users/SouthSystem/Federated Learning/DataBioCom/data"
    phone_accel_file_paths = []

    for directories, subdirectories, files in os.walk(base_path):
        for filename in files:
            if "accel" in filename:
                phone_accel_file_paths.append(f"{base_path}/accel/{filename}")

    data = pd.concat(map(pd.read_csv, phone_accel_file_paths))
    users = data['player_id'].unique()
    
    train_set, user_list = split_data(data, users)
    train_set = np.array([np.array(x) for x in train_set]) 
    train_set_join = train_set.reshape(train_set.shape[0], 384)
    data_join = pd.DataFrame(train_set_join)
    data_join['user'] = user_list
    
    train_ds, val_ds, n = split_dataframe(data_join)
    
    return train_ds, val_ds, n
    
def split_data(data, users):
    user_list = []
    train = []
    frame_size = 128
    step = 50

    for user in users:
        data_user = data[data['player_id']==user]  
        data_user = data_user.iloc[:,[0,1,2]]
        for w in range(0, data_user.shape[0] - frame_size, step):
            end = w + frame_size        
            frame = data_user.iloc[w:end,[0, 1, 2]]        
            train.append(frame)
            user_list.append(user)

    return train, user_list

In [15]:
def get_datasets():
    train_dataset, validation_dataset, n = load_data()
    return train_dataset, validation_dataset, n
    

In [26]:
def centralized_training_loop(train_dataset, validation_dataset, nbclasses, input_shape = (128, 3), num_filters = 128):
    input_layer = keras.layers.Input(input_shape) 

    conv1 = keras.layers.Conv1D(filters=num_filters, kernel_size=8, padding='same')(input_layer)
    conv1 = keras.layers.BatchNormalization()(conv1)
    conv1 = keras.layers.Activation(activation='relu')(conv1)

    conv2 = keras.layers.Conv1D(filters=2*num_filters, kernel_size=5, padding='same')(conv1)
    conv2 = keras.layers.BatchNormalization()(conv2)
    conv2 = keras.layers.Activation('relu')(conv2)

    conv3 = keras.layers.Conv1D(num_filters, kernel_size=3,padding='same')(conv2)
    conv3 = keras.layers.BatchNormalization()(conv3)
    conv3 = keras.layers.Activation('relu')(conv3)

    gap_layer = keras.layers.GlobalAveragePooling1D()(conv3)
    output_layer = keras.layers.Dense(nbclasses, activation='softmax')(gap_layer)
    
    model = keras.models.Model(inputs=input_layer, outputs=output_layer)
    
    learning_rate = 0.0001
    cb = keras.callbacks.ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=50,min_lr=learning_rate)
    precision = tf.keras.metrics.Precision(name='precision')
    recall = tf.keras.metrics.Recall(name='recall')
    
    model.compile(loss='categorical_crossentropy', optimizer = keras.optimizers.Adam(), metrics=['accuracy', precision, recall]) 
    
    model.summary()
    
    EPOCHS = 50
    
    hist = model.fit(train_dataset, 
                  epochs=EPOCHS,
                  verbose=True, 
                  validation_data=validation_dataset, 
                  callbacks=cb)
    
    hist_df = pd.DataFrame(hist.history) 
    
    print(hist_df)
    
    validation_metrics = model.evaluate(validation_dataset, return_dict=True)
    print("Evaluating validation metrics")
    for m in model.metrics:
        print(f"\t{m.name}: {validation_metrics[m.name]:.4f}")
    

    


In [27]:
def centralized_pipeline():
    train_dataset, validation_dataset, n = get_datasets()
    centralized_training_loop(train_dataset, validation_dataset, n)


In [28]:
centralized_pipeline()

Model: "functional_5"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_3 (InputLayer)         [(None, 128, 3)]          0         
_________________________________________________________________
conv1d_6 (Conv1D)            (None, 128, 128)          3200      
_________________________________________________________________
batch_normalization_6 (Batch (None, 128, 128)          512       
_________________________________________________________________
activation_6 (Activation)    (None, 128, 128)          0         
_________________________________________________________________
conv1d_7 (Conv1D)            (None, 128, 256)          164096    
_________________________________________________________________
batch_normalization_7 (Batch (None, 128, 256)          1024      
_________________________________________________________________
activation_7 (Activation)    (None, 128, 256)         

Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50
        loss  accuracy  precision    recall  val_loss  val_accuracy  \
0   1.619665  0.492147   0.652681  0.291382  1.444735      0.553578   
1   1.464936  0.530508   0.684380  0.352979  1.455088      0.541960   
2   1.381749  0.555805   0.710502  0.388748  1.430807      0.508103   
3   1.297779  0.580587   0.728432  0.429286  1.195640      0.607479   
4   1.227326  0.601031   0.745231  0.459104  1.105272      0.642184   
5   1.180064  0.615341   0.756245  0.485598  1.052810      0.671803   
6   1.132838  0.632760   0.766112  0.508768  1.037624      0.672650   
7   1.096628  0.642300   0.773540  0.524192  1.006999      0.678484   
8   1.064908  0.654666   0.779930  0.541511  0.954270      0.693144   
9   1.044627  0.658140   0

Evaluating validation metrics
	loss: 2.2470
	accuracy: 0.4515
	precision: 0.5631
	recall: 0.3930
