In [33]:
import pandas as pd
import numpy as np
import random
import os
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelBinarizer
from sklearn.model_selection import train_test_split
from sklearn.utils import shuffle
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import OneHotEncoder, LabelEncoder

from typing import Callable, Dict, List, Tuple

import tensorflow_federated as tff

import nest_asyncio
nest_asyncio.apply()

import tensorflow as tf
import tensorflow.keras as keras
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D
from tensorflow.keras.layers import MaxPooling2D
from tensorflow.keras.layers import Activation
from tensorflow.keras.layers import Flatten
from tensorflow.keras.layers import Dense
from tensorflow.keras.optimizers import SGD
from tensorflow.keras import backend as K
from tensorflow.keras.utils import to_categorical

In [36]:
def unique(list1):       
    list_set = set(list1) 
    unique_list = (list(list_set)) 
    unique_list.sort()
    return unique_list

def create_userids( df ):
    array = df.values
    y = array[:, -1]
    return unique( y )

In [37]:
def split_dataframe(df):
    train_X = df.iloc[:, :384].values
    le = LabelEncoder()
    df['user'] = le.fit_transform(df['user'])
    train_y = to_categorical(df['user']).astype(int)    
    return train_X, train_y

In [23]:
def get_datasets():
    number_of_clients = 10
    train_X, train_y = load_data()
    train_client_data, train_data = create_uniform_dataset(train_X, train_y, number_of_clients)
    return train_client_data, train_data

In [None]:
def create_uniform_dataset(
    X: np.ndarray, y: np.ndarray, number_of_clients: int
) -> Tuple[Dict, tff.simulation.ClientData]:
    """Function distributes the data equally such that each client holds equal amounts of each class.
    Args:
        X (np.ndarray): Input.\n
        y (np.ndarray): Output.\n
        number_of_clients (int): Number of clients.\n
    Returns:
        [Dict, tff.simulation.ClientData]: A dictionary and a tensorflow federated dataset containing the distributed dataset.
    """
    clients_data = {f"client_{i}": [[], []] for i in range(1, number_of_clients + 1)}
    for i in range(len(X)):
        clients_data[f"client_{(i%number_of_clients)+1}"][0].append(X[i])
        clients_data[f"client_{(i%number_of_clients)+1}"][1].append(y[i])

    return clients_data, create_tff_dataset(clients_data)

In [34]:
def create_tff_dataset(clients_data: Dict) -> tff.simulation.ClientData:
    """Function converts dictionary to tensorflow federated dataset.
    Args:
        clients_data (Dict): Inputs.
    Returns:
        tff.simulation.ClientData: Returns federated data distribution.
    """
    client_dataset = collections.OrderedDict()

    for client in clients_data:
        data = collections.OrderedDict(
            (
                ("label", np.array(clients_data[client][1], dtype=np.int32)),
                ("datapoints", np.array(clients_data[client][0], dtype=np.float32)),
            )
        )
        client_dataset[client] = data

    return tff.simulation.FromTensorSlicesClientData(client_dataset)

In [24]:
def load_data():
    screens = ['Focus', 'Mathisis', 'Memoria', 'Reacton', 'Speedy']
    screens_code = ['1', '2', '3', '4', '5']

    base_path = "C:/Users/SouthSystem/Federated Learning/DataBioCom/data"
    phone_accel_file_paths = []

    for directories, subdirectories, files in os.walk(base_path):
        for filename in files:
            if "accel" in filename:
                phone_accel_file_paths.append(f"{base_path}/accel/{filename}")

    data = pd.concat(map(pd.read_csv, phone_accel_file_paths))
    users = data['player_id'].unique()
    
    train_set, user_list = split_data(data, users)
    train_set = np.array([np.array(x) for x in train_set]) 
    train_set_join = train_set.reshape(train_set.shape[0], 384)
    data_join = pd.DataFrame(train_set_join)
    data_join['user'] = user_list
    
    train_X, train_y = split_dataframe(data_join)
    
    return train_X, train_y

def split_data(data, users):
    user_list = []
    train = []
    frame_size = 128
    step = 50

    for user in users:
        data_user = data[data['player_id']==user]  
        data_user = data_user.iloc[:,[0,1,2]]
        for w in range(0, data_user.shape[0] - frame_size, step):
            end = w + frame_size        
            frame = data_user.iloc[w:end,[0, 1, 2]]        
            train.append(frame)
            user_list.append(user)

    return train, user_list

In [35]:
get_datasets()

(array([[-0.14604597,  0.80252016,  0.58662611, ..., -0.01514189,
          0.77321327,  0.66184711],
        [ 0.        ,  0.77125949,  0.67503524, ..., -0.10843547,
          0.8078931 ,  0.97250009],
        [ 0.02344551,  0.77809781,  0.65500885, ...,  0.02149171,
          0.61984062,  0.78005153],
        ...,
        [-0.03955078, -0.6050415 , -0.78927612, ..., -0.05117798,
         -0.60906982, -0.78384399],
        [-0.04751587, -0.60830688, -0.78411865, ..., -0.04589844,
         -0.6043396 , -0.78945923],
        [-0.05181885, -0.60850525, -0.73361206, ..., -0.04301453,
         -0.60502625, -0.79290771]]),
 array([[1, 0, 0, ..., 0, 0, 0],
        [1, 0, 0, ..., 0, 0, 0],
        [1, 0, 0, ..., 0, 0, 0],
        ...,
        [0, 0, 0, ..., 0, 0, 1],
        [0, 0, 0, ..., 0, 0, 1],
        [0, 0, 0, ..., 0, 0, 1]]))