In [None]:
!pip install --upgrade tensorflow-federated
!pip install nest_asyncio
import nest_asyncio
nest_asyncio.apply()

In [1]:
import sys
IN_COLAB = 'google.colab' in sys.modules

if IN_COLAB:
    sys.path.append("./drive/MyDrive/Colab Notebooks/Projektarbeit_2/")
    BASE_DIR = sys.path[-1]
    !pip install --upgrade tensorflow-federated
    !pip install nest_asyncio
    import nest_asyncio
    nest_asyncio.apply()
    # from google.colab import files
    # files.upload()

else: 
    BASE_DIR = "../"
    sys.path.append(BASE_DIR)

import json
import numpy as np 
import pandas as pd
import tensorflow as tf
# import tensorflow_federated as tff
from sklearn.model_selection import train_test_split
import collections
from tensorflow.keras import Model, callbacks
from tensorflow.keras.layers import Dense, Softmax
from sklearn.preprocessing import MinMaxScaler, StandardScaler
import random
import time
from Reader import Reader
from FLModel import FLModel
from Utils import Utils
import statistics

%load_ext tensorboard

In [3]:
def read_config():
    config_file = BASE_DIR + "config/config.json"
    config = None
    with open(config_file) as json_file:
        config = json.loads(json_file.read())
    return config

def split_input_target(input, target):
    return input, target

def create_dataset(x, y, use_tff = True):
    ds =  tf.data.Dataset.from_tensor_slices((x, y))
    
    if use_tff:
        return (
        ds.repeat(EPOCHS).shuffle(SHUFFLE_BUFFER)
        .map(split_input_target)).batch(BATCH_SIZE) 
    else:
        return ds.repeat(BATCH_SIZE).shuffle(SHUFFLE_BUFFER).batch(BATCH_SIZE,drop_remainder = True) 

def get_split(x, y):
    return train_test_split(x, y, test_size=0.2, random_state=42)

def create_unfederated_dataset(x, features):
    former_shape = x[:, 1:features].shape
    client_x = np.delete( x[:, 1:features], 2, 1 ).reshape(former_shape[0], former_shape[1]-1)
    client_x = scaler.transform(client_x)
    client_y = x[:, 3].reshape(-1, 1)
    X_train, X_test, y_train, y_test = get_split(client_x, client_y)
    X_train, X_val, y_train, y_val = get_split(X_train, y_train)
    train_data = create_dataset(X_train, y_train, use_tff=False)
    test_data = create_dataset(X_test, y_test, use_tff=False)
    val_data = create_dataset(X_val, y_val, use_tff=False)
    return train_data, test_data, val_data

In [5]:
config = read_config()
BATCH_SIZE = config["BATCH_SIZE"]
PREFETCH_BUFFER = config["PREFETCH_BUFFER"]
SHUFFLE_BUFFER = config["SHUFFLE_BUFFER"]
CLIENTS = config["CLIENTS"]
DATA_DIR = config["DATA_DIR"]
OUT_DIR = config["OUT_DIR"]
LOG_DIR = config["LOG_DIR"]
EPOCHS = config["EPOCHS"] 
NUM_CLASSES = config["NUM_CLASSES"]
file = config["file_top_apps"]
if IN_COLAB:
    tf_log_dir = "/tmp/logs/scalars/tf_training/"
    !rm -R /tmp/logs/scalars/*

else:
    tf_log_dir = LOG_DIR + "tensorboard/"

In [6]:
entropy_loss = tf.keras.losses.SparseCategoricalCrossentropy()
sparseCategoricalAcc = tf.keras.metrics.SparseCategoricalAccuracy()
sparseTopKCategoricalAccuracy = tf.keras.metrics.SparseTopKCategoricalAccuracy(k=5)
client_optimizer =  tf.keras.optimizers.SGD(learning_rate= 0.6, momentum=0.6, nesterov=True)
server_optimzer = tf.keras.optimizers.SGD(learning_rate= 1.0)

In [7]:
# from client_manager
train_data = []
test_data = []
client_ids = None
use_tff = False
scaler = StandardScaler()
utils = Utils()
reader = Reader(BASE_DIR + DATA_DIR, file)
data = reader.get_data()

if ("IID" in file): 
    data = utils.create_clients(data, CLIENTSs, strict = False)
    reader.set_features(reader.get_features() + 1)
    client_ids =  [i for i in range(0, CLIENTS)]
cols = [i for i in range(0, reader.get_features())]
del cols[0]
del cols[2]

features = reader.get_features()
scaler.fit(data[:, cols])
if ((file == "App_usage_trace.txt") or (file == "top_90_apps.csv")): 
    data  = utils.map_ids(data.copy())
    num_of_users = int((np.amax(data[:, 0]) + 1))
    client_ids = list(range(0, num_of_users))
    random.shuffle(client_ids)
    client_ids = client_ids[:CLIENTS]
client_ids = sorted(client_ids)
for id in client_ids:
    indicees = data[:, 0] == id
    former_shape = data[indicees, 1:features].shape
    #delete index 0 and 3, containing the label and the user id
    client_x = np.delete( data[indicees, 1:features], 2, 1 ).reshape(former_shape[0], former_shape[1]-1)
    #scale 
    client_x = scaler.transform(client_x)
    client_y = data[indicees, 3].reshape(-1, 1)
    if len(client_x) > 1:
        X_train, X_test, y_train, y_test = train_test_split(client_x, client_y, test_size=0.2, random_state=42)    
        ds_train = create_dataset(X_train, y_train, use_tff)
        ds_test = create_dataset(X_test, y_test, use_tff)
        print("Client {}: Created  dataset".format(id))

        train_data.append(ds_train)
        test_data.append(ds_test)
    else:
        print("Could not generate datasets for client {} as there is just one entry in X_train".format(id))
        client_ids.remove(id)

Client 43: Created  dataset
Client 285: Created  dataset
Client 389: Created  dataset
Client 448: Created  dataset
Client 495: Created  dataset
Client 499: Created  dataset
Client 544: Created  dataset
Client 602: Created  dataset
Client 719: Created  dataset
Client 808: Created  dataset


In [8]:
# Check format for TFF: needs to be in shape(None, dim)
#like eg:
# (TensorSpec(shape=(None, 3), dtype=tf.float64, name=None),
#  TensorSpec(shape=(None, 1), dtype=tf.float64, name=None)
print(train_data[0].element_spec)
print(test_data[0].element_spec)

(TensorSpec(shape=(64, 3), dtype=tf.float64, name=None), TensorSpec(shape=(64, 1), dtype=tf.float64, name=None))
(TensorSpec(shape=(64, 3), dtype=tf.float64, name=None), TensorSpec(shape=(64, 1), dtype=tf.float64, name=None))


In [None]:
def create_keras_model(input_dim=3):
    return tf.keras.models.Sequential([
      tf.keras.layers.InputLayer(input_shape=(input_dim,)),
      tf.keras.layers.Dense(500, activation=tf.nn.relu),
      tf.keras.layers.Dense(NUM_CLASSES, activation='softmax'),
    ])

# Each time the next method is called, the server model is broadcast to each client using a broadcast function. 
# For each client, one epoch of local training is performed via the tf.keras.optimizers.Optimizer.apply_gradients method of the client optimizer. 
# Each client computes the difference between the client model after training and the initial broadcast model. 
# These model deltas are then aggregated at the server using some aggregation function. 
# The aggregate model delta is applied at the server by using the tf.keras.optimizers.Optimizer.apply_gradients method of the server optimizer.
def model_fn():
  # We _must_ create a new model here, and _not_ capture it from an external
  # scope. TFF will call this within different graph contexts.
    keras_model = create_keras_model(3)
    return tff.learning.from_keras_model(
      keras_model,
      input_spec = train_data[0].element_spec,
      loss = tf.keras.losses.SparseCategoricalCrossentropy(),
      metrics = [tf.keras.metrics.SparseCategoricalAccuracy(), 
               tf.keras.metrics.SparseTopKCategoricalAccuracy(k=5)]) 
  
def run_federated():
    with tf.device('/gpu:0'):
        iterative_process = tff.learning.build_federated_averaging_process(
            model_fn,
            client_optimizer_fn = lambda: tf.keras.optimizers.SGD(learning_rate= 0.6, momentum=0.6, nesterov=True), 
            server_optimizer_fn = lambda: tf.keras.optimizers.SGD(learning_rate= 1.0)
            )

    state = iterative_process.initialize()
    with summary_writer.as_default():
        for round_num in range(EPOCHS):
            state, metrics = iterative_process.next(state, train_data)

            # Note: training metrics reported by the iterative training process 
            #generally reflect the performance of the model at the beginning of the training round
            for name, value in metrics['train'].items():
                tf.summary.scalar(name, value, step=round_num)
                print(round_num, name, value)

            evaluation = tff.learning.build_federated_evaluation(model_fn)  
            test_metrics = evaluation(state.model, test_data)
            print(f"{round_num} Test: {test_metrics}")


In [None]:
summary_writer = tf.summary.create_file_writer(tf_log_dir + "federated/")
run_federated()
%tensorboard --logdir {tf_log_dir + "federated/"}

## Unfederated Trainings

In [10]:
def run_unfederated(ds_train, ds_test, ds_val, input_dim):

    early_stopping_callback = tf.keras.callbacks.EarlyStopping(monitor='val_loss', 
                                min_delta=0.01, 
                                patience=2, 
                                verbose=0, 
                                mode='auto', 
                                baseline=None, 
                                restore_best_weights=True)

    model = FLModel(NUM_CLASSES)
    model.compile(
                optimizer= client_optimizer, 
                loss= "sparse_categorical_crossentropy", 
                metrics= [
                          sparseCategoricalAcc, 
                          sparseTopKCategoricalAccuracy
                          ]
                )
    for epoch in range(EPOCHS):
        with tf.device('/gpu:0'):
            history = model.fit(
                              ds_train,
                              steps_per_epoch=64, 
                              validation_data = ds_val, 
                              verbose=0,
                              callbacks = [early_stopping_callback])

        loss = round(history.history["loss"][0], 4)
        acc = round(history.history["sparse_categorical_accuracy"][0], 4)
        k_acc =  round(history.history["sparse_top_k_categorical_accuracy"][0], 4)
        val_loss =  round(history.history['val_loss'][0], 4)
        val_acc = round(history.history['val_sparse_categorical_accuracy'][0], 4)
        val_k_acc =  round(history.history['val_sparse_top_k_categorical_accuracy'][0], 4)

        with tf.device('/gpu:0'):
            test_loss, test_acc, test_k_acc = model.evaluate(ds_test, batch_size=BATCH_SIZE, verbose=0)
        
        test_loss = round(test_loss, 4)
        test_acc = round(test_acc, 4)
        test_k_acc = round(test_k_acc, 4)

        with summary_writer.as_default():
            tf.summary.scalar("Loss/train", loss, step=epoch)
            tf.summary.scalar("Acc/train", acc, step=epoch)
            tf.summary.scalar("K_acc/train", k_acc, step=epoch)

            tf.summary.scalar("Loss/validation", val_loss, step=epoch)
            tf.summary.scalar("Acc/validation", val_acc, step=epoch)
            tf.summary.scalar("K_acc/validation", val_k_acc, step=epoch)

            tf.summary.scalar("Loss/test", test_loss, step=epoch)
            tf.summary.scalar("Acc/test", test_acc, step=epoch)
            tf.summary.scalar("K_acc/test", test_k_acc, step=epoch)

        print(
          f'Epoch: {epoch},\n'
          f'Train Loss:\t{loss}, '
          f'Train Accuracy:\t{acc}, '
          f'Train Top 5 Accuracy:\t{k_acc}\n'
          f'Validation Loss:\t{val_loss}, '
          f'Validation Accuracy:\t{val_acc}, '
          f'Validation Top 5 Accuracy:\t{val_k_acc}\n'
          f'Test Loss:\t{test_loss}, '
          f'Test Accuracy:\t{test_acc} '
          f'Test Top 5 Accuracy:\t{test_k_acc}'
          f'\n--------------------------------------------------------------------------------------------------------------------------\n'
        )

In [11]:
# !rm -R "/tmp/logs/scalars/tf_training/unfederated/"
summary_writer = tf.summary.create_file_writer(tf_log_dir + "unfederated/")

#get same client ids, as with tff
mask = np.isin(data[:, 0], client_ids)
x = data[mask].copy() 

unfederated_train, unfederated_test, unfederated_val = create_unfederated_dataset(x, reader.get_features())
run_unfederated(unfederated_train, unfederated_test, unfederated_val,  (reader.get_features()-2))



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.





To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



Epoch 0,
Train Loss: 5.8816, Train Accuracy: 0.1948, Train Top 5 Accuracy: 0.53
Validation Loss: 4.8007, Validation Accuracy: 0.2307, Validation Top 5 Accuracy: 0.5406
Test Loss: 4.873, Test Accuracy: 0.2088 Test Top 5 Accuracy: 0.5281
--------------------------------------------------------------------------------------------------------------------------

Epoch 1,
Train Loss: 4.5695, Train Accuracy: 0.1982, Train Top 5 Accuracy: 0.5269
Validation Loss: 4.2695, Validation Accuracy: 0.2307, Validation Top 5 Accuracy: 0.5406
Test Loss: 4.3255, Test Accuracy: 0.2088 Test Top 5 Accuracy: 0.5281
--------------------------------------------------------------------------------------------------------------------------

Epoch 2,
Train Loss: 4.2017, Train Accuracy: 0.197, Train Top 5 Accuracy: 0.5247
Validation Loss: 4.0276, Validation Accuracy: 0.2307, Validation Top 5 Accuracy: 0.5406
Test Loss: 4.0766, Test Accuracy: 0.2088 Test Top 5 Accuracy: 0.5281
---------------------------------------

In [12]:
%tensorboard --logdir {tf_log_dir + "unfederated/"}

Reusing TensorBoard on port 6006 (pid 9940), started 0:06:13 ago. (Use '!kill 9940' to kill it.)