<a href="https://colab.research.google.com/github/alessandrotofani/Tesi_magistrale/blob/master/6_Federated.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

Overview: https://www.tensorflow.org/federated

Image classification tutorial: https://www.tensorflow.org/federated/tutorials/federated_learning_for_image_classification

# Installation

In [1]:
!pip install --quiet tensorflow==2.3.0
!pip install --quiet tensorflow_federated==0.17.0
!pip install --quiet --upgrade nest_asyncio
# !pip install -q tfds-nightly

In [2]:
import nest_asyncio
nest_asyncio.apply()
%load_ext tensorboard

In [3]:
import collections
import numpy as np
import tensorflow as tf
import tensorflow_federated as tff
import pandas as pd 
import os
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [4]:
import sys 
sys.path.append('/content/drive/MyDrive/Tesi_magistrale/Tesi_magistrale')
import mf

In [5]:
data = pd.read_csv('/content/drive/MyDrive/Tesi_magistrale/Dataset/IEEE/Output/data.csv')
data.drop(data.columns[data.columns.str.contains('unnamed',case = False)],axis = 1, inplace = True)

In [6]:
data = mf.feature_engineering(data)
# data = mf.feature_scaling(data)
data = pd.get_dummies(data)

In [7]:
shuffled = data.sample(frac=1)
result = np.array_split(shuffled, 5)  
# result

In [8]:
res = []
for dataset in result:
  res.append(mf.feature_scaling(dataset))
# res

In [10]:
features = mf.get_col(res[0])
len(features)

1103

Inserire le prossime due celle nella funzione preprocess e vedere se funziona. 

provare anche a sostituire il tf.cast per avere un array al posto di un tensor. 

In [11]:
new_res = []
label = []
for subset in res:
  label.append(subset['isFraud'])
  new_res.append(subset.drop(columns = ['isFraud']).to_numpy())
# new_res
# label

In [12]:
dataset = tf.data.Dataset.from_tensor_slices((new_res[0], label[0]))

In [13]:
del res, new_res

In [14]:
NUM_CLIENTS = 5
NUM_EPOCHS = 2
BATCH_SIZE = 100
SHUFFLE_BUFFER = 10
PREFETCH_BUFFER = 10

def preprocess(dataset):

  def batch_format_fn(e1, e2):
    """Flatten a batch `pixels` and return the features as an `OrderedDict`."""
    return collections.OrderedDict(
        x = tf.cast(e1, tf.float32),
        y = tf.cast(e2, tf.int32))
      #  x=tf.reshape(element, [-1, 1103]),
      #  y=tf.reshape(target, [-1, 1]))
        # element)

  return dataset.repeat(NUM_EPOCHS).shuffle(SHUFFLE_BUFFER).batch(
      BATCH_SIZE).map(batch_format_fn).prefetch(PREFETCH_BUFFER)

preprocessed_example_dataset = preprocess(dataset)

sample_batch = tf.nest.map_structure(lambda x: x.numpy(),
                                     next(iter(preprocessed_example_dataset)))

sample_batch

OrderedDict([('x',
              array([[0.20168072, 0.00437422, 0.56248564, ..., 0.        , 0.        ,
                      0.        ],
                     [0.03759842, 0.00168205, 0.44044608, ..., 0.        , 0.        ,
                      0.        ],
                     [0.20247507, 0.00096126, 0.5742125 , ..., 0.        , 0.        ,
                      0.        ],
                     ...,
                     [0.6747616 , 0.00168299, 0.79604506, ..., 0.        , 0.        ,
                      0.        ],
                     [0.29577255, 0.00490808, 0.42504025, ..., 0.        , 0.        ,
                      0.        ],
                     [0.889358  , 0.00260668, 0.18808922, ..., 0.        , 0.        ,
                      0.        ]], dtype=float32)),
             ('y',
              array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
                     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
            

In [17]:
def make_federated_data(client_data, client_ids):
  return [
      preprocess(dataset)
      for x in client_ids
  ]

sample_clients = np.arange(0,3)#emnist_train.client_ids[0:NUM_CLIENTS]

federated_train_data = make_federated_data(dataset, sample_clients)

print('Number of client datasets: {l}'.format(l=len(federated_train_data)))
print('First dataset: {d}'.format(d=federated_train_data[0]))

Number of client datasets: 3
First dataset: <PrefetchDataset shapes: OrderedDict([(x, (None, 1103)), (y, (None,))]), types: OrderedDict([(x, tf.float32), (y, tf.int32)])>


In [18]:
from tensorflow import keras
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Flatten, Dense, Dropout, Input

def create_keras_model():
  model = Sequential()
  model.add(Input(shape=(1103,))) 
  # model.add(Dense(1024, activation='relu')) 
  # model.add(Dropout(0.2)) 
  model.add(Dense(512, activation='relu')) 
  model.add(Dropout(0.2)) 
  model.add(Dense(256, activation='relu')) 
  model.add(Dropout(0.2))
  model.add(Dense(128, activation='relu')) 
  model.add(Dropout(0.2))
  model.add(Dense(24, activation='relu')) 
  model.add(Dropout(0.2))
  model.add(Dense(4, activation='relu')) 
  model.add(Dropout(0.2))
  model.add(Dense(1, activation='sigmoid'))
  return model

def model_fn():
  # We _must_ create a new model here, and _not_ capture it from an external
  # scope. TFF will call this within different graph contexts.
  keras_model = create_keras_model()
  return tff.learning.from_keras_model(
      keras_model,
      input_spec=preprocessed_example_dataset.element_spec,
      loss=tf.keras.losses.BinaryCrossentropy(),
      metrics=[tf.keras.metrics.BinaryAccuracy()])
  
iterative_process = tff.learning.build_federated_averaging_process(
    model_fn,
    client_optimizer_fn=lambda: tf.keras.optimizers.SGD(learning_rate=0.02),
    server_optimizer_fn=lambda: tf.keras.optimizers.SGD(learning_rate=1.0))

state = iterative_process.initialize()

state, metrics = iterative_process.next(state, federated_train_data)
print('round  1, metrics={}'.format(metrics))

round  1, metrics=OrderedDict([('broadcast', ()), ('aggregation', OrderedDict([('value_sum_process', ()), ('weight_sum_process', ())])), ('train', OrderedDict([('binary_accuracy', 0.96532565), ('loss', 0.16143432)]))])
