In [1]:
import pandas as pd

In [14]:
import tensorflow as tf
import collections
import numpy as np
from tensorflow import reshape, nest, config
from tensorflow.keras import losses, metrics, optimizers
import tensorflow_federated as tff
from matplotlib import pyplot as plt
from pathlib import Path

split = 4
NUM_ROUNDS = 5
NUM_EPOCHS = 5
BATCH_SIZE = 1
PREFETCH_BUFFER = 10




df = pd.read_csv('../Downloads/creditcard.csv')
x = np.asanyarray(df.drop('Class',1))
y = np.asanyarray(df['Class'])
from sklearn.model_selection import train_test_split
x_train,x_test,y_train,y_test = train_test_split(x,y,train_size=0.7)



from imblearn.over_sampling import SMOTE
sm = SMOTE()

X_train,y_train = sm.fit_resample(x_train,y_train)









x_train = x_train.astype(np.float32)/x_train.max()
y_train = y_train.astype(np.int32)/y_train.max()
x_test = x_test.astype(np.float32)/x_test.max()
y_test = y_test.astype(np.int32)/y_test.max()
n_input = x_train.shape[1]
total_count = len(x_train)
data_per_set = int(np.floor(total_count/split))

client_train_dataset = collections.OrderedDict()
for i in range(1, split+1):
    client_name = "client_" + str(i)
    start = data_per_set * (i-1)
    end = data_per_set * i

    print(f"Adding data from {start} to {end} for client : {client_name}")
    data = collections.OrderedDict((('label', y_train[start:end]), ('features', x_train[start:end])))
    client_train_dataset[client_name] = data


SHUFFLE_BUFFER = data_per_set


Adding data from 0 to 49841 for client : client_1
Adding data from 49841 to 99682 for client : client_2
Adding data from 99682 to 149523 for client : client_3
Adding data from 149523 to 199364 for client : client_4


In [15]:
train_dataset = tff.simulation.datasets.TestClientData(client_train_dataset)
sample_dataset = train_dataset.create_tf_dataset_for_client(train_dataset.client_ids[0])
sample_element = next(iter(sample_dataset))
sample_dataset

<TensorSliceDataset element_spec=OrderedDict([('label', TensorSpec(shape=(), dtype=tf.float64, name=None)), ('features', TensorSpec(shape=(30,), dtype=tf.float32, name=None))])>

In [16]:
def preprocess(dataset):

  def batch_format_fn(element):
    """Flatten a batch `pixels` and return the features as an `OrderedDict`."""

    return collections.OrderedDict(
        x=reshape(element['features'], [-1,1,30]),
        y=reshape(element['label'], [-1, 1]))

  return dataset.repeat(NUM_EPOCHS).shuffle(SHUFFLE_BUFFER).batch(
      BATCH_SIZE).map(batch_format_fn).prefetch(1)



In [17]:
preprocessed_sample_dataset = preprocess(sample_dataset)


def make_federated_data(client_data, client_ids):
    return [preprocess(client_data.create_tf_dataset_for_client(x)) for x in client_ids]

federated_train_data = make_federated_data(train_dataset, train_dataset.client_ids)

print('Number of client datasets: {l}'.format(l=len(federated_train_data)))
print('First dataset: {d}'.format(d=federated_train_data[0]))

Number of client datasets: 4
First dataset: <PrefetchDataset element_spec=OrderedDict([('x', TensorSpec(shape=(None, 1, 30), dtype=tf.float32, name=None)), ('y', TensorSpec(shape=(None, 1), dtype=tf.float64, name=None))])>


In [21]:
def create_keras_model():
    model = tf.keras.models.Sequential()
    model.add(tf.keras.layers.Dense(65, input_shape=(1,30), kernel_initializer='he_normal', activation='relu'))
    model.add(tf.keras.layers.Dense(1, kernel_initializer='he_normal', activation='sigmoid'))
    return model




def model_fn():
    keras_model = create_keras_model()
    return tff.learning.from_keras_model(
          keras_model,
          loss=losses.BinaryCrossentropy(),
          input_spec=preprocessed_sample_dataset.element_spec,
           
          metrics=[tf.keras.metrics.Accuracy]
      )



iterative_process = tff.learning.algorithms.build_weighted_fed_avg(
    model_fn,
    client_optimizer_fn=lambda: tf.keras.optimizers.Adam(learning_rate=0.02),
    server_optimizer_fn=lambda: tf.keras.optimizers.Adam(learning_rate=1.0))
print(str(iterative_process.initialize.type_signature))

state = iterative_process.initialize()

tff_train_acc = []
tff_val_acc = []
tff_train_loss = []
tff_val_loss = []



( -> <global_model_weights=<trainable=<float32[30,65],float32[65],float32[65,1],float32[1]>,non_trainable=<>>,distributor=<>,client_work=<>,aggregator=<value_sum_process=<>,weight_sum_process=<>>,finalizer=<int64,float32[30,65],float32[65],float32[65,1],float32[1],float32[30,65],float32[65],float32[65,1],float32[1]>>@SERVER)


In [22]:
result = iterative_process.next(state, federated_train_data)
state = result.state
metrics = result.metrics
print('round  1, metrics={}'.format(metrics))

round  1, metrics=OrderedDict([('distributor', ()), ('client_work', OrderedDict([('train', OrderedDict([('accuracy', 0.0), ('loss', 0.02522428), ('num_examples', 996820), ('num_batches', 996820)]))])), ('aggregator', OrderedDict([('mean_value', ()), ('mean_weight', ())])), ('finalizer', ())])


In [23]:
NUM_ROUNDS = 5
for round_num in range(2, NUM_ROUNDS):
  result = iterative_process.next(state, federated_train_data)
  state = result.state
  metrics = result.metrics
  print('round {:2d}, metrics={}'.format(round_num, metrics))

round  2, metrics=OrderedDict([('distributor', ()), ('client_work', OrderedDict([('train', OrderedDict([('accuracy', 0.0), ('loss', 0.025300208), ('num_examples', 996820), ('num_batches', 996820)]))])), ('aggregator', OrderedDict([('mean_value', ()), ('mean_weight', ())])), ('finalizer', ())])
round  3, metrics=OrderedDict([('distributor', ()), ('client_work', OrderedDict([('train', OrderedDict([('accuracy', 0.96241546), ('loss', 0.025300207), ('num_examples', 996820), ('num_batches', 996820)]))])), ('aggregator', OrderedDict([('mean_value', ()), ('mean_weight', ())])), ('finalizer', ())])
round  4, metrics=OrderedDict([('distributor', ()), ('client_work', OrderedDict([('train', OrderedDict([('accuracy', 0.9983598), ('loss', 0.025300207), ('num_examples', 996820), ('num_batches', 996820)]))])), ('aggregator', OrderedDict([('mean_value', ()), ('mean_weight', ())])), ('finalizer', ())])


In [24]:
logdir = "/tmp/logs/scalars/training/"
summary_writer = tf.summary.create_file_writer(logdir)
state = iterative_process.initialize()

In [25]:
with summary_writer.as_default():
  for round_num in range(1, NUM_ROUNDS):
    result = iterative_process.next(state, federated_train_data)
    state = result.state
    metrics = result.metrics
    for name, value in metrics['client_work']['train'].items():
      tf.summary.scalar(name, value, step=round_num)

In [26]:
!ls {logdir}
%tensorboard --logdir {logdir} --port=0

events.out.tfevents.1658934507.ANEESHs-MacBook-Air.local.32230.0.v2
events.out.tfevents.1658934654.ANEESHs-MacBook-Air.local.34508.0.v2
events.out.tfevents.1658940218.ANEESHs-MacBook-Air.local.35602.0.v2
events.out.tfevents.1659024913.ANEESHs-MacBook-Air.local.36751.0.v2
events.out.tfevents.1659246725.ANEESHs-MacBook-Air.local.51605.0.v2


UsageError: Line magic function `%tensorboard` not found.
