In [1]:
import shfl
from shfl.data_base.data_base import LabeledDatabase
from sklearn.datasets import make_classification
import numpy as np

# Create database:
n_features = 10
n_classes = 4
data, labels = make_classification(
    n_samples=500, n_features=n_features, n_informative=2, 
    n_redundant=0, n_repeated=0, n_classes=n_classes, 
    n_clusters_per_class=1, weights=None, flip_y=0.1, class_sep=0.1)
database = LabeledDatabase(data, labels)

#np.random.seed(123)     # Reproducibility 
train_data, train_labels, test_data, test_labels = database.load_data()

In [2]:
print("Shape of train and test data: " + str(train_data.shape) + str(test_data.shape))
print("Shape of train and test labels: " + str(train_labels.shape) + str(test_labels.shape))
print(train_data[0,:])

Shape of train and test data: (400, 10)(100, 10)
Shape of train and test labels: (400,)(100,)
[ 0.53396551 -3.48735976 -0.17765793 -0.28531268  0.34459187 -0.82653961
 -1.21174669 -1.90996085  2.72846777  0.12601776]


In [3]:
import matplotlib.pyplot as plt


def plot_2D_decision_boundary(model, data, labels, title=None):
    # Step size of the mesh. Smaller it is, better the quality
    h = .02 
    # Color map
    cmap = plt.cm.Set1
    
    # Plot the decision boundary. For that, we will assign a color to each
    x_min, x_max = data[:, 0].min() - 1, data[:, 0].max() + 1
    y_min, y_max = data[:, 1].min() - 1, data[:, 1].max() + 1
    xx, yy = np.meshgrid(np.arange(x_min, x_max, h), np.arange(y_min, y_max, h))
    
    # Obtain labels for each point in mesh. Use last trained model.
    Z = model.predict(np.c_[xx.ravel(), yy.ravel()])
    
    # Put the result into a color plot
    Z = Z.reshape(xx.shape)
    fig, ax = plt.subplots(figsize=(9,6))
    plt.clf()
    plt.imshow(Z, interpolation='nearest',
               extent=(xx.min(), xx.max(), yy.min(), yy.max()),
               cmap=cmap,
               alpha=0.6,
               aspect='auto', origin='lower')
    # Plot data:
    plt.scatter(data[:, 0], data[:, 1], c=labels, cmap=cmap, s=40, marker='o')
    
    plt.title(title)
    plt.xlim(x_min, x_max)
    plt.ylim(y_min, y_max)
    ax.set_xlabel('x')
    ax.set_ylabel('y')
    plt.show()

In [4]:
from shfl.model.logistic_regression_model import LogisticRegressionModel

# Train model on centralized data:
classes = [i for i in range(0,n_classes)]
model_centralized = LogisticRegressionModel(n_features=n_features, classes=classes)
model_centralized.train(train_data, train_labels)
if n_features == 2:
    plot_2D_decision_boundary(model_centralized._model, train_data, train_labels, title = "Benchmark: Logistic regression using Centralized data")
print(model_centralized.evaluate(test_data, test_labels))

(0.3252946127946128, 0.0883868410622275)


In [5]:
#np.random.seed(132)     # Reproducibility
iid_distribution = shfl.data_distribution.IidDataDistribution(database)
federated_data, test_data, test_labels = iid_distribution.get_federated_data(num_nodes=5, percent=100)

In [6]:
classes = [i for i in range(0,n_classes)]
def model_builder():
    model = LogisticRegressionModel(n_features=n_features, classes=classes, model_inputs={'warm_start':True, 'tol':0.0001, 'max_iter':100})
    return model

aggregator = shfl.federated_aggregator.FedAvgAggregator()

In [7]:
federated_government = shfl.federated_government.FederatedGovernment(model_builder, federated_data, aggregator)
federated_government.run_rounds(n=2, test_data=test_data, test_label=test_labels)

Accuracy round 0
Test performance client <shfl.private.federated_operation.FederatedDataNode object at 0x7ff717c3d470>: (0.23249158249158247, -0.02174482577940795)
Test performance client <shfl.private.federated_operation.FederatedDataNode object at 0x7ff717c3d438>: (0.31582491582491584, 0.09222470727535859)
Test performance client <shfl.private.federated_operation.FederatedDataNode object at 0x7ff717c3d160>: (0.2781986531986532, 0.03549899531145351)
Test performance client <shfl.private.federated_operation.FederatedDataNode object at 0x7ff717c3d780>: (0.2297138047138047, -0.03851715525174182)
Test performance client <shfl.private.federated_operation.FederatedDataNode object at 0x7ff717c3d898>: (0.2699915824915825, 0.02229174814235446)
Global model test performance : (0.29452861952861953, 0.05759162303664933)



Accuracy round 1
Test performance client <shfl.private.federated_operation.FederatedDataNode object at 0x7ff717c3d470>: (0.23249158249158247, -0.02174482577940795)
Test perform