## Federated data integration testing

In [1]:
import sys
import os
sys.path.append(".../S-ADL")


##### Example of static data set for testing

In [None]:
from SADL.static_data.static_datasets_uci import global_load
from sklearn.model_selection import train_test_split
X,y = global_load('default_of_credit_card_clients')   #name dataset in static datasets uci repo
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2,stratify=y, random_state=42)

#### Tests with different models of the flex-anomaly platform

In [4]:
from SADL.federated_data.algorithms import flexanomalies

kwargs = {
    "algorithm_": "isolationForest",
    "contamination":0.1,
    "label_parser": None,
    "n_estimators": 100, 
    "n_rounds": 10,
    "n_clients":5,
    
}

modelIforest = flexanomalies.FlexAnomalyDetection(**kwargs)
print(modelIforest.get_params())

Federated Params:{'n_rounds': 10, 'n_clients': 5} 
 Model Params:{'algorithm_': 'isolationForest', 'contamination': 0.1, 'label_parser': None, 'n_estimators': 100}
Inspecting model's attributes:
contamination: 0.1
n_estimators: 100
max_samples: 1000
max_features: 1.0
bootstrap: False
n_jobs: 1
behaviour: old
random_state: None
verbose: 0
model_path: 
model: IsolationForest(contamination=0.1, max_samples=1000, n_jobs=1)
algorithm_: isolationForest
label_parser: None
{'label_parser': None, 'algorithm_': 'isolationForest', 'contamination': 0.1, 'n_estimators': 100, 'max_samples': 1000, 'max_features': 1.0, 'bootstrap': False, 'n_jobs': 1, 'behaviour': 'old', 'random_state': None, 'verbose': 0, 'model_path': '', 'model': IsolationForest(contamination=0.1, max_samples=1000, n_jobs=1)}


In [5]:
modelIforest.fit(X_train,y_train)


Running round: 0

Training model at client.
Training model at client.
Training model at client.
Training model at client.
Training model at client.

Running round: 1

Training model at client.
Training model at client.
Training model at client.
Training model at client.
Training model at client.

Running round: 2

Training model at client.
Training model at client.
Training model at client.
Training model at client.
Training model at client.

Running round: 3

Training model at client.
Training model at client.
Training model at client.
Training model at client.
Training model at client.

Running round: 4

Training model at client.
Training model at client.
Training model at client.
Training model at client.
Training model at client.

Running round: 5

Training model at client.
Training model at client.
Training model at client.
Training model at client.
Training model at client.

Running round: 6

Training model at client.
Training model at client.
Training model at client.
Training 

<SADL.federated_data.algorithms.flexanomalies.FlexAnomalyDetection at 0x79be7b4c54b0>

In [6]:
scores_pred = modelIforest.decision_function(X_train)
print(scores_pred)
modelIforest.predict(X_train)

[0.41818181 0.37036392 0.37577926 ... 0.36658941 0.38596575 0.37542932]
Inspecting model's attributes:
contamination: 0.1
n_estimators: 100
max_samples: 1000
max_features: 1.0
bootstrap: False
n_jobs: 1
behaviour: old
random_state: None
verbose: 0
model_path: 
model: IsolationForest(contamination=0.1, max_samples=1000, n_jobs=1)
algorithm_: isolationForest
label_parser: None


<flexanomalies.utils.iforest.IsolationForest at 0x79be7b4c5b10>

In [7]:
print(modelIforest.model.d_scores_ == scores_pred)
modelIforest.model.labels_


[ True  True  True ...  True  True  True]


array([1., 0., 0., ..., 0., 0., 0.])

In [8]:
modelIforest.evaluate(X_test,y_test)

Acc: 73.483% 

Precision: 0.280 

F1score: 0.174 

Recall: 0.127 

AUC_ROC: 0.517 



In [11]:
from SADL.federated_data.algorithms import flexanomalies

kwargs = {
    "algorithm_": "autoencoder",
    "contamination":0.1,
    "label_parser": None,
     "epochs": 100,
     "input_dim": X_train.shape[1],
     "batch_size": 8,
     "neurons": [16,8, 16],
     "hidden_act": ['relu', 'relu', 'relu'],
     "n_clients":2,
     "n_rounds":10,
     }

modelAE = flexanomalies.FlexAnomalyDetection(**kwargs)
print(modelAE.get_params())


Federated Params:{'n_clients': 2, 'n_rounds': 10} 
 Model Params:{'algorithm_': 'autoencoder', 'contamination': 0.1, 'label_parser': None, 'epochs': 100, 'input_dim': 23, 'batch_size': 8, 'neurons': [16, 8, 16], 'hidden_act': ['relu', 'relu', 'relu']}
Inspecting model's attributes:
contamination: 0.1
input_dim: 23
neurons: [16, 8, 16]
model_path: 
w_size: None
n_pred: 1
callbacks: [<keras.src.callbacks.early_stopping.EarlyStopping object at 0x79be72b75db0>]
hidden_act: ['relu', 'relu', 'relu']
output_act: linear
loss: mse
validation_size: 0.2
batch_size: 8
epochs: 100
optimizer: adam
preprocess: True
scaler: StandardScaler()
model: <Sequential name=sequential_3, built=True>
algorithm_: autoencoder
label_parser: None
{'label_parser': None, 'algorithm_': 'autoencoder', 'contamination': 0.1, 'input_dim': 23, 'neurons': [16, 8, 16], 'model_path': '', 'w_size': None, 'n_pred': 1, 'callbacks': [<keras.src.callbacks.early_stopping.EarlyStopping object at 0x79be72b75db0>], 'hidden_act': ['relu

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [None]:
modelAE.fit(X_train,y_train)


In [13]:
scores_pred = modelAE.decision_function(X_train)
print("Scores",scores_pred)
modelAE.predict(X_train)

autoencoder, decision_function(): This StandardScaler instance is not fitted yet. Call 'fit' with appropriate arguments before using this estimator.
Scores None
Inspecting model's attributes:
contamination: 0.1
input_dim: 23
neurons: [16, 8, 16]
model_path: 
w_size: None
n_pred: 1
callbacks: [<keras.src.callbacks.early_stopping.EarlyStopping object at 0x79be72b75db0>]
hidden_act: ['relu', 'relu', 'relu']
output_act: linear
loss: mse
validation_size: 0.2
batch_size: 8
epochs: 100
optimizer: adam
preprocess: True
scaler: StandardScaler()
model: <Sequential name=sequential_3, built=True>
algorithm_: autoencoder
label_parser: None
[1m750/750[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 650us/step


array([[-0.3729661 ,  0.6556096 ,  0.04646468, ..., -0.05525184,
         0.05991161, -0.05538727],
       [-0.34828734,  0.78528076, -1.0705326 , ..., -0.35974097,
        -0.33439827, -0.34611118],
       [-0.98441905, -1.2883397 ,  0.21904027, ..., -0.33952904,
        -0.24912095, -0.2993713 ],
       ...,
       [-0.42043203,  0.73084366, -1.0925276 , ..., -0.21034884,
        -0.1381892 , -0.23942567],
       [ 1.6222942 , -1.3499044 , -1.0408863 , ..., -0.25874805,
        -0.27248907, -0.2844221 ],
       [ 0.595979  , -1.2626668 , -1.1000922 , ..., -0.24076104,
        -0.16774464, -0.26051873]], dtype=float32)

In [14]:
modelAE.model.d_scores_ ==scores_pred
modelAE.model.labels_


array([0., 0., 0., ..., 0., 0., 0.])

In [15]:
modelAE.evaluate(X_test,y_test)

[1m188/188[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 959us/step
Acc: 72.750% 

Precision: 0.243 

F1score: 0.152 

Recall: 0.110 

AUC_ROC: 0.506 



In [16]:
from SADL.federated_data.algorithms import flexanomalies

kwargs = {
    "algorithm_": "clusterAnomaly",
    "contamination":0.1,
    "label_parser": None,
    "n_clusters": 4,
    "n_clients":5,
    "n_rounds":10,
       
}

modelCluster = flexanomalies.FlexAnomalyDetection(**kwargs)
print(modelCluster.get_params())

Federated Params:{'n_clients': 5, 'n_rounds': 10} 
 Model Params:{'algorithm_': 'clusterAnomaly', 'contamination': 0.1, 'label_parser': None, 'n_clusters': 4}
Inspecting model's attributes:
contamination: 0.1
n_clusters: 4
seed: None
init_centroids: random
max_iter: 100
tol: 0.0001
verbose: True
n_init: 1
algorithm: lloyd
model_path: 
model: KMeans(init='random', max_iter=100, n_clusters=4, n_init=1)
algorithm_: clusterAnomaly
label_parser: None
{'label_parser': None, 'algorithm_': 'clusterAnomaly', 'contamination': 0.1, 'n_clusters': 4, 'seed': None, 'init_centroids': 'random', 'max_iter': 100, 'tol': 0.0001, 'verbose': True, 'n_init': 1, 'algorithm': 'lloyd', 'model_path': '', 'model': KMeans(init='random', max_iter=100, n_clusters=4, n_init=1)}


In [17]:
modelCluster.fit(X_train,y_train)



Running round: 0

Training model at client.
Training model at client.
Training model at client.
Training model at client.
Training model at client.

Running round: 1

Training model at client.
Training model at client.
Training model at client.
Training model at client.
Training model at client.

Running round: 2

Training model at client.
Training model at client.
Training model at client.
Training model at client.
Training model at client.

Running round: 3

Training model at client.
Training model at client.
Training model at client.
Training model at client.
Training model at client.

Running round: 4

Training model at client.
Training model at client.
Training model at client.
Training model at client.
Training model at client.

Running round: 5

Training model at client.
Training model at client.
Training model at client.
Training model at client.
Training model at client.

Running round: 6

Training model at client.
Training model at client.
Training model at client.
Training 

<SADL.federated_data.algorithms.flexanomalies.FlexAnomalyDetection at 0x79be58358ca0>

In [18]:
scores_pred = modelCluster.decision_function(X_train)
print(scores_pred)
modelCluster.predict(X_train)

[122256.1332972  101202.56907276  77515.42795219 ...  92612.4646725
  71078.74211753  47124.14167828]
Inspecting model's attributes:
contamination: 0.1
n_clusters: 4
seed: None
init_centroids: random
max_iter: 100
tol: 0.0001
verbose: True
n_init: 1
algorithm: lloyd
model_path: 
model: KMeans(init='random', max_iter=100, n_clusters=4, n_init=1)
algorithm_: clusterAnomaly
label_parser: None
cluster_centers_: [[ 3.07025630e+05  1.63229643e+00  1.60919080e+00  1.48379355e+00
   3.69099120e+01 -5.95297986e-01 -8.39176955e-01 -8.44245884e-01
  -8.58149115e-01 -8.68420946e-01 -8.82554425e-01  2.06583839e+04
   1.84501093e+04  1.79356330e+04  1.77207712e+04  1.72412485e+04
   1.71183094e+04  6.92287852e+03  7.44348367e+03  7.04973621e+03
   6.74588344e+03  6.80601702e+03  7.67674537e+03]
 [ 1.97325014e+05  1.60571696e+00  1.90183141e+00  1.53634841e+00
   3.57884681e+01  3.06680891e-01  2.89987420e-01  2.55706988e-01
   2.22204631e-01  1.76001307e-01  1.90396458e-01  1.34487221e+05
   1.31262

<flexanomalies.utils.cluster.ClusterAnomaly at 0x79be58358c10>

In [19]:
modelCluster.evaluate(X_test,y_test)

Acc: 70.783% 

Precision: 0.145 

F1score: 0.090 

Recall: 0.066 

AUC_ROC: 0.478 



In [20]:
from SADL.federated_data.algorithms import flexanomalies

kwargs = {
    "algorithm_": "pcaAnomaly",
    "contamination":0.1,
    "label_parser": None,
    "preprocess":False,  
    "n_components": 4,
    "n_clients":5,
    "n_rounds":10,      
}

modelPca = flexanomalies.FlexAnomalyDetection(**kwargs)
print(modelPca.get_params())

Federated Params:{'n_clients': 5, 'n_rounds': 10} 
 Model Params:{'algorithm_': 'pcaAnomaly', 'contamination': 0.1, 'label_parser': None, 'preprocess': False, 'n_components': 4}
Inspecting model's attributes:
contamination: 0.1
n_components: 4
n_selected_components: None
copy: True
whiten: False
svd_solver: auto
tol: 0.0
iterated_power: auto
random_state: None
weighted: (True,)
preprocess: False
model_path: 
scaler: StandardScaler()
model: PCA(n_components=4)
algorithm_: pcaAnomaly
label_parser: None
{'label_parser': None, 'algorithm_': 'pcaAnomaly', 'contamination': 0.1, 'n_components': 4, 'n_selected_components': None, 'copy': True, 'whiten': False, 'svd_solver': 'auto', 'tol': 0.0, 'iterated_power': 'auto', 'random_state': None, 'weighted': (True,), 'preprocess': False, 'model_path': '', 'scaler': StandardScaler(), 'model': PCA(n_components=4)}


In [21]:
modelPca.fit(X_train,y_train)


Running round: 0

Training model at client.
Training model at client.
Training model at client.
Training model at client.
Training model at client.

Running round: 1

Training model at client.
Training model at client.
Training model at client.
Training model at client.
Training model at client.

Running round: 2

Training model at client.
Training model at client.
Training model at client.
Training model at client.
Training model at client.

Running round: 3

Training model at client.
Training model at client.
Training model at client.
Training model at client.
Training model at client.

Running round: 4

Training model at client.
Training model at client.
Training model at client.
Training model at client.
Training model at client.

Running round: 5

Training model at client.
Training model at client.
Training model at client.
Training model at client.
Training model at client.

Running round: 6

Training model at client.
Training model at client.
Training model at client.
Training 

<SADL.federated_data.algorithms.flexanomalies.FlexAnomalyDetection at 0x79be5833b040>

In [22]:
scores_pred = modelPca.decision_function(X_train)
print(scores_pred)
modelPca.predict(X_train)

pcaAnomaly, decision_function(): 'PCA_Anomaly' object has no attribute 'selected_components_'
None
Inspecting model's attributes:
contamination: 0.1
n_components: 4
n_selected_components: None
copy: True
whiten: False
svd_solver: auto
tol: 0.0
iterated_power: auto
random_state: None
weighted: (True,)
preprocess: False
model_path: 
scaler: StandardScaler()
model: PCA(n_components=4)
algorithm_: pcaAnomaly
label_parser: None


<flexanomalies.utils.pca_anomaly.PCA_Anomaly at 0x79be5833a470>

In [23]:
modelPca.evaluate(X_test,y_test)

Acc: 71.317% 

Precision: 0.172 

F1score: 0.107 

Recall: 0.078 

AUC_ROC: 0.486 



##### Test: new model and new funtions primitives

In [22]:
from SADL.federated_data.algorithms import flexanomalies
class MyAnomalyModel:
    def __init__(self, param1, param2):
        self.param1 = param1
        self.param2 = param2

flexanomalies.FlexAnomalyDetection.register_algorithm("myAnomalyModel", MyAnomalyModel)

# Ahora puedes instanciar usando el nuevo algoritmo
detector = flexanomalies.FlexAnomalyDetection(algorithm_="myAnomalyModel",param1=10, param2=20)
detector.register_federated_functions("customAnomaly", {
    "build_model": lambda: print("Building Custom Model"),
    "copy": lambda: print("Copying Custom Model"),
    "train": lambda: print("Training Custom Model"),
    "collect": lambda: print("Collecting Custom Model Weights"),
    "aggregate": lambda: print("Aggregating Custom Model Weights"),
    "set_weights": lambda: print("Setting Aggregated Weights"),
})

Federated Params:{} 
 Model Params:{'algorithm_': 'myAnomalyModel', 'param1': 10, 'param2': 20}


In [25]:
#Example using a federated external data set but using flex dataset format 
from flexanomalies.utils.load_data import federate_data
detector.flex_data = federate_data(5, X_train, y_train)