In [29]:
import os
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier, BaggingClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score
from ad_method import test_scores

In [30]:
# Load client data from folder
def load_clients_data(folder_path):
    clients = []
    for file_name in os.listdir(folder_path):
        if file_name.endswith(".csv"):
            file_path = os.path.join(folder_path, file_name)
            data = pd.read_csv(file_path)
            
            print(file_name)
      # Assuming the last column is the label
            X = data.iloc[:, 1:-1].values
            y = data.iloc[:, -1].values
            
            # Split into train and test (local train-test split for each client)
            X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.9, random_state=42)
            
            clients.append({
                'train_data': X_train,
                'train_labels': y_train,
                'test_data': X_test,
                'test_labels': y_test
            })
    return clients

In [47]:
# Define the Federated Voting Classifier
class FederatedVotingClassifier:
    def __init__(self):
        # Initialize base classifiers
        self.rf = RandomForestClassifier(n_estimators=10)
        self.knn = KNeighborsClassifier(n_neighbors=10)
        self.bg = BaggingClassifier(estimator=self.rf, n_estimators=10=10, )
        
    def fit(self, X_train, y_train):
        # Train each classifier independently
        self.rf.fit(X_train, y_train)
        self.knn.fit(X_train, y_train)
        self.bg.fit(X_train, y_train)

    def predict(self, X_test):
        rf_pred = self.rf.predict(X_test)
        knn_pred = self.knn.predict(X_test)
        bg_pred = self.bg.predict(X_test)
        
        # Convert predictions to integer numpy arrays
        rf_pred = np.array(rf_pred, dtype=int)
        knn_pred = np.array(knn_pred, dtype=int)
        bg_pred = np.array(bg_pred, dtype=int)
        
        # Adjust bincount to handle negative labels by offsetting
        offset = 1  # Add this offset to handle `-1` labels
        predictions = np.array([rf_pred, knn_pred, bg_pred]) + offset
        
        # Majority voting
        final_prediction = np.apply_along_axis(lambda x: np.bincount(x).argmax() - offset, axis=0, arr=predictions)
        return final_prediction

    def get_params(self):
        return {
            "rf": self.rf,
            "knn": self.knn,
            "bg": self.bg
        }
    
    def set_params(self, params):
        self.rf = params["rf"]
        self.knn = params["knn"]
        self.bg = params["bg"]

In [43]:
def aggregate_parameters(client_params):
    # Initialize lists to store parameters
    rf_estimators = []
    knn_neighbors = []
    bg_estimators = []
    
    for params in client_params:
        rf = params["rf"]
        knn = params["knn"]
        bg = params["bg"]
        
        # Random Forest: Collect estimators (trees)
        rf_estimators.extend(rf.estimators_)
        
        # KNN: Collect n_neighbors
        knn_neighbors.append(knn.n_neighbors)
        
        # Bagging: Collect estimators
        bg_estimators.extend(bg.estimators_)
    
    # Aggregate Random Forest
    aggregated_rf = RandomForestClassifier(n_estimators=len(rf_estimators), random_state=42)
    aggregated_rf.estimators_ = rf_estimators
    
    # Aggregate KNN
    aggregated_knn = KNeighborsClassifier(n_neighbors=int(np.mean(knn_neighbors)))
    
    # Aggregate Bagging Classifier
    aggregated_bg = BaggingClassifier(base_estimator=RandomForestClassifier(n_estimators=5, random_state=42),
                                       n_estimators=len(bg_estimators))
    aggregated_bg.estimators_ = bg_estimators
    
    return {"rf": aggregated_rf, "knn": aggregated_knn, "bg": aggregated_bg}


In [44]:
# Federated Learning
def federated_learning(clients, rounds=5):
    global_model = FederatedVotingClassifier()
    for r in range(rounds):
        client_params = []
        for client in clients:
            X_train, y_train = client['train_data'], client['train_labels']
            X_test, y_test = client['test_data'], client['test_labels']
            
            # Train local model
            local_model = FederatedVotingClassifier()
            local_model.fit(X_train, y_train)
                        
            # Collect local model parameters
            client_params.append(local_model.get_params())
            
            # Evaluate local model
            y_pred = local_model.predict(X_test)
            acc = accuracy_score(y_test, y_pred)
            test_scores(y_test, y_pred, X_test, "ensemble", "2")
        
        # Aggregate results
        global_params = aggregate_parameters(client_params)
       
        # Update global model
        global_model.set_params(global_params)
        print(f"Round {r+1}/{rounds},")
    
    print("Federated Learning Completed.")
    return global_model

In [None]:
%%time
clients = load_clients_data('./n')

In [48]:
%%time
# Run federated learning
federated_learning(clients)

 Anomaly detection method:  ensemble
Accuracy: 1.0
Confusion Matrix: [[ 26199      0]
 [     0 629577]]
Precision: 1.0
Recall: 1.0
F1 Score: 1.0
ROC AUC: 1.0
Average Precision: 1.0
Mean Squared Error: 0.0
Mean Absolute Error: 0.0
Adjusted Rand Index: 1.0
Homogeneity: 1.0
Completeness: 1.0
V-measure: 1.0
Davies-Bouldin Index: 0.1983178432224426
Calinski-Harabasz Index: 981021.3564332413


KeyboardInterrupt: 