In [None]:
import requests

def get_shared_data():
  response = requests.get('http://localhost:5000/data')
  return response.json() if response.status_code == 200 else None

model_params = get_shared_data()
print(model_params)

{'7S4ZSD7LR2H6CFIYJVIFBXOHRKNRSZ4MPAY22OTDS445IJKSUZY37ZRJPU': {'paramHash': 'QmWzzk5L1ysDyhgYz1nmf2qHQZrdKXW39iwPQHSXuJpGhM', 'paramKey': 'c3dUOTRXN3hSRWdwQ1NYSkRSTVJ6dE1ZNUxEMUp6QklLcm05aHJiOVJMbz0='}, 'SD4C2NO6AA6MWPK7BMPGDO7SC3GQGI7D2CDSP6LGN2Q76SZFCGDZL5CM4M': {'paramHash': 'QmXew71iMyPng7SKYfuvRdaJwkxa5y8HYukkh425BoPE1n', 'paramKey': 'RENaMUJWNTRLd1ZUVElmQlVjaWVaQV9QQ2NlZmQ4U1NaVm1CdHo2YmNJQT0='}}


In [12]:
import sys
sys.path.append('..')
from ipfs_configs import retrieve_model_params

def get_model_params(params_array):
  if params_array is None:
    return []
    
  federation_packages = []
  for model_id, item in params_array.items():
    ipfs_hash = item['paramHash']
    param_key = item['paramKey']
    result = retrieve_model_params(ipfs_hash, param_key)
    if result:
      federation_packages.append(result)
      
  return federation_packages

federation_packages  = get_model_params(model_params)


In [13]:
import numpy as np
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

def get_global_model(federation_packages):
    # Stack and average predictions
    all_predictions = np.stack([pkg['predictions'] for pkg in federation_packages])
    global_predictions = np.mean(all_predictions, axis=0)
    
    # Stack and average feature importances
    all_importances = np.stack([pkg['feature_importances'] for pkg in federation_packages])
    global_importances = np.mean(all_importances, axis=0)
    
    # Get averaged hyperparameters
    n_estimators = int(np.mean([pkg['n_estimators'] for pkg in federation_packages]))
    max_features = federation_packages[0]['max_features']
    avg_max_depth = np.mean([pkg['max_depth'] for pkg in federation_packages])
    max_depth = int(avg_max_depth) if avg_max_depth > 0 else None  # Use None for unlimited depth
    
    # Initialize global model
    global_model = RandomForestClassifier(
        n_estimators=n_estimators,
        max_features=max_features,
        max_depth=max_depth
    )
    
    # Return the model along with global predictions and global importances
    return global_model, global_predictions, global_importances


In [14]:
#load dataset
import pandas as pd 
data = pd.read_csv("creditcard.csv") 

In [15]:
#separating the X and the Y values
X = data.drop(['Class'], axis = 1) 
Y = data["Class"] 
print(X.shape) 
print(Y.shape) 
# getting just the values for the sake of processing  
# (its a numpy array with no columns) 
xData = X.values 
yData = Y.values 

(284807, 30)
(284807,)


In [16]:
#training and testing data bifurcation
from sklearn.model_selection import train_test_split 
#split the data into training and testing sets 
xTrain, xTest, yTrain, yTest = train_test_split(xData, yData, test_size = 0.2, random_state = 42) 

In [17]:
# Create global model
global_model, global_predictions, global_importances = get_global_model(federation_packages)


# Train the global model with the training data
global_model.fit(xTrain, yTrain)

# Make predictions with global model
global_test_predictions = global_model.predict(xTest)

# Calculate and print performance metrics
print("\nGlobal Model Performance:")
print(f"Accuracy: {accuracy_score(yTest, global_test_predictions):.4f}")
print(f"Precision: {precision_score(yTest, global_test_predictions):.4f}")
print(f"Recall: {recall_score(yTest, global_test_predictions):.4f}")
print(f"F1 Score: {f1_score(yTest, global_test_predictions):.4f}")


Global Model Performance:
Accuracy: 0.9996
Precision: 0.9744
Recall: 0.7755
F1 Score: 0.8636
