In [85]:
import requests

def get_shared_data():
  response = requests.get('http://localhost:5000/data')
  return response.json() if response.status_code == 200 else None

model_params = get_shared_data()
print(model_params)



In [86]:
import sys
sys.path.append('..')
from ipfs_configs import retrieve_model_params

def get_model_params(params_array):
  if params_array is None:
    return []
    
  federation_packages = []
  for model_id, item in params_array.items():
    ipfs_hash = item['paramHash']
    param_key = item['paramKey']
    result = retrieve_model_params(ipfs_hash, param_key)
    if result:
      federation_packages.append(result)
      
  return federation_packages

federation_packages  = get_model_params(model_params)


In [87]:
import numpy as np
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

def get_global_model(federation_packages):
    # Stack and average predictions
    all_predictions = np.stack([pkg['predictions'] for pkg in federation_packages])
    global_predictions = np.mean(all_predictions, axis=0)
    
    # Stack and average feature importances
    all_importances = np.stack([pkg['feature_importances'] for pkg in federation_packages])
    global_importances = np.mean(all_importances, axis=0)
    
    # Get averaged hyperparameters
    n_estimators = int(np.mean([pkg['n_estimators'] for pkg in federation_packages]))
    max_features = federation_packages[0]['max_features']
    avg_max_depth = np.mean([pkg['max_depth'] for pkg in federation_packages])
    max_depth = int(avg_max_depth) if avg_max_depth > 0 else None  # Use None for unlimited depth
    
    # Initialize global model
    global_model = RandomForestClassifier(
        n_estimators=n_estimators,
        max_features=max_features,
        max_depth=max_depth
    )
    
    # Return the model along with global predictions and global importances
    return global_model, global_predictions, global_importances


In [88]:
#load dataset
import pandas as pd 
data = pd.read_csv("creditcard.csv") 

In [89]:
#separating the X and the Y values
X = data.drop(['Class'], axis = 1) 
Y = data["Class"] 
print(X.shape) 
print(Y.shape) 
# getting just the values for the sake of processing  
# (its a numpy array with no columns) 
xData = X.values 
yData = Y.values 



In [90]:
#training and testing data bifurcation
from sklearn.model_selection import train_test_split 
#split the data into training and testing sets 
xTrain, xTest, yTrain, yTest = train_test_split(xData, yData, test_size = 0.2, random_state = 42) 

In [91]:
# Create global model
global_model, global_predictions, global_importances = get_global_model(federation_packages)


# Train the global model with the training data
global_model.fit(xTrain, yTrain)

# Make predictions with global model
global_test_predictions = global_model.predict(xTest)

# Calculate and print performance metrics
print("\nGlobal Model Performance:")
print(f"Accuracy: {accuracy_score(yTest, global_test_predictions):.4f}")
print(f"Precision: {precision_score(yTest, global_test_predictions):.4f}")
print(f"Recall: {recall_score(yTest, global_test_predictions):.4f}")
print(f"F1 Score: {f1_score(yTest, global_test_predictions):.4f}")



In [92]:
#this is the centralized model
#import libraries

import numpy as np 
import pandas as pd 
import matplotlib.pyplot as plt 
import pickle

#load dataset
data = pd.read_csv("creditcard.csv") 

data.head(10) 

#describing the data
print(data.shape) 
print(data.describe()) 

#imbalance in the data
fraud = data[data['Class'] == 1] 
valid = data[data['Class'] == 0] 
outlierFraction = len(fraud)/float(len(valid)) 
print(outlierFraction) 
print('Fraud Cases: {}'.format(len(data[data['Class'] == 1]))) 
print('Valid Transactions: {}'.format(len(data[data['Class'] == 0]))) 

#the amount details for fraudulent transaction
fraud.Amount.describe() 

#the amount details for normal transaction
valid.Amount.describe() 


#separating the X and the Y values
X = data.drop(['Class'], axis = 1) 
Y = data["Class"] 
print(X.shape) 
print(Y.shape) 

xData = X.values 
yData = Y.values 

#training and testing data bifurcation
from sklearn.model_selection import train_test_split 
#split the data into training and testing sets 
xTrain, xTest, yTrain, yTest = train_test_split(xData, yData, test_size = 0.2, random_state = 42) 

#building the Random Forest Classifier
from sklearn.ensemble import RandomForestClassifier 
#random forest model creation 
rfc = RandomForestClassifier() 
rfc.fit(xTrain, yTrain) 
#predictions 
yPred = rfc.predict(xTest) 



In [93]:
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics import confusion_matrix, roc_curve, auc

# Get predictions for both models
centralized_predictions = yPred
federated_predictions = global_test_predictions

# Calculate metrics for both models
metrics_centralized = {
    'Accuracy': accuracy_score(yTest, centralized_predictions),
    'Precision': precision_score(yTest, centralized_predictions),
    'Recall': recall_score(yTest, centralized_predictions),
    'F1 Score': f1_score(yTest, centralized_predictions)
}

metrics_federated = {
    'Accuracy': accuracy_score(yTest, federated_predictions),
    'Precision': precision_score(yTest, federated_predictions),
    'Recall': recall_score(yTest, federated_predictions),
    'F1 Score': f1_score(yTest, federated_predictions)
}

# Plotting performance comparison
plt.figure(figsize=(12, 6))
x = list(metrics_centralized.keys())
width = 0.35

plt.bar([i - width/2 for i in range(len(x))], metrics_centralized.values(), width, label='Centralized model')
plt.bar([i + width/2 for i in range(len(x))], metrics_federated.values(), width, label='DML-CHAIN Federated model')

plt.xlabel('Metrics')
plt.ylabel('Score')
plt.title('Model Performance Comparison')
plt.xticks(range(len(x)), x)
plt.legend()
plt.show()



In [94]:
# Plot confusion matrices
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(15, 5))

# Centralized model confusion matrix
cm_centralized = confusion_matrix(yTest, centralized_predictions)
sns.heatmap(cm_centralized, annot=True, fmt='d', ax=ax1)
ax1.set_title('Centralized Model Confusion Matrix')
ax1.set_xlabel('Predicted')
ax1.set_ylabel('Actual')

# Federated model confusion matrix
cm_federated = confusion_matrix(yTest, federated_predictions)
sns.heatmap(cm_federated, annot=True, fmt='d', ax=ax2)
ax2.set_title('DML-CHAIN federated model Confusion Matrix')
ax2.set_xlabel('Predicted')
ax2.set_ylabel('Actual')

plt.tight_layout()
plt.show()



In [95]:
# Calculate ROC curves
fpr_c, tpr_c, _ = roc_curve(yTest, rfc.predict_proba(xTest)[:, 1])
roc_auc_c = auc(fpr_c, tpr_c)

fpr_f, tpr_f, _ = roc_curve(yTest, global_model.predict_proba(xTest)[:, 1])
roc_auc_f = auc(fpr_f, tpr_f)

# Plot ROC curves
plt.figure(figsize=(10, 8))
plt.plot(fpr_c, tpr_c, color='darkorange', lw=2,
         label=f'Centralized model ROC (AUC = {roc_auc_c:.2f})')
plt.plot(fpr_f, tpr_f, color='green', lw=2,
         label=f'DML-CHAIN Federated model ROC (AUC = {roc_auc_f:.2f})')
plt.plot([0, 1], [0, 1], color='navy', lw=2, linestyle='--')
plt.xlim([0.0, 1.0])
plt.ylim([0.0, 1.05])
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('ROC Curves Comparison')
plt.legend(loc='lower right')
plt.show()



In [96]:
# Calculate additional metrics without per-class breakdown
from sklearn.metrics import precision_recall_fscore_support

# Use binary averaging to get single precision, recall, F1 values
prf_centralized = precision_recall_fscore_support(yTest, centralized_predictions, average='binary')
prf_federated = precision_recall_fscore_support(yTest, federated_predictions, average='binary')

comparison_data = {
  'Metric': [
    'Accuracy',
    'AUC-ROC',
    'Precision',
    'Recall',
    'F1 Score',
  ],
  'Centralized Model': [
    f"{metrics_centralized['Accuracy']:.4f}",
    f"{roc_auc_c:.4f}",
    f"{prf_centralized[0]:.4f}",
    f"{prf_centralized[1]:.4f}",
    f"{prf_centralized[2]:.4f}",
  ],
  'DML-Chain Federated Model': [
    f"{metrics_federated['Accuracy']:.4f}",
    f"{roc_auc_f:.4f}",
    f"{prf_federated[0]:.4f}",
    f"{prf_federated[1]:.4f}",
    f"{prf_federated[2]:.4f}",
  ]
}

comparison_df = pd.DataFrame(comparison_data)

from IPython.display import display
display(comparison_df.style
  .set_properties(**{'text-align': 'center'})
  .set_table_styles([
    {'selector': 'th', 'props': [('text-align', 'center'), ('font-weight', 'bold')]},
    {'selector': 'td', 'props': [('text-align', 'center')]}
  ])
  .highlight_max(axis=1, subset=['Centralized Model', 'DML-Chain Federated Model'], color='green')
)


