In [None]:
import requests

def get_shared_data():
    response = requests.get('http://localhost:5000/data')
    return response.json() if response.status_code == 200 else None



model_params = get_shared_data()
print(model_params)



In [137]:
import sys
sys.path.append('..')
from ipfs_configs import retrieve_model_params

def get_model_params(params_array):
    if params_array is None:
        return []
    
    federation_packages = []
    for model_id, item in params_array.items():
        ipfs_hash = item['paramHash']
        param_key = item['paramKey']
        result = retrieve_model_params(ipfs_hash, param_key)
        if result:
            federation_packages.append(result)
    
    return federation_packages

federation_packages = get_model_params(model_params)

In [138]:
import numpy as np
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import StandardScaler

def get_global_model(federation_packages):
    # Average coefficients and intercepts
    all_coefficients = np.stack([pkg['coefficients'] for pkg in federation_packages])
    global_coefficients = np.mean(all_coefficients, axis=0)
    
    all_intercepts = np.array([pkg['intercept'] for pkg in federation_packages])
    global_intercept = np.mean(all_intercepts)
    
    # Average scaler parameters if available
    all_scales = np.stack([pkg['scaler_params']['scale_'] for pkg in federation_packages])
    global_scale = np.mean(all_scales, axis=0)
    
    all_means = np.stack([pkg['scaler_params']['mean_'] for pkg in federation_packages])
    global_mean = np.mean(all_means, axis=0)
    
    # Create global scaler
    global_scaler = StandardScaler()
    global_scaler.scale_ = global_scale
    global_scaler.mean_ = global_mean
    
    # Initialize global model
    global_model = LogisticRegression(random_state=42)
    
    # Set the coefficients and intercept directly
    global_model.coef_ = global_coefficients.reshape(1, -1)
    global_model.intercept_ = np.array([global_intercept])
    
    # Initialize classes_ attribute for binary classification (1: Survived, 2: Died)
    global_model.classes_ = np.array([1, 2])
    
    return global_model, global_scaler

In [139]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix
from imblearn.under_sampling import RandomUnderSampler

# Load and preprocess data
covid = pd.read_csv('Covid Data.csv')

# Clean data
cols = ['PNEUMONIA', 'DIABETES', 'COPD', 'ASTHMA', 'INMSUPR', 'HIPERTENSION',
  'OTHER_DISEASE', 'CARDIOVASCULAR', 'OBESITY', 'RENAL_CHRONIC', 'TOBACCO']
for col in cols:
    covid = covid[(covid[col] == 1)|(covid[col] == 2)]

# Create death column
covid['DEATH'] = [2 if row=='9999-99-99' else 1 for row in covid['DATE_DIED']]
covid.drop(columns=['INTUBED', 'ICU', 'DATE_DIED'], inplace=True)
covid['PREGNANT'] = covid['PREGNANT'].replace({97:2, 98:2})

# Prepare features and target
y = covid['DEATH']
X = covid.drop('DEATH', axis=1)

# Scale features
scaler = StandardScaler()
X = scaler.fit_transform(X)

# Apply undersampling and split data
undersampler = RandomUnderSampler(random_state=42)
X_resampled, y_resampled = undersampler.fit_resample(X, y)
X_train, X_test, y_train, y_test = train_test_split(X_resampled, y_resampled, test_size=0.2, random_state=42)

# Train model
model = LogisticRegression(random_state=42)
model.fit(X_train, y_train)

# Get predictions
y_pred = model.predict(X_test)

# Get and process test data
X_test_processed = scaler.transform(X_test)

# Get predictions from global model
global_test_predictions = global_model.predict(X_test_processed)

# Calculate performance metrics
acc = accuracy_score(y_test, global_test_predictions)
prec = precision_score(y_test, global_test_predictions)
rec = recall_score(y_test, global_test_predictions)
f1 = f1_score(y_test, global_test_predictions)

print('Global Model Performance:')
print(f'Accuracy: {acc:.4f}')
print(f'Precision: {prec:.4f}')
print(f'Recall: {rec:.4f}')
print(f'F1 Score: {f1:.4f}')

# Store predictions for later visualization
lr_pred = model.predict(X_test)



In [140]:
# Visualization and comparison for COVID-19 models
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
from sklearn.metrics import confusion_matrix, roc_curve, auc, precision_recall_fscore_support

# Get predictions
centralized_predictions = lr_pred
federated_predictions = global_test_predictions

# Calculate metrics with binary averaging
prf_centralized = precision_recall_fscore_support(y_test, centralized_predictions, average='binary')
prf_federated = precision_recall_fscore_support(y_test, federated_predictions, average='binary')

# Calculate ROC curve metrics
fpr_c, tpr_c, _ = roc_curve(y_test, model.predict_proba(X_test)[:, 1], pos_label=2)
fpr_f, tpr_f, _ = roc_curve(y_test, global_model.predict_proba(X_test)[:, 1], pos_label=2)
roc_auc_c = auc(fpr_c, tpr_c)
roc_auc_f = auc(fpr_f, tpr_f)

# Calculate base metrics
metrics_centralized = {
    'Accuracy': accuracy_score(y_test, centralized_predictions)
}

metrics_federated = {
    'Accuracy': accuracy_score(y_test, federated_predictions)
}

# Create comparison dataframe
comparison_data = {
  'Metric': [
    'Accuracy',
    'AUC-ROC',
    'Precision', 
    'Recall',
    'F1 Score',
  ],
  'Centralized Model': [
    f"{metrics_centralized['Accuracy']:.4f}",
    f"{roc_auc_c:.4f}",
    f"{prf_centralized[0]:.4f}",
    f"{prf_centralized[1]:.4f}", 
    f"{prf_centralized[2]:.4f}",
  ],
  'DML-Chain Federated Model': [
    f"{metrics_federated['Accuracy']:.4f}",
    f"{roc_auc_f:.4f}",
    f"{prf_federated[0]:.4f}",
    f"{prf_federated[1]:.4f}",
    f"{prf_federated[2]:.4f}",
  ],
  'Difference': [
    f"{metrics_centralized['Accuracy'] - metrics_federated['Accuracy']:.4f}",
    f"{roc_auc_c - roc_auc_f:.4f}",
    f"{prf_centralized[0] - prf_federated[0]:.4f}",
    f"{prf_centralized[1] - prf_federated[1]:.4f}",
    f"{prf_centralized[2] - prf_federated[2]:.4f}",
  ]
}

comparison_df = pd.DataFrame(comparison_data)

# Display styled comparison table
from IPython.display import display
display(comparison_df.style
    .set_properties(**{'text-align': 'center'})
    .set_table_styles([
        {'selector': 'th', 'props': [('text-align', 'center'), ('font-weight', 'bold')]},
        {'selector': 'td', 'props': [('text-align', 'center')]}
    ])
    .highlight_max(axis=1, subset=['Centralized Model', 'DML-Chain Federated Model'], color='green')
)

# Plot confusion matrices
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(15, 5))

labels = ['Survived', 'Died']

# Centralized model confusion matrix
cm_centralized = confusion_matrix(y_test, centralized_predictions)
sns.heatmap(cm_centralized, annot=True, fmt='d', ax=ax1, xticklabels=labels, yticklabels=labels)
ax1.set_title('Centralized COVID-19 Model Confusion Matrix')
ax1.set_xlabel('Predicted')
ax1.set_ylabel('Actual')

# Federated model confusion matrix
cm_federated = confusion_matrix(y_test, federated_predictions)
sns.heatmap(cm_federated, annot=True, fmt='d', ax=ax2, xticklabels=labels, yticklabels=labels)
ax2.set_title('DML-CHAIN Federated COVID-19 Model Confusion Matrix')
ax2.set_xlabel('Predicted')
ax2.set_ylabel('Actual')

plt.tight_layout()
plt.show()

# Plot ROC curves
plt.figure(figsize=(8, 6))
plt.plot(fpr_c, tpr_c, label=f'Centralized (AUC = {roc_auc_c:.2f})')
plt.plot(fpr_f, tpr_f, label=f'Federated (AUC = {roc_auc_f:.2f})')
plt.plot([0, 1], [0, 1], 'k--')
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('COVID-19 Mortality ROC Curves')
plt.legend()
plt.tight_layout()
plt.show()

# Add bar plot comparison
plt.figure(figsize=(12, 6))
x = ['Accuracy', 'Precision', 'Recall', 'F1 Score']
width = 0.35

centralized_values = [metrics_centralized['Accuracy'],
                     prf_centralized[0],
                     prf_centralized[1],
                     prf_centralized[2]]

federated_values = [metrics_federated['Accuracy'],
                   prf_federated[0],
                   prf_federated[1],
                   prf_federated[2]]

plt.bar([i - width/2 for i in range(len(x))], centralized_values, width, label='Centralized model')
plt.bar([i + width/2 for i in range(len(x))], federated_values, width, label='DML-CHAIN Federated model')

plt.xlabel('Metrics')
plt.ylabel('Score')
plt.title('Model Performance Comparison')
plt.xticks(range(len(x)), x)
plt.legend()
plt.tight_layout()
plt.show()







