In [4]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.utils import resample
from sklearn.preprocessing import MinMaxScaler


In [5]:
#framinghamdataset.csvLoad dataset
file_path = "framinghamdataset.csv"
df = pd.read_csv(file_path)

# Display basic dataset info
print("Dataset Overview:\n", df.head())
print("\nDataset Shape:", df.shape)




Dataset Overview:
    male  age  education  currentSmoker  cigsPerDay  BPMeds  prevalentStroke  \
0     1   39        4.0              0         0.0     0.0                0   
1     0   46        2.0              0         0.0     0.0                0   
2     1   48        1.0              1        20.0     0.0                0   
3     0   61        3.0              1        30.0     0.0                0   
4     0   46        3.0              1        23.0     0.0                0   

   prevalentHyp  diabetes  totChol  sysBP  diaBP    BMI  heartRate  glucose  \
0             0         0    195.0  106.0   70.0  26.97       80.0     77.0   
1             0         0    250.0  121.0   81.0  28.73       95.0     76.0   
2             0         0    245.0  127.5   80.0  25.34       75.0     70.0   
3             1         0    225.0  150.0   95.0  28.58       65.0    103.0   
4             0         0    285.0  130.0   84.0  23.10       85.0     85.0   

   TenYearCHD  
0           0  

In [6]:
print("Class Distribution Before Upsampling:\n", df['TenYearCHD'].value_counts())

Class Distribution Before Upsampling:
 TenYearCHD
0    3596
1     644
Name: count, dtype: int64


In [7]:
# Check class distribution before upsampling
class_counts = df['TenYearCHD'].value_counts()
majority_count = class_counts[0]  # Count of majority class
minority_count = class_counts[1]  # Count of minority class


In [8]:
# Define upsampling target (e.g., 85% of majority class size)
upsample_target = int(majority_count * 0.85)

# Separate majority and minority classes
df_majority = df[df['TenYearCHD'] == 0]
df_minority = df[df['TenYearCHD'] == 1]

# Perform realistic upsampling (not exactly equal)
df_minority_upsampled = resample(df_minority,
                                 replace=True,  # Sample with replacement
                                 n_samples=upsample_target,  # Upsample to target size
                                 random_state=42)  # Reproducibility

# Combine original majority class with upsampled minority class
df_realistic_balanced = pd.concat([df_majority, df_minority_upsampled])

# Shuffle the dataset
df_realistic_balanced = df_realistic_balanced.sample(frac=1, random_state=42).reset_index(drop=True)

# Check new class distribution
print("\nClass Distribution After Realistic Upsampling:\n", df_realistic_balanced['TenYearCHD'].value_counts())


Class Distribution After Realistic Upsampling:
 TenYearCHD
0    3596
1    3056
Name: count, dtype: int64


In [9]:
# Separate features and target variable
X = df.drop(columns=['TenYearCHD'])  # Features (all except target)
y = df['TenYearCHD']  # Target variable

# Apply MinMax Scaling
scaler = MinMaxScaler()
X_scaled = scaler.fit_transform(X)

# Convert back to DataFrame
df_scaled = pd.DataFrame(X_scaled, columns=X.columns)
df_scaled['TenYearCHD'] = y  # Add target back

In [10]:
# Split dataset into features and labels
X = df.iloc[:, :-1]  # Features (All columns except last)
y = df.iloc[:, -1]   # Labels (Last column)

# Split into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

print("\nTraining Set Shape:", X_train.shape)
print("Test Set Shape:", X_test.shape)

# Convert data into NumPy arrays (for PyTorch/TensorFlow compatibility)
X_train, X_test = np.array(X_train), np.array(X_test)
y_train, y_test = np.array(y_train), np.array(y_test)



Training Set Shape: (3392, 15)
Test Set Shape: (848, 15)


In [11]:
from sklearn.model_selection import train_test_split

# Adjust train-test split to 70% train, 30% test
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.30, random_state=42, stratify=y)

# Check new shapes
print("\nNew Training Set Shape:", X_train.shape)
print("New Test Set Shape:", X_test.shape)



New Training Set Shape: (2968, 15)
New Test Set Shape: (1272, 15)


In [12]:
# Define the number of federated clients
num_clients = 3

# Split the training data among clients
client_data = {}
X_splits = np.array_split(X_train, num_clients)
y_splits = np.array_split(y_train, num_clients)

# Store each client's data in a dictionary
for i in range(num_clients):
    client_data[f'client_{i+1}'] = (X_splits[i], y_splits[i])

# Confirm data distribution
for client, (X_c, y_c) in client_data.items():
    print(f"{client}: {X_c.shape}, {y_c.shape}")

print("\nTraining data successfully divided among federated clients! 🚀")


client_1: (990, 15), (990,)
client_2: (989, 15), (989,)
client_3: (989, 15), (989,)

Training data successfully divided among federated clients! 🚀


  return bound(*args, **kwds)
  return bound(*args, **kwds)


In [13]:

def add_noise(data, noise_level=0.01):
    """Add Gaussian noise to the dataset."""
    noise = np.random.normal(loc=0, scale=noise_level, size=data.shape)
    return data + noise

# Apply noise to training data
for i in range(num_clients):
    X_splits[i] = add_noise(X_splits[i])  # Apply noise only to features
    client_data[f'client_{i+1}'] = (X_splits[i], y_splits[i])

print("✅ Gaussian Noise applied to data for better generalization!")

✅ Gaussian Noise applied to data for better generalization!


In [14]:
import torch
import torch.nn as nn
import torch.optim as optim
import flwr as fl  # ✅ Import Flower
from sklearn.utils.class_weight import compute_class_weight
from flwr.common import Context


In [15]:
class FLModel(nn.Module):
    def __init__(self, input_size):
        super(FLModel, self).__init__()
        self.layers = nn.Sequential(
            nn.Linear(input_size, 512),
            nn.ReLU(),
            nn.Dropout(0.3),
            nn.Linear(512, 256),
            nn.ReLU(),
            nn.Dropout(0.2),
            nn.Linear(256, 128),
            nn.ReLU(),
            nn.Dropout(0.2),
            nn.Linear(128, 1),
            nn.Sigmoid()
        )

    def forward(self, x):
        return self.layers(x)




print("✅ Global Model Defined Successfully!")


✅ Global Model Defined Successfully!


FED CLient


In [16]:
from sklearn.utils.class_weight import compute_class_weight

class FLClient(fl.client.NumPyClient):
    def __init__(self, model, train_loader):
        self.model = model
        self.train_loader = train_loader
        self.optimizer = optim.SGD(self.model.parameters(), lr=0.0005, momentum=0.9)  # Lower learning rate

        # Compute class weights dynamically based on training data
        all_labels = []
        for _, y_batch in train_loader:
            all_labels.extend(y_batch.numpy())  # Collect all labels
        
        class_weights = compute_class_weight(class_weight="balanced", 
                                             classes=np.unique(all_labels), 
                                             y=all_labels)
        class_weights = torch.tensor(class_weights, dtype=torch.float32)

        # Define weighted loss function
        self.criterion = nn.BCELoss(weight=class_weights[1])  # Apply weight to positive class

    def get_parameters(self, config):
        return [param.cpu().detach().numpy() for param in self.model.parameters()]

    def set_parameters(self, parameters):
        for param, new_param in zip(self.model.parameters(), parameters):
            param.data = torch.tensor(new_param, dtype=torch.float32)

    def train(self, epochs=30):  # More local epochs
        self.model.train()
        for epoch in range(epochs):
            for X_batch, y_batch in self.train_loader:
                self.optimizer.zero_grad()
                y_pred = self.model(X_batch.float()).squeeze()
                loss = self.criterion(y_pred, y_batch.float())
                loss.backward()
                self.optimizer.step()


    def fit(self, parameters, config):
        """Perform local training and return updated model parameters."""
        self.set_parameters(parameters)
        self.train()  # ✅ Now training for 20 epochs
        return self.get_parameters(config), len(self.train_loader.dataset), {}

    def evaluate(self, parameters, config):
        """Evaluate the local model and return accuracy."""
        self.set_parameters(parameters)
        loss, accuracy = self.test_model()
        return float(loss), len(self.train_loader.dataset), {"accuracy": float(accuracy)}

    def test_model(self):
        """Test the local model and return accuracy."""
        self.model.eval()
        correct, total, loss_total = 0, 0, 0.0
        with torch.no_grad():
            for X_batch, y_batch in self.train_loader:
                y_pred = self.model(X_batch.float()).squeeze()
                loss = self.criterion(y_pred, y_batch.float())
                loss_total += loss.item()
                correct += ((y_pred > 0.5) == y_batch).sum().item()
                total += y_batch.size(0)
        accuracy = correct / total
        return loss_total / total, accuracy


In [17]:
def client_fn(context: Context):
    """Create a federated learning client."""
    client_id = int(context.cid)  # Extract client ID from context

    # Load client data
    X_client, y_client = client_data[f'client_{client_id+1}']
    X_client = X_client.astype(np.float32)
    y_client = y_client.astype(np.float32)

    # Convert to PyTorch dataset
    train_dataset = TensorDataset(torch.tensor(X_client), torch.tensor(y_client))
    train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)

    # Create model for this client
    model = FLModel(input_size=X_train.shape[1])

    return FLClient(model, train_loader).to_client()  # ✅ Convert to `Client`

print("✅ Clients are Ready!")


✅ Clients are Ready!


In [18]:
# Initialize the global model
global_model = FLModel(input_size=X_train.shape[1])
initial_parameters = [param.cpu().detach().numpy() for param in global_model.parameters()]

# Define the FL strategy with initial model parameters
strategy = fl.server.strategy.FedAvg(
    initial_parameters=initial_parameters  # Ensure model starts with proper weights
)

# Run FL Simulation
history = fl.simulation.start_simulation(
    client_fn=client_fn,  # Use the client function to get client models
    num_clients=2,  # Number of federated clients
    config=fl.server.ServerConfig(num_rounds=50),  # Run for 5 communication rounds
    strategy=strategy
)

print("✅ Training Completed!")


	Instead, use the `flwr run` CLI command to start a local simulation in your Flower app, as shown for example below:

		$ flwr new  # Create a new Flower app from a template

		$ flwr run  # Run the Flower app in Simulation Mode

	Using `start_simulation()` is deprecated.

            This is a deprecated feature. It will be removed
            entirely in future versions of Flower.
        
[92mINFO [0m:      Starting Flower simulation, config: num_rounds=50, no round_timeout
2025-03-18 01:04:08,632	INFO worker.py:1771 -- Started a local Ray instance.
[92mINFO [0m:      Flower VCE: Ray initialized with resources: {'accelerator_type:G': 1.0, 'node:__internal_head__': 1.0, 'CPU': 16.0, 'object_store_memory': 630183936.0, 'node:127.0.0.1': 1.0, 'memory': 1260367872.0, 'GPU': 1.0}
[92mINFO [0m:      Optimize your simulation with Flower VCE: https://flower.ai/docs/framework/how-to-run-simulations.html
[92mINFO [0m:      No `client_resources` specified. Using minimal resources for c

✅ Training Completed!


In [19]:
# Import necessary libraries
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import TensorDataset, DataLoader  # ✅ Import Fixed

In [20]:
# Evaluate each client after training
client_accuracies = {}
for i in range(2):  # Since we have 2 clients
    client_model = FLModel(input_size=X_train.shape[1])
    
    # Load client-specific data
    X_client, y_client = client_data[f'client_{i+1}']
    
    # ✅ Convert DataFrame to NumPy arrays
    X_client = X_client.to_numpy()  # Convert X to NumPy array
    y_client = y_client.to_numpy()  # Convert y to NumPy array

    # Convert to PyTorch dataset
    test_dataset = TensorDataset(torch.tensor(X_client, dtype=torch.float32), 
                                 torch.tensor(y_client, dtype=torch.float32))
    test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)

    # ✅ Load the latest global model (no need to call `load_state_dict`)
    client_model.load_state_dict(global_model.state_dict())

    # Evaluate accuracy
    client_model.eval()
    correct, total = 0, 0
    with torch.no_grad():
        for X_batch, y_batch in test_loader:
            y_pred = client_model(X_batch).squeeze()
            y_pred = (y_pred > 0.5).float()  # Apply thresholding for classification
            correct += (y_pred == y_batch).sum().item()
            total += y_batch.size(0)

    accuracy = correct / total if total > 0 else 0
    client_accuracies[f'Client {i+1}'] = accuracy

# Print client accuracies
for client, acc in client_accuracies.items():
    print(f"{client} Accuracy: {acc:.4f}")

# Evaluate the global model on all client data
global_correct, global_total = 0, 0
with torch.no_grad():
    for i in range(2):  # Loop through each client dataset
        X_client, y_client = client_data[f'client_{i+1}']
        
        # ✅ Convert DataFrame to NumPy arrays
        X_client = X_client.to_numpy()
        y_client = y_client.to_numpy()
        
        # Convert data into PyTorch dataset
        test_dataset = TensorDataset(torch.tensor(X_client, dtype=torch.float32), 
                                     torch.tensor(y_client, dtype=torch.float32))
        test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)

        # Evaluate on test data
        for X_batch, y_batch in test_loader:
            y_pred = global_model(X_batch).squeeze()
            y_pred = (y_pred > 0.5).float()  # Thresholding
            global_correct += (y_pred == y_batch).sum().item()
            global_total += y_batch.size(0)

# Compute global model accuracy
global_accuracy = global_correct / global_total if global_total > 0 else 0
print(f"\n🌍 Global Model Accuracy: {global_accuracy:.4f}")


Client 1 Accuracy: 0.1485
Client 2 Accuracy: 0.1345

🌍 Global Model Accuracy: 0.3648
