# 1. Import Libraries
First, you need to import the required Python libraries. This is typically done in the first cell of the notebook.

In [3]:
# Cell 1: Import Libraries
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset, random_split
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
import matplotlib.pyplot as plt
import os
import traceback


# 2. Helper Function for Debugging
Add the debug print function to help trace issues during execution. You may choose to toggle these prints on/off during different stages of testing.

In [5]:
# Cell 2: Debugging Utility
def debug_print(message):
    print(f"DEBUG: {message}")


# 3. Define the Neural Network Model
Next, define the architecture of the neural network HealthcarePredictionModel. This cell can be modified later to tune the model structure if needed.

In [9]:
# Cell 3: Define Neural Network Model
class HealthcarePredictionModel(nn.Module):
    def __init__(self, input_features):
        super(HealthcarePredictionModel, self).__init__()
        self.fc1 = nn.Linear(input_features, 256)
        self.fc2 = nn.Linear(256, 128)
        self.fc3 = nn.Linear(128, 64)
        self.fc4 = nn.Linear(64, 32)
        self.fc5 = nn.Linear(32, 2)
        self.dropout = nn.Dropout(0.4)
        self.bn1 = nn.BatchNorm1d(256)
        self.bn2 = nn.BatchNorm1d(128)
        self.bn3 = nn.BatchNorm1d(64)
    
    def forward(self, x):
        x = self.bn1(torch.relu(self.fc1(x)))
        x = self.dropout(x)
        x = self.bn2(torch.relu(self.fc2(x)))
        x = self.dropout(x)
        x = self.bn3(torch.relu(self.fc3(x)))
        x = self.dropout(x)
        x = torch.relu(self.fc4(x))
        x = self.fc5(x)
        return x


# 4. Data Loading and Preprocessing
This cell defines a function to load the CSV data and preprocess it, including handling missing values, converting categorical 
variables, and scaling features.

In [12]:
# Cell 4: Data Loading and Preprocessing
def load_and_preprocess_data(file_path):
    try:
        debug_print(f"Loading data from {file_path}")
        file_path = file_path.strip('"')
        if not os.path.exists(file_path):
            raise FileNotFoundError(f"The file {file_path} does not exist.")
        data = pd.read_csv(file_path)
        
        debug_print("Handling missing values")
        data = data.dropna()
        
        debug_print("Converting categorical variables")
        categorical_columns = data.select_dtypes(include=['object']).columns
        data = pd.get_dummies(data, columns=categorical_columns)
        
        debug_print("Separating features and labels")
        features = data.iloc[:, :-1].values
        labels = data.iloc[:, -1].values
        
        debug_print("Normalizing features")
        scaler = StandardScaler()
        features = scaler.fit_transform(features)
        
        return torch.tensor(features, dtype=torch.float32), torch.tensor(labels, dtype=torch.long)
    except Exception as e:
        print(f"Error loading data from {file_path}: {str(e)}")
        print("Traceback:")
        traceback.print_exc()
        return None, None


# 5. Federated Training Logic
The federated training logic involves training local models on hospital datasets and aggregating them.

In [15]:
# Cell 5: Federated Training Logic
def train_federated(model, hospital_loaders, val_loader, epochs=50, lr=0.0005):
    criterion = nn.CrossEntropyLoss()
    
    train_losses = []
    val_accuracies = []
    
    for epoch in range(epochs):
        debug_print(f"Starting epoch {epoch + 1}")
        worker_models = []
        epoch_loss = 0
        for i, loader in enumerate(hospital_loaders):
            debug_print(f"Training on hospital dataset {i + 1}")
            local_model = HealthcarePredictionModel(input_features=next(iter(loader))[0].shape[1])
            local_model.load_state_dict(model.state_dict())
            local_optimizer = optim.Adam(local_model.parameters(), lr=lr, weight_decay=1e-5)
            local_scheduler = optim.lr_scheduler.ReduceLROnPlateau(local_optimizer, 'min', patience=5, factor=0.5)
            
            local_model.train()
            batch_losses = []
            for data, target in loader:
                local_optimizer.zero_grad()
                output = local_model(data)
                loss = criterion(output, target)
                loss.backward()
                nn.utils.clip_grad_norm_(local_model.parameters(), max_norm=1.0)
                local_optimizer.step()
                batch_losses.append(loss.item())
            
            avg_loss = np.mean(batch_losses)
            local_scheduler.step(avg_loss)
            epoch_loss += avg_loss
            worker_models.append(local_model.state_dict())
        
        debug_print("Aggregating models")
        avg_model = {}
        for key in model.state_dict().keys():
            avg_model[key] = torch.mean(torch.stack([worker_model[key].float() for worker_model in worker_models]), dim=0)
        model.load_state_dict(avg_model)
        
        train_losses.append(epoch_loss / len(hospital_loaders))
        
        debug_print("Validating model")
        model.eval()
        val_preds = []
        val_true = []
        with torch.no_grad():
            for data, target in val_loader:
                output = model(data)
                pred = output.argmax(dim=1)
                val_preds.extend(pred.cpu().numpy())
                val_true.extend(target.cpu().numpy())
        
        val_accuracy = accuracy_score(val_true, val_preds)
        val_accuracies.append(val_accuracy)
        
        print(f"Epoch {epoch + 1}, Loss: {train_losses[-1]:.4f}, Val Accuracy: {val_accuracy:.4f}")
    
    return train_losses, val_accuracies


# 6. Main Execution
1. The main execution logic where you:
2. Load hospital datasets.
3. Initialize the model.
4. Train the model using federated learning.
5. Plot results and test the model.

In [None]:
# Cell 6: Main Execution
# Get user input for file paths
hospital_files = []
for i in range(3):
    while True:
        file_path = input(f"Enter the file path for hospital {i + 1} dataset (CSV format): ")
        file_path = file_path.strip('"')
        if os.path.exists(file_path):
            hospital_files.append(file_path)
            break
        else:
            print(f"File not found: {file_path}")
            print("Please enter a valid file path.")

# Load and prepare data
hospital_datasets = []
for file in hospital_files:
    data, target = load_and_preprocess_data(file)
    if data is not None and target is not None:
        dataset = TensorDataset(data, target)
        hospital_datasets.append(dataset)

if not hospital_datasets:
    print("No valid data loaded. Exiting program.")
    exit()

debug_print("Creating validation set")
combined_dataset = torch.utils.data.ConcatDataset(hospital_datasets)
train_size = int(0.8 * len(combined_dataset))
val_size = len(combined_dataset) - train_size
train_dataset, val_dataset = random_split(combined_dataset, [train_size, val_size])

debug_print("Creating DataLoaders")
hospital_loaders = [DataLoader(dataset, batch_size=64, shuffle=True) for dataset in hospital_datasets]
val_loader = DataLoader(val_dataset, batch_size=64, shuffle=False)

debug_print("Initializing and training the model")
input_features = next(iter(hospital_loaders[0]))[0].shape[1]
global_model = HealthcarePredictionModel(input_features=input_features)
train_losses, val_accuracies = train_federated(global_model, hospital_loaders, val_loader, epochs=50, lr=0.0005)

debug_print("Plotting training progress")
plt.figure(figsize=(10, 5))
plt.subplot(1, 2, 1)
plt.plot(train_losses)
plt.title('Training Loss')
plt.xlabel('Epoch')
plt.ylabel('Loss')

plt.subplot(1, 2, 2)
plt.plot(val_accuracies)
plt.title('Validation Accuracy')
plt.xlabel('Epoch')
plt.ylabel('Accuracy')

plt.tight_layout()
plt.show()


Enter the file path for hospital 1 dataset (CSV format):  "C:\Users\Harshan\Documents\dsu hackothon\hospital_1_1data.csv"
Enter the file path for hospital 2 dataset (CSV format):  "C:\Users\Harshan\Documents\dsu hackothon\hospital_1_1data.csv"
Enter the file path for hospital 3 dataset (CSV format):  "C:\Users\Harshan\Documents\dsu hackothon\hospital_1_1data.csv"


DEBUG: Loading data from C:\Users\Harshan\Documents\dsu hackothon\hospital_1_1data.csv
DEBUG: Handling missing values
DEBUG: Converting categorical variables
DEBUG: Separating features and labels
DEBUG: Normalizing features
DEBUG: Loading data from C:\Users\Harshan\Documents\dsu hackothon\hospital_1_1data.csv
DEBUG: Handling missing values
DEBUG: Converting categorical variables
DEBUG: Separating features and labels
DEBUG: Normalizing features
DEBUG: Loading data from C:\Users\Harshan\Documents\dsu hackothon\hospital_1_1data.csv
DEBUG: Handling missing values
DEBUG: Converting categorical variables
DEBUG: Separating features and labels
DEBUG: Normalizing features
DEBUG: Creating validation set
DEBUG: Creating DataLoaders
DEBUG: Initializing and training the model
DEBUG: Starting epoch 1
DEBUG: Training on hospital dataset 1
DEBUG: Training on hospital dataset 2
DEBUG: Training on hospital dataset 3
DEBUG: Aggregating models
DEBUG: Validating model
Epoch 1, Loss: 0.0701, Val Accuracy: 0

# 7. Test Set Evaluation
Finally, handle user input for the test set and evaluate the trained model on the test dataset.

In [None]:
# Cell 7: Test Set Evaluation
while True:
    test_file_path = input("Enter the file path for the test dataset (CSV format): ")
    test_file_path = test_file_path.strip('"')
    if os.path.exists(test_file_path):
        break
    else:
        print(f"File not found: {test_file_path}")
        print("Please enter a valid file path.")

debug_print("Evaluating on test set")
test_data, test_labels = load_and_preprocess_data(test_file_path)
global_model.eval()
with torch.no_grad():
   
