Setup & Imports

In [None]:
# General utilities
import os
import numpy as np
import pandas as pd
import random
from collections import defaultdict

# PyTorch
import torch
import torch.nn as nn
from torch.utils.data import DataLoader, TensorDataset

# Sklearn
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report

# Set seed for reproducibility
def set_seed(seed=42):
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)

set_seed(42)

# Device configuration
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")


Using device: cuda


Data Preparation

In [None]:
# Load the dataset
df = pd.read_csv("cleaned_ADHI.csv")

# Filter valid rows
required_cols = ["Catchment", "Mean_annual_precip", "lc_urban", "Maxi_q", "q95th", "Country"]
df = df.dropna(subset=required_cols)

# Create binary flood year target
df["flood_year"] = (df["Maxi_q"] > df["q95th"]).astype(int)

# Normalize features
features = ["Catchment", "Mean_annual_precip", "lc_urban"]
scaler = StandardScaler()
df[features] = scaler.fit_transform(df[features])

# Group into train/test per country
federated_data = {}
for country in df["Country"].unique():
    country_df = df[df["Country"] == country]
    if len(country_df) < 30:
        continue  # Skip small datasets

    X = country_df[features].values
    y = country_df["flood_year"].values

    X_train, X_test, y_train, y_test = train_test_split(X, y, stratify=y, test_size=0.2, random_state=42)

    federated_data[country] = {
        "train": (torch.tensor(X_train, dtype=torch.float32), torch.tensor(y_train, dtype=torch.float32)),
        "test": (torch.tensor(X_test, dtype=torch.float32), torch.tensor(y_test, dtype=torch.float32))
    }

print(f"Prepared data for {len(federated_data)} countries.")


Prepared data for 20 countries.


Model Definition (Binary Classifier)

In [None]:
class FloodYearClassifier(nn.Module):
    def __init__(self, input_dim=3, hidden_dim=16):
        super(FloodYearClassifier, self).__init__()
        self.net = nn.Sequential(
            nn.Linear(input_dim, hidden_dim),
            nn.ReLU(),
            nn.Linear(hidden_dim, 1)  # Single output (logit)
        )

    def forward(self, x):
        return self.net(x).squeeze(1)  # output shape: [batch_size]


 Federated Training Loop

In [None]:
def train_federated_model(
    federated_data,
    input_dim=3,
    hidden_dim=16,
    num_rounds=5,
    local_epochs=3,
    batch_size=16,
    lr=0.001
):
    # Initialize the global model
    global_model = FloodYearClassifier(input_dim, hidden_dim).to(device)
    global_weights = global_model.state_dict()

    for round_num in range(num_rounds):
        print(f"\n Communication Round {round_num + 1}/{num_rounds}")
        local_models = []

        for country, data in federated_data.items():
            X_train, y_train = data["train"]

            # Build local model and load global weights
            local_model = FloodYearClassifier(input_dim, hidden_dim).to(device)
            local_model.load_state_dict(global_weights)
            local_model.train()

            optimizer = torch.optim.Adam(local_model.parameters(), lr=lr)
            criterion = nn.BCEWithLogitsLoss()

            dataset = DataLoader(TensorDataset(X_train, y_train), batch_size=batch_size, shuffle=True)

            # Train locally
            for epoch in range(local_epochs):
                for xb, yb in dataset:
                    xb, yb = xb.to(device), yb.to(device)
                    optimizer.zero_grad()
                    output = local_model(xb)
                    loss = criterion(output, yb)
                    loss.backward()
                    optimizer.step()

            local_models.append(local_model.state_dict())

        # Federated Averaging (FedAvg)
        new_state_dict = {}
        for key in global_weights.keys():
            new_state_dict[key] = torch.stack([local_model[key] for local_model in local_models], dim=0).mean(dim=0)

        global_model.load_state_dict(new_state_dict)
        global_weights = global_model.state_dict()

        print("Updated global model.")

    return global_model


Evaluation & Saving

In [None]:
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

def evaluate_federated_model(model, federated_data):
    model.eval()
    all_metrics = []

    print("\n Evaluation per country:")
    for country, data in federated_data.items():
        X_test, y_test = data["test"]
        with torch.no_grad():
            preds = model(X_test.to(device))
            preds = torch.sigmoid(preds).cpu().numpy()
            preds_binary = (preds >= 0.5).astype(int)

        y_true = y_test.cpu().numpy()

        acc = accuracy_score(y_true, preds_binary)
        prec = precision_score(y_true, preds_binary)
        rec = recall_score(y_true, preds_binary)
        f1 = f1_score(y_true, preds_binary)

        all_metrics.append({
            "Country": country,
            "Accuracy": acc,
            "Precision": prec,
            "Recall": rec,
            "F1": f1
        })

        print(f"  {country:20} | Acc: {acc:.2f} | Prec: {prec:.2f} | Rec: {rec:.2f} | F1: {f1:.2f}")

    return pd.DataFrame(all_metrics)


In [None]:
# Initialize the model (input_dim = 3 features used in flood_year classification)
model = FloodYearClassifier(input_dim=3, hidden_dim=16).to(device)

In [None]:
model.eval()

In [None]:
# Save the trained model weights
torch.save(model.state_dict(), "federated_flood_year_model.pt")
print("\n  Model saved as federated_flood_year_model.pt")

In [None]:
import os
import zipfile
import pandas as pd
import torch
from sklearn.preprocessing import StandardScaler
from torch.utils.data import DataLoader, TensorDataset

# --- Extract the zip file ---
zip_path = "/content/data_by_country.zip"
extract_dir = "/content/data_by_country/data_by_country"
os.makedirs(extract_dir, exist_ok=True)

with zipfile.ZipFile(zip_path, "r") as zip_ref:
    zip_ref.extractall(extract_dir)

print("Files extracted.")

# --- Define relevant columns ---
features = ["Catchment", "Mean_annual_precip", "lc_urban"]
target = "flood_year"

# --- Store all trained models here ---
local_models = {}

# --- Loop through each country file ---
for file_name in os.listdir(extract_dir):
    if not file_name.endswith(".csv"):
        continue

    country = file_name.replace(".csv", "")
    file_path = os.path.join(extract_dir, file_name)

    try:
        df = pd.read_csv(file_path)
        df = df.dropna(subset=features + [target])
        df[features] = StandardScaler().fit_transform(df[features])

        X = torch.tensor(df[features].values, dtype=torch.float32)
        y = torch.tensor(df[target].values, dtype=torch.float32)

        dataset = TensorDataset(X, y)
        loader = DataLoader(dataset, batch_size=16, shuffle=True)

        # Define local model
        model = FloodYearClassifier(input_dim=3, hidden_dim=16).to(device)
        optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)
        loss_fn = torch.nn.BCEWithLogitsLoss()

        # Train locally
        model.train()
        for epoch in range(5):
            for xb, yb in loader:
                xb, yb = xb.to(device), yb.to(device)
                pred = model(xb).squeeze(-1)
                loss = loss_fn(pred, yb)
                optimizer.zero_grad()
                loss.backward()
                optimizer.step()

        print(f"Trained local model for: {country}")
        local_models[country] = model

    except Exception as e:
        print(f" Skipping {country}: {e}")


In [None]:
import copy

def fedavg(models_dict):
    """Federated Averaging of model weights."""
    models = list(models_dict.values())
    global_model = copy.deepcopy(models[0])  # Start from one of the models

    with torch.no_grad():
        for param in global_model.state_dict():
            # Stack and average parameters across models
            avg_param = torch.stack([m.state_dict()[param] for m in models], dim=0).mean(dim=0)
            global_model.state_dict()[param].copy_(avg_param)

    return global_model

# Aggregate
global_model = fedavg(local_models)

# Save the global model
torch.save(global_model.state_dict(), "global_flood_year_model.pt")
print("Global Flood Year model saved as 'global_flood_year_model.pt'")
