üß© STEP 1: Mount Google Drive

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


üóÇ STEP 2: Set up paths and install dependencies

In [None]:
!pip install tensorflow numpy scikit-learn

import os
import tensorflow as tf
import numpy as np
import random
from sklearn.model_selection import train_test_split




In [None]:
# Change path according to your dataset folder
DATA_DIR = "/content/drive/MyDrive/PCOS MINOR/data/train"

IMAGE_SIZE = (224, 224)
BATCH_SIZE = 128
NUM_CLIENTS = 10
LOCAL_EPOCHS = 2
COMMS_ROUNDS = 20
LEARNING_RATE = 1e-4
OUTPUT_WEIGHTS_FILE = "/content/drive/MyDrive/PCOS MINOR/Federated_ResNet50_PCOS (20 Comm Round 128 BS).h5"


üß† STEP 3: Define the helper functions

In [None]:
def build_resnet50_model(num_classes, input_shape=(224,224,3), dropout_rate=0.3):
    base = tf.keras.applications.ResNet50(
        include_top=False, weights='imagenet',
        input_shape=input_shape, pooling='avg'
    )
    base.trainable = False
    inputs = tf.keras.Input(shape=input_shape)
    x = tf.keras.applications.resnet.preprocess_input(inputs)
    x = base(x, training=False)
    x = tf.keras.layers.Dropout(dropout_rate)(x)
    outputs = tf.keras.layers.Dense(num_classes, activation='softmax')(x)
    model = tf.keras.Model(inputs, outputs)
    return model

def gather_filepaths_and_labels(data_dir):
    classes = sorted([d for d in os.listdir(data_dir) if os.path.isdir(os.path.join(data_dir, d))])
    paths, labels = [], []
    for idx, cls in enumerate(classes):
        cls_dir = os.path.join(data_dir, cls)
        for f in os.listdir(cls_dir):
            fpath = os.path.join(cls_dir, f)
            if os.path.isfile(fpath):
                paths.append(fpath)
                labels.append(idx)
    return np.array(paths), np.array(labels), classes

def paths_to_dataset(paths, labels, batch_size=BATCH_SIZE, shuffle=True):
    ds = tf.data.Dataset.from_tensor_slices((paths, labels))
    if shuffle:
        ds = ds.shuffle(buffer_size=len(paths))
    def load_img(path, label):
        img = tf.io.read_file(path)
        img = tf.image.decode_image(img, channels=3, expand_animations=False)
        img = tf.image.resize(img, IMAGE_SIZE)
        img = tf.keras.applications.resnet.preprocess_input(img)
        return img, label
    ds = ds.map(load_img, num_parallel_calls=tf.data.AUTOTUNE)
    ds = ds.batch(batch_size).prefetch(tf.data.AUTOTUNE)
    return ds

def shard_data(paths, labels, num_clients):
    idx = np.arange(len(paths))
    np.random.shuffle(idx)
    paths, labels = paths[idx], labels[idx]
    shards = []
    n = len(paths) // num_clients
    for i in range(num_clients):
        start, end = i * n, (i + 1) * n if i != num_clients - 1 else len(paths)
        shards.append((paths[start:end], labels[start:end]))
    return shards

def scale_weights(weights, scalar):
    return [w * scalar for w in weights]

def aggregate_scaled_weights(scaled_weights_list):
    avg = []
    for layer in zip(*scaled_weights_list):
        avg.append(np.sum(layer, axis=0))
    return avg


üîÑ STEP 4: Federated Training (FedAvg)

In [None]:
def run_fedavg():
    # Load dataset
    paths, labels, classes = gather_filepaths_and_labels(DATA_DIR)
    print(f"Found {len(classes)} classes: {classes}")

    # Split into train/test
    p_train, p_test, y_train, y_test = train_test_split(paths, labels, test_size=0.2, stratify=labels, random_state=42)
    shards = shard_data(p_train, y_train, NUM_CLIENTS)
    clients = {f"client_{i+1}": {"paths": shards[i][0], "labels": shards[i][1]} for i in range(NUM_CLIENTS)}

    # Build global model
    global_model = build_resnet50_model(num_classes=len(classes))
    global_model.compile(
        optimizer=tf.keras.optimizers.Adam(LEARNING_RATE),
        loss=tf.keras.losses.SparseCategoricalCrossentropy(),
        metrics=['accuracy']
    )

    test_ds = paths_to_dataset(p_test, y_test, batch_size=BATCH_SIZE, shuffle=False)
    total_samples = len(p_train)

    for rnd in range(1, COMMS_ROUNDS + 1):
        print(f"\n--- Communication Round {rnd}/{COMMS_ROUNDS} ---")
        scaled_local_weights = []

        for cname, data in clients.items():
            local_ds = paths_to_dataset(data["paths"], data["labels"], batch_size=BATCH_SIZE)
            local_model = tf.keras.models.clone_model(global_model)
            local_model.build((None, IMAGE_SIZE[0], IMAGE_SIZE[1], 3))
            local_model.set_weights(global_model.get_weights())
            local_model.compile(
                optimizer=tf.keras.optimizers.Adam(LEARNING_RATE),
                loss=tf.keras.losses.SparseCategoricalCrossentropy(),
                metrics=['accuracy']
            )
            local_model.fit(local_ds, epochs=LOCAL_EPOCHS, verbose=0)

            scaling_factor = len(data["labels"]) / total_samples
            scaled_local_weights.append(scale_weights(local_model.get_weights(), scaling_factor))
            tf.keras.backend.clear_session()

        # Aggregate updates
        new_weights = aggregate_scaled_weights(scaled_local_weights)
        global_model.set_weights(new_weights)

        # Evaluate global model
        loss, acc = global_model.evaluate(test_ds, verbose=0)
        print(f"Round {rnd} - Global Test Loss: {loss:.4f} | Accuracy: {acc:.4%}")

    global_model.save(OUTPUT_WEIGHTS_FILE)
    print(f"\n‚úÖ Training complete! Model saved to {OUTPUT_WEIGHTS_FILE}")

run_fedavg()


Found 2 classes: ['infected', 'notinfected']
Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/resnet/resnet50_weights_tf_dim_ordering_tf_kernels_notop.h5
[1m94765736/94765736[0m [32m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ[0m[37m[0m [1m0s[0m 0us/step

--- Communication Round 1/20 ---
Round 1 - Global Test Loss: 0.9086 | Accuracy: 50.9091%

--- Communication Round 2/20 ---
Round 2 - Global Test Loss: 0.7954 | Accuracy: 56.1039%

--- Communication Round 3/20 ---
Round 3 - Global Test Loss: 0.7113 | Accuracy: 60.5195%

--- Communication Round 4/20 ---
Round 4 - Global Test Loss: 0.6391 | Accuracy: 63.8961%

--- Communication Round 5/20 ---
Round 5 - Global Test Loss: 0.5736 | Accuracy: 66.2338%

--- Communication Round 6/20 ---
Round 6 - Global Test Loss: 0.5123 | Accuracy: 69.3506%

--- Communication Round 7/20 ---
Round 7 - Global Test Loss: 0.4544 | Accuracy: 72.7273%

--- Communication Round 8/20 ---
Round 8 - Global Test L



Round 20 - Global Test Loss: 0.1071 | Accuracy: 99.2208%

‚úÖ Training complete! Model saved to /content/drive/MyDrive/PCOS MINOR/Federated_ResNet50_PCOS (20 Comm Round 128 BS).h5
