In [None]:
import os
import numpy as np
import tensorflow as tf
from PIL import Image
from tensorflow.keras.layers import Input, Dense, GlobalAveragePooling2D
from tensorflow.keras.applications import VGG16
from tensorflow.keras.models import Model, load_model
from tensorflow.keras.utils import to_categorical
from sklearn.model_selection import train_test_split
from sklearn.metrics import precision_score, recall_score, f1_score, accuracy_score
import random

# Parameters
latent_dim = 50
num_clients = 5
epochs_per_client = 10
mu = 0.01  # FedProx proximal term coefficient
learning_rate = 0.005
batch_size = 32
image_size = (224, 224)

# Dataset paths
base_path = "C:\\Users\\paray\\OneDrive\\Desktop\\bits new research\\dataset\\dataset\\organized_screenshots"
categories = {
    0: os.path.join(base_path, "Education", "Coursera"),
    1: os.path.join(base_path, "Education", "Programming"),
    2: os.path.join(base_path, "Education", "YouTube"),
    3: os.path.join(base_path, "Entertainment", "YouTube"),
    4: os.path.join(base_path, "Shopping"),
}

# Load and preprocess images
def load_data_limited_per_class(categories, max_per_class=100):
    X, y = [], []
    for label, folder in categories.items():
        count = 0
        for file in os.listdir(folder):
            if file.endswith(".jpg") or file.endswith(".png"):
                try:
                    img_path = os.path.join(folder, file)
                    img = Image.open(img_path).convert("RGB")
                    img = img.resize(image_size)
                    img_array = np.array(img) / 255.0
                    X.append(img_array)
                    y.append(label)
                    count += 1
                    if count >= max_per_class:
                        break
                except Exception as e:
                    print(f"Error loading {file}: {e}")
    return np.array(X), np.array(y)

# Load dataset
X, y = load_data_limited_per_class(categories, max_per_class=100)
y_cat = to_categorical(y, num_classes=5)

# Train-test split
X_train_full, X_test, y_train_full, y_test = train_test_split(X, y_cat, test_size=0.2, stratify=y, random_state=42)

# Create non-IID clients
def create_noniid_clients(X, y, num_clients=5):
    data = list(zip(X, y))
    random.shuffle(data)

    class_buckets = {i: [] for i in range(5)}
    for xi, yi in data:
        class_idx = np.argmax(yi)
        class_buckets[class_idx].append((xi, yi))

    clients_data = [[] for _ in range(num_clients)]

    for cls in class_buckets:
        cls_data = class_buckets[cls]
        split_size = len(cls_data) // num_clients
        for i in range(num_clients):
            start = i * split_size
            end = start + split_size if i < num_clients - 1 else len(cls_data)
            clients_data[i].extend(cls_data[start:end])

    X_clients, y_clients = [], []
    for client_data in clients_data:
        random.shuffle(client_data)
        Xc, yc = zip(*client_data)
        X_clients.append(np.array(Xc))
        y_clients.append(np.array(yc))

    return X_clients, y_clients

# Build base VGG16 model
def build_fedprox_vgg16(input_shape=(224, 224, 3), num_classes=5):
    base_model = VGG16(weights='imagenet', include_top=False, input_shape=input_shape)
    base_model.trainable = False

    inputs = Input(shape=input_shape)
    x = base_model(inputs, training=False)
    x = GlobalAveragePooling2D()(x)
    outputs = Dense(num_classes, activation='softmax')(x)

    model = Model(inputs, outputs)
    return model

# Custom FedProx training per client
def train_fedprox_model(local_model, global_trainable_weights, X, y, mu, epochs):
    optimizer = tf.keras.optimizers.Adam(learning_rate)
    loss_fn = tf.keras.losses.CategoricalCrossentropy()

    dataset = tf.data.Dataset.from_tensor_slices((X, y)).shuffle(1000).batch(batch_size)

    for epoch in range(epochs):
        for step, (x_batch, y_batch) in enumerate(dataset):
            with tf.GradientTape() as tape:
                preds = local_model(x_batch, training=True)
                loss = loss_fn(y_batch, preds)

                # FedProx proximal term
                prox_term = 0.0
                for w_local, w_global in zip(local_model.trainable_weights, global_trainable_weights):
                    prox_term += tf.reduce_sum(tf.square(w_local - w_global))
                loss += (mu / 2.0) * prox_term

            grads = tape.gradient(loss, local_model.trainable_weights)
            optimizer.apply_gradients(zip(grads, local_model.trainable_weights))

# Federated training with FedProx
def federated_fedprox_training(model, X_clients, y_clients, epochs, mu):
    global_weights = model.get_weights()
    global_trainable_weights = model.trainable_weights
    global_trainable_values = [w.numpy() for w in global_trainable_weights]

    client_weights = []

    for i in range(num_clients):
        print(f"Client {i+1} training...")
        local_model = build_fedprox_vgg16()
        local_model.set_weights(global_weights)
        train_fedprox_model(local_model, global_trainable_values, X_clients[i], y_clients[i], mu, epochs)
        client_weights.append(local_model.get_weights())

    new_weights = []
    for weights in zip(*client_weights):
        new_weights.append(np.mean(weights, axis=0))

    return new_weights

# Load pretrained IID-trained base model
pretrained_model_path = r"C:\\Users\\paray\\OneDrive\\Desktop\\bits new research\\final_trained_vgg16_model.h5"
global_model = load_model(pretrained_model_path)
print("Loaded pretrained base model.")

# Create non-IID client datasets
X_clients, y_clients = create_noniid_clients(X_train_full, y_train_full, num_clients)

# Federated training (1 round)
print("\nStarting Federated Round 1")
updated_weights = federated_fedprox_training(global_model, X_clients, y_clients, epochs=epochs_per_client, mu=mu)
global_model.set_weights(updated_weights)

# Evaluate
y_pred_probs = global_model.predict(X_test)
y_pred = np.argmax(y_pred_probs, axis=1)
y_true = np.argmax(y_test, axis=1)

accuracy = accuracy_score(y_true, y_pred)
precision = precision_score(y_true, y_pred, average='weighted', zero_division=0)
recall = recall_score(y_true, y_pred, average='weighted', zero_division=0)
f1 = f1_score(y_true, y_pred, average='weighted', zero_division=0)

print("\nFinal Model Evaluation Metrics:")
print(f"Accuracy : {accuracy:.4f}")
print(f"Precision: {precision:.4f}")
print(f"Recall   : {recall:.4f}")
print(f"F1 Score : {f1:.4f}")

# Save model
global_model.save("non_iid_vgg16_fedprox_updated.h5")
print("Saved fine-tuned global model as 'non_iid_vgg16_fedprox_updated.h5'")




Loaded pretrained base model.

Starting Federated Round 1
Client 1 training...
Client 2 training...
Client 3 training...
Client 4 training...
Client 5 training...
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m15s[0m 3s/step





Final Model Evaluation Metrics:
Accuracy : 0.9100
Precision: 0.9118
Recall   : 0.9100
F1 Score : 0.9099
Saved fine-tuned global model as 'non_iid_vgg16_fedprox_updated.h5'


In [None]:
import os
import random
import numpy as np
import tensorflow as tf
from PIL import Image
from tensorflow.keras.models import Model, load_model
from tensorflow.keras.layers import Input, Dense, GlobalAveragePooling2D
from tensorflow.keras.applications import VGG19
from tensorflow.keras.utils import to_categorical
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, accuracy_score, precision_score, recall_score, f1_score

# ----------------- CONFIGURATION -----------------
latent_dim = 50
num_clients = 5
epochs_per_client = 30
mu = 0.01
learning_rate = 0.005
batch_size = 32
image_size = (224, 224)
num_classes = 5
max_images_per_class = 100

base_path = "C:\\Users\\paray\\OneDrive\\Desktop\\bits new research\\dataset\\dataset\\organized_screenshots"
categories = {
    0: os.path.join(base_path, "Education", "Coursera"),
    1: os.path.join(base_path, "Education", "Programming"),
    2: os.path.join(base_path, "Education", "YouTube"),
    3: os.path.join(base_path, "Entertainment", "YouTube"),
    4: os.path.join(base_path, "Shopping"),
}

# ----------------- DATA LOADING -----------------
def load_data(categories, max_per_class):
    X, y = [], []
    for label, folder in categories.items():
        count = 0
        for file in os.listdir(folder):
            if file.lower().endswith(('.jpg', '.png')):
                try:
                    img_path = os.path.join(folder, file)
                    img = Image.open(img_path).convert("RGB").resize(image_size)
                    X.append(np.array(img) / 255.0)
                    y.append(label)
                    count += 1
                    if count >= max_per_class:
                        break
                except Exception as e:
                    print(f"[Warning] Skipping {file}: {e}")
    return np.array(X), to_categorical(np.array(y), num_classes=num_classes)

X, y_cat = load_data(categories, max_images_per_class)
X_train, X_test, y_train, y_test = train_test_split(
    X, y_cat, test_size=0.2, stratify=np.argmax(y_cat, axis=1), random_state=42
)

# ----------------- CLIENT SPLITTING -----------------
def create_noniid_clients(X, y, num_clients):
    data = list(zip(X, y))
    random.shuffle(data)
    class_buckets = {i: [] for i in range(num_classes)}
    for xi, yi in data:
        class_idx = np.argmax(yi)
        class_buckets[class_idx].append((xi, yi))
    clients = [[] for _ in range(num_clients)]
    for cls_data in class_buckets.values():
        random.shuffle(cls_data)
        split_size = len(cls_data) // num_clients
        for i in range(num_clients):
            start = i * split_size
            end = len(cls_data) if i == num_clients - 1 else (i + 1) * split_size
            clients[i].extend(cls_data[start:end])
    X_clients, y_clients = [], []
    for data in clients:
        random.shuffle(data)
        X_i, y_i = zip(*data)
        X_clients.append(np.array(X_i))
        y_clients.append(np.array(y_i))
    return X_clients, y_clients

X_clients, y_clients = create_noniid_clients(X_train, y_train, num_clients)

# ----------------- MODEL DEFINITION -----------------
def build_vgg19_model(input_shape=(224, 224, 3), num_classes=5, trainable=False):
    base_model = VGG19(weights='imagenet', include_top=False, input_shape=input_shape)
    base_model.trainable = trainable
    inputs = Input(shape=input_shape)
    x = base_model(inputs, training=False)
    x = GlobalAveragePooling2D()(x)
    outputs = Dense(num_classes, activation='softmax')(x)
    model = Model(inputs, outputs)
    return model

# ----------------- FEDPROX LOCAL TRAINING -----------------
def train_fedprox_model(local_model, global_weights, X, y, mu, epochs):
    optimizer = tf.keras.optimizers.Adam(learning_rate)
    loss_fn = tf.keras.losses.CategoricalCrossentropy()
    dataset = tf.data.Dataset.from_tensor_slices((X, y)).shuffle(1000).batch(batch_size)

    for epoch in range(epochs):
        for x_batch, y_batch in dataset:
            with tf.GradientTape() as tape:
                preds = local_model(x_batch, training=True)
                loss = loss_fn(y_batch, preds)
                # Proximal term
                prox_term = 0.0
                for w, w_t in zip(local_model.trainable_weights, global_weights):
                    prox_term += tf.reduce_sum(tf.square(w - w_t))
                loss += (mu / 2.0) * prox_term
            grads = tape.gradient(loss, local_model.trainable_weights)
            optimizer.apply_gradients(zip(grads, local_model.trainable_weights))

# ----------------- FEDPROX AGGREGATION -----------------
def federated_fedprox_training(global_model, X_clients, y_clients, epochs, mu):
    global_weights = [w.numpy() for w in global_model.trainable_weights]
    client_weights = []

    for i in range(num_clients):
        print(f"\n🔧 Training Client {i + 1}")
        local_model = build_vgg19_model()
        local_model.set_weights(global_model.get_weights())
        train_fedprox_model(local_model, global_weights, X_clients[i], y_clients[i], mu, epochs)
        client_weights.append(local_model.get_weights())

    # FedAvg aggregation
    new_weights = [np.mean(w, axis=0) for w in zip(*client_weights)]
    return new_weights

# ----------------- MAIN SCRIPT -----------------
if __name__ == "__main__":
    pretrained_model_path = r"C:\Users\paray\OneDrive\Desktop\bits new research\final_global_vgg19_model.h5"
    global_model = load_model(pretrained_model_path)
    print("✅ Loaded pretrained VGG19 model.")

    # Federated training round
    print("\n🚀 Starting Federated Training Round")
    updated_weights = federated_fedprox_training(global_model, X_clients, y_clients, epochs_per_client, mu)
    global_model.set_weights(updated_weights)

    # Evaluation
    global_model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
    loss, acc = global_model.evaluate(X_test, y_test, verbose=1)
    print(f"\n🎯 Final Global Model Test Accuracy (Keras): {acc:.4f}")

    # Custom metrics
    y_true = np.argmax(y_test, axis=1)
    y_pred = np.argmax(global_model.predict(X_test, verbose=0), axis=1)

    precision = precision_score(y_true, y_pred, average='macro')
    recall = recall_score(y_true, y_pred, average='macro')
    f1 = f1_score(y_true, y_pred, average='macro')
    accuracy = accuracy_score(y_true, y_pred)

    print("\n📊 Classification Metrics:")
    print(f"🔹 Accuracy:  {accuracy:.4f}")
    print(f"🔹 Precision: {precision:.4f}")
    print(f"🔹 Recall:    {recall:.4f}")
    print(f"🔹 F1-score:  {f1:.4f}")

    # Save model
    save_path = "non_iid_vgg19_fedprox_updated.h5"
    global_model.save(save_path)
    print(f"💾 Saved updated model to '{save_path}'")




✅ Loaded pretrained VGG19 model.

🚀 Starting Federated Training Round

🔧 Training Client 1

🔧 Training Client 2

🔧 Training Client 3

🔧 Training Client 4

🔧 Training Client 5
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m19s[0m 4s/step - accuracy: 0.9397 - loss: 0.2944

🎯 Final Global Model Test Accuracy (Keras): 0.9300





📊 Classification Metrics:
🔹 Accuracy:  0.9300
🔹 Precision: 0.9312
🔹 Recall:    0.9300
🔹 F1-score:  0.9294
💾 Saved updated model to 'non_iid_vgg19_fedprox_updated.h5'


In [None]:
import os
import random
import numpy as np
import tensorflow as tf
from PIL import Image
from tensorflow.keras.models import Model, load_model
from tensorflow.keras.layers import Input, Dense, GlobalAveragePooling2D
from tensorflow.keras.applications import ResNet50
from tensorflow.keras.utils import to_categorical
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, accuracy_score, precision_score, recall_score, f1_score

# ----------------- CONFIGURATION -----------------
latent_dim = 50
num_clients = 5
epochs_per_client = 30
mu = 0.01
learning_rate = 0.005
batch_size = 32
image_size = (224, 224)
num_classes = 5
max_images_per_class = 100

# Dataset paths
base_path = "C:\\Users\\paray\\OneDrive\\Desktop\\bits new research\\dataset\\dataset\\organized_screenshots"
categories = {
    0: os.path.join(base_path, "Education", "Coursera"),
    1: os.path.join(base_path, "Education", "Programming"),
    2: os.path.join(base_path, "Education", "YouTube"),
    3: os.path.join(base_path, "Entertainment", "YouTube"),
    4: os.path.join(base_path, "Shopping"),
}

# ----------------- DATA LOADING -----------------
def load_data(categories, max_per_class):
    X, y = [], []
    for label, folder in categories.items():
        count = 0
        for file in os.listdir(folder):
            if file.lower().endswith(('.jpg', '.png')):
                try:
                    img_path = os.path.join(folder, file)
                    img = Image.open(img_path).convert("RGB").resize(image_size)
                    X.append(np.array(img) / 255.0)
                    y.append(label)
                    count += 1
                    if count >= max_per_class:
                        break
                except Exception as e:
                    print(f"[Warning] Skipping {file}: {e}")
    return np.array(X), to_categorical(np.array(y), num_classes=num_classes)

X, y_cat = load_data(categories, max_images_per_class)
X_train, X_test, y_train, y_test = train_test_split(
    X, y_cat, test_size=0.2, stratify=np.argmax(y_cat, axis=1), random_state=42
)

# ----------------- CLIENT SPLITTING -----------------
def create_noniid_clients(X, y, num_clients):
    data = list(zip(X, y))
    random.shuffle(data)
    
    class_buckets = {i: [] for i in range(num_classes)}
    for xi, yi in data:
        class_idx = np.argmax(yi)
        class_buckets[class_idx].append((xi, yi))

    clients = [[] for _ in range(num_clients)]
    for cls_data in class_buckets.values():
        random.shuffle(cls_data)
        split_size = len(cls_data) // num_clients
        for i in range(num_clients):
            start = i * split_size
            end = len(cls_data) if i == num_clients - 1 else (i + 1) * split_size
            clients[i].extend(cls_data[start:end])

    X_clients, y_clients = [], []
    for data in clients:
        random.shuffle(data)
        X_i, y_i = zip(*data)
        X_clients.append(np.array(X_i))
        y_clients.append(np.array(y_i))
    return X_clients, y_clients

X_clients, y_clients = create_noniid_clients(X_train, y_train, num_clients)

# ----------------- MODEL DEFINITION -----------------
def build_resnet50_model(input_shape=(224, 224, 3), num_classes=5, trainable=False):
    base_model = ResNet50(weights='imagenet', include_top=False, input_shape=input_shape)
    base_model.trainable = trainable

    inputs = Input(shape=input_shape)
    x = base_model(inputs, training=False)
    x = GlobalAveragePooling2D()(x)
    outputs = Dense(num_classes, activation='softmax')(x)

    model = Model(inputs, outputs)
    return model

# ----------------- FEDPROX TRAINING -----------------
def federated_fedprox_training(global_model, X_clients, y_clients, epochs, mu):
    global_weights = global_model.get_weights()
    client_weights = []

    for i in range(num_clients):
        print(f"\n🔧 Training Client {i + 1}")
        local_model = build_resnet50_model(trainable=False)
        local_model.set_weights(global_weights)

        optimizer = tf.keras.optimizers.Adam(learning_rate)
        loss_fn = tf.keras.losses.CategoricalCrossentropy()

        for epoch in range(epochs):
            indices = np.arange(len(X_clients[i]))
            np.random.shuffle(indices)
            for start in range(0, len(indices), batch_size):
                end = start + batch_size
                batch_idx = indices[start:end]
                x_batch, y_batch = X_clients[i][batch_idx], y_clients[i][batch_idx]

                with tf.GradientTape() as tape:
                    predictions = local_model(x_batch, training=True)
                    ce_loss = loss_fn(y_batch, predictions)
                    
                    # FedProx regularization term
                    prox_term = 0.0
                    for w, w_glob in zip(local_model.trainable_weights, global_weights):
                        prox_term += tf.reduce_sum(tf.square(w - w_glob))
                    total_loss = ce_loss + (mu / 2) * prox_term

                grads = tape.gradient(total_loss, local_model.trainable_weights)
                optimizer.apply_gradients(zip(grads, local_model.trainable_weights))

        client_weights.append(local_model.get_weights())

    # FedAvg
    new_weights = [np.mean(w, axis=0) for w in zip(*client_weights)]
    return new_weights

# ----------------- MAIN SCRIPT -----------------
if __name__ == "__main__":
    pretrained_model_path = r"C:\Users\paray\OneDrive\Desktop\bits new research\updated_global_resnet50_model.h5"
    global_model = load_model(pretrained_model_path)
    print("✅ Loaded pretrained ResNet50 model.")

    # Federated training round
    print("\n🚀 Starting Federated Training Round")
    updated_weights = federated_fedprox_training(global_model, X_clients, y_clients, epochs_per_client, mu)
    global_model.set_weights(updated_weights)

    # Evaluation
    global_model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
    loss, acc = global_model.evaluate(X_test, y_test, verbose=1)
    print(f"\n🎯 Final Global Model Test Accuracy (Keras): {acc:.4f}")

    # Custom metrics
    y_true = np.argmax(y_test, axis=1)
    y_pred = np.argmax(global_model.predict(X_test, verbose=0), axis=1)

    precision = precision_score(y_true, y_pred, average='macro')
    recall = recall_score(y_true, y_pred, average='macro')
    f1 = f1_score(y_true, y_pred, average='macro')
    accuracy = accuracy_score(y_true, y_pred)

    print("\n📊 Classification Metrics:")
    print(f"🔹 Accuracy:  {accuracy:.4f}")
    print(f"🔹 Precision: {precision:.4f}")
    print(f"🔹 Recall:    {recall:.4f}")
    print(f"🔹 F1-score:  {f1:.4f}")

    # Save model
    save_path = "non_iid_resnet50_fedprox_updated.h5"
    global_model.save(save_path)
    print(f"💾 Saved updated model to '{save_path}'")




✅ Loaded pretrained ResNet50 model.

🚀 Starting Federated Training Round

🔧 Training Client 1

🔧 Training Client 2

🔧 Training Client 3

🔧 Training Client 4

🔧 Training Client 5
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 2s/step - accuracy: 0.7041 - loss: 0.8867

🎯 Final Global Model Test Accuracy (Keras): 0.6900









📊 Classification Metrics:
🔹 Accuracy:  0.6900
🔹 Precision: 0.7813
🔹 Recall:    0.6900
🔹 F1-score:  0.6878
💾 Saved updated model to 'non_iid_resnet50_fedprox_updated.h5'


In [13]:
import os
import random
import numpy as np
import tensorflow as tf
from PIL import Image
from tensorflow.keras.models import Model, load_model
from tensorflow.keras.layers import Input, Dense, GlobalAveragePooling2D
from tensorflow.keras.applications import InceptionV3
from tensorflow.keras.utils import to_categorical
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, accuracy_score, precision_score, recall_score, f1_score

# ----------------- CONFIGURATION -----------------
latent_dim = 50
num_clients = 5
epochs_per_client = 30
mu = 0.01  # FedProx proximal term coefficient
learning_rate = 0.005
batch_size = 32
image_size = (299, 299)
num_classes = 5
max_images_per_class = 100

# Dataset paths
base_path = "C:\\Users\\paray\\OneDrive\\Desktop\\bits new research\\dataset\\dataset\\organized_screenshots"
categories = {
    0: os.path.join(base_path, "Education", "Coursera"),
    1: os.path.join(base_path, "Education", "Programming"),
    2: os.path.join(base_path, "Education", "YouTube"),
    3: os.path.join(base_path, "Entertainment", "YouTube"),
    4: os.path.join(base_path, "Shopping"),
}

# ----------------- DATA LOADING -----------------
def load_data(categories, max_per_class):
    X, y = [], []
    for label, folder in categories.items():
        count = 0
        for file in os.listdir(folder):
            if file.lower().endswith(('.jpg', '.png')):
                try:
                    img_path = os.path.join(folder, file)
                    img = Image.open(img_path).convert("RGB").resize(image_size)
                    X.append(np.array(img) / 255.0)
                    y.append(label)
                    count += 1
                    if count >= max_per_class:
                        break
                except Exception as e:
                    print(f"[Warning] Skipping {file}: {e}")
    return np.array(X), to_categorical(np.array(y), num_classes=num_classes)

X, y_cat = load_data(categories, max_images_per_class)
X_train, X_test, y_train, y_test = train_test_split(
    X, y_cat, test_size=0.2, stratify=np.argmax(y_cat, axis=1), random_state=42
)

# ----------------- CLIENT SPLITTING -----------------
def create_noniid_clients(X, y, num_clients):
    data = list(zip(X, y))
    random.shuffle(data)

    class_buckets = {i: [] for i in range(num_classes)}
    for xi, yi in data:
        class_idx = np.argmax(yi)
        class_buckets[class_idx].append((xi, yi))

    clients = [[] for _ in range(num_clients)]
    for cls_data in class_buckets.values():
        random.shuffle(cls_data)
        split_size = len(cls_data) // num_clients
        for i in range(num_clients):
            start = i * split_size
            end = len(cls_data) if i == num_clients - 1 else (i + 1) * split_size
            clients[i].extend(cls_data[start:end])

    X_clients, y_clients = [], []
    for data in clients:
        random.shuffle(data)
        X_i, y_i = zip(*data)
        X_clients.append(np.array(X_i))
        y_clients.append(np.array(y_i))
    return X_clients, y_clients

X_clients, y_clients = create_noniid_clients(X_train, y_train, num_clients)

# ----------------- MODEL DEFINITION -----------------
def build_inceptionv3_model(input_shape=(299, 299, 3), num_classes=5, trainable=False):
    base_model = InceptionV3(weights='imagenet', include_top=False, input_shape=input_shape)
    base_model.trainable = trainable

    inputs = Input(shape=input_shape)
    x = base_model(inputs, training=False)
    x = GlobalAveragePooling2D()(x)
    outputs = Dense(num_classes, activation='softmax')(x)
    
    model = Model(inputs, outputs)
    model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate),
                  loss='categorical_crossentropy',
                  metrics=['accuracy'])
    return model

# ----------------- LOCAL TRAINING WITH FEDPROX -----------------
def train_local_fedprox_model(local_model, global_model, X, y, epochs, mu):
    optimizer = tf.keras.optimizers.Adam(learning_rate)
    loss_fn = tf.keras.losses.CategoricalCrossentropy()

    # Get reference trainable weights from global model directly
    global_trainable_weights = [tf.convert_to_tensor(w) for w in global_model.trainable_weights]

    for epoch in range(epochs):
        dataset = tf.data.Dataset.from_tensor_slices((X, y)).shuffle(buffer_size=1024).batch(batch_size)
        for step, (batch_x, batch_y) in enumerate(dataset):
            with tf.GradientTape() as tape:
                predictions = local_model(batch_x, training=True)
                ce_loss = loss_fn(batch_y, predictions)
                # FedProx proximal term (on trainable weights only)
                prox_loss = 0.0
                for lw, gw in zip(local_model.trainable_weights, global_trainable_weights):
                    prox_loss += tf.reduce_sum(tf.square(lw - gw))
                total_loss = ce_loss + (mu / 2) * prox_loss
            grads = tape.gradient(total_loss, local_model.trainable_weights)
            optimizer.apply_gradients(zip(grads, local_model.trainable_weights))
    return local_model.get_weights()



# ----------------- FEDPROX TRAINING -----------------
def federated_fedprox_training(global_model, X_clients, y_clients, epochs, mu):
    global_weights = global_model.get_weights()
    client_weights = []

    for i in range(num_clients):
        print(f"\n🔧 Training Client {i + 1}")
        local_model = build_inceptionv3_model()
        local_model.set_weights(global_weights)

        # 🔧 Corrected line below
        updated_weights = train_local_fedprox_model(local_model, global_model, X_clients[i], y_clients[i], epochs, mu)
        client_weights.append(updated_weights)

    # FedAvg Aggregation
    new_weights = [np.mean(w, axis=0) for w in zip(*client_weights)]
    return new_weights


# ----------------- MAIN SCRIPT -----------------
if __name__ == "__main__":
    pretrained_model_path = r"C:\Users\paray\OneDrive\Desktop\bits new research\final_global_inceptionv3_model.h5"
    global_model = load_model(pretrained_model_path)
    print("✅ Loaded pretrained InceptionV3 model.")

    print("\n🚀 Starting Federated Training Round with FedProx")
    updated_weights = federated_fedprox_training(global_model, X_clients, y_clients, epochs_per_client, mu)
    global_model.set_weights(updated_weights)

    global_model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
    loss, acc = global_model.evaluate(X_test, y_test, verbose=1)
    print(f"\n🎯 Final Global Model Test Accuracy: {acc:.4f}")

    # Classification metrics
    y_true = np.argmax(y_test, axis=1)
    y_pred = np.argmax(global_model.predict(X_test, verbose=0), axis=1)

    precision = precision_score(y_true, y_pred, average='macro')
    recall = recall_score(y_true, y_pred, average='macro')
    f1 = f1_score(y_true, y_pred, average='macro')
    accuracy = accuracy_score(y_true, y_pred)

    print("\n📊 Classification Metrics:")
    print(f"🔹 Accuracy:  {accuracy:.4f}")
    print(f"🔹 Precision: {precision:.4f}")
    print(f"🔹 Recall:    {recall:.4f}")
    print(f"🔹 F1-score:  {f1:.4f}")

    save_path = "non_iid_inceptionv3_fedprox_updated.h5"
    global_model.save(save_path)
    print(f"💾 Saved updated model to '{save_path}'")




✅ Loaded pretrained InceptionV3 model.

🚀 Starting Federated Training Round with FedProx

🔧 Training Client 1

🔧 Training Client 2

🔧 Training Client 3

🔧 Training Client 4

🔧 Training Client 5
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 2s/step - accuracy: 0.9838 - loss: 0.0457

🎯 Final Global Model Test Accuracy: 0.9700





📊 Classification Metrics:
🔹 Accuracy:  0.9700
🔹 Precision: 0.9710
🔹 Recall:    0.9700
🔹 F1-score:  0.9702
💾 Saved updated model to 'non_iid_inceptionv3_fedprox_updated.h5'


In [4]:
import os
import random
import numpy as np
import tensorflow as tf
from PIL import Image
from tensorflow.keras.models import Model, load_model
from tensorflow.keras.layers import Input, Dense, GlobalAveragePooling2D
from tensorflow.keras.applications import InceptionResNetV2
from tensorflow.keras.utils import to_categorical
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, accuracy_score, precision_score, recall_score, f1_score

# ----------------- CONFIGURATION -----------------
latent_dim = 50
num_clients = 5
epochs_per_client = 30
mu = 0.01
learning_rate = 0.005
batch_size = 32
image_size = (224, 224)
num_classes = 5
max_images_per_class = 100

# Dataset paths
base_path = "C:\\Users\\paray\\OneDrive\\Desktop\\bits new research\\dataset\\dataset\\organized_screenshots"
categories = {
    0: os.path.join(base_path, "Education", "Coursera"),
    1: os.path.join(base_path, "Education", "Programming"),
    2: os.path.join(base_path, "Education", "YouTube"),
    3: os.path.join(base_path, "Entertainment", "YouTube"),
    4: os.path.join(base_path, "Shopping"),
}

# ----------------- DATA LOADING -----------------
def load_data(categories, max_per_class):
    X, y = [], []
    for label, folder in categories.items():
        count = 0
        for file in os.listdir(folder):
            if file.lower().endswith(('.jpg', '.png')):
                try:
                    img_path = os.path.join(folder, file)
                    img = Image.open(img_path).convert("RGB").resize(image_size)
                    X.append(np.array(img) / 255.0)
                    y.append(label)
                    count += 1
                    if count >= max_per_class:
                        break
                except Exception as e:
                    print(f"[Warning] Skipping {file}: {e}")
    return np.array(X), to_categorical(np.array(y), num_classes=num_classes)

X, y_cat = load_data(categories, max_images_per_class)
X_train, X_test, y_train, y_test = train_test_split(
    X, y_cat, test_size=0.2, stratify=np.argmax(y_cat, axis=1), random_state=42
)

# ----------------- CLIENT SPLITTING -----------------
def create_noniid_clients(X, y, num_clients):
    data = list(zip(X, y))
    random.shuffle(data)

    class_buckets = {i: [] for i in range(num_classes)}
    for xi, yi in data:
        class_idx = np.argmax(yi)
        class_buckets[class_idx].append((xi, yi))

    clients = [[] for _ in range(num_clients)]
    for cls_data in class_buckets.values():
        random.shuffle(cls_data)
        split_size = len(cls_data) // num_clients
        for i in range(num_clients):
            start = i * split_size
            end = len(cls_data) if i == num_clients - 1 else (i + 1) * split_size
            clients[i].extend(cls_data[start:end])

    X_clients, y_clients = [], []
    for data in clients:
        random.shuffle(data)
        X_i, y_i = zip(*data)
        X_clients.append(np.array(X_i))
        y_clients.append(np.array(y_i))
    return X_clients, y_clients

X_clients, y_clients = create_noniid_clients(X_train, y_train, num_clients)

# ----------------- MODEL DEFINITION (InceptionResNetV2) -----------------
def FedInceptionResNetV2(input_shape=(224, 224, 3), num_classes=5):
    base_model = InceptionResNetV2(weights='imagenet', include_top=False, input_shape=input_shape)
    base_model.trainable = False

    inputs = Input(shape=input_shape)
    x = base_model(inputs, training=False)
    x = GlobalAveragePooling2D()(x)
    outputs = Dense(num_classes, activation='softmax')(x)

    model = Model(inputs, outputs)
    return model

# ----------------- FEDPROX LOSS FUNCTION -----------------
def fedprox_loss(global_weights, local_model, mu):
    # Extract only trainable global weights
    global_trainable_weights = [
        w for w, lw in zip(global_weights, local_model.weights) if lw.trainable
    ]
    def loss_fn(y_true, y_pred):
        base_loss = tf.keras.losses.categorical_crossentropy(y_true, y_pred)
        prox_term = 0.0
        for w, w_t in zip(local_model.trainable_weights, global_trainable_weights):
            prox_term += tf.reduce_sum(tf.square(w - tf.convert_to_tensor(w_t)))
        return base_loss + (mu / 2) * prox_term
    return loss_fn


# ----------------- FEDPROX TRAINING -----------------
def federated_fedprox_training(global_model, X_clients, y_clients, epochs, mu):
    global_weights = global_model.get_weights()
    client_weights = []

    for i in range(num_clients):
        print(f"\n🔧 Training Client {i + 1}")
        local_model = FedInceptionResNetV2()
        local_model.set_weights(global_weights)

        local_model.compile(
            optimizer=tf.keras.optimizers.Adam(learning_rate),
            loss=fedprox_loss(global_weights, local_model, mu),
            metrics=['accuracy']
        )

        local_model.fit(
            X_clients[i], y_clients[i],
            epochs=epochs,
            batch_size=batch_size,
            verbose=0
        )

        client_weights.append(local_model.get_weights())

    # FedAvg aggregation
    new_weights = [np.mean(w, axis=0) for w in zip(*client_weights)]
    return new_weights

# ----------------- MAIN SCRIPT -----------------
if __name__ == "__main__":
    pretrained_model_path = r"C:\Users\paray\OneDrive\Desktop\bits new research\updated_global_inceptionresnetv2_model.h5"
    global_model = FedInceptionResNetV2()
    global_model.load_weights(pretrained_model_path)
    print("✅ Loaded model weights into FedInceptionResNetV2 model.")

    print("\n🚀 Starting Federated Training Round")
    updated_weights = federated_fedprox_training(global_model, X_clients, y_clients, epochs_per_client, mu)
    global_model.set_weights(updated_weights)

    # Evaluation
    global_model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
    loss, acc = global_model.evaluate(X_test, y_test, verbose=1)
    print(f"\n🎯 Final Global Model Test Accuracy (Keras): {acc:.4f}")

    # Custom metrics
    y_true = np.argmax(y_test, axis=1)
    y_pred = np.argmax(global_model.predict(X_test, verbose=0), axis=1)

    precision = precision_score(y_true, y_pred, average='macro')
    recall = recall_score(y_true, y_pred, average='macro')
    f1 = f1_score(y_true, y_pred, average='macro')
    accuracy = accuracy_score(y_true, y_pred)

    print("\n📊 Classification Metrics:")
    print(f"🔹 Accuracy:  {accuracy:.4f}")
    print(f"🔹 Precision: {precision:.4f}")
    print(f"🔹 Recall:    {recall:.4f}")
    print(f"🔹 F1-score:  {f1:.4f}")

    # Save model
    save_path = "non_iid_inceptionresnetv2_fedprox_updated.h5"
    global_model.save(save_path)
    print(f"💾 Saved updated model to '{save_path}'")


✅ Loaded model weights into FedInceptionResNetV2 model.

🚀 Starting Federated Training Round

🔧 Training Client 1

🔧 Training Client 2

🔧 Training Client 3

🔧 Training Client 4

🔧 Training Client 5
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m17s[0m 2s/step - accuracy: 0.9746 - loss: 0.1144

🎯 Final Global Model Test Accuracy (Keras): 0.9600





📊 Classification Metrics:
🔹 Accuracy:  0.9600
🔹 Precision: 0.9614
🔹 Recall:    0.9600
🔹 F1-score:  0.9600
💾 Saved updated model to 'non_iid_inceptionresnetv2_fedprox_updated.h5'


In [15]:
import os
import random
import numpy as np
import tensorflow as tf
from PIL import Image
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Dense, GlobalAveragePooling2D
from tensorflow.keras.applications import MobileNet
from tensorflow.keras.utils import to_categorical
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, accuracy_score, precision_score, recall_score, f1_score

# ----------------- CONFIGURATION -----------------
latent_dim = 50
num_clients = 5
epochs_per_client = 30
mu = 0.01
learning_rate = 0.005
batch_size = 32
image_size = (224, 224)
num_classes = 5
max_images_per_class = 100

# Dataset paths
base_path = "C:\\Users\\paray\\OneDrive\\Desktop\\bits new research\\dataset\\dataset\\organized_screenshots"
categories = {
    0: os.path.join(base_path, "Education", "Coursera"),
    1: os.path.join(base_path, "Education", "Programming"),
    2: os.path.join(base_path, "Education", "YouTube"),
    3: os.path.join(base_path, "Entertainment", "YouTube"),
    4: os.path.join(base_path, "Shopping"),
}

# ----------------- DATA LOADING -----------------
def load_data(categories, max_per_class):
    X, y = [], []
    for label, folder in categories.items():
        count = 0
        for file in os.listdir(folder):
            if file.lower().endswith(('.jpg', '.png')):
                try:
                    img_path = os.path.join(folder, file)
                    img = Image.open(img_path).convert("RGB").resize(image_size)
                    X.append(np.array(img) / 255.0)
                    y.append(label)
                    count += 1
                    if count >= max_per_class:
                        break
                except Exception as e:
                    print(f"[Warning] Skipping {file}: {e}")
    return np.array(X), to_categorical(np.array(y), num_classes=num_classes)

X, y_cat = load_data(categories, max_images_per_class)
X_train, X_test, y_train, y_test = train_test_split(
    X, y_cat, test_size=0.2, stratify=np.argmax(y_cat, axis=1), random_state=42
)

# ----------------- CLIENT SPLITTING -----------------
def create_noniid_clients(X, y, num_clients):
    data = list(zip(X, y))
    random.shuffle(data)

    class_buckets = {i: [] for i in range(num_classes)}
    for xi, yi in data:
        class_idx = np.argmax(yi)
        class_buckets[class_idx].append((xi, yi))

    clients = [[] for _ in range(num_clients)]
    for cls_data in class_buckets.values():
        random.shuffle(cls_data)
        split_size = len(cls_data) // num_clients
        for i in range(num_clients):
            start = i * split_size
            end = len(cls_data) if i == num_clients - 1 else (i + 1) * split_size
            clients[i].extend(cls_data[start:end])

    X_clients, y_clients = [], []
    for data in clients:
        random.shuffle(data)
        X_i, y_i = zip(*data)
        X_clients.append(np.array(X_i))
        y_clients.append(np.array(y_i))
    return X_clients, y_clients

X_clients, y_clients = create_noniid_clients(X_train, y_train, num_clients)

# ----------------- MODEL DEFINITION (MobileNet) -----------------
def FedMobileNet(input_shape=(224, 224, 3), num_classes=5):
    base_model = MobileNet(weights='imagenet', include_top=False, input_shape=input_shape)
    base_model.trainable = False

    inputs = Input(shape=input_shape)
    x = base_model(inputs, training=False)
    x = GlobalAveragePooling2D()(x)
    outputs = Dense(num_classes, activation='softmax')(x)

    model = Model(inputs, outputs)
    return model

# ----------------- FEDPROX LOSS FUNCTION -----------------
def fedprox_loss(global_weights, local_model, mu):
    global_trainable_weights = [
        w for w, lw in zip(global_weights, local_model.weights) if lw.trainable
    ]
    def loss_fn(y_true, y_pred):
        base_loss = tf.keras.losses.categorical_crossentropy(y_true, y_pred)
        prox_term = 0.0
        for w, w_t in zip(local_model.trainable_weights, global_trainable_weights):
            prox_term += tf.reduce_sum(tf.square(w - tf.convert_to_tensor(w_t)))
        return base_loss + (mu / 2) * prox_term
    return loss_fn

# ----------------- FEDPROX TRAINING -----------------
def federated_fedprox_training(global_model, X_clients, y_clients, epochs, mu):
    global_weights = global_model.get_weights()
    client_weights = []

    for i in range(num_clients):
        print(f"\n🔧 Training Client {i + 1}")
        local_model = FedMobileNet()
        local_model.set_weights(global_weights)

        local_model.compile(
            optimizer=tf.keras.optimizers.Adam(learning_rate),
            loss=fedprox_loss(global_weights, local_model, mu),
            metrics=['accuracy']
        )

        local_model.fit(
            X_clients[i], y_clients[i],
            epochs=epochs,
            batch_size=batch_size,
            verbose=0
        )

        client_weights.append(local_model.get_weights())

    # FedAvg aggregation
    new_weights = [np.mean(w, axis=0) for w in zip(*client_weights)]
    return new_weights

# ----------------- MAIN SCRIPT -----------------
if __name__ == "__main__":
    pretrained_model_path = r"C:\Users\paray\OneDrive\Desktop\bits new research\updated_mobilenet_model.h5"
    global_model = FedMobileNet()
    global_model.load_weights(pretrained_model_path)
    print("✅ Loaded model weights into FedMobileNet model.")

    print("\n🚀 Starting Federated Training Round (MobileNet + FedProx)")
    updated_weights = federated_fedprox_training(global_model, X_clients, y_clients, epochs_per_client, mu)
    global_model.set_weights(updated_weights)

    # Evaluation
    global_model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
    loss, acc = global_model.evaluate(X_test, y_test, verbose=1)
    print(f"\n🎯 Final Global Model Test Accuracy (MobileNet): {acc:.4f}")

    # Custom metrics
    y_true = np.argmax(y_test, axis=1)
    y_pred = np.argmax(global_model.predict(X_test, verbose=0), axis=1)

    precision = precision_score(y_true, y_pred, average='macro')
    recall = recall_score(y_true, y_pred, average='macro')
    f1 = f1_score(y_true, y_pred, average='macro')
    accuracy = accuracy_score(y_true, y_pred)

    print("\n📊 Classification Metrics:")
    print(f"🔹 Accuracy:  {accuracy:.4f}")
    print(f"🔹 Precision: {precision:.4f}")
    print(f"🔹 Recall:    {recall:.4f}")
    print(f"🔹 F1 Score:  {f1:.4f}")


✅ Loaded model weights into FedMobileNet model.

🚀 Starting Federated Training Round (MobileNet + FedProx)

🔧 Training Client 1

🔧 Training Client 2

🔧 Training Client 3

🔧 Training Client 4

🔧 Training Client 5
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 607ms/step - accuracy: 0.9868 - loss: 0.0264

🎯 Final Global Model Test Accuracy (MobileNet): 0.9800

📊 Classification Metrics:
🔹 Accuracy:  0.9800
🔹 Precision: 0.9810
🔹 Recall:    0.9800
🔹 F1 Score:  0.9797
