In [None]:
import numpy as np
import tensorflow as tf
from tensorflow.keras import datasets, layers, models

# Load MNIST dataset
(x_train, y_train), (x_test, y_test) = datasets.mnist.load_data()
x_train, x_test = x_train / 255.0, x_test / 255.0
x_train = x_train.reshape((-1, 28, 28, 1))
x_test = x_test.reshape((-1, 28, 28, 1))

# Define a simple CNN model for FedPer
def create_fedper_model(shared=True):
    model = models.Sequential()
    # Shared layers (collaboratively trained)
    model.add(layers.Conv2D(32, (3, 3), activation='relu', input_shape=(28, 28, 1)))
    model.add(layers.MaxPooling2D((2, 2)))
    model.add(layers.Conv2D(64, (3, 3), activation='relu'))
    model.add(layers.MaxPooling2D((2, 2)))

    # The following layers ensure that the model outputs a classification result
    model.add(layers.Flatten())  # Flatten the feature map before fully connected layers
    model.add(layers.Dense(64, activation='relu'))
    model.add(layers.Dense(10, activation='softmax'))  # Output layer for 10 classes (MNIST digits)

    model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
    return model

# Function to get only the shared weights
def get_shared_weights(model):
    return model.layers[0].get_weights(), model.layers[2].get_weights()  # Return weights of shared layers only

# Function to set only the shared weights
def set_shared_weights(model, shared_weights):
    model.layers[0].set_weights(shared_weights[0])  # Set the weights for the first Conv2D layer
    model.layers[2].set_weights(shared_weights[1])  # Set the weights for the second Conv2D layer

# Split dataset among clients (n_clients = 10)
n_clients = 10
client_data_size = len(x_train) // n_clients
client_data = [(x_train[i * client_data_size:(i + 1) * client_data_size],
                y_train[i * client_data_size:(i + 1) * client_data_size]) for i in range(n_clients)]

# FedPer training
def fedper_training(global_model, rounds=5, epochs=1):
    global_shared_weights = get_shared_weights(global_model)  # Get the shared weights

    for r in range(rounds):
        print(f"Round {r + 1}")
        for (x_local, y_local) in client_data:
            # Train local personalized models
            local_model = create_fedper_model(shared=True)  # Create personalized model (with shared layers)
            set_shared_weights(local_model, global_shared_weights)  # Set the shared weights from the global model

            local_model.fit(x_local, y_local, epochs=epochs, verbose=0)

            # After local training, get the shared weights (ignore personalized layers)
            local_shared_weights = get_shared_weights(local_model)

            # Average each weight (including kernel and bias separately) element-wise
            global_shared_weights = [
                [(gs_w) for gs_w, ls_w in zip(global_w, local_w)]
                for global_w, local_w in zip(global_shared_weights, local_shared_weights)
            ]

    # Set the averaged shared weights back to the global model
    set_shared_weights(global_model, global_shared_weights)

# Initialize shared global model
global_model = create_fedper_model(shared=True)

# Train the model using FedPer
fedper_training(global_model)

# Test the global model on test data
test_loss, test_acc = global_model.evaluate(x_test, y_test, verbose=2)
print(f"Test Accuracy: {test_acc:.4f}")


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Round 1
Round 2
Round 3
Round 4
Round 5
313/313 - 3s - 10ms/step - accuracy: 0.0543 - loss: 2.3232
Test Accuracy: 0.0543


In [None]:
import numpy as np
import tensorflow as tf
from tensorflow.keras import layers, models
from tensorflow.keras import datasets

# Load and preprocess MNIST dataset
(x_train, y_train), (x_test, y_test) = datasets.mnist.load_data()
x_train, x_test = x_train / 255.0, x_test / 255.0
x_train = x_train.reshape((-1, 28, 28, 1))
x_test = x_test.reshape((-1, 28, 28, 1))

# Split the dataset among clients
n_clients = 3  # Number of clients
client_data_size = len(x_train) // n_clients
client_data = [(x_train[i * client_data_size:(i + 1) * client_data_size],
                y_train[i * client_data_size:(i + 1) * client_data_size]) for i in range(n_clients)]

# Define NAS search space (e.g., different CNN architectures)
def create_nas_model(architecture):
    model = models.Sequential()

    # Add layers according to the architecture
    for layer in architecture:
        if layer["type"] == "conv":
            model.add(layers.Conv2D(layer["filters"], (3, 3), activation='relu', input_shape=(28, 28, 1)))
        elif layer["type"] == "pool":
            model.add(layers.MaxPooling2D((2, 2)))
        elif layer["type"] == "dense":
            model.add(layers.Flatten())
            model.add(layers.Dense(layer["units"], activation='relu'))
        elif layer["type"] == "output":
            model.add(layers.Dense(10, activation='softmax'))

    # Compile the model
    model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
    return model

# Define the NAS search space
search_space = [
    [{"type": "conv", "filters": 32}, {"type": "pool"}, {"type": "dense", "units": 64}, {"type": "output"}],
    [{"type": "conv", "filters": 64}, {"type": "pool"}, {"type": "dense", "units": 128}, {"type": "output"}],
    [{"type": "conv", "filters": 32}, {"type": "conv", "filters": 64}, {"type": "pool"}, {"type": "dense", "units": 64}, {"type": "output"}],
    [{"type": "conv", "filters": 64}, {"type": "conv", "filters": 128}, {"type": "pool"}, {"type": "dense", "units": 128}, {"type": "output"}]
]

# NAS optimization loop for each client
def local_nas_search(client_data, search_space):
    best_architecture = None
    best_score = 0

    # Loop through the search space to find the best architecture
    for arch in search_space:
        model = create_nas_model(arch)
        model.fit(client_data[0], client_data[1], epochs=2, verbose=0)
        _, score = model.evaluate(client_data[0], client_data[1], verbose=0)

        # Update the best architecture if the current one performs better
        if score > best_score:
            best_architecture = arch
            best_score = score

    return best_architecture, best_score

# FedNAS main function
def fednas_training(n_clients, rounds=3, search_space=search_space):
    for r in range(rounds):
        print(f"\n=== Round {r + 1} ===")

        # Each client performs local NAS search
        client_architectures = []
        for client in range(n_clients):
            best_arch, score = local_nas_search(client_data[client], search_space)
            client_architectures.append((best_arch, score))
            print(f"Client {client + 1} best architecture: {best_arch} with score {score:.4f}")

        # Select the best architecture among the clients for this round
        best_architecture = max(client_architectures, key=lambda x: x[1])[0]
        print(f"Selected best architecture for round {r + 1}: {best_architecture}")

        # Train the global model on the selected best architecture
        global_model = create_nas_model(best_architecture)
        global_model.fit(x_train, y_train, epochs=2, verbose=2)  # Train on full dataset

        # Evaluate the global model on the test set
        test_loss, test_acc = global_model.evaluate(x_test, y_test, verbose=2)
        print(f"Global Model Test Accuracy after Round {r + 1}: {test_acc:.4f}")

# Run the FedNAS training process
fednas_training(n_clients, rounds=3)



=== Round 1 ===
Client 1 best architecture: [{'type': 'conv', 'filters': 64}, {'type': 'conv', 'filters': 128}, {'type': 'pool'}, {'type': 'dense', 'units': 128}, {'type': 'output'}] with score 0.9933
Client 2 best architecture: [{'type': 'conv', 'filters': 64}, {'type': 'conv', 'filters': 128}, {'type': 'pool'}, {'type': 'dense', 'units': 128}, {'type': 'output'}] with score 0.9929


KeyboardInterrupt: 

In [None]:
import numpy as np
import tensorflow as tf
from tensorflow.keras import datasets, layers, models

# Load and preprocess MNIST dataset
(x_train, y_train), (x_test, y_test) = datasets.mnist.load_data()
x_train, x_test = x_train / 255.0, x_test / 255.0
x_train = x_train.reshape((-1, 28, 28, 1))
x_test = x_test.reshape((-1, 28, 28, 1))

# Split dataset among clients
n_clients = 3
client_data_size = len(x_train) // n_clients
client_data = [(x_train[i * client_data_size:(i + 1) * client_data_size],
                y_train[i * client_data_size:(i + 1) * client_data_size]) for i in range(n_clients)]

# Define CNN model (used for both teacher and student models)
def create_model():
    model = models.Sequential([
        layers.Conv2D(32, (3, 3), activation='relu', input_shape=(28, 28, 1)),
        layers.MaxPooling2D((2, 2)),
        layers.Conv2D(64, (3, 3), activation='relu'),
        layers.MaxPooling2D((2, 2)),
        layers.Flatten(),
        layers.Dense(64, activation='relu'),
        layers.Dense(10, activation='softmax')  # Output layer for 10 classes (MNIST digits)
    ])
    model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
    return model

# Knowledge Distillation Function
def knowledge_distillation(teacher_model, student_model, x_train, alpha=0.1):
    # Teacher predicts soft labels (logits or probabilities)
    teacher_pred = teacher_model.predict(x_train)

    # Compile the student model (global model) with sparse_categorical_crossentropy
    student_model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

    # Train the student model on client data using teacher's soft labels
    student_model.fit(x_train, teacher_pred, epochs=5, verbose=0)

# FedGKT Training Function
def fedgkt_training(global_model, client_data, n_clients, rounds=5):
    for r in range(rounds):
        print(f"\n=== Round {r + 1} ===")

        for client in range(n_clients):
            # Create a local model (teacher) for the client
            local_model = create_model()

            # Train the teacher model on client's local data
            local_model.fit(client_data[client][0], client_data[client][1], epochs=5, verbose=0)

            # Perform knowledge distillation from teacher (local) to student (global)
            knowledge_distillation(local_model, global_model, client_data[client][0])

        # After each round, evaluate the global model on the test set
        test_loss, test_acc = global_model.evaluate(x_test, y_test, verbose=2)
        print(f"Global Model Test Accuracy after Round {r + 1}: {test_acc:.4f}")

# Initialize the global model (student model)
global_model = create_model()

# Perform the FedGKT training process
fedgkt_training(global_model, client_data, n_clients, rounds=3)

# Final evaluation of the global model
test_loss, test_acc = global_model.evaluate(x_test, y_test, verbose=2)
print(f"Final Global Model Test Accuracy: {test_acc:.4f}")



=== Round 1 ===
[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 10ms/step


ValueError: Argument `output` must have rank (ndim) `target.ndim - 1`. Received: target.shape=(32, 10), output.shape=(32, 10)