<a href="https://colab.research.google.com/github/XeyadO/FL-GIU/blob/main/Autism_GRU_FL.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# Federated Learning on Autism Dataset with GRU
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, BatchNormalization
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.optimizers import Adam

# Load dataset
df = pd.read_csv("Phenotypic_V1_0b_preprocessed1.csv")

# Keep only numeric columns and target
target_column = "DX_GROUP"
numeric_cols = df.select_dtypes(include=["int64", "float64"]).columns.tolist()
if target_column not in numeric_cols:
    numeric_cols.append(target_column)
df = df[numeric_cols].fillna(df[numeric_cols].mean())

# Feature-label split
X = df.drop(columns=[target_column]).values
y = df[target_column].values - 1  # Convert 1/2 to 0/1

# Standardize features
scaler = StandardScaler()
X = scaler.fit_transform(X)

# One-hot encode labels
num_classes = len(np.unique(y))
y_cat = to_categorical(y, num_classes=num_classes)

# Simulate 2 federated clients
X1, X2, y1, y2 = train_test_split(X, y_cat, test_size=0.5, random_state=42)

# Build improved model
def build_model(input_shape, num_classes):
    model = Sequential()
    model.add(Dense(128, activation='relu', input_shape=input_shape))
    model.add(BatchNormalization())
    model.add(Dropout(0.3))
    model.add(Dense(64, activation='relu'))
    model.add(BatchNormalization())
    model.add(Dropout(0.3))
    model.add(Dense(32, activation='relu'))
    model.add(Dense(num_classes, activation='softmax'))
    model.compile(optimizer=Adam(learning_rate=0.001),
                  loss='categorical_crossentropy', metrics=['accuracy'])
    return model

# Federated training with more local epochs and rounds
def federated_train(X_parts, y_parts, rounds=5):
    global_model = build_model((X.shape[1],), num_classes)

    for r in range(rounds):
        print(f"\n🌍 Federated Round {r+1}/{rounds}")
        local_weights = []

        for i, (X_local, y_local) in enumerate(zip(X_parts, y_parts)):
            print(f"  🏠 Training on Site {i+1}")
            model = build_model((X.shape[1],), num_classes)
            model.set_weights(global_model.get_weights())
            model.fit(X_local, y_local, epochs=30, verbose=0)  # More local training
            local_weights.append(model.get_weights())

        # FedAvg aggregation
        new_weights = [np.mean(w, axis=0) for w in zip(*local_weights)]
        global_model.set_weights(new_weights)

    return global_model

# Train the global federated model
X_parts = [X1, X2]
y_parts = [y1, y2]
global_model = federated_train(X_parts, y_parts, rounds=5)

# Evaluate global model
print("\n🌍 Evaluating Global Model on Full Dataset")
loss, acc = global_model.evaluate(X, y_cat, verbose=1)
print(f"🌍 Global Model Accuracy: {acc:.4f}")

# Evaluate each site
for i, (X_local, y_local) in enumerate(zip(X_parts, y_parts)):
    _, acc_local = global_model.evaluate(X_local, y_local, verbose=0)
    print(f"🏠 Site {i+1} Accuracy: {acc_local:.4f}")


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)



🌍 Federated Round 1/5
  🏠 Training on Site 1
  🏠 Training on Site 2

🌍 Federated Round 2/5
  🏠 Training on Site 1
  🏠 Training on Site 2

🌍 Federated Round 3/5
  🏠 Training on Site 1
  🏠 Training on Site 2

🌍 Federated Round 4/5
  🏠 Training on Site 1
  🏠 Training on Site 2

🌍 Federated Round 5/5
  🏠 Training on Site 1
  🏠 Training on Site 2

🌍 Evaluating Global Model on Full Dataset
[1m35/35[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - accuracy: 0.9885 - loss: 0.0272
🌍 Global Model Accuracy: 0.9802
🏠 Site 1 Accuracy: 0.9820
🏠 Site 2 Accuracy: 0.9784


In [7]:
import os
import numpy as np
from PIL import Image
from sklearn.model_selection import train_test_split
from sklearn.utils import shuffle
from sklearn.svm import SVC
from sklearn.metrics import classification_report, confusion_matrix
from tensorflow.keras.applications.mobilenet_v2 import MobileNetV2, preprocess_input
from tensorflow.keras.preprocessing.image import img_to_array
from google.colab import drive

# Mount Google Drive
drive.mount('/content/drive')

# Set your dataset paths
base_dir = '/content/drive/MyDrive/Bachelor_ds'
train_dir = os.path.join(base_dir, 'train')
test_dir = os.path.join(base_dir, 'test')

# Verify folder structure
print("Train directory contents:", os.listdir(train_dir))
print("Test directory contents:", os.listdir(test_dir))

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
Train directory contents: ['Non_Autistic.17.jpg', 'Non_Autistic.183.jpg', 'Non_Autistic.1094.jpg', 'Autistic.901.jpg', 'Non_Autistic.1180.jpg', 'Non_Autistic.219.jpg', 'Non_Autistic.1145.jpg', 'Non_Autistic.178.jpg', 'Non_Autistic.1250.jpg', 'Autistic.914.jpg', 'Non_Autistic.153.jpg', 'Non_Autistic.142.jpg', 'Non_Autistic.172.jpg', 'Non_Autistic.1038.jpg', 'Non_Autistic.1231.jpg', 'Non_Autistic.1011.jpg', 'Autistic.976.jpg', 'Non_Autistic.1057.jpg', 'Non_Autistic.294.jpg', 'Autistic.945.jpg', 'Non_Autistic.115.jpg', 'Non_Autistic.1196.jpg', 'Non_Autistic.105.jpg', 'Non_Autistic.1185.jpg', 'Non_Autistic.145.jpg', 'Non_Autistic.220.jpg', 'Autistic.949.jpg', 'Autistic.947.jpg', 'Non_Autistic.293.jpg', 'Non_Autistic.26.jpg', 'Autistic.924.jpg', 'Autistic.990.jpg', 'Non_Autistic.252.jpg', 'Non_Autistic.103.jpg', 'Autistic.903.jpg', 'Non_Autistic.1244.jpg', 'Non_Au

In [8]:
def load_image_dataset(directory, img_size=(224, 224)):
    """Load images from a flat directory with proper error handling"""
    images = []
    labels = []

    img_files = sorted(os.listdir(directory))
    img_files = [f for f in img_files if f.lower().endswith(('.jpg', '.jpeg', '.png'))]
    print(f"Found {len(img_files)} image files in {directory}")

    for idx, img_file in enumerate(img_files):
        try:
            img_path = os.path.join(directory, img_file)
            img = Image.open(img_path).convert('RGB').resize(img_size)
            img_array = img_to_array(img)
            images.append(img_array)

            # Here you could set a dummy label (e.g., 0) if needed
            labels.append(0)  # All same class if no real labels

        except Exception as e:
            print(f"Error loading {img_path}: {str(e)}")
            continue

    print(f"Loaded {len(images)} images.")
    return np.array(images), np.array(labels)

# Load datasets
print("\nLoading training data...")
X_train, y_train = load_image_dataset(train_dir)
print("\nLoading test data...")
X_test, y_test = load_image_dataset(test_dir)

# Verify class balance
print("\nClass distribution in training set:", np.unique(y_train, return_counts=True))
print("Class distribution in test set:", np.unique(y_test, return_counts=True))



Loading training data...
Found 2540 image files in /content/drive/MyDrive/Bachelor_ds/train
Loaded 2540 images.

Loading test data...
Found 300 image files in /content/drive/MyDrive/Bachelor_ds/test
Loaded 300 images.

Class distribution in training set: (array([0]), array([2540]))
Class distribution in test set: (array([0]), array([300]))


In [9]:
# Initialize feature extractor
base_model = MobileNetV2(weights='imagenet',
                        include_top=False,
                        pooling='avg',
                        input_shape=(224, 224, 3))

def extract_features(images, batch_size=32):
    """Extract features from images using MobileNetV2"""
    images_preprocessed = preprocess_input(images.astype('float32'))
    features = base_model.predict(images_preprocessed, batch_size=batch_size, verbose=1)
    print(f"Extracted features shape: {features.shape}")
    return features

print("\nExtracting training features...")
train_features = extract_features(X_train)
print("\nExtracting test features...")
test_features = extract_features(X_test)

Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/mobilenet_v2/mobilenet_v2_weights_tf_dim_ordering_tf_kernels_1.0_224_no_top.h5
[1m9406464/9406464[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step

Extracting training features...
[1m80/80[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m136s[0m 2s/step
Extracted features shape: (2540, 1280)

Extracting test features...
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m15s[0m 1s/step
Extracted features shape: (300, 1280)


In [11]:
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, BatchNormalization
from tensorflow.keras.optimizers import Adam

# ------------------------------
# Prepare labels properly
# ------------------------------

# Since you put dummy labels (0), you need to fix real labels if you have them.
# Otherwise, we'll assume everything is class 0 temporarily.
# Let's one-hot encode anyway, so the code stays compatible.

num_classes = len(np.unique(y_train))
y_train_cat = tf.keras.utils.to_categorical(y_train, num_classes=num_classes)
y_test_cat = tf.keras.utils.to_categorical(y_test, num_classes=num_classes)

# ------------------------------
# Simulate Federated Clients
# ------------------------------

# Split the train features into two parts
X1, X2, y1, y2 = train_test_split(train_features, y_train_cat, test_size=0.5, random_state=42)

X_parts = [X1, X2]
y_parts = [y1, y2]

# ------------------------------
# Build the model
# ------------------------------

def build_model(input_shape, num_classes):
    model = Sequential()
    model.add(Dense(128, activation='relu', input_shape=input_shape))
    model.add(BatchNormalization())
    model.add(Dropout(0.3))
    model.add(Dense(64, activation='relu'))
    model.add(BatchNormalization())
    model.add(Dropout(0.3))
    model.add(Dense(32, activation='relu'))
    model.add(Dense(num_classes, activation='softmax'))
    model.compile(optimizer=Adam(learning_rate=0.001),
                  loss='categorical_crossentropy', metrics=['accuracy'])
    return model

# ------------------------------
# Federated training
# ------------------------------

def federated_train(X_parts, y_parts, rounds=5):
    global_model = build_model((train_features.shape[1],), num_classes)

    for r in range(rounds):
        print(f"\n🌍 Federated Round {r+1}/{rounds}")
        local_weights = []

        for i, (X_local, y_local) in enumerate(zip(X_parts, y_parts)):
            print(f"  🏠 Training on Site {i+1}")
            model = build_model((train_features.shape[1],), num_classes)
            model.set_weights(global_model.get_weights())
            model.fit(X_local, y_local, epochs=5, batch_size=32, verbose=0)
            local_weights.append(model.get_weights())

        # FedAvg aggregation
        new_weights = [np.mean(w, axis=0) for w in zip(*local_weights)]
        global_model.set_weights(new_weights)

    return global_model

# ------------------------------
# Train the global model
# ------------------------------

global_model = federated_train(X_parts, y_parts, rounds=5)

# ------------------------------
# Evaluate
# ------------------------------

print("\n🌍 Evaluating Global Model on Full Test Set")
loss, acc = global_model.evaluate(test_features, y_test_cat, verbose=1)
print(f"🌍 Global Model Accuracy on Test Set: {acc:.4f}")

# Evaluate on each site separately
for i, (X_local, y_local) in enumerate(zip(X_parts, y_parts)):
    _, acc_local = global_model.evaluate(X_local, y_local, verbose=0)
    print(f"🏠 Site {i+1} Accuracy: {acc_local:.4f}")



🌍 Federated Round 1/5
  🏠 Training on Site 1
  🏠 Training on Site 2

🌍 Federated Round 2/5
  🏠 Training on Site 1
  🏠 Training on Site 2

🌍 Federated Round 3/5
  🏠 Training on Site 1
  🏠 Training on Site 2

🌍 Federated Round 4/5
  🏠 Training on Site 1
  🏠 Training on Site 2

🌍 Federated Round 5/5
  🏠 Training on Site 1
  🏠 Training on Site 2

🌍 Evaluating Global Model on Full Test Set
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 6ms/step - accuracy: 1.0000 - loss: 0.0000e+00  
🌍 Global Model Accuracy on Test Set: 1.0000
🏠 Site 1 Accuracy: 1.0000
🏠 Site 2 Accuracy: 1.0000
