**MNIST Manual Composition**
---

In [None]:
# ========================
# MOUNT GOOGLE DRIVE
# ========================
from google.colab import drive
import os
drive.mount('/content/drive')

# ‚úÖ Weights folder (given by you)
WEIGHTS_DIR = "/content/drive/MyDrive/MLaaS_Weights_20_MNIST"  # <-- CHANGE if needed

# ========================
# IMPORTS
# ========================
import pandas as pd
import numpy as np
import itertools
import tensorflow as tf
from tensorflow.keras import layers, models

# ========================
# ASK USER: HOW MANY CLIENTS TO SAMPLE
# ========================
num_clients_to_sample = int(input("üî¢ Enter how many clients to sample (n): "))

CLIENTS_PER_COMBINATION = 10     # fixed as you said
LOCAL_EPOCHS = 2               # ‚úÖ you asked "two epochs"
GLOBAL_ROUNDS = 1              # ‚úÖ you asked "one round"

# ========================
# LOAD CLIENT METADATA
# ========================
df = pd.read_csv("/content/MNIST_Client_Profiles_For_Composability_20_20.csv")

# Optional: label columns if present (Label0..Label9)
label_cols = [c for c in df.columns if c.lower().startswith("label")]

# ========================
# MODEL DEFINITION
# ========================
def build_model():
    model = models.Sequential([
        layers.Conv2D(16, (3, 3), activation="relu", input_shape=(28, 28, 1)),
        layers.MaxPooling2D((2, 2)),
        layers.Flatten(),
        layers.Dense(32, activation="relu"),
        layers.Dense(10, activation="softmax"),
    ])
    model.compile(
        optimizer="adam",
        loss="sparse_categorical_crossentropy",
        metrics=["accuracy"],
    )
    return model

# ========================
# LOAD MNIST (train + test)
# ========================
(x_train, y_train), (x_test, y_test) = tf.keras.datasets.mnist.load_data()

x_train = (x_train.astype("float32") / 255.0)[..., None]
y_train = y_train.astype("int32")

x_test = (x_test.astype("float32") / 255.0)[..., None]
y_test = y_test.astype("int32")

# Evaluation subset (as you had)
eval_data = x_test[:1000]
eval_labels = y_test[:1000]

# ========================
# BUILD LABEL POOLS (for non-IID reconstruction if label columns exist)
# ========================
train_indices_by_label = {k: np.where(y_train == k)[0] for k in range(10)}

# We'll keep a "cursor" per label so we don't always reuse the same samples
label_cursors = {k: 0 for k in range(10)}

def _take_indices_for_label(label, count):
    """Take `count` indices from the label pool in a rolling way."""
    pool = train_indices_by_label[label]
    if count <= 0:
        return np.array([], dtype=np.int64)

    # If asked more than available, wrap around (still deterministic)
    start = label_cursors[label]
    end = start + count
    if end <= len(pool):
        out = pool[start:end]
    else:
        part1 = pool[start:]
        remaining = end - len(pool)
        part2 = pool[:remaining]
        out = np.concatenate([part1, part2], axis=0)

    label_cursors[label] = end % len(pool)
    return out

def get_client_dataset(cid, client_row, batch_size=64):
    """
    Create a local dataset for the client.
    - If Label* columns exist: build non-IID sample using those counts.
    - Else: build a deterministic random sample using DataVolume(Samples) and Client_ID as seed.
    """
    # fallback sample count
    if "DataVolume(Samples)" in client_row.index and not pd.isna(client_row["DataVolume(Samples)"]):
        n_samples = int(client_row["DataVolume(Samples)"])
    else:
        n_samples = 500  # safe fallback

    n_samples = max(1, n_samples)

    # Case A: label distribution exists
    if len(label_cols) >= 10:
        # try to map label columns to 0..9
        # Many of your files are Label0..Label9; this handles that pattern.
        counts = []
        for k in range(10):
            # find a column that ends with the digit k
            candidates = [c for c in label_cols if c.lower().endswith(str(k))]
            if len(candidates) == 0:
                counts.append(0)
            else:
                v = client_row[candidates[0]]
                counts.append(int(v) if not pd.isna(v) else 0)

        # If counts sum is 0 or doesn't match n_samples, fix it cleanly
        total = sum(counts)
        if total <= 0:
            # fallback to deterministic random
            rng = np.random.RandomState(int(cid) + 123)
            idx = rng.choice(len(x_train), size=n_samples, replace=False)
        else:
            # If total != n_samples, rescale counts to match n_samples
            if total != n_samples:
                scaled = np.array(counts, dtype=np.float64) / float(total)
                counts = np.floor(scaled * n_samples).astype(int).tolist()
                # fix rounding gap
                gap = n_samples - sum(counts)
                if gap > 0:
                    # add remaining to the largest proportions
                    order = np.argsort(-scaled)
                    for i in range(gap):
                        counts[int(order[i % 10])] += 1

            idx_parts = []
            for k in range(10):
                idx_parts.append(_take_indices_for_label(k, counts[k]))
            idx = np.concatenate(idx_parts, axis=0)

    # Case B: no label columns -> deterministic random split
    else:
        rng = np.random.RandomState(int(cid) + 123)
        replace = n_samples > len(x_train)
        idx = rng.choice(len(x_train), size=n_samples, replace=replace)

    x_c = x_train[idx]
    y_c = y_train[idx]

    ds = tf.data.Dataset.from_tensor_slices((x_c, y_c)).shuffle(
        buffer_size=min(2000, len(y_c)), seed=int(cid) + 999, reshuffle_each_iteration=True
    ).batch(batch_size)

    return ds, len(y_c)

def fedavg(weights_list, sample_counts=None):
    """
    FedAvg aggregation.
    - If sample_counts is provided: weighted average by local data size
    - Else: simple mean
    """
    if sample_counts is None:
        avg_weights = []
        for layer_weights in zip(*weights_list):
            stacked = np.stack(layer_weights, axis=0)
            avg_weights.append(np.mean(stacked, axis=0))
        return avg_weights

    sample_counts = np.asarray(sample_counts, dtype=np.float64)
    sample_counts = sample_counts / sample_counts.sum()

    avg_weights = []
    for layer_weights in zip(*weights_list):
        # layer_weights: tuple of arrays, one per client
        stacked = np.stack(layer_weights, axis=0)  # (num_clients, ...)
        # weighted sum across first axis
        w = sample_counts.reshape((-1,) + (1,) * (stacked.ndim - 1))
        avg = np.sum(stacked * w, axis=0)
        avg_weights.append(avg)
    return avg_weights

# ========================
# SAMPLE CLIENTS & GENERATE COMBINATIONS
# ========================
selected_df = df.sample(n=num_clients_to_sample, random_state=42)
selected_ids = selected_df["Client_ID"].tolist()

combinations = list(itertools.combinations(selected_ids, CLIENTS_PER_COMBINATION))
print(f"üîÅ Total combinations to evaluate: {len(combinations)}")

# ========================
# COMBINATION EVALUATION
# - Step 1: load local weights
# - Step 2: aggregate -> initial global weights
# - Step 3: 1 federated round with 2 local epochs
# - Step 4: aggregate updated weights -> final global weights
# - Step 5: evaluate final global accuracy
# ========================
records = []

for combo_id, client_ids in enumerate(combinations):
    clients_data = df[df["Client_ID"].isin(client_ids)].copy()

    # ---- (A) Load local weights for each client (existing .npz)
    init_weights_list = []
    missing = False
    for cid in client_ids:
        full_path = os.path.join(WEIGHTS_DIR, f"client_{cid}_local.npz")
        if not os.path.exists(full_path):
            print(f"‚ùå Missing weights for client {cid} at {full_path}")
            missing = True
            break

        with np.load(full_path) as data:
            # IMPORTANT: preserve key order for consistent weight list
            # We'll sort keys so it's stable.
            keys = sorted(list(data.keys()))
            weights = [data[k] for k in keys]
            init_weights_list.append(weights)

    if missing or len(init_weights_list) < CLIENTS_PER_COMBINATION:
        continue

    # ---- (B) Initial global weights = FedAvg(local weights)
    global_weights = fedavg(init_weights_list, sample_counts=None)

    # ---- (C) Run 1 federated round with 2 local epochs
    for r in range(GLOBAL_ROUNDS):
        updated_weights_list = []
        sample_counts = []

        for cid in client_ids:
            # Build client local dataset (reconstructed from MNIST train)
            row = df[df["Client_ID"] == cid].iloc[0]
            ds, n_samp = get_client_dataset(cid, row, batch_size=64)

            # Local training starting from current global
            local_model = build_model()
            local_model.set_weights(global_weights)
            local_model.fit(ds, epochs=LOCAL_EPOCHS, verbose=0)

            updated_weights_list.append(local_model.get_weights())
            sample_counts.append(n_samp)

        # Aggregate updated weights -> new global weights (weighted FedAvg)
        global_weights = fedavg(updated_weights_list, sample_counts=sample_counts)
    # ---- (D) Evaluate final global model on eval_data
    global_model = build_model()
    global_model.set_weights(global_weights)
    _, acc = global_model.evaluate(eval_data, eval_labels, verbose=0)
    global_accuracy = float(acc * 100.0)
    # ---- (E) QoS metrics (same as your existing)
    total_volume = clients_data["DataVolume(Samples)"].sum()
    total_latency = clients_data["Latency(ms)"].sum()
    mean_quality = clients_data["Mean_Quality_Factor(%)"].mean()
    mean_reliability = clients_data["Reliability_Score"].mean()
    weight_paths = [os.path.join(WEIGHTS_DIR, f"client_{cid}_local.npz") for cid in client_ids]
    records.append({
        "Combination_ID": combo_id,
        **{f"Client_{i+1}": cid for i, cid in enumerate(client_ids)},
        "Weights_Paths": weight_paths,
        "Global_DataVolume": total_volume,
        "Global_Latency": total_latency,
        "Global_Mean_Quality_Factor": mean_quality,
        "Global_Reliability_Score": mean_reliability,
        "Global_Accuracy": global_accuracy
    })
    if combo_id % 100 == 0:
        print(f"‚úÖ Processed {combo_id}/{len(combinations)} combinations")
# ========================
# SAVE FINAL DATAFRAME
# ========================
df_combos = pd.DataFrame(records)
df_combos.to_csv("client_combinations_with_qos_custom.csv", index=False)
print("‚úÖ All combinations processed and saved to CSV.")

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
üî¢ Enter how many clients to sample (n): 13
Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/mnist.npz
[1m11490434/11490434[0m [32m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ[0m[37m[0m [1m0s[0m 0us/step
üîÅ Total combinations to evaluate: 286


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


‚úÖ Processed 0/286 combinations
‚úÖ Processed 100/286 combinations
‚úÖ Processed 200/286 combinations
‚úÖ All combinations processed and saved to CSV.


In [None]:
df_combos['Global_Accuracy'].describe()
df_combos.to_csv("Updated_combination_10_MINIST_20_20_V1.csv")

Unnamed: 0,Global_Accuracy
count,286.0
mean,74.067133
std,4.15604
min,62.5
25%,71.200001
50%,74.249998
75%,76.899999
max,83.899999


**FMNIST Manual Composition**
---

In [None]:
# ========================
# MOUNT GOOGLE DRIVE
# ========================
from google.colab import drive
import os
drive.mount('/content/drive')
# ========================
# IMPORTS
# ========================
import pandas as pd
import numpy as np
import itertools
import tensorflow as tf
from tensorflow.keras import layers, models
# ========================
# PATHS (EDIT ONLY THESE IF NEEDED)
# ========================
# ‚úÖ FMNIST weights folder (must contain: client_<id>_local.npz)
WEIGHTS_DIR = "/content/drive/MyDrive/MLaaS_Weights_30_FMNIST"

# ‚úÖ FMNIST client profile CSV (must contain Client_ID + optional Label0..Label9 etc.)
# Example name (change to your actual file path):
PROFILES_CSV = "/content/FMNIST_Client_Profiles_For_Composability_30_30.csv"

# Output CSV
OUT_CSV = "client_combinations_with_qos_custom_FMNIST.csv"

# ========================
# ASK USER: HOW MANY CLIENTS TO SAMPLE
# ========================
num_clients_to_sample = int(input("üî¢ Enter how many clients to sample (n): "))

# ========================
# CONFIG (KEEP SAME AS YOUR MNIST COMBO CODE)
# ========================
CLIENTS_PER_COMBINATION = 10  # fixed as you said
LOCAL_EPOCHS = 2               # ‚úÖ "two epochs"
GLOBAL_ROUNDS = 1              # ‚úÖ "one round"
BATCH_SIZE = 64                # consistent

# ========================
# LOAD CLIENT METADATA
# ========================
df = pd.read_csv(PROFILES_CSV)

# Optional: label columns if present (Label0..Label9)
label_cols = [c for c in df.columns if c.lower().startswith("label")]

# ========================
# MODEL DEFINITION (SAME ARCHITECTURE AS MNIST)
# ========================
def build_model():
    model = models.Sequential([
        layers.Conv2D(16, (3, 3), activation="relu", input_shape=(28, 28, 1)),
        layers.MaxPooling2D((2, 2)),
        layers.Flatten(),
        layers.Dense(32, activation="relu"),
        layers.Dense(10, activation="softmax"),
    ])
    model.compile(
        optimizer="adam",
        loss="sparse_categorical_crossentropy",
        metrics=["accuracy"],
    )
    return model

# ========================
# LOAD FMNIST (train + test)
# ========================
(x_train, y_train), (x_test, y_test) = tf.keras.datasets.fashion_mnist.load_data()

x_train = (x_train.astype("float32") / 255.0)[..., None]
y_train = y_train.astype("int32")

x_test = (x_test.astype("float32") / 255.0)[..., None]
y_test = y_test.astype("int32")

# Evaluation subset (same idea as your MNIST combo code)
eval_data = x_test[:1000]
eval_labels = y_test[:1000]

# ========================
# BUILD LABEL POOLS (for non-IID reconstruction if label columns exist)
# ========================
train_indices_by_label = {k: np.where(y_train == k)[0] for k in range(10)}
label_cursors = {k: 0 for k in range(10)}

def _take_indices_for_label(label, count):
    """Take `count` indices from the label pool in a rolling way."""
    pool = train_indices_by_label[label]
    if count <= 0:
        return np.array([], dtype=np.int64)

    start = label_cursors[label]
    end = start + count

    if end <= len(pool):
        out = pool[start:end]
    else:
        part1 = pool[start:]
        remaining = end - len(pool)
        part2 = pool[:remaining]
        out = np.concatenate([part1, part2], axis=0)

    label_cursors[label] = end % len(pool)
    return out

def get_client_dataset(cid, client_row, batch_size=64):
    """
    Create a local dataset for the client.
    - If Label* columns exist: build non-IID sample using those counts.
    - Else: deterministic random sample using DataVolume(Samples) and Client_ID as seed.
    """
    if "DataVolume(Samples)" in client_row.index and not pd.isna(client_row["DataVolume(Samples)"]):
        n_samples = int(client_row["DataVolume(Samples)"])
    else:
        n_samples = 500

    n_samples = max(1, n_samples)

    # Case A: label distribution exists (Label0..Label9)
    if len(label_cols) >= 10:
        counts = []
        for k in range(10):
            candidates = [c for c in label_cols if c.lower().endswith(str(k))]
            if len(candidates) == 0:
                counts.append(0)
            else:
                v = client_row[candidates[0]]
                counts.append(int(v) if not pd.isna(v) else 0)

        total = sum(counts)
        if total <= 0:
            rng = np.random.RandomState(int(cid) + 123)
            idx = rng.choice(len(x_train), size=n_samples, replace=(n_samples > len(x_train)))
        else:
            if total != n_samples:
                scaled = np.array(counts, dtype=np.float64) / float(total)
                counts = np.floor(scaled * n_samples).astype(int).tolist()
                gap = n_samples - sum(counts)
                if gap > 0:
                    order = np.argsort(-scaled)
                    for i in range(gap):
                        counts[int(order[i % 10])] += 1

            idx_parts = []
            for k in range(10):
                idx_parts.append(_take_indices_for_label(k, counts[k]))
            idx = np.concatenate(idx_parts, axis=0)

    # Case B: no label columns -> deterministic random split
    else:
        rng = np.random.RandomState(int(cid) + 123)
        idx = rng.choice(len(x_train), size=n_samples, replace=(n_samples > len(x_train)))

    x_c = x_train[idx]
    y_c = y_train[idx]

    ds = tf.data.Dataset.from_tensor_slices((x_c, y_c)).shuffle(
        buffer_size=min(2000, len(y_c)),
        seed=int(cid) + 999,
        reshuffle_each_iteration=True
    ).batch(batch_size)

    return ds, len(y_c)

def fedavg(weights_list, sample_counts=None):
    """
    FedAvg aggregation.
    - If sample_counts provided: weighted average by local data size
    - Else: simple mean
    """
    if sample_counts is None:
        avg_weights = []
        for layer_weights in zip(*weights_list):
            stacked = np.stack(layer_weights, axis=0)
            avg_weights.append(np.mean(stacked, axis=0))
        return avg_weights

    sample_counts = np.asarray(sample_counts, dtype=np.float64)
    sample_counts = sample_counts / sample_counts.sum()

    avg_weights = []
    for layer_weights in zip(*weights_list):
        stacked = np.stack(layer_weights, axis=0)
        w = sample_counts.reshape((-1,) + (1,) * (stacked.ndim - 1))
        avg = np.sum(stacked * w, axis=0)
        avg_weights.append(avg)
    return avg_weights

# ========================
# SAMPLE CLIENTS & GENERATE COMBINATIONS
# ========================
selected_df = df.sample(n=num_clients_to_sample, random_state=42)
selected_ids = selected_df["Client_ID"].tolist()

combinations = list(itertools.combinations(selected_ids, CLIENTS_PER_COMBINATION))
print(f"üîÅ Total combinations to evaluate: {len(combinations)}")

# ========================
# COMBINATION EVALUATION
# ========================
records = []

for combo_id, client_ids in enumerate(combinations):
    clients_data = df[df["Client_ID"].isin(client_ids)].copy()

    # ---- (A) Load local weights for each client
    init_weights_list = []
    missing = False

    for cid in client_ids:
        full_path = os.path.join(WEIGHTS_DIR, f"client_{cid}_local.npz")
        if not os.path.exists(full_path):
            print(f"‚ùå Missing weights for client {cid} at {full_path}")
            missing = True
            break

        with np.load(full_path) as data:
            keys = sorted(list(data.keys()))  # keep stable order
            weights = [data[k] for k in keys]
            init_weights_list.append(weights)

    if missing or len(init_weights_list) < CLIENTS_PER_COMBINATION:
        continue

    # ---- (B) Initial global weights = FedAvg(local weights)
    global_weights = fedavg(init_weights_list, sample_counts=None)

    # ---- (C) 1 federated round with 2 local epochs
    for r in range(GLOBAL_ROUNDS):
        updated_weights_list = []
        sample_counts = []

        for cid in client_ids:
            row = df[df["Client_ID"] == cid].iloc[0]
            ds, n_samp = get_client_dataset(cid, row, batch_size=BATCH_SIZE)

            local_model = build_model()
            local_model.set_weights(global_weights)
            local_model.fit(ds, epochs=LOCAL_EPOCHS, verbose=0)

            updated_weights_list.append(local_model.get_weights())
            sample_counts.append(n_samp)

        global_weights = fedavg(updated_weights_list, sample_counts=sample_counts)

    # ---- (D) Evaluate final global model on eval subset
    global_model = build_model()
    global_model.set_weights(global_weights)
    _, acc = global_model.evaluate(eval_data, eval_labels, verbose=0)
    global_accuracy = float(acc * 100.0)

    # ---- (E) QoS metrics (same as your MNIST combo code)
    total_volume = clients_data["DataVolume(Samples)"].sum() if "DataVolume(Samples)" in clients_data.columns else np.nan
    total_latency = clients_data["Latency(ms)"].sum() if "Latency(ms)" in clients_data.columns else np.nan
    mean_quality = clients_data["Mean_Quality_Factor(%)"].mean() if "Mean_Quality_Factor(%)" in clients_data.columns else np.nan
    mean_reliability = clients_data["Reliability_Score"].mean() if "Reliability_Score" in clients_data.columns else np.nan

    weight_paths = [os.path.join(WEIGHTS_DIR, f"client_{cid}_local.npz") for cid in client_ids]

    records.append({
        "Combination_ID": combo_id,
        **{f"Client_{i+1}": cid for i, cid in enumerate(client_ids)},
        "Weights_Paths": weight_paths,
        "Global_DataVolume": total_volume,
        "Global_Latency": total_latency,
        "Global_Mean_Quality_Factor": mean_quality,
        "Global_Reliability_Score": mean_reliability,
        "Global_Accuracy": global_accuracy,
    })

    if combo_id % 100 == 0:
        print(f"‚úÖ Processed {combo_id}/{len(combinations)} combinations")

# ========================
# SAVE FINAL DATAFRAME
# ========================
df_combos = pd.DataFrame(records)
df_combos.to_csv(OUT_CSV, index=False)
print(f"‚úÖ All combinations processed and saved to CSV: {OUT_CSV}")

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
üî¢ Enter how many clients to sample (n): 12
Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/train-labels-idx1-ubyte.gz
[1m29515/29515[0m [32m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ[0m[37m[0m [1m0s[0m 0us/step
Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/train-images-idx3-ubyte.gz
[1m26421880/26421880[0m [32m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ[0m[37m[0m [1m3s[0m 0us/step
Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/t10k-labels-idx1-ubyte.gz
[1m5148/5148[0m [32m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ[0m[37m[0m [1m0s[0m 0us/step
Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/t10k-images-idx3-ubyte.gz
[1m4422102/4422102[0m [32m‚

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


‚úÖ Processed 0/66 combinations
‚úÖ All combinations processed and saved to CSV: client_combinations_with_qos_custom_FMNIST.csv


**CIFAR10 Manual Composition**
---

In [None]:
# ========================
# MOUNT GOOGLE DRIVE
# ========================
from google.colab import drive
import os, re, json, itertools, time
drive.mount('/content/drive')
# ========================
# IMPORTS
# ========================
import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow.keras import layers, models

# ========================
# PATHS (EDIT ONLY THESE IF NEEDED)
# ========================
# ‚úÖ Must contain: client_<id>_local.npz  (you currently have 1..30)
WEIGHTS_DIR  = "/content/drive/MyDrive/MLaaS_Weights_20_CIFAR"

# ‚úÖ Your CIFAR profile CSV (must have Client_ID, plus QoS columns if you want them aggregated)
PROFILES_CSV = "/content/CIFAR10_Client_Profiles_For_Composability_30_30.csv"

# Output CSV
OUT_CSV      = "client_combinations_with_qos_custom_CIFAR.csv"

# ========================
# USER INPUT
# ========================
num_clients_to_sample = int(input("üî¢ Enter how many clients to sample (n): "))

# ========================
# CONFIG (SAME AS YOUR MNIST/FMNISt COMBO LOGIC)
# ========================
CLIENTS_PER_COMBINATION = 3  # set to 10 (change if you want)
LOCAL_EPOCHS = 2               # ‚úÖ two epochs
GLOBAL_ROUNDS = 1              # ‚úÖ one round
BATCH_SIZE = 64

# ========================
# LOAD CLIENT METADATA
# ========================
df = pd.read_csv(PROFILES_CSV)
df["Client_ID"] = df["Client_ID"].astype(int)

# Optional: label columns if present (Label0..Label9)
label_cols = [c for c in df.columns if c.lower().startswith("label")]

# ========================
# READ AVAILABLE WEIGHT IDS (FIXES "MISSING WEIGHTS")
# ========================
npz_files = [f for f in os.listdir(WEIGHTS_DIR) if f.endswith(".npz")]
available_ids = set()
for f in npz_files:
    m = re.match(r"client_(\d+)_local\.npz$", f)
    if m:
        available_ids.add(int(m.group(1)))

print("‚úÖ NPZ weights found:", len(available_ids))
if len(available_ids) == 0:
    raise FileNotFoundError(f"No client_<id>_local.npz files found in: {WEIGHTS_DIR}")

print("‚úÖ Weight ID range:", min(available_ids), "-", max(available_ids))

# Filter CSV to only clients that have weights
df = df[df["Client_ID"].isin(available_ids)].copy()
print("‚úÖ Clients available in CSV after filtering:", len(df))

if len(df) < CLIENTS_PER_COMBINATION:
    raise ValueError(
        f"Not enough clients with weights to form one combination of size {CLIENTS_PER_COMBINATION}. "
        f"Available clients: {len(df)}"
    )

# Adjust sample size safely
if num_clients_to_sample > len(df):
    print(f"‚ö†Ô∏è You asked n={num_clients_to_sample}, but only {len(df)} clients have weights. Using n={len(df)}.")
    num_clients_to_sample = len(df)

# ========================
# CIFAR MODEL (MUST MATCH YOUR SAVED WEIGHTS ARCHITECTURE)
# ========================
def build_model():
    model = models.Sequential([
        layers.Input(shape=(32, 32, 3)),
        layers.Conv2D(32, (3,3), padding="same", activation="relu"),
        layers.Conv2D(32, (3,3), padding="same", activation="relu"),
        layers.MaxPooling2D((2,2)),

        layers.Conv2D(64, (3,3), padding="same", activation="relu"),
        layers.Conv2D(64, (3,3), padding="same", activation="relu"),
        layers.MaxPooling2D((2,2)),

        layers.Flatten(),
        layers.Dense(128, activation="relu"),
        layers.Dense(10, activation="softmax"),
    ])
    model.compile(
        optimizer="adam",
        loss="sparse_categorical_crossentropy",
        metrics=["accuracy"],
    )
    return model

# ========================
# LOAD CIFAR-10 (train + test)
# ========================
(x_train, y_train), (x_test, y_test) = tf.keras.datasets.cifar10.load_data()
y_train = y_train.astype("int32").flatten()
y_test  = y_test.astype("int32").flatten()

x_train = x_train.astype("float32") / 255.0
x_test  = x_test.astype("float32") / 255.0

# Evaluation subset
eval_data   = x_test[:1000]
eval_labels = y_test[:1000]

# ========================
# LABEL POOLS (for non-IID reconstruction if Label* exists)
# ========================
train_indices_by_label = {k: np.where(y_train == k)[0] for k in range(10)}
label_cursors = {k: 0 for k in range(10)}

def _take_indices_for_label(label, count):
    pool = train_indices_by_label[label]
    if count <= 0:
        return np.array([], dtype=np.int64)

    start = label_cursors[label]
    end = start + count

    if end <= len(pool):
        out = pool[start:end]
    else:
        part1 = pool[start:]
        remaining = end - len(pool)
        part2 = pool[:remaining]
        out = np.concatenate([part1, part2], axis=0)

    label_cursors[label] = end % len(pool)
    return out

def get_client_dataset(cid, client_row, batch_size=64):
    # pick sample size from CSV if possible
    if "DataVolume(Samples)" in client_row.index and not pd.isna(client_row["DataVolume(Samples)"]):
        n_samples = int(client_row["DataVolume(Samples)"])
    else:
        n_samples = 500
    n_samples = max(1, n_samples)

    # If Label columns exist: reconstruct label-based sampling
    if len(label_cols) >= 10:
        counts = []
        for k in range(10):
            candidates = [c for c in label_cols if c.lower().endswith(str(k))]
            if len(candidates) == 0:
                counts.append(0)
            else:
                v = client_row[candidates[0]]
                counts.append(int(v) if not pd.isna(v) else 0)

        total = sum(counts)

        if total <= 0:
            rng = np.random.RandomState(int(cid) + 123)
            idx = rng.choice(len(x_train), size=n_samples, replace=(n_samples > len(x_train)))
        else:
            # scale counts to n_samples
            if total != n_samples:
                scaled = np.array(counts, dtype=np.float64) / float(total)
                counts = np.floor(scaled * n_samples).astype(int).tolist()
                gap = n_samples - sum(counts)
                if gap > 0:
                    order = np.argsort(-scaled)
                    for i in range(gap):
                        counts[int(order[i % 10])] += 1

            idx_parts = []
            for k in range(10):
                idx_parts.append(_take_indices_for_label(k, counts[k]))
            idx = np.concatenate(idx_parts, axis=0)
    else:
        # deterministic sampling if no label columns in profile
        rng = np.random.RandomState(int(cid) + 123)
        idx = rng.choice(len(x_train), size=n_samples, replace=(n_samples > len(x_train)))

    x_c = x_train[idx]
    y_c = y_train[idx]

    ds = tf.data.Dataset.from_tensor_slices((x_c, y_c)).shuffle(
        buffer_size=min(2000, len(y_c)),
        seed=int(cid) + 999,
        reshuffle_each_iteration=True
    ).batch(batch_size)

    return ds, len(y_c)

# ========================
# ‚úÖ FIX 1: LOAD NPZ WEIGHTS IN CORRECT ORDER (NO STRING SORT BUG)
# ========================
def load_npz_weights(npz_path):
    with np.load(npz_path) as data:
        # data.files looks like: ['arr_0','arr_1',...]
        keys = sorted(data.files, key=lambda k: int(k.split("_")[1]))  # numeric sort
        return [data[k] for k in keys]

# ========================
# FedAvg
# ========================
def fedavg(weights_list, sample_counts=None):
    if sample_counts is None:
        return [np.mean(np.stack(ws, axis=0), axis=0) for ws in zip(*weights_list)]

    sample_counts = np.asarray(sample_counts, dtype=np.float64)
    sample_counts = sample_counts / sample_counts.sum()

    avg_weights = []
    for layer_ws in zip(*weights_list):
        stacked = np.stack(layer_ws, axis=0)
        w = sample_counts.reshape((-1,) + (1,) * (stacked.ndim - 1))
        avg_weights.append(np.sum(stacked * w, axis=0))
    return avg_weights

# ========================
# SAMPLE CLIENTS & GENERATE COMBINATIONS
# ========================
selected_df = df.sample(n=num_clients_to_sample, random_state=42)
selected_ids = selected_df["Client_ID"].tolist()

combinations = list(itertools.combinations(selected_ids, CLIENTS_PER_COMBINATION))
print(f"üîÅ Total combinations to evaluate: {len(combinations)}")

# ========================
# COMBINATION EVALUATION
# ========================
records = []

for combo_id, client_ids in enumerate(combinations):
    clients_data = df[df["Client_ID"].isin(client_ids)].copy()

    # ---- (A) Load local weights
    init_weights_list = []
    missing_clients = []

    for cid in client_ids:
        full_path = os.path.join(WEIGHTS_DIR, f"client_{cid}_local.npz")
        if not os.path.exists(full_path):
            missing_clients.append(cid)
            continue
        init_weights_list.append(load_npz_weights(full_path))

    # If any missing, skip this combo (should not happen after filtering, but safe)
    if missing_clients or len(init_weights_list) < CLIENTS_PER_COMBINATION:
        print(f"‚ö†Ô∏è Combo {combo_id} skipped, missing weights for: {missing_clients}")
        continue

    # ---- (B) Initial global weights = FedAvg(local weights)
    global_weights = fedavg(init_weights_list)

    # ---- (C) Safety check: ensure shapes match model
    test_model = build_model()
    expected_shapes = [w.shape for w in test_model.get_weights()]
    got_shapes = [w.shape for w in global_weights]
    if expected_shapes != got_shapes:
        print("‚ùå Weight shape mismatch in combo:", combo_id, "clients:", client_ids)
        print("Expected:", expected_shapes)
        print("Got     :", got_shapes)
        raise ValueError("Loaded weights do not match model architecture/order. Fix your saved model or weight loading.")

    # ---- (D) 1 global round, 2 local epochs
    for _ in range(GLOBAL_ROUNDS):
        updated_weights_list = []
        sample_counts = []

        for cid in client_ids:
            row = df[df["Client_ID"] == cid].iloc[0]
            ds, n_samp = get_client_dataset(cid, row, batch_size=BATCH_SIZE)

            local_model = build_model()
            local_model.set_weights(global_weights)
            local_model.fit(ds, epochs=LOCAL_EPOCHS, verbose=0)

            updated_weights_list.append(local_model.get_weights())
            sample_counts.append(n_samp)

        global_weights = fedavg(updated_weights_list, sample_counts=sample_counts)

    # ---- (E) Evaluate final global model
    global_model = build_model()
    global_model.set_weights(global_weights)
    _, acc = global_model.evaluate(eval_data, eval_labels, verbose=0)
    global_accuracy = float(acc * 100.0)

    # ---- (F) QoS metrics (if columns exist)
    total_volume = clients_data["DataVolume(Samples)"].sum() if "DataVolume(Samples)" in clients_data.columns else np.nan
    total_latency = clients_data["Latency(ms)"].sum() if "Latency(ms)" in clients_data.columns else np.nan
    mean_quality = clients_data["Mean_Quality_Factor(%)"].mean() if "Mean_Quality_Factor(%)" in clients_data.columns else np.nan
    mean_reliability = clients_data["Reliability_Score"].mean() if "Reliability_Score" in clients_data.columns else np.nan

    weight_paths = [os.path.join(WEIGHTS_DIR, f"client_{cid}_local.npz") for cid in client_ids]

    records.append({
        "Combination_ID": combo_id,
        **{f"Client_{i+1}": cid for i, cid in enumerate(client_ids)},
        "Weights_Paths": weight_paths,
        "Global_DataVolume": total_volume,
        "Global_Latency": total_latency,
        "Global_Mean_Quality_Factor": mean_quality,
        "Global_Reliability_Score": mean_reliability,
        "Global_Accuracy": global_accuracy,
    })

    if combo_id % 50 == 0:
        print(f"‚úÖ Processed {combo_id}/{len(combinations)} combinations")
df_combos = pd.DataFrame(records)
df_combos.to_csv(OUT_CSV, index=False)
print(f"‚úÖ Saved: {OUT_CSV} | Rows: {len(df_combos)}")

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
üî¢ Enter how many clients to sample (n): 13
‚úÖ NPZ weights found: 30
‚úÖ Weight ID range: 1 - 30
‚úÖ Clients available in CSV after filtering: 30
üîÅ Total combinations to evaluate: 286
‚úÖ Processed 0/286 combinations
‚úÖ Processed 50/286 combinations
‚úÖ Processed 100/286 combinations
‚úÖ Processed 150/286 combinations
‚úÖ Processed 200/286 combinations
‚úÖ Processed 250/286 combinations
‚úÖ Saved: client_combinations_with_qos_custom_CIFAR.csv | Rows: 286


In [None]:
df_combos.describe()

Unnamed: 0,Combination_ID,Client_1,Client_2,Client_3,Global_DataVolume,Global_Latency,Global_Mean_Quality_Factor,Global_Reliability_Score,Global_Accuracy
count,286.0,286.0,286.0,286.0,286.0,286.0,286.0,286.0,286.0
mean,142.5,19.891608,15.486014,11.006993,7210.153846,35315.325847,24.474573,0.538462,29.044406
std,82.7053,7.137654,8.75664,8.010516,1384.425664,3641.155727,4.028871,0.076197,5.97129
min,0.0,1.0,1.0,1.0,3239.0,25325.00267,12.962591,0.3,13.2
25%,71.25,16.0,9.0,5.0,6198.5,32773.464978,21.538224,0.5,25.025
50%,142.5,18.0,13.0,6.0,7170.0,35402.976155,24.298181,0.533333,29.4
75%,213.75,28.0,24.0,17.0,8261.25,37908.545673,27.632379,0.6,33.675001
max,285.0,29.0,29.0,29.0,10325.0,44096.36879,33.503737,0.7,41.9


In [None]:
df_combos.to_csv("Updated_combination_3_CIFAR_20_20.csv")

**HAR Manual Composition**
---

In [None]:
# ========================
# MOUNT GOOGLE DRIVE
# ========================
from google.colab import drive
import os, re, json, itertools, time
drive.mount('/content/drive')

# ========================
# IMPORTS
# ========================
import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow.keras import layers, models
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler

# ========================
# PATHS (EDIT ONLY THESE IF NEEDED)
# ========================
# ‚úÖ Must contain: client_<id>_local.npz   (e.g., client_1_local.npz ...)
WEIGHTS_DIR  = "/content/drive/MyDrive/MLaaS_Weights_20_HAR"

# ‚úÖ Your HAR client profile CSV (must contain Client_ID, plus QoS columns if you want them aggregated)
PROFILES_CSV = "/content/HAR_Client_Profiles_For_Composability_30_30.csv"

# ‚úÖ Your HAR dataset CSV (KEEP DATASET AS-IS)
DATASET_CSV  = "/content/drive/My Drive/Early Drift Detection/pamap2_final.csv"

# Output CSV
OUT_CSV      = "client_combinations_with_qos_custom_HAR.csv"

# ========================
# USER INPUT
# ========================
num_clients_to_sample = int(input("üî¢ Enter how many clients to sample (n): "))

# ========================
# CONFIG (SAME COMBO LOGIC)
# ========================
CLIENTS_PER_COMBINATION = 3    # ‚úÖ set 2 => if n=30 then combos=435 (same behavior you saw)
LOCAL_EPOCHS = 2               # ‚úÖ two epochs
GLOBAL_ROUNDS = 1              # ‚úÖ one round
BATCH_SIZE = 32

# ========================
# LOAD CLIENT METADATA
# ========================
dfp = pd.read_csv(PROFILES_CSV)
dfp["Client_ID"] = dfp["Client_ID"].astype(int)

label_cols = [c for c in dfp.columns if c.lower().startswith("label")]

# ========================
# READ AVAILABLE WEIGHT IDS (FIXES "MISSING WEIGHTS")
# ========================
npz_files = [f for f in os.listdir(WEIGHTS_DIR) if f.endswith(".npz")]
available_ids = set()
for f in npz_files:
    m = re.match(r"client_(\d+)_local\.npz$", f)
    if m:
        available_ids.add(int(m.group(1)))

print("‚úÖ NPZ weights found:", len(available_ids))
if len(available_ids) == 0:
    raise FileNotFoundError(f"No client_<id>_local.npz files found in: {WEIGHTS_DIR}")

print("‚úÖ Weight ID range:", min(available_ids), "-", max(available_ids))

# Filter profiles to only clients that have weights
dfp = dfp[dfp["Client_ID"].isin(available_ids)].copy()
print("‚úÖ Clients available in profile CSV after filtering:", len(dfp))

if len(dfp) < CLIENTS_PER_COMBINATION:
    raise ValueError(
        f"Not enough clients with weights to form one combination of size {CLIENTS_PER_COMBINATION}. "
        f"Available clients: {len(dfp)}"
    )

if num_clients_to_sample > len(dfp):
    print(f"‚ö†Ô∏è You asked n={num_clients_to_sample}, but only {len(dfp)} clients have weights. Using n={len(dfp)}.")
    num_clients_to_sample = len(dfp)

# ========================
# LOAD HAR DATASET (KEEP AS-IS, but make train pool for reconstruction)
# ========================
df = pd.read_csv(DATASET_CSV)
df = df.sample(frac=1, random_state=42).reset_index(drop=True)

# Encode activity
le_activity = LabelEncoder()
df["activity"] = le_activity.fit_transform(df["activity"])

# Encode subject if present (keeps your earlier original-style logic)
if "subject" in df.columns:
    le_subject = LabelEncoder()
    df["subject"] = le_subject.fit_transform(df["subject"])

# Build X/y (drop activity and subject from features)
drop_cols = ["activity"]
if "subject" in df.columns:
    drop_cols.append("subject")

X = df.drop(drop_cols, axis=1).values.astype("float32")
y = df["activity"].values.astype("int32")

FEATURE_COUNT = X.shape[1]
NUM_CLASSES   = len(np.unique(y))

# Global train/test split
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.4, random_state=42, shuffle=True
)

# Standardize (helps MLP stability)
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train).astype("float32")
X_test  = scaler.transform(X_test).astype("float32")

print(f"üì¶ HAR Loaded ‚Üí Train: {len(X_train)}, Test: {len(X_test)}, Features: {FEATURE_COUNT}, Classes: {NUM_CLASSES}")

# eval subset
eval_data   = X_test[:1000]
eval_labels = y_test[:1000]

# ========================
# LABEL POOLS (for reconstruction if profile has Label0..LabelK)
# ========================
train_indices_by_label = {k: np.where(y_train == k)[0] for k in range(NUM_CLASSES)}
label_cursors = {k: 0 for k in range(NUM_CLASSES)}

def _take_indices_for_label(label, count):
    pool = train_indices_by_label[label]
    if count <= 0:
        return np.array([], dtype=np.int64)

    start = label_cursors[label]
    end = start + count

    if end <= len(pool):
        out = pool[start:end]
    else:
        part1 = pool[start:]
        remaining = end - len(pool)
        part2 = pool[:remaining]
        out = np.concatenate([part1, part2], axis=0)

    label_cursors[label] = end % len(pool)
    return out

def get_client_dataset(cid, profile_row, batch_size=32):
    # choose n_samples
    if "DataVolume(Samples)" in profile_row.index and not pd.isna(profile_row["DataVolume(Samples)"]):
        n_samples = int(profile_row["DataVolume(Samples)"])
    else:
        n_samples = 500
    n_samples = max(1, n_samples)

    # If Label columns exist, reconstruct
    if len(label_cols) >= NUM_CLASSES:
        counts = []
        for k in range(NUM_CLASSES):
            candidates = [c for c in label_cols if c.lower().endswith(str(k))]
            if len(candidates) == 0:
                counts.append(0)
            else:
                v = profile_row[candidates[0]]
                counts.append(int(v) if not pd.isna(v) else 0)

        total = sum(counts)

        if total <= 0:
            rng = np.random.RandomState(int(cid) + 123)
            idx = rng.choice(len(X_train), size=n_samples, replace=(n_samples > len(X_train)))
        else:
            # scale to n_samples
            if total != n_samples:
                scaled = np.array(counts, dtype=np.float64) / float(total)
                counts = np.floor(scaled * n_samples).astype(int).tolist()
                gap = n_samples - sum(counts)
                if gap > 0:
                    order = np.argsort(-scaled)
                    for i in range(gap):
                        counts[int(order[i % NUM_CLASSES])] += 1

            idx_parts = []
            for k in range(NUM_CLASSES):
                idx_parts.append(_take_indices_for_label(k, counts[k]))
            idx = np.concatenate(idx_parts, axis=0)
    else:
        rng = np.random.RandomState(int(cid) + 123)
        idx = rng.choice(len(X_train), size=n_samples, replace=(n_samples > len(X_train)))

    x_c = X_train[idx]
    y_c = y_train[idx]

    ds = tf.data.Dataset.from_tensor_slices((x_c, y_c)).shuffle(
        buffer_size=min(2000, len(y_c)),
        seed=int(cid) + 999,
        reshuffle_each_iteration=True
    ).batch(batch_size)

    return ds, len(y_c)

# ========================
# HAR MODEL (MUST MATCH THE ONE USED TO SAVE client_<id>_local.npz)
# ========================
def build_model():
    model = models.Sequential([
        layers.Input(shape=(FEATURE_COUNT,)),
        layers.Dense(256, activation="relu"),
        layers.Dropout(0.2),
        layers.Dense(128, activation="relu"),
        layers.Dropout(0.2),
        layers.Dense(64, activation="relu"),
        layers.Dropout(0.2),
        layers.Dense(32, activation="relu"),
        layers.Dense(NUM_CLASSES, activation="softmax"),
    ])
    model.compile(
        optimizer="adam",
        loss="sparse_categorical_crossentropy",
        metrics=["accuracy"],
    )
    return model

# ========================
# ‚úÖ FIX 1: LOAD NPZ WEIGHTS IN CORRECT ORDER
# ========================
def load_npz_weights(npz_path):
    with np.load(npz_path) as data:
        keys = sorted(data.files, key=lambda k: int(k.split("_")[1]))  # arr_0..arr_n numeric
        return [data[k] for k in keys]

# ========================
# FedAvg
# ========================
def fedavg(weights_list, sample_counts=None):
    if sample_counts is None:
        return [np.mean(np.stack(ws, axis=0), axis=0) for ws in zip(*weights_list)]

    sample_counts = np.asarray(sample_counts, dtype=np.float64)
    sample_counts = sample_counts / sample_counts.sum()

    avg_weights = []
    for layer_ws in zip(*weights_list):
        stacked = np.stack(layer_ws, axis=0)
        w = sample_counts.reshape((-1,) + (1,) * (stacked.ndim - 1))
        avg_weights.append(np.sum(stacked * w, axis=0))
    return avg_weights

# ========================
# SAMPLE CLIENTS & GENERATE COMBINATIONS
# ========================
selected_df = dfp.sample(n=num_clients_to_sample, random_state=42)
selected_ids = selected_df["Client_ID"].tolist()

combinations = list(itertools.combinations(selected_ids, CLIENTS_PER_COMBINATION))
print(f"üîÅ Total combinations to evaluate: {len(combinations)}")

# ========================
# COMBINATION EVALUATION
# ========================
records = []

for combo_id, client_ids in enumerate(combinations):
    clients_data = dfp[dfp["Client_ID"].isin(client_ids)].copy()

    # ---- (A) Load local weights
    init_weights_list = []
    missing_clients = []

    for cid in client_ids:
        full_path = os.path.join(WEIGHTS_DIR, f"client_{cid}_local.npz")
        if not os.path.exists(full_path):
            missing_clients.append(cid)
            continue
        init_weights_list.append(load_npz_weights(full_path))

    if missing_clients or len(init_weights_list) < CLIENTS_PER_COMBINATION:
        print(f"‚ö†Ô∏è Combo {combo_id} skipped, missing weights for: {missing_clients}")
        continue

    # ---- (B) Initial global weights
    global_weights = fedavg(init_weights_list)

    # ---- (C) Safety check: shapes match
    test_model = build_model()
    expected_shapes = [w.shape for w in test_model.get_weights()]
    got_shapes = [w.shape for w in global_weights]
    if expected_shapes != got_shapes:
        print("‚ùå Weight shape mismatch in combo:", combo_id, "clients:", client_ids)
        print("Expected:", expected_shapes)
        print("Got     :", got_shapes)
        raise ValueError("Loaded weights do not match HAR model architecture/order.")

    # ---- (D) 1 global round, 2 local epochs
    for _ in range(GLOBAL_ROUNDS):
        updated_weights_list = []
        sample_counts = []

        for cid in client_ids:
            row = dfp[dfp["Client_ID"] == cid].iloc[0]
            ds, n_samp = get_client_dataset(cid, row, batch_size=BATCH_SIZE)

            local_model = build_model()
            local_model.set_weights(global_weights)
            local_model.fit(ds, epochs=LOCAL_EPOCHS, verbose=0)

            updated_weights_list.append(local_model.get_weights())
            sample_counts.append(n_samp)

        global_weights = fedavg(updated_weights_list, sample_counts=sample_counts)

    # ---- (E) Evaluate final global model
    global_model = build_model()
    global_model.set_weights(global_weights)
    _, acc = global_model.evaluate(eval_data, eval_labels, verbose=0)
    global_accuracy = float(acc * 100.0)

    # ---- (F) QoS metrics (if columns exist)
    total_volume = clients_data["DataVolume(Samples)"].sum() if "DataVolume(Samples)" in clients_data.columns else np.nan
    total_latency = clients_data["Latency(ms)"].sum() if "Latency(ms)" in clients_data.columns else np.nan
    mean_quality = clients_data["Mean_Quality_Factor(%)"].mean() if "Mean_Quality_Factor(%)" in clients_data.columns else np.nan
    mean_reliability = clients_data["Reliability_Score"].mean() if "Reliability_Score" in clients_data.columns else np.nan

    weight_paths = [os.path.join(WEIGHTS_DIR, f"client_{cid}_local.npz") for cid in client_ids]

    records.append({
        "Combination_ID": combo_id,
        **{f"Client_{i+1}": cid for i, cid in enumerate(client_ids)},
        "Weights_Paths": weight_paths,
        "Global_DataVolume": total_volume,
        "Global_Latency": total_latency,
        "Global_Mean_Quality_Factor": mean_quality,
        "Global_Reliability_Score": mean_reliability,
        "Global_Accuracy": global_accuracy,
    })

    if combo_id % 50 == 0:
        print(f"‚úÖ Processed {combo_id}/{len(combinations)} combinations")

# ========================
# SAVE FINAL DATAFRAME
# ========================
df_combos = pd.DataFrame(records)
df_combos.to_csv(OUT_CSV, index=False)
print(f"‚úÖ Saved: {OUT_CSV} | Rows: {len(df_combos)}")

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
üî¢ Enter how many clients to sample (n): 12
‚úÖ NPZ weights found: 30
‚úÖ Weight ID range: 1 - 30
‚úÖ Clients available in profile CSV after filtering: 30
üì¶ HAR Loaded ‚Üí Train: 447813, Test: 298542, Features: 43, Classes: 5
üîÅ Total combinations to evaluate: 220
‚úÖ Processed 0/220 combinations
‚úÖ Processed 50/220 combinations
‚úÖ Processed 100/220 combinations


In [None]:
df_combos.describe()

In [None]:
df_combos.to_csv("Updated_combination_3_HAR_20_20.csv")