**MNIST Dataset**
---

In [None]:
import numpy as np
import pandas as pd
import time, os, json
import tensorflow as tf
from tensorflow.keras import layers, models

# ======================================================
# CONNECT GOOGLE DRIVE
# ======================================================
from google.colab import drive
drive.mount('/content/drive')

# ======================================================
# CONFIG
# ======================================================
NUM_CLIENTS      = 100      # you can reduce for faster runs
IID_FRACTION    = 0.4        # half IID, half non-IID
LOCAL_EPOCHS    = 5
GLOBAL_ROUNDS   = 5          # rounds for quality & reliability
BATCH_SIZE      = 64
AVAILABILITY_PROB = 0.6      # probability a client participates in a round

# ======================================================
# SAVE ALL WEIGHTS IN GOOGLE DRIVE
# ======================================================
WEIGHTS_DIR = "/content/drive/MyDrive/MLaaS_Weights_100_MNIST"  # <--- UPDATED LOCATION
os.makedirs(WEIGHTS_DIR, exist_ok=True)

FEATURE_COUNT = 28 * 28

# ======================================================
# LOAD MNIST
# ======================================================
(x_train, y_train), (x_test, y_test) = tf.keras.datasets.mnist.load_data()
x_train = x_train.astype("float32") / 255.0
x_test  = x_test.astype("float32") / 255.0
y_train = y_train.astype("int32")
y_test  = y_test.astype("int32")

all_images = np.concatenate([x_train, x_test], axis=0)
all_labels = np.concatenate([y_train, y_test], axis=0)


def create_clients_iid_noniid(images, labels, num_clients, iid_indices, non_iid_indices):
    clients = {}
    scenario = {}

    class_indices = {cls: np.where(labels == cls)[0] for cls in range(10)}

    for i in range(num_clients):
        samples_per_client = np.random.randint(800, 4000)
        client_samples = []

        if i in iid_indices:
            per_class = samples_per_client // 10
            for cls in range(10):
                chosen = np.random.choice(class_indices[cls],
                                          per_class,
                                          replace=True)
                client_samples.extend(chosen)
            scen = "IID"

        elif i in non_iid_indices:
            alpha = np.random.uniform(0.3, 3.0)
            class_weights = np.random.dirichlet(np.ones(10) * alpha)

            for cls in range(10):
                n_cls = int(class_weights[cls] * samples_per_client)
                if n_cls > 0:
                    chosen = np.random.choice(class_indices[cls],
                                              n_cls,
                                              replace=True)
                    client_samples.extend(chosen)
            scen = "NonIID"

        np.random.shuffle(client_samples)
        cid = i + 1
        clients[cid] = (images[client_samples], labels[client_samples])
        scenario[cid] = scen

    return clients, scenario


num_iid = int(NUM_CLIENTS * IID_FRACTION)
iid_indices     = list(range(0, num_iid))
non_iid_indices = list(range(num_iid, NUM_CLIENTS))

clients, scenario = create_clients_iid_noniid(
    all_images, all_labels,
    NUM_CLIENTS, iid_indices, non_iid_indices
)

print(f"‚úÖ Created {len(clients)} clients "
      f"({sum(1 for s in scenario.values() if s=='IID')} IID, "
      f"{sum(1 for s in scenario.values() if s=='NonIID')} NonIID)")


# ======================================================
# MODEL
# ======================================================
def build_model():
    model = models.Sequential([
        layers.Conv2D(16, (3, 3), activation="relu", input_shape=(28, 28, 1)),
        layers.MaxPooling2D((2, 2)),
        layers.Flatten(),
        layers.Dense(32, activation="relu"),
        layers.Dense(10, activation="softmax"),
    ])
    model.compile(
        optimizer="adam",
        loss="sparse_categorical_crossentropy",
        metrics=["accuracy"],
    )
    return model


# ======================================================
# LOCAL TRAINING + LOCAL QoS
# ======================================================
client_records = []
base_accuracy = {}
local_results_for_global = {}

for cid, (x_c, y_c) in clients.items():
    x_c = x_c[..., None]

    model = build_model()

    t0 = time.time()
    model.fit(x_c, y_c,
              epochs=LOCAL_EPOCHS,
              batch_size=BATCH_SIZE,
              verbose=0)
    train_time = time.time() - t0

    C_p = 1.0 / train_time
    latency_ms = train_time * 1000.0

    _, acc = model.evaluate(x_test[..., None], y_test, verbose=0)
    acc_pct = float(acc * 100.0)
    base_accuracy[cid] = acc_pct

    weights = model.get_weights()
    flat = np.concatenate([w.flatten() for w in weights])
    size_MB = flat.nbytes / (1024 * 1024)
    bw_t0 = time.time()
    _ = flat.copy()
    bw_time = time.time() - bw_t0
    BW_MBps = float(size_MB / bw_time)

    # ======================================================
    # UPDATED: SAVE CLIENT WEIGHTS IN GOOGLE DRIVE
    # ======================================================
    weight_path = os.path.join(WEIGHTS_DIR, f"client_{cid}_local.npz")
    np.savez_compressed(weight_path, *weights)

    unique, counts = np.unique(y_c, return_counts=True)
    label_map = dict(zip(unique, counts))

    row = {
        "Client_ID": cid,
        "Scenario": scenario[cid],
        "DataVolume(Samples)": int(len(y_c)),
        "FeatureCount": FEATURE_COUNT,
        "Local_Accuracy(%)": round(acc_pct, 2),
        "Train_Time(s)": train_time,
        "Latency(ms)": latency_ms,
        "C_p": C_p,
        "BW": BW_MBps,
        "Weights_Path": weight_path,
    }

    for l in range(10):
        row[f"Label{l}"] = int(label_map.get(l, 0))

    client_records.append(row)

    local_results_for_global[cid] = {
        "samples": len(y_c),
        "data": (x_c, y_c),
    }

print(f"‚úÖ Local training finished for {len(client_records)} clients")


# ======================================================
# FEDERATED ROUNDS
# ======================================================
def fedavg_weights(active_clients, local_results, global_model_weights):
    total_samples = sum(local_results[c]["samples"] for c in active_clients)
    if total_samples == 0:
        return global_model_weights

    agg = [np.zeros_like(w) for w in global_model_weights]

    for c in active_clients:
        x_c, y_c = local_results[c]["data"]
        model = build_model()
        model.set_weights(global_model_weights)
        model.fit(x_c, y_c, epochs=1, batch_size=BATCH_SIZE, verbose=0)
        w_c = model.get_weights()
        weight_factor = local_results[c]["samples"] / total_samples
        for i, w in enumerate(w_c):
            agg[i] += w * weight_factor

    return agg


quality_history = {cid: [] for cid in clients.keys()}
availability_history = {cid: [] for cid in clients.keys()}

global_model = build_model()
global_weights = global_model.get_weights()

for rnd in range(1, GLOBAL_ROUNDS + 1):
    active_clients = []

    for cid in clients.keys():
        is_active = (np.random.rand() < AVAILABILITY_PROB)
        availability_history[cid].append(1 if is_active else 0)
        if is_active:
            active_clients.append(cid)

    if active_clients:
        global_weights = fedavg_weights(
            active_clients, local_results_for_global, global_weights
        )
        global_model.set_weights(global_weights)

    for cid, (x_c, y_c) in clients.items():
        if availability_history[cid][-1] == 1:
            loss_c, acc_c = global_model.evaluate(x_c[..., None], y_c, verbose=0)
            acc_value = float(acc_c * 100.0)
        else:
            acc_value = base_accuracy[cid]

        quality_history[cid].append(acc_value)

    print(f"Round {rnd}: active clients = {len(active_clients)}")


reliability_score = {}
for cid in clients.keys():
    activations = sum(availability_history[cid])
    reliability_score[cid] = activations / GLOBAL_ROUNDS

print("‚úÖ Quality_Factor (per-round accuracies) and Reliability_Score computed.")


# ======================================================
# FINAL CLIENT DATAFRAME
# ======================================================
df_clients = pd.DataFrame(client_records)
df_clients["Quality_Factor"] = df_clients["Client_ID"].apply(
    lambda cid: json.dumps(quality_history[int(cid)])
)
df_clients["Mean_Quality_Factor(%)"] = df_clients["Client_ID"].apply(
    lambda cid: float(np.mean(quality_history[int(cid)]))
)
df_clients["Reliability_Score"] = df_clients["Client_ID"].apply(
    lambda cid: reliability_score[int(cid)]
)
print(df_clients.head())

Mounted at /content/drive
Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/mnist.npz
[1m11490434/11490434[0m [32m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ[0m[37m[0m [1m2s[0m 0us/step
‚úÖ Created 100 clients (40 IID, 60 NonIID)


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


‚úÖ Local training finished for 100 clients
Round 1: active clients = 56
Round 2: active clients = 51
Round 3: active clients = 65
Round 4: active clients = 66
Round 5: active clients = 56
‚úÖ Quality_Factor (per-round accuracies) and Reliability_Score computed.
   Client_ID Scenario  DataVolume(Samples)  FeatureCount  Local_Accuracy(%)  \
0          1      IID                 1740           784              90.87   
1          2      IID                 3610           784              92.86   
2          3      IID                 3380           784              92.89   
3          4      IID                 2730           784              92.29   
4          5      IID                 2180           784              90.51   

   Train_Time(s)  Latency(ms)       C_p            BW  \
0       4.371706  4371.705770  0.228744  15649.438202   
1       3.466563  3466.563463  0.288470  20482.352941   
2       3.190025  3190.024614  0.313477   4819.377163   
3       3.114357  3114.357233  0.3

In [None]:
df_clients

Unnamed: 0,Client_ID,Scenario,DataVolume(Samples),FeatureCount,Local_Accuracy(%),Train_Time(s),Latency(ms),C_p,BW,Weights_Path,...,Label3,Label4,Label5,Label6,Label7,Label8,Label9,Quality_Factor,Mean_Quality_Factor(%),Reliability_Score
0,1,IID,1740,784,90.87,4.371706,4371.705770,0.228744,15649.438202,/content/drive/MyDrive/mlaas_weights100/client...,...,174,174,174,174,174,174,174,"[77.01149582862854, 90.86999893188477, 90.8699...",88.348000,0.6
1,2,IID,3610,784,92.86,3.466563,3466.563463,0.288470,20482.352941,/content/drive/MyDrive/mlaas_weights100/client...,...,361,361,361,361,361,361,361,"[77.09141373634338, 88.33795189857483, 90.5540...",87.822715,1.0
2,3,IID,3380,784,92.89,3.190025,3190.024614,0.313477,4819.377163,/content/drive/MyDrive/mlaas_weights100/client...,...,338,338,338,338,338,338,338,"[92.89000034332275, 88.04733753204346, 92.8900...",91.745834,0.4
3,4,IID,2730,784,92.29,3.114357,3114.357233,0.321094,5901.694915,/content/drive/MyDrive/mlaas_weights100/client...,...,273,273,273,273,273,273,273,"[77.03296542167664, 92.29000210762024, 89.9633...",88.762155,0.6
4,5,IID,2180,784,90.51,3.003023,3003.022909,0.332998,5952.136752,/content/drive/MyDrive/mlaas_weights100/client...,...,218,218,218,218,218,218,218,"[77.38531827926636, 87.98165321350098, 90.5099...",87.615762,0.8
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
95,96,NonIID,1556,784,87.51,2.897884,2897.884130,0.345079,6135.682819,/content/drive/MyDrive/mlaas_weights100/client...,...,95,247,127,251,152,154,88,"[78.47043871879578, 87.51000165939331, 91.1311...",87.356186,0.6
96,97,NonIID,3889,784,90.06,2.916785,2916.785002,0.342843,5827.615063,/content/drive/MyDrive/mlaas_weights100/client...,...,265,411,270,283,87,93,247,"[90.0600016117096, 88.78889083862305, 90.15170...",90.441930,0.8
97,98,NonIID,1776,784,89.82,2.604724,2604.723930,0.383918,6029.437229,/content/drive/MyDrive/mlaas_weights100/client...,...,80,397,277,79,168,122,189,"[89.819997549057, 89.819997549057, 89.81999754...",90.795152,0.4
98,99,NonIID,3574,784,86.70,2.935914,2935.913801,0.340609,5977.682403,/content/drive/MyDrive/mlaas_weights100/client...,...,45,65,632,290,638,552,779,"[86.69999837875366, 86.26189231872559, 88.5842...",87.573117,0.6


In [None]:
df_clients.to_csv("Client_Profiles_For_Composability_100.csv")

In [None]:
df_clients['Local_Accuracy(%)'].describe()

Unnamed: 0,Local_Accuracy(%)
count,100.0
mean,86.1909
std,7.83317
min,45.1
25%,83.485
50%,88.67
75%,91.1775
max,93.69


In [None]:
df_clients.columns

Index(['Client_ID', 'Scenario', 'DataVolume(Samples)', 'FeatureCount',
       'Local_Accuracy(%)', 'Train_Time(s)', 'Latency(ms)', 'C_p', 'BW',
       'Weights_Path', 'Label0', 'Label1', 'Label2', 'Label3', 'Label4',
       'Label5', 'Label6', 'Label7', 'Label8', 'Label9', 'Quality_Factor',
       'Mean_Quality_Factor(%)', 'Reliability_Score'],
      dtype='object')

**FMNIST Dataset**
---

In [None]:
import numpy as np
import pandas as pd
import time, os, json
import tensorflow as tf
from tensorflow.keras import layers, models

# ======================================================
# CONNECT GOOGLE DRIVE
# ======================================================
from google.colab import drive
drive.mount('/content/drive')

# ======================================================
# CONFIG
# ======================================================
NUM_CLIENTS       = 100      # you can reduce for faster runs
IID_FRACTION      = 0.4      # fraction of IID clients
LOCAL_EPOCHS      = 5
GLOBAL_ROUNDS     = 5        # rounds for quality & reliability
BATCH_SIZE        = 64
AVAILABILITY_PROB = 0.6      # probability a client participates in a round

# ======================================================
# SAVE ALL WEIGHTS IN GOOGLE DRIVE
# ======================================================
WEIGHTS_DIR = "/content/drive/MyDrive/MLaaS_Weights_100_FMNIST"  # UPDATED LOCATION FOR FMNIST
os.makedirs(WEIGHTS_DIR, exist_ok=True)

FEATURE_COUNT = 28 * 28

# ======================================================
# LOAD FASHION-MNIST
# ======================================================
(x_train, y_train), (x_test, y_test) = tf.keras.datasets.fashion_mnist.load_data()
x_train = x_train.astype("float32") / 255.0
x_test  = x_test.astype("float32") / 255.0
y_train = y_train.astype("int32")
y_test  = y_test.astype("int32")

all_images = np.concatenate([x_train, x_test], axis=0)
all_labels = np.concatenate([y_train, y_test], axis=0)


def create_clients_iid_noniid(images, labels, num_clients, iid_indices, non_iid_indices):
    clients = {}
    scenario = {}

    class_indices = {cls: np.where(labels == cls)[0] for cls in range(10)}

    for i in range(num_clients):
        samples_per_client = np.random.randint(800, 4000)
        client_samples = []

        if i in iid_indices:
            per_class = samples_per_client // 10
            for cls in range(10):
                chosen = np.random.choice(class_indices[cls],
                                          per_class,
                                          replace=True)
                client_samples.extend(chosen)
            scen = "IID"

        elif i in non_iid_indices:
            alpha = np.random.uniform(0.3, 3.0)
            class_weights = np.random.dirichlet(np.ones(10) * alpha)

            for cls in range(10):
                n_cls = int(class_weights[cls] * samples_per_client)
                if n_cls > 0:
                    chosen = np.random.choice(class_indices[cls],
                                              n_cls,
                                              replace=True)
                    client_samples.extend(chosen)
            scen = "NonIID"

        np.random.shuffle(client_samples)
        cid = i + 1
        clients[cid] = (images[client_samples], labels[client_samples])
        scenario[cid] = scen

    return clients, scenario


num_iid = int(NUM_CLIENTS * IID_FRACTION)
iid_indices     = list(range(0, num_iid))
non_iid_indices = list(range(num_iid, NUM_CLIENTS))

clients, scenario = create_clients_iid_noniid(
    all_images, all_labels,
    NUM_CLIENTS, iid_indices, non_iid_indices
)

print(f"‚úÖ Created {len(clients)} clients "
      f"({sum(1 for s in scenario.values() if s=='IID')} IID, "
      f"{sum(1 for s in scenario.values() if s=='NonIID')} NonIID)")


# ======================================================
# MODEL
# ======================================================
def build_model():
    model = models.Sequential([
        layers.Conv2D(16, (3, 3), activation="relu", input_shape=(28, 28, 1)),
        layers.MaxPooling2D((2, 2)),
        layers.Flatten(),
        layers.Dense(32, activation="relu"),
        layers.Dense(10, activation="softmax"),
    ])
    model.compile(
        optimizer="adam",
        loss="sparse_categorical_crossentropy",
        metrics=["accuracy"],
    )
    return model


# ======================================================
# LOCAL TRAINING + LOCAL QoS
# ======================================================
client_records = []
base_accuracy = {}
local_results_for_global = {}

for cid, (x_c, y_c) in clients.items():
    x_c = x_c[..., None]

    model = build_model()

    t0 = time.time()
    model.fit(x_c, y_c,
              epochs=LOCAL_EPOCHS,
              batch_size=BATCH_SIZE,
              verbose=0)
    train_time = time.time() - t0

    C_p = 1.0 / train_time
    latency_ms = train_time * 1000.0

    _, acc = model.evaluate(x_test[..., None], y_test, verbose=0)
    acc_pct = float(acc * 100.0)
    base_accuracy[cid] = acc_pct

    weights = model.get_weights()
    flat = np.concatenate([w.flatten() for w in weights])
    size_MB = flat.nbytes / (1024 * 1024)
    bw_t0 = time.time()
    _ = flat.copy()
    bw_time = time.time() - bw_t0
    BW_MBps = float(size_MB / bw_time)

    # ======================================================
    # UPDATED: SAVE CLIENT WEIGHTS IN GOOGLE DRIVE
    # ======================================================
    weight_path = os.path.join(WEIGHTS_DIR, f"client_{cid}_local.npz")
    np.savez_compressed(weight_path, *weights)

    unique, counts = np.unique(y_c, return_counts=True)
    label_map = dict(zip(unique, counts))

    row = {
        "Client_ID": cid,
        "Scenario": scenario[cid],
        "DataVolume(Samples)": int(len(y_c)),
        "FeatureCount": FEATURE_COUNT,
        "Local_Accuracy(%)": round(acc_pct, 2),
        "Train_Time(s)": train_time,
        "Latency(ms)": latency_ms,
        "C_p": C_p,
        "BW": BW_MBps,
        "Weights_Path": weight_path,
    }

    for l in range(10):
        row[f"Label{l}"] = int(label_map.get(l, 0))

    client_records.append(row)

    local_results_for_global[cid] = {
        "samples": len(y_c),
        "data": (x_c, y_c),
    }

print(f"‚úÖ Local training finished for {len(client_records)} clients")


# ======================================================
# FEDERATED ROUNDS
# ======================================================
def fedavg_weights(active_clients, local_results, global_model_weights):
    total_samples = sum(local_results[c]["samples"] for c in active_clients)
    if total_samples == 0:
        return global_model_weights

    agg = [np.zeros_like(w) for w in global_model_weights]

    for c in active_clients:
        x_c, y_c = local_results[c]["data"]
        model = build_model()
        model.set_weights(global_model_weights)
        model.fit(x_c, y_c, epochs=1, batch_size=BATCH_SIZE, verbose=0)
        w_c = model.get_weights()
        weight_factor = local_results[c]["samples"] / total_samples
        for i, w in enumerate(w_c):
            agg[i] += w * weight_factor

    return agg


quality_history = {cid: [] for cid in clients.keys()}
availability_history = {cid: [] for cid in clients.keys()}

global_model = build_model()
global_weights = global_model.get_weights()

for rnd in range(1, GLOBAL_ROUNDS + 1):
    active_clients = []

    for cid in clients.keys():
        is_active = (np.random.rand() < AVAILABILITY_PROB)
        availability_history[cid].append(1 if is_active else 0)
        if is_active:
            active_clients.append(cid)

    if active_clients:
        global_weights = fedavg_weights(
            active_clients, local_results_for_global, global_weights
        )
        global_model.set_weights(global_weights)

    for cid, (x_c, y_c) in clients.items():
        if availability_history[cid][-1] == 1:
            loss_c, acc_c = global_model.evaluate(x_c[..., None], y_c, verbose=0)
            acc_value = float(acc_c * 100.0)
        else:
            acc_value = base_accuracy[cid]

        quality_history[cid].append(acc_value)

    print(f"Round {rnd}: active clients = {len(active_clients)}")


reliability_score = {}
for cid in clients.keys():
    activations = sum(availability_history[cid])
    reliability_score[cid] = activations / GLOBAL_ROUNDS

print("‚úÖ Quality_Factor (per-round accuracies) and Reliability_Score computed.")


# ======================================================
# FINAL CLIENT DATAFRAME
# ======================================================
df_clients = pd.DataFrame(client_records)
df_clients["Quality_Factor"] = df_clients["Client_ID"].apply(
    lambda cid: json.dumps(quality_history[int(cid)])
)
df_clients["Mean_Quality_Factor(%)"] = df_clients["Client_ID"].apply(
    lambda cid: float(np.mean(quality_history[int(cid)]))
)
df_clients["Reliability_Score"] = df_clients["Client_ID"].apply(
    lambda cid: reliability_score[int(cid)]
)
print(df_clients.head())

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/train-labels-idx1-ubyte.gz
[1m29515/29515[0m [32m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ[0m[37m[0m [1m0s[0m 0us/step
Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/train-images-idx3-ubyte.gz
[1m26421880/26421880[0m [32m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ[0m[37m[0m [1m2s[0m 0us/step
Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/t10k-labels-idx1-ubyte.gz
[1m5148/5148[0m [32m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ[0m[37m[0m [1m0s[0m 0us/step
Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/t10k-images-idx3-ubyte.gz
[1m4422102/4422102[0m [32m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚î

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


‚úÖ Local training finished for 100 clients
Round 1: active clients = 63
Round 2: active clients = 58
Round 3: active clients = 57
Round 4: active clients = 59
Round 5: active clients = 65
‚úÖ Quality_Factor (per-round accuracies) and Reliability_Score computed.
   Client_ID Scenario  DataVolume(Samples)  FeatureCount  Local_Accuracy(%)  \
0          1      IID                 3980           784              83.02   
1          2      IID                 2720           784              80.22   
2          3      IID                 3970           784              82.02   
3          4      IID                 1370           784              74.12   
4          5      IID                 1290           784              78.55   

   Train_Time(s)  Latency(ms)       C_p           BW  \
0       3.003238  3003.237724  0.332974  5901.694915   
1       2.806538  2806.538343  0.356311  5120.588235   
2       2.909387  2909.387350  0.343715  4220.606061   
3       2.632246  2632.246494  0.37990

In [None]:
df_clients.to_csv("FMNIST_Client_Profiles_For_Composability_1000.csv")

**CIFAR-10**
---

**Training**

In [4]:
import numpy as np
import pandas as pd
import time, os, json
import tensorflow as tf
from tensorflow.keras import layers, models

# ======================================================
# CONNECT GOOGLE DRIVE  üö® (ADDED AS PER YOUR FORMAT)
# ======================================================
from google.colab import drive
drive.mount('/content/drive')

# ======================================================
# CONFIG
# ======================================================
NUM_CLIENTS       = 100      # you can reduce for faster runs
IID_FRACTION      = 0.4      # fraction of IID clients
LOCAL_EPOCHS      = 5
GLOBAL_ROUNDS     = 3       # rounds for quality & reliability
BATCH_SIZE        = 64
AVAILABILITY_PROB = 0.6      # probability a client participates in a round
# ======================================================
# SAVE ALL WEIGHTS IN GOOGLE DRIVE  (CONSISTENT LOCATION)
# ======================================================
WEIGHTS_DIR = "/content/drive/MyDrive/MLaaS_Weights_100_CIFAR"
os.makedirs(WEIGHTS_DIR, exist_ok=True)

FEATURE_COUNT = 32 * 32 * 3   # CIFAR-10 feature dimension

# ======================================================
# LOAD CIFAR-10 DATASET
# ======================================================
(x_train, y_train), (x_test, y_test) = tf.keras.datasets.cifar10.load_data()
y_train = y_train.flatten()
y_test  = y_test.flatten()

x_train = x_train.astype("float32") / 255.0
x_test  = x_test.astype("float32") / 255.0

all_images = np.concatenate([x_train, x_test], axis=0)
all_labels = np.concatenate([y_train, y_test], axis=0)

print("üì¶ CIFAR-10 Loaded ‚Üí Total Samples:", len(all_images))


# ======================================================
# CLIENT CREATION (IID + Non-IID)
# ======================================================
def create_clients_iid_noniid(images, labels, num_clients, iid_indices, non_iid_indices):
    clients = {}
    scenario = {}
    class_indices = {cls: np.where(labels == cls)[0] for cls in range(10)}

    for i in range(num_clients):
        cid = i + 1
        samples = np.random.randint(800, 4000)
        client_samples = []

        if i in iid_indices:   # IID distribution
            per_cls = samples // 10
            for cls in range(10):
                chosen = np.random.choice(class_indices[cls], per_cls, replace=True)
                client_samples.extend(chosen)
            scenario[cid] = "IID"

        else:  # NON-IID using Dirichlet allocation
            alpha = np.random.uniform(0.3, 3.0)
            dist = np.random.dirichlet(np.ones(10) * alpha)
            for cls in range(10):
                n_cls = int(dist[cls] * samples)
                if n_cls > 0:
                    chosen = np.random.choice(class_indices[cls], n_cls, replace=True)
                    client_samples.extend(chosen)
            scenario[cid] = "NonIID"

        np.random.shuffle(client_samples)
        clients[cid] = (images[client_samples], labels[client_samples])

    return clients, scenario


num_iid = int(NUM_CLIENTS * IID_FRACTION)
iid_indices     = list(range(num_iid))
non_iid_indices = list(range(num_iid, NUM_CLIENTS))

clients, scenario = create_clients_iid_noniid(all_images, all_labels, NUM_CLIENTS, iid_indices, non_iid_indices)

print(f"‚úÖ Created {len(clients)} Clients "
      f"({sum(1 for s in scenario.values() if s=='IID')} IID, "
      f"{sum(1 for s in scenario.values() if s=='NonIID')} NonIID)")


# ======================================================
# CIFAR-10 MODEL
# ======================================================
def build_model():
    model = models.Sequential([
        layers.Conv2D(32, (3,3), padding="same", activation="relu", input_shape=(32,32,3)),
        layers.Conv2D(32, (3,3), padding="same", activation="relu"),
        layers.MaxPooling2D((2,2)),

        layers.Conv2D(64, (3,3), padding="same", activation="relu"),
        layers.Conv2D(64, (3,3), padding="same", activation="relu"),
        layers.MaxPooling2D((2,2)),

        layers.Flatten(),
        layers.Dense(128, activation="relu"),
        layers.Dense(10, activation="softmax"),
    ])
    model.compile(
        optimizer="adam",
        loss="sparse_categorical_crossentropy",
        metrics=["accuracy"]
    )
    return model


# ======================================================
# LOCAL TRAINING + QoS EXTRACTION
# ======================================================
client_records = []
base_accuracy = {}
local_results_for_global = {}

for cid, (x_c, y_c) in clients.items():
    model = build_model()

    t0 = time.time()
    model.fit(x_c, y_c, epochs=LOCAL_EPOCHS, batch_size=BATCH_SIZE, verbose=0)
    train_time = time.time() - t0

    C_p = 1.0 / train_time
    latency_ms = train_time * 1000

    _, acc = model.evaluate(x_test, y_test, verbose=0)
    acc_pct = float(acc * 100)
    base_accuracy[cid] = acc_pct

    weights = model.get_weights()
    flat = np.concatenate([w.flatten() for w in weights])
    size_MB = flat.nbytes / (1024 * 1024)

    bw_t0 = time.time()
    _ = flat.copy()
    bw_s = time.time() - bw_t0
    BW_MBps = size_MB / bw_s

    path = os.path.join(WEIGHTS_DIR, f"client_{cid}_local.npz")
    np.savez_compressed(path, *weights)

    unique, counts = np.unique(y_c, return_counts=True)
    lbl_map = dict(zip(unique, counts))

    row = {
        "Client_ID": cid,
        "Scenario": scenario[cid],
        "DataVolume(Samples)": len(y_c),
        "FeatureCount": FEATURE_COUNT,
        "Local_Accuracy(%)": round(acc_pct, 2),
        "Train_Time(s)": train_time,
        "Latency(ms)": latency_ms,
        "C_p": C_p,
        "BW": BW_MBps,
        "Weights_Path": path
    }
    for l in range(10):
        row[f"Label{l}"] = lbl_map.get(l, 0)

    client_records.append(row)
    local_results_for_global[cid] = {"samples": len(y_c), "data": (x_c, y_c)}

print("üöÄ Local Training Finished")


# ======================================================
# FEDERATED GLOBAL AGGREGATION + QUALITY TRACKING
# ======================================================
def fedavg_weights(active, local, global_w):
    total = sum(local[c]["samples"] for c in active)
    if total == 0: return global_w

    agg = [np.zeros_like(w) for w in global_w]
    for c in active:
        x, y = local[c]["data"]
        m = build_model()
        m.set_weights(global_w)
        m.fit(x, y, epochs=1, batch_size=BATCH_SIZE, verbose=0)
        w = m.get_weights()
        factor = local[c]["samples"] / total
        for i in range(len(w)):
            agg[i] += w[i] * factor
    return agg


quality_history      = {cid: [] for cid in clients}
availability_history = {cid: [] for cid in clients}

global_model = build_model()
global_w = global_model.get_weights()

for rnd in range(1, GLOBAL_ROUNDS + 1):
    active = []
    for cid in clients:
        flag = np.random.rand() < AVAILABILITY_PROB
        availability_history[cid].append(flag)
        if flag: active.append(cid)

    if active:
        global_w = fedavg_weights(active, local_results_for_global, global_w)
        global_model.set_weights(global_w)

    for cid, (x_c, y_c) in clients.items():
        if availability_history[cid][-1]:
            _, acc = global_model.evaluate(x_c, y_c, verbose=0)
            val = acc * 100
        else:
            val = base_accuracy[cid]
        quality_history[cid].append(val)

    print(f"üåç Round {rnd}: Active Clients = {len(active)}")


reliability = {
    cid: sum(availability_history[cid]) / GLOBAL_ROUNDS
    for cid in clients
}


# ======================================================
# FINAL CLIENT DATAFRAME
# ======================================================
df_clients = pd.DataFrame(client_records)
df_clients["Quality_Factor"] = df_clients["Client_ID"].apply(lambda c: json.dumps(quality_history[c]))
df_clients["Mean_Quality_Factor(%)"] = df_clients["Client_ID"].apply(lambda c: float(np.mean(quality_history[c])))
df_clients["Reliability_Score"] = df_clients["Client_ID"].apply(lambda c: reliability[c])

print(df_clients.head())


Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
üì¶ CIFAR-10 Loaded ‚Üí Total Samples: 60000
‚úÖ Created 100 Clients (40 IID, 60 NonIID)


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


üöÄ Local Training Finished
üåç Round 1: Active Clients = 59
üåç Round 2: Active Clients = 56
üåç Round 3: Active Clients = 57
   Client_ID Scenario  DataVolume(Samples)  FeatureCount  Local_Accuracy(%)  \
0          1      IID                 3730          3072              48.64   
1          2      IID                 1430          3072              40.20   
2          3      IID                 1470          3072              37.94   
3          4      IID                 2340          3072              43.79   
4          5      IID                 2380          3072              47.02   

   Train_Time(s)  Latency(ms)       C_p           BW  \
0       5.956566  5956.566334  0.167882  5832.542540   
1       4.938850  4938.850164  0.202476  8767.733086   
2       4.931860  4931.859970  0.202763  6155.096942   
3       5.416594  5416.594267  0.184618  9122.838959   
4       5.223161  5223.160744  0.191455  5991.376821   

                                        Weights_Path  ...

In [5]:
df_clients.to_csv("CIFAR10_Client_Profiles_For_Composability_100.csv")

In [3]:
df_clients

Unnamed: 0,Client_ID,Scenario,DataVolume(Samples),FeatureCount,Local_Accuracy(%),Train_Time(s),Latency(ms),C_p,BW,Weights_Path,...,Label3,Label4,Label5,Label6,Label7,Label8,Label9,Quality_Factor,Mean_Quality_Factor(%),Reliability_Score
0,1,IID,3560,3072,44.19,7.826199,7826.199293,0.127776,2832.450299,/content/drive/MyDrive/MLaaS_Weights_100_CIFAR...,...,356,356,356,356,356,356,356,"[44.190001487731934, 35.19662916660309, 40.955...",40.113895,0.666667
1,2,IID,1100,3072,31.05,4.967299,4967.299223,0.201317,10418.925110,/content/drive/MyDrive/MLaaS_Weights_100_CIFAR...,...,110,110,110,110,110,110,110,"[22.727273404598236, 31.049999594688416, 42.54...",32.107576,0.666667
2,3,IID,1650,3072,34.88,5.613085,5613.084555,0.178155,3202.567366,/content/drive/MyDrive/MLaaS_Weights_100_CIFAR...,...,165,165,165,165,165,165,165,"[21.939393877983093, 36.06060743331909, 34.880...",30.960001,0.666667
3,4,IID,2130,3072,39.71,5.178314,5178.313732,0.193113,6167.134289,/content/drive/MyDrive/MLaaS_Weights_100_CIFAR...,...,213,213,213,213,213,213,213,"[22.018779814243317, 34.31924879550934, 41.267...",32.535212,1.000000
4,5,IID,2670,3072,38.96,5.581422,5581.422329,0.179166,5998.975269,/content/drive/MyDrive/MLaaS_Weights_100_CIFAR...,...,267,267,267,267,267,267,267,"[21.9475656747818, 36.40449345111847, 42.05992...",33.470661,1.000000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
95,96,NonIID,1127,3072,17.56,4.362401,4362.401247,0.229232,5857.822910,/content/drive/MyDrive/MLaaS_Weights_100_CIFAR...,...,141,302,90,65,54,89,59,"[17.560000717639923, 17.560000717639923, 17.56...",17.560001,0.000000
96,97,NonIID,3432,3072,38.20,6.560693,6560.692787,0.152423,5282.179788,/content/drive/MyDrive/MLaaS_Weights_100_CIFAR...,...,349,615,126,723,272,486,170,"[20.425407588481903, 38.199999928474426, 38.19...",32.275136,0.333333
97,98,NonIID,1808,3072,29.55,4.792418,4792.418003,0.208663,10726.058957,/content/drive/MyDrive/MLaaS_Weights_100_CIFAR...,...,184,126,43,265,235,380,77,"[21.681416034698486, 29.55000102519989, 35.730...",28.987168,0.666667
98,99,NonIID,870,3072,19.15,4.710182,4710.182190,0.212306,5916.437774,/content/drive/MyDrive/MLaaS_Weights_100_CIFAR...,...,181,207,36,129,59,29,32,"[19.14999932050705, 27.931034564971924, 19.149...",22.077011,0.333333


**WITHOUT training**
---

In [None]:
import numpy as np
import pandas as pd
import time, os, json
import tensorflow as tf
from tensorflow.keras import layers, models

# ======================================================
# CONNECT GOOGLE DRIVE
# ======================================================
from google.colab import drive
drive.mount('/content/drive')

# ======================================================
# CONFIG
# ======================================================
NUM_CLIENTS       = 100
LOCAL_EPOCHS      = 0       # üî• NO TRAINING
GLOBAL_ROUNDS     = 5
BATCH_SIZE        = 64
AVAILABILITY_PROB = 0.6

WEIGHTS_DIR = "/content/drive/MyDrive/mlaas_weights_cifar10"
FEATURE_COUNT = 32 * 32 * 3

# ======================================================
# LOAD CIFAR-10
# ======================================================
(x_train, y_train), (x_test, y_test) = tf.keras.datasets.cifar10.load_data()
y_test = y_test.flatten()
x_test = x_test.astype("float32") / 255.0

# ======================================================
# DEFINE CIFAR-10 MODEL
# ======================================================
def build_model():
    model = models.Sequential([
        layers.Conv2D(32, (3,3), padding="same", activation="relu", input_shape=(32,32,3)),
        layers.Conv2D(32, (3,3), padding="same", activation="relu"),
        layers.MaxPooling2D((2,2)),
        layers.Conv2D(64, (3,3), padding="same", activation="relu"),
        layers.Conv2D(64, (3,3), padding="same", activation="relu"),
        layers.MaxPooling2D((2,2)),
        layers.Flatten(),
        layers.Dense(128, activation="relu"),
        layers.Dense(10, activation="softmax"),
    ])
    model.compile(optimizer="adam", loss="sparse_categorical_crossentropy", metrics=["accuracy"])
    return model

# ======================================================
# LOAD CLIENT WEIGHTS + LOCAL TESTING
# ======================================================
client_records = {}
local_results_for_global = {}
base_accuracy = {}

for cid in range(1, NUM_CLIENTS + 1):
    path = os.path.join(WEIGHTS_DIR, f"client_{cid}_local.npz")
    data = np.load(path, allow_pickle=True)
    weights = [data[f'arr_{i}'] for i in range(len(data.files))]

    model = build_model()
    model.set_weights(weights)

    t0 = time.time()
    loss, acc = model.evaluate(x_test, y_test, verbose=0)
    latency = (time.time() - t0) * 1000

    flat = np.concatenate([w.flatten() for w in weights])
    size_MB = flat.nbytes / (1024 * 1024)

    bw_t0 = time.time()
    _ = flat.copy()
    bw_val = size_MB / (time.time() - bw_t0)

    local_results_for_global[cid] = {
        "weights": weights,
        "samples": model.count_params()
    }
    base_accuracy[cid] = acc * 100

    client_records[cid] = {
        "Client_ID": cid,
        "Local_Accuracy(%)": round(acc * 100, 2),
        "Latency(ms)": latency,
        "C_p": 1 / (latency / 1000),
        "BW": bw_val,
        "FeatureCount": FEATURE_COUNT,
        "Weights_Path": path
    }

print("üìå All client weights loaded successfully & tested locally")

# ======================================================
# FEDERATED AGGREGATION USING SAVED WEIGHTS
# ======================================================
def fedavg(global_w, active_clients):
    total = len(active_clients)
    if total == 0:
        return global_w

    agg = [np.zeros_like(w) for w in global_w]

    for cid in active_clients:
        client_w = local_results_for_global[cid]["weights"]
        for i in range(len(client_w)):
            agg[i] += client_w[i] / total

    return agg

global_model = build_model()
global_w = global_model.get_weights()

quality_history = {cid: [] for cid in range(1, NUM_CLIENTS + 1)}
availability_history = {cid: [] for cid in range(1, NUM_CLIENTS + 1)}

for rnd in range(1, GLOBAL_ROUNDS + 1):
    active = []
    for cid in range(1, NUM_CLIENTS + 1):
        avail = np.random.rand() < AVAILABILITY_PROB
        availability_history[cid].append(avail)
        if avail:
            active.append(cid)

    if active:
        global_w = fedavg(global_w, active)
        global_model.set_weights(global_w)

    for cid in range(1, NUM_CLIENTS + 1):
        if availability_history[cid][-1]:
            _, acc = global_model.evaluate(x_test, y_test, verbose=0)
            quality_history[cid].append(acc * 100)
        else:
            quality_history[cid].append(base_accuracy[cid])

    print(f"üåç Round {rnd}: Active Clients = {len(active)}")

# ======================================================
# RELIABILITY + FINAL DF
# ======================================================
reliability = {cid: sum(availability_history[cid]) / GLOBAL_ROUNDS for cid in range(1, NUM_CLIENTS + 1)}

df_clients = pd.DataFrame.from_dict(client_records, orient='index')
df_clients["Quality_Factor"] = df_clients["Client_ID"].apply(lambda c: json.dumps(quality_history[c]))
df_clients["Mean_Quality_Factor(%)"] = df_clients["Client_ID"].apply(lambda c: float(np.mean(quality_history[c])))
df_clients["Reliability_Score"] = df_clients["Client_ID"].apply(lambda c: reliability[c])

print("üéØ Final DF Ready")
print(df_clients.head())

Mounted at /content/drive
Downloading data from https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz
[1m170498071/170498071[0m [32m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ[0m[37m[0m [1m6s[0m 0us/step


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


üìå All client weights loaded successfully & tested locally
üåç Round 1: Active Clients = 51
üåç Round 2: Active Clients = 60
üåç Round 3: Active Clients = 54
üåç Round 4: Active Clients = 52
üåç Round 5: Active Clients = 58
üéØ Final DF Ready
   Client_ID  Local_Accuracy(%)  Latency(ms)       C_p           BW  \
1          1              47.55  4133.072853  0.241951  1986.223809   
2          2              43.27  2305.532932  0.433739  5189.459133   
3          3              37.73  2247.833014  0.444873  5938.721908   
4          4              43.56  2339.457035  0.427450  5091.702906   
5          5              49.58  2345.153570  0.426411  5449.529954   

   FeatureCount                                       Weights_Path  \
1          3072  /content/drive/MyDrive/mlaas_weights_cifar10/c...   
2          3072  /content/drive/MyDrive/mlaas_weights_cifar10/c...   
3          3072  /content/drive/MyDrive/mlaas_weights_cifar10/c...   
4          3072  /content/drive/MyDrive/ml

In [None]:
df_clients.to_csv("CIFAR10_Client_Profiles_For_Composability_1000.csv")

In [None]:
df_clients

Unnamed: 0,Client_ID,Local_Accuracy(%),Latency(ms),C_p,BW,FeatureCount,Weights_Path,Quality_Factor,Mean_Quality_Factor(%),Reliability_Score
1,1,47.55,4133.072853,0.241951,1986.223809,3072,/content/drive/MyDrive/mlaas_weights_cifar10/c...,"[10.000000149011612, 10.000000149011612, 47.54...",17.510000,0.8
2,2,43.27,2305.532932,0.433739,5189.459133,3072,/content/drive/MyDrive/mlaas_weights_cifar10/c...,"[10.000000149011612, 43.27000081539154, 10.000...",23.308000,0.6
3,3,37.73,2247.833014,0.444873,5938.721908,3072,/content/drive/MyDrive/mlaas_weights_cifar10/c...,"[10.000000149011612, 10.000000149011612, 37.72...",15.546000,0.8
4,4,43.56,2339.457035,0.427450,5091.702906,3072,/content/drive/MyDrive/mlaas_weights_cifar10/c...,"[43.560001254081726, 43.560001254081726, 43.56...",36.848001,0.2
5,5,49.58,2345.153570,0.426411,5449.529954,3072,/content/drive/MyDrive/mlaas_weights_cifar10/c...,"[49.57999885082245, 49.57999885082245, 10.0000...",41.663999,0.2
...,...,...,...,...,...,...,...,...,...,...
96,96,37.33,2324.016094,0.430290,7562.257394,3072,/content/drive/MyDrive/mlaas_weights_cifar10/c...,"[10.000000149011612, 37.32999861240387, 37.329...",20.932000,0.6
97,97,27.29,2331.239700,0.428956,10227.442162,3072,/content/drive/MyDrive/mlaas_weights_cifar10/c...,"[27.289998531341553, 10.000000149011612, 10.00...",16.916000,0.6
98,98,43.25,2302.645683,0.434283,8262.344105,3072,/content/drive/MyDrive/mlaas_weights_cifar10/c...,"[10.000000149011612, 43.25000047683716, 43.250...",29.950000,0.4
99,99,46.02,2313.387632,0.432267,6925.610542,3072,/content/drive/MyDrive/mlaas_weights_cifar10/c...,"[10.000000149011612, 10.000000149011612, 10.00...",10.000000,1.0


**HAR Dataset**
---

In [None]:
import numpy as np
import pandas as pd
import time, os, json
import tensorflow as tf
from tensorflow.keras import layers, models
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder

# ======================================================
# CONNECT GOOGLE DRIVE
# ======================================================
from google.colab import drive
drive.mount('/content/drive')

# ======================================================
# CONFIG
# ======================================================
NUM_CLIENTS       = 100       # adjust if needed
IID_FRACTION      = 0.4
LOCAL_EPOCHS      = 10
GLOBAL_ROUNDS     = 5
BATCH_SIZE        = 32
AVAILABILITY_PROB = 0.6

WEIGHTS_DIR = "/content/drive/MyDrive/MLaaS_Weights_HAR"
os.makedirs(WEIGHTS_DIR, exist_ok=True)

file_path = '/content/drive/My Drive/Early Drift Detection/pamap2_final.csv'
# ======================================================
# LOAD YOUR HAR CSV DATASET
# ======================================================
df = pd.read_csv(file_path)   # <-- change if needed
df = df.sample(frac=1).reset_index(drop=True)

label_encoder = LabelEncoder()
df["activity"] = label_encoder.fit_transform(df["activity"])

X = df.drop("activity", axis=1).values.astype("float32")
y = df["activity"].values.astype("int32")

FEATURE_COUNT = X.shape[1]
NUM_CLASSES   = len(np.unique(y))

# GLOBAL TRAIN/TEST
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, shuffle=True
)

# ======================================================
# CREATE IID + NON-IID CLIENTS
# ======================================================
def create_clients(X, y, num_clients, iid_idx, non_iid_idx):
    clients, scenario = {}, {}
    cls_idx = {cls: np.where(y == cls)[0] for cls in np.unique(y)}

    for i in range(num_clients):
        cid = i + 1
        samples = np.random.randint(300, 2000)
        chosen = []

        if i in iid_idx:
            per = samples // NUM_CLASSES
            for cls in cls_idx:
                sel = np.random.choice(cls_idx[cls], per, replace=True)
                chosen.extend(sel)
            scenario[cid] = "IID"
        else:
            alpha = np.random.uniform(0.3, 3.0)
            dist = np.random.dirichlet(np.ones(NUM_CLASSES) * alpha)
            for idx, cls in enumerate(cls_idx):
                n = int(dist[idx] * samples)
                if n > 0:
                    sel = np.random.choice(cls_idx[cls], n, replace=True)
                    chosen.extend(sel)
            scenario[cid] = "NonIID"

        np.random.shuffle(chosen)
        clients[cid] = (X[chosen], y[chosen])
    return clients, scenario

num_iid = int(NUM_CLIENTS * IID_FRACTION)
iid_idx = list(range(num_iid))
non_iid_idx = list(range(num_iid, NUM_CLIENTS))

clients, scenario = create_clients(X_train, y_train, NUM_CLIENTS, iid_idx, non_iid_idx)
print("Clients created:", len(clients))

# ======================================================
# BUILD HAR MLP MODEL
# ======================================================
def build_model():
    model = models.Sequential([
        layers.Input(shape=(FEATURE_COUNT,)),
        layers.Dense(256, activation='relu'),
        layers.Dropout(0.2),
        layers.Dense(128, activation='relu'),
        layers.Dropout(0.2),
        layers.Dense(64, activation='relu'),
        layers.Dropout(0.2),
        layers.Dense(NUM_CLASSES, activation='softmax')
    ])
    model.compile(optimizer="adam", loss="sparse_categorical_crossentropy", metrics=["accuracy"])
    return model

# ======================================================
# LOCAL TRAINING + QOS EXTRACTION
# ======================================================
client_records = []
base_accuracy = {}
local_store = {}

for cid, (x_c, y_c) in clients.items():
    model = build_model()

    t0 = time.time()
    model.fit(x_c, y_c, epochs=LOCAL_EPOCHS, batch_size=BATCH_SIZE, verbose=0)
    train_time = time.time() - t0

    C_p = 1.0 / train_time
    latency_ms = train_time * 1000
    _, acc = model.evaluate(X_test, y_test, verbose=0)
    acc_pct = float(acc * 100)
    base_accuracy[cid] = acc_pct

    weights = model.get_weights()
    flat = np.concatenate([w.flatten() for w in weights])
    size_MB = flat.nbytes / (1024 * 1024)
    bw_t0 = time.time()
    _ = flat.copy()
    BW = size_MB / (time.time() - bw_t0)

    path = f"{WEIGHTS_DIR}/client_{cid}.npz"
    np.savez_compressed(path, *weights)

    unique, counts = np.unique(y_c, return_counts=True)
    lbl = dict(zip(unique, counts))

    row = {
        "Client_ID": cid,
        "Scenario": scenario[cid],
        "DataVolume(Samples)": len(y_c),
        "FeatureCount": FEATURE_COUNT,
        "Local_Accuracy(%)": round(acc_pct, 2),
        "Train_Time(s)": train_time,
        "Latency(ms)": latency_ms,
        "C_p": C_p,
        "BW": BW,
        "Weights_Path": path
    }
    for l in range(NUM_CLASSES):
        row[f"Label{l}"] = int(lbl.get(l, 0))

    client_records.append(row)
    local_store[cid] = {"samples": len(y_c), "data": (x_c, y_c)}

print("Local training complete.")

# ======================================================
# FEDERATED AGGREGATION (FEDAVG)
# ======================================================
def fedavg(active, local_store, global_w):
    total = sum(local_store[c]["samples"] for c in active)
    agg = [np.zeros_like(w) for w in global_w]
    for cid in active:
        model = build_model()
        model.set_weights(global_w)
        x, y = local_store[cid]["data"]
        model.fit(x, y, epochs=1, batch_size=BATCH_SIZE, verbose=0)
        w = model.get_weights()
        factor = local_store[cid]["samples"] / total
        for i in range(len(w)):
            agg[i] += w[i] * factor
    return agg

quality_history = {cid: [] for cid in clients}
availability = {cid: [] for cid in clients}

global_model = build_model()
global_w = global_model.get_weights()

for rnd in range(GLOBAL_ROUNDS):
    active = [cid for cid in clients if np.random.rand() < AVAILABILITY_PROB]

    if active:
        global_w = fedavg(active, local_store, global_w)
        global_model.set_weights(global_w)

    for cid in clients:
        if cid in active:
            _, acc = global_model.evaluate(*local_store[cid]["data"], verbose=0)
            val = acc * 100
        else:
            val = base_accuracy[cid]
        availability[cid].append(int(cid in active))
        quality_history[cid].append(val)

    print(f"Round {rnd+1}: {len(active)} clients active")

print("Federated rounds completed.")

# ======================================================
# FINAL CLIENT DATAFRAME (SAME AS MNIST)
# ======================================================
df_clients = pd.DataFrame(client_records)
df_clients["Quality_Factor"] = df_clients["Client_ID"].apply(lambda c: json.dumps(quality_history[c]))
df_clients["Mean_Quality_Factor(%)"] = df_clients["Client_ID"].apply(lambda c: float(np.mean(quality_history[c])))
df_clients["Reliability_Score"] = df_clients["Client_ID"].apply(lambda c: sum(availability[c]) / GLOBAL_ROUNDS)
print(df_clients.head())

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
Clients created: 100
Local training complete.
Round 1: 56 clients active
Round 2: 57 clients active
Round 3: 61 clients active
Round 4: 54 clients active
Round 5: 63 clients active
Federated rounds completed.
   Client_ID Scenario  DataVolume(Samples)  FeatureCount  Local_Accuracy(%)  \
0          1      IID                 1305            43              24.43   
1          2      IID                 1625            43              24.43   
2          3      IID                  590            43              19.22   
3          4      IID                 1155            43              24.40   
4          5      IID                  800            43              24.39   

   Train_Time(s)  Latency(ms)       C_p           BW  \
0       5.169581  5169.580936  0.193439  1604.288973   
1       6.852367  6852.367163  0.145935  4934.830409   
2       2.547058  2

In [None]:
df_clients.to_csv("HAR_Client_Profiles_For_Composability.csv")