**MINIST NIID(Possible duplicate assignments)**
---

In [None]:

import numpy as np
import tensorflow as tf
import json
import pandas as pd
import time
from tensorflow.keras import layers, models  # type: ignore
from tensorflow.keras.datasets import mnist  # type: ignore
from sklearn.utils import shuffle
import os
# Global Configuration
CONFIG = {
    "num_clients": 20,  # Extended to 20 clients
    "num_rounds": 10,
    "local_epochs": 5,
    "batch_size": 32,
    "learning_rate": 0.01,
    "reduced_neurons": 64
}

# Create directory to store weights
os.makedirs("weights", exist_ok=True)

# Load and Prepare MNIST Dataset
(x_train, y_train), (x_test, y_test) = mnist.load_data()
x_train = x_train.astype('float32') / 255.0
x_test = x_test.astype('float32') / 255.0
x_train = np.expand_dims(x_train, axis=-1)
x_test = np.expand_dims(x_test, axis=-1)

client_distributions = {
    "client_1": {i: 100 for i in range(10)},  # Balanced with 100 samples per label
    "client_2": {0: 100, 1: 30, 2: 40, 3: 200, 4: 10, 5: 50, 6: 70, 7: 90, 8: 60, 9: 20},  # Mixed, unbalanced
    "client_3": {0: 150, 1: 150, 2: 150, 3: 150, 4: 150, 5: 150, 6: 150, 7: 150, 8: 150, 9:150},  # All samples from label 3
    "client_4": {0: 50, 1: 50, 2: 50, 3: 50, 4: 50, 5: 50, 6: 50, 7: 50, 8: 50, 9:50},  # All samples from label 5
    "client_5": {0: 500, 1: 500, 2: 500, 3: 500, 4: 500, 5: 500, 6: 500, 7: 500, 8: 500, 9:500}  # All samples from label 7
}

# Regular distribution for remaining clients
def generate_regular_distribution(num_clients, start_client=6):
    regular_distributions = {}
    for i in range(start_client, num_clients + 1):
        regular_distributions[f"client_{i}"] = {label: 100 for label in range(10)}
    return regular_distributions

# Merge all client distributions
client_distributions.update(generate_regular_distribution(CONFIG["num_clients"]))

# Split the data based on custom distributions
def split_custom_data(x, y, client_distributions):
    clients_data = {}
    for client, distribution in client_distributions.items():
        client_x, client_y = [], []
        for label, count in distribution.items():
            indices = np.where(y == label)[0]
            selected_indices = np.random.choice(indices, size=min(count, len(indices)), replace=False)
            client_x.extend(x[selected_indices])
            client_y.extend(y[selected_indices])
        clients_data[client] = {
            "x": np.array(client_x).tolist(),
            "y": np.array(client_y).tolist()
        }
    return clients_data

clients_data = split_custom_data(x_train, y_train, client_distributions)

# Define the Model
def create_model():
    model = models.Sequential([
        layers.Conv2D(64, (3, 3), activation='relu', input_shape=(28, 28, 1)),
        layers.MaxPooling2D((2, 2)),
        layers.Flatten(),
        layers.Dense(CONFIG["reduced_neurons"], activation='relu'),
        layers.Dense(10, activation='softmax')
    ])
    model.compile(
        optimizer=tf.keras.optimizers.SGD(learning_rate=CONFIG["learning_rate"]),
        loss='sparse_categorical_crossentropy',
        metrics=['accuracy']
    )
    return model

# Data distribution function
def get_data_distribution(y):
    distribution = {i: 0 for i in range(10)}
    for label in y:
        distribution[label] += 1
    return distribution

# Train Local Model on Each Client
def train_local_model(model, x, y):
    model.fit(np.array(x), np.array(y), epochs=CONFIG["local_epochs"], batch_size=CONFIG["batch_size"], verbose=0)
    return {f"layer_{i}": w.tolist() for i, w in enumerate(model.get_weights())}

# Aggregate Weights on the Server
def aggregate_weights(client_weights):
    return {
        f"layer_{i}": np.mean([w[f"layer_{i}"] for w in client_weights], axis=0).tolist()
        for i in range(len(client_weights[0]))
    }

# Evaluate Local Model on the Test Set
def evaluate_local_model(model):
    loss, accuracy = model.evaluate(x_test, y_test, verbose=0)
    return accuracy

# Federated Learning Process
global_model = create_model()
client_records = []

for round_num in range(CONFIG["num_rounds"]):
    print(f"--- Round {round_num + 1} ---")

    client_weights = []
    round_accuracies = []

    for client_id, data in clients_data.items():
        print(f"{client_id} training...")
        local_model = create_model()
        local_model.set_weights(global_model.get_weights())

        # Compute data distribution
        data_distribution = get_data_distribution(data["y"])

        # Measure computation time
        start_time = time.time()
        client_weight = train_local_model(local_model, data["x"], data["y"])
        end_time = time.time()
        computation_time = end_time - start_time

        # Save local weights
        with open(f"weights/{client_id}_round_{round_num+1}.json", "w") as f:
            json.dump(client_weight, f, indent=4)

        # Evaluate local model
        local_accuracy = evaluate_local_model(local_model)
        round_accuracies.append(local_accuracy)

        # Record client info
        client_records.append({
            "Client": client_id,
            "Round": round_num + 1,
            "Data_Distribution": data_distribution,
            "Computation_Time": computation_time,
            "Quality_Factor": local_accuracy
        })

        client_weights.append(client_weight)

    # Aggregate weights on server
    print("Aggregating client weights...")
    new_global_weights = aggregate_weights(client_weights)
    global_model.set_weights([np.array(new_global_weights[f"layer_{i}"]) for i in range(len(new_global_weights))])

    # Save global model weights
    with open(f"weights/global_round_{round_num+1}.json", "w") as f:
        json.dump(new_global_weights, f, indent=4)

# Compile client information into a DataFrame
client_df = pd.DataFrame(client_records)

# Compute mean quality factor per client
mean_accuracies = client_df.groupby("Client")["Quality_Factor"].mean().reset_index().rename(columns={"Quality_Factor": "Reliability_Score"})
client_df = client_df.merge(mean_accuracies, on="Client")

# Save the DataFrame to a CSV
client_df.to_csv("MLaaS_MNIST_Clients_NonIID_2.csv", index=False)

# Display DataFrame to the user
print("Federated Learning Process Complete! Statistics saved in 'MLaaS_MNIST_Clients_NonIID_2.csv'.")

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


--- Round 1 ---
client_1 training...
client_2 training...
client_3 training...
client_4 training...
client_5 training...
client_6 training...
client_7 training...
client_8 training...
client_9 training...
client_10 training...
client_11 training...
client_12 training...
client_13 training...
client_14 training...
client_15 training...
client_16 training...
client_17 training...
client_18 training...
client_19 training...
client_20 training...
Aggregating client weights...
--- Round 2 ---
client_1 training...
client_2 training...
client_3 training...
client_4 training...
client_5 training...
client_6 training...
client_7 training...
client_8 training...
client_9 training...
client_10 training...
client_11 training...
client_12 training...
client_13 training...
client_14 training...
client_15 training...
client_16 training...
client_17 training...
client_18 training...
client_19 training...
client_20 training...
Aggregating client weights...
--- Round 3 ---
client_1 training...
client_2 

In [None]:
# Step 1: Mount your Google Drive
from google.colab import drive
drive.mount('/content/drive')

# Step 2: Copy your folder to Google Drive
import shutil

# Replace 'your_folder_name' with the path of the folder you want to save
# Replace 'destination_folder_name' with the name you want in Google Drive
source_folder = '/content/weights'  # Folder in Colab
destination_folder = '/content/drive/MyDrive/MLaaS Genarator Weight MNIST 65'  # Folder in Google Drive

# Copy the entire folder to Google Drive
shutil.copytree(source_folder, destination_folder)

print("Folder successfully copied to Google Drive!")

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
Folder successfully copied to Google Drive!


**NIID(Ensures unique assignment)**
---

In [None]:
import numpy as np
import tensorflow as tf
import json
import pandas as pd
import time
from tensorflow.keras import layers, models  # type: ignore
from tensorflow.keras.datasets import mnist  # type: ignore
from sklearn.utils import shuffle
import os

# Global Configuration
CONFIG = {
    "num_clients": 20,  # Extended to 20 clients
    "num_rounds": 10,
    "local_epochs": 5,
    "batch_size": 32,
    "learning_rate": 0.01,
    "reduced_neurons": 64
}

# Create directory to store weights
os.makedirs("weights_1", exist_ok=True)

# Load and Prepare MNIST Dataset
(x_train, y_train), (x_test, y_test) = mnist.load_data()
x_train = x_train.astype('float32') / 255.0
x_test = x_test.astype('float32') / 255.0
x_train = np.expand_dims(x_train, axis=-1)
x_test = np.expand_dims(x_test, axis=-1)

# Shuffle data before assigning
x_train, y_train = shuffle(x_train, y_train, random_state=42)

# Custom Data Distributions for Selected Clients (Non-IID for Clients 1-5)
client_distributions = {
    "client_1": {i: 100 for i in range(10)},  # Balanced with 100 samples per label
    "client_2": {0: 100, 1: 30, 2: 40, 3: 200, 4: 10, 5: 50, 6: 70, 7: 90, 8: 60, 9: 20},  # Mixed, unbalanced
    "client_3": {0: 150, 1: 150, 2: 150, 3: 150, 4: 150, 5: 150, 6: 150, 7: 150, 8: 150, 9:150},  # All samples from label 3
    "client_4": {0: 50, 1: 50, 2: 50, 3: 50, 4: 50, 5: 50, 6: 50, 7: 50, 8: 50, 9:50},  # All samples from label 5
    "client_5": {0: 500, 1: 500, 2: 500, 3: 500, 4: 500, 5: 500, 6: 500, 7: 500, 8: 500, 9:500}  # All samples from label 7
}
# Split the data with non-IID for selected clients and regular IID for the rest
def split_custom_and_regular_data(x, y, client_distributions, total_clients):
    clients_data = {}
    assigned_indices = set()

    # Allocate for clients with custom distributions
    for client, distribution in client_distributions.items():
        client_x, client_y = [], []
        for label, count in distribution.items():
            indices = np.where(y == label)[0]
            available_indices = list(set(indices) - assigned_indices)
            selected_indices = np.random.choice(available_indices, size=min(count, len(available_indices)), replace=False)
            assigned_indices.update(selected_indices)
            client_x.extend(x[selected_indices])
            client_y.extend(y[selected_indices])
        clients_data[client] = {
            "x": np.array(client_x).tolist(),
            "y": np.array(client_y).tolist()
        }

    # Distribute remaining data equally among other clients
    remaining_clients = [f"client_{i}" for i in range(6, total_clients + 1)]
    remaining_indices = list(set(range(len(y))) - assigned_indices)
    samples_per_client = len(remaining_indices) // len(remaining_clients)

    for i, client in enumerate(remaining_clients):
        start_idx = i * samples_per_client
        end_idx = start_idx + samples_per_client
        client_x = x[remaining_indices[start_idx:end_idx]]
        client_y = y[remaining_indices[start_idx:end_idx]]
        clients_data[client] = {
            "x": client_x.tolist(),
            "y": client_y.tolist()
        }
    return clients_data

clients_data = split_custom_and_regular_data(x_train, y_train, client_distributions, CONFIG["num_clients"])

# Define the Model
def create_model():
    model = models.Sequential([
        layers.Conv2D(64, (3, 3), activation='relu', input_shape=(28, 28, 1)),
        layers.MaxPooling2D((2, 2)),
        layers.Flatten(),
        layers.Dense(CONFIG["reduced_neurons"], activation='relu'),
        layers.Dense(10, activation='softmax')
    ])
    model.compile(
        optimizer=tf.keras.optimizers.SGD(learning_rate=CONFIG["learning_rate"]),
        loss='sparse_categorical_crossentropy',
        metrics=['accuracy']
    )
    return model

# Data distribution function
def get_data_distribution(y):
    distribution = {i: 0 for i in range(10)}
    for label in y:
        distribution[label] += 1
    return distribution

# Train Local Model on Each Client
def train_local_model(model, x, y):
    model.fit(np.array(x), np.array(y), epochs=CONFIG["local_epochs"], batch_size=CONFIG["batch_size"], verbose=0)
    return {f"layer_{i}": w.tolist() for i, w in enumerate(model.get_weights())}

# Aggregate Weights on the Server
def aggregate_weights(client_weights):
    return {
        f"layer_{i}": np.mean([w[f"layer_{i}"] for w in client_weights], axis=0).tolist()
        for i in range(len(client_weights[0]))
    }

# Evaluate Local Model on the Test Set
def evaluate_local_model(model):
    loss, accuracy = model.evaluate(x_test, y_test, verbose=0)
    return accuracy

# Federated Learning Process
global_model = create_model()
client_records = []

for round_num in range(CONFIG["num_rounds"]):
    print(f"--- Round {round_num + 1} ---")

    client_weights = []
    round_accuracies = []

    for client_id, data in clients_data.items():
        print(f"{client_id} training...")
        local_model = create_model()
        local_model.set_weights(global_model.get_weights())

        # Compute data distribution
        data_distribution = get_data_distribution(data["y"])

        # Measure computation time
        start_time = time.time()
        client_weight = train_local_model(local_model, data["x"], data["y"])
        end_time = time.time()
        computation_time = end_time - start_time

        # Save local weights
        with open(f"weights_1/{client_id}_round_{round_num+1}.json", "w") as f:
            json.dump(client_weight, f, indent=4)

        # Evaluate local model
        local_accuracy = evaluate_local_model(local_model)
        round_accuracies.append(local_accuracy)

        # Record client info
        client_records.append({
            "Client": client_id,
            "Round": round_num + 1,
            "Data_Distribution": data_distribution,
            "Computation_Time": computation_time,
            "Quality_Factor": local_accuracy
        })

        client_weights.append(client_weight)

    # Aggregate weights on server
    print("Aggregating client weights...")
    new_global_weights = aggregate_weights(client_weights)
    global_model.set_weights([np.array(new_global_weights[f"layer_{i}"]) for i in range(len(new_global_weights))])

    # Save global model weights
    with open(f"weights_1/global_round_{round_num+1}.json", "w") as f:
        json.dump(new_global_weights, f, indent=4)

# Compile client information into a DataFrame
client_df = pd.DataFrame(client_records)

# Compute mean quality factor per client
mean_accuracies = client_df.groupby("Client")["Quality_Factor"].mean().reset_index().rename(columns={"Quality_Factor": "Reliability_Score"})
client_df = client_df.merge(mean_accuracies, on="Client")

# Save the DataFrame to a CSV
client_df.to_csv("MLaaS_MNIST_Clients_NonIID.csv", index=False)

# Display DataFrame to the user
print("Federated Learning Process Complete! Statistics saved in 'MLaaS_MNIST_Clients_NonIID.csv'.")

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/mnist.npz
[1m11490434/11490434[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


--- Round 1 ---
client_1 training...
client_2 training...
client_3 training...
client_4 training...
client_5 training...
client_6 training...
client_7 training...
client_8 training...
client_9 training...
client_10 training...
client_11 training...
client_12 training...
client_13 training...
client_14 training...
client_15 training...
client_16 training...
client_17 training...
client_18 training...
client_19 training...
client_20 training...
Aggregating client weights...
--- Round 2 ---
client_1 training...
client_2 training...
client_3 training...
client_4 training...
client_5 training...
client_6 training...
client_7 training...
client_8 training...
client_9 training...
client_10 training...
client_11 training...
client_12 training...
client_13 training...
client_14 training...
client_15 training...
client_16 training...
client_17 training...
client_18 training...
client_19 training...
client_20 training...
Aggregating client weights...
--- Round 3 ---
client_1 training...
client_2 

In [None]:
# Step 1: Mount your Google Drive
from google.colab import drive
drive.mount('/content/drive')

# Step 2: Copy your folder to Google Drive
import shutil

# Replace 'your_folder_name' with the path of the folder you want to save
# Replace 'destination_folder_name' with the name you want in Google Drive
source_folder = '/content/weights_1'  # Folder in Colab
destination_folder = '/content/drive/MyDrive/MLaaS Genarator Weight MNIST 64/NIID_2'  # Folder in Google Drive

# Copy the entire folder to Google Drive
shutil.copytree(source_folder, destination_folder)

print("Folder successfully copied to Google Drive!")

Mounted at /content/drive
Folder successfully copied to Google Drive!


**IID MINIST**
---

In [None]:
import numpy as np
import tensorflow as tf
import json
import pandas as pd
import time
from tensorflow.keras import layers, models  # type: ignore
from tensorflow.keras.datasets import mnist  # type: ignore
from sklearn.utils import shuffle
import os

# Global Configuration
CONFIG = {
    "num_clients": 20,
    "num_rounds": 10,
    "local_epochs": 5,
    "batch_size": 32,
    "learning_rate": 0.01,
    "reduced_neurons": 64
}

# Create directory to store weights
os.makedirs("weights", exist_ok=True)

# Load and Prepare MNIST Dataset
(x_train, y_train), (x_test, y_test) = mnist.load_data()
x_train = x_train.astype('float32') / 255.0
x_test = x_test.astype('float32') / 255.0
x_train = np.expand_dims(x_train, axis=-1)
x_test = np.expand_dims(x_test, axis=-1)

# Split the data among clients
def split_data(x, y, num_clients):
    data_per_client = len(x) // num_clients
    return {
        f"client_{i+1}": {
            "x": x[i * data_per_client: (i + 1) * data_per_client].tolist(),
            "y": y[i * data_per_client: (i + 1) * data_per_client].tolist(),
        }
        for i in range(num_clients)
    }

clients_data = split_data(x_train, y_train, CONFIG["num_clients"])

# Define the Model
def create_model():
    model = models.Sequential([
        layers.Conv2D(64, (3, 3), activation='relu', input_shape=(28, 28, 1)),
        layers.MaxPooling2D((2, 2)),
        layers.Flatten(),
        layers.Dense(CONFIG["reduced_neurons"], activation='relu'),
        layers.Dense(10, activation='softmax')
    ])
    model.compile(
        optimizer=tf.keras.optimizers.SGD(learning_rate=CONFIG["learning_rate"]),
        loss='sparse_categorical_crossentropy',
        metrics=['accuracy']
    )
    return model

# Data distribution function
def get_data_distribution(y):
    distribution = {i: 0 for i in range(10)}
    for label in y:
        distribution[label] += 1
    return distribution

# Train Local Model on Each Client
def train_local_model(model, x, y):
    model.fit(np.array(x), np.array(y), epochs=CONFIG["local_epochs"], batch_size=CONFIG["batch_size"], verbose=0)
    return {f"layer_{i}": w.tolist() for i, w in enumerate(model.get_weights())}

# Aggregate Weights on the Server
def aggregate_weights(client_weights):
    return {
        f"layer_{i}": np.mean([w[f"layer_{i}"] for w in client_weights], axis=0).tolist()
        for i in range(len(client_weights[0]))
    }

# Evaluate Local Model on the Test Set
def evaluate_local_model(model):
    loss, accuracy = model.evaluate(x_test, y_test, verbose=0)
    return accuracy

# Federated Learning Process
global_model = create_model()
client_records = []

for round_num in range(CONFIG["num_rounds"]):
    print(f"--- Round {round_num + 1} ---")

    client_weights = []
    round_accuracies = []

    for client_id, data in clients_data.items():
        print(f"{client_id} training...")
        local_model = create_model()
        local_model.set_weights(global_model.get_weights())

        # Compute data distribution
        data_distribution = get_data_distribution(data["y"])

        # Measure computation time
        start_time = time.time()
        client_weight = train_local_model(local_model, data["x"], data["y"])
        end_time = time.time()
        computation_time = end_time - start_time

        # Save local weights
        with open(f"weights/{client_id}_round_{round_num+1}.json", "w") as f:
            json.dump(client_weight, f, indent=4)

        # Evaluate local model
        local_accuracy = evaluate_local_model(local_model)
        round_accuracies.append(local_accuracy)

        # Record client info
        client_records.append({
            "Client": client_id,
            "Round": round_num + 1,
            "Data_Distribution": data_distribution,
            "Computation_Time": computation_time,
            "Quality_Factor": local_accuracy
        })

        client_weights.append(client_weight)

    # Aggregate weights on server
    print("Aggregating client weights...")
    new_global_weights = aggregate_weights(client_weights)
    global_model.set_weights([np.array(new_global_weights[f"layer_{i}"]) for i in range(len(new_global_weights))])

    # Save global model weights
    with open(f"weights/global_round_{round_num+1}.json", "w") as f:
        json.dump(new_global_weights, f, indent=4)

# Compile client information into a DataFrame
client_df = pd.DataFrame(client_records)

# Compute mean quality factor per client
mean_accuracies = client_df.groupby("Client")["Quality_Factor"].mean().reset_index().rename(columns={"Quality_Factor": "Reliability_Score"})
client_df = client_df.merge(mean_accuracies, on="Client")

# Save the DataFrame to a CSV
client_df.to_csv("MLaaS_MINIST_Clients.csv", index=False)

# Display DataFrame to the user

print("Federated Learning Process Complete! Statistics saved in 'MLaaS_MINIST_Clients.csv'.")

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


--- Round 1 ---
client_1 training...
client_2 training...
client_3 training...
client_4 training...
client_5 training...
client_6 training...
client_7 training...
client_8 training...
client_9 training...
client_10 training...
client_11 training...
client_12 training...
client_13 training...
client_14 training...
client_15 training...
client_16 training...
client_17 training...
client_18 training...
client_19 training...
client_20 training...
Aggregating client weights...
--- Round 2 ---
client_1 training...
client_2 training...
client_3 training...
client_4 training...
client_5 training...
client_6 training...
client_7 training...
client_8 training...
client_9 training...
client_10 training...
client_11 training...
client_12 training...
client_13 training...
client_14 training...
client_15 training...
client_16 training...
client_17 training...
client_18 training...
client_19 training...
client_20 training...
Aggregating client weights...
--- Round 3 ---
client_1 training...
client_2 

In [None]:
# Step 1: Mount your Google Drive
from google.colab import drive
drive.mount('/content/drive')

# Step 2: Copy your folder to Google Drive
import shutil

# Replace 'your_folder_name' with the path of the folder you want to save
# Replace 'destination_folder_name' with the name you want in Google Drive
source_folder = '/content/weights'  # Folder in Colab
destination_folder = '/content/drive/MyDrive/MLaaS Genarator Weight MNIST 64/IID'  # Folder in Google Drive

# Copy the entire folder to Google Drive
shutil.copytree(source_folder, destination_folder)

print("Folder successfully copied to Google Drive!")

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
Folder successfully copied to Google Drive!


**Combien data files**
---

In [None]:
import pandas as pd

# Load the uploaded CSV files
file1 = pd.read_csv('/content/MLaaS_MINIST_Clients.csv')
file2 = pd.read_csv('/content/MLaaS_MNIST_Clients_NonIID.csv')  # Assuming it's a duplicate upload of the same file
file3 = pd.read_csv('/content/MLaaS_MNIST_Clients_NonIID_2.csv')

# Merge the three datasets
combined_df = pd.concat([file1, file2, file3], ignore_index=True)

# Add the MLS_ID column starting from 1 up to the length of the combined dataset
combined_df['MLaaS_ID'] = range(1, len(combined_df) + 1)

# Save the combined dataset to a CSV file
output_file = '/content/Combined_MLaaS_Clients_64.csv'
combined_df.to_csv(output_file, index=False)
combined_df

Unnamed: 0,Client,Round,Data_Distribution,Computation_Time,Quality_Factor,Reliability_Score,MLaaS_ID
0,client_1,1,"{0: 285, 1: 339, 2: 299, 3: 295, 4: 325, 5: 27...",20.881774,0.8774,0.92243,1
1,client_2,1,"{0: 307, 1: 332, 2: 282, 3: 313, 4: 298, 5: 24...",17.688163,0.8722,0.91886,2
2,client_3,1,"{0: 299, 1: 352, 2: 308, 3: 314, 4: 276, 5: 26...",19.310270,0.8873,0.92170,3
3,client_4,1,"{0: 315, 1: 328, 2: 287, 3: 306, 4: 285, 5: 27...",24.872246,0.8480,0.91596,4
4,client_5,1,"{0: 290, 1: 339, 2: 286, 3: 320, 4: 284, 5: 27...",17.652168,0.8827,0.92494,5
...,...,...,...,...,...,...,...
595,client_16,10,"{0: 100, 1: 100, 2: 100, 3: 100, 4: 100, 5: 10...",6.801612,0.9163,0.87653,596
596,client_17,10,"{0: 100, 1: 100, 2: 100, 3: 100, 4: 100, 5: 10...",7.328037,0.9201,0.87825,597
597,client_18,10,"{0: 100, 1: 100, 2: 100, 3: 100, 4: 100, 5: 10...",7.089456,0.9217,0.88580,598
598,client_19,10,"{0: 100, 1: 100, 2: 100, 3: 100, 4: 100, 5: 10...",6.903012,0.8952,0.87578,599


**Fashion-MNIST IID**
---

In [None]:
import numpy as np
import tensorflow as tf
import json
import pandas as pd
import time
from tensorflow.keras import layers, models
from tensorflow.keras.datasets import fashion_mnist
from sklearn.utils import shuffle
import os
# Global Configuration
CONFIG = {
    "num_clients": 20,
    "num_rounds": 20,
    "local_epochs": 3,
    "batch_size": 32,
    "learning_rate": 0.01,
    "reduced_neurons": 64
}

# Create directory to store weights
os.makedirs("weights_fashion_mnist", exist_ok=True)

# Load and Prepare Fashion-MNIST Dataset
(x_train, y_train), (x_test, y_test) = fashion_mnist.load_data()
input_shape = (28, 28, 1)  # Fashion-MNIST uses grayscale images
num_classes = 10

# Normalize and reshape data
x_train = x_train.astype('float32') / 255.0
x_test = x_test.astype('float32') / 255.0
x_train = np.expand_dims(x_train, axis=-1)
x_test = np.expand_dims(x_test, axis=-1)

# Split the data among clients in a non-IID manner
def split_data_non_iid(x, y, num_clients, num_classes):
    client_data = {f"client_{i+1}": {"x": [], "y": []} for i in range(num_clients)}
    indices = np.arange(len(y))
    np.random.shuffle(indices)
    y = y[indices]
    x = x[indices]

    label_counts = {i: 0 for i in range(num_classes)}
    for i in range(len(y)):
        label = y[i]
        client_id = (label_counts[label] % num_clients) + 1
        client_data[f"client_{client_id}"]["x"].append(x[i].tolist())
        client_data[f"client_{client_id}"]["y"].append(int(y[i]))
        label_counts[label] += 1

    return client_data

clients_data = split_data_non_iid(x_train, y_train, CONFIG["num_clients"], num_classes)

# Define the Model
def create_model():
    model = models.Sequential([
        layers.Conv2D(32, (3, 3), activation='relu', input_shape=input_shape),
        layers.MaxPooling2D((2, 2)),
        layers.Flatten(),
        layers.Dense(CONFIG["reduced_neurons"], activation='relu'),
        layers.Dense(num_classes, activation='softmax')
    ])
    model.compile(
        optimizer=tf.keras.optimizers.SGD(learning_rate=CONFIG["learning_rate"]),
        loss='sparse_categorical_crossentropy',
        metrics=['accuracy']
    )
    return model
# Data distribution function
def get_data_distribution(y):
    distribution = {i: 0 for i in range(num_classes)}
    for label in y:
        distribution[label] += 1
    return distribution
# Train Local Model on Each Client
def train_local_model(model, x, y):
    model.fit(np.array(x), np.array(y), epochs=CONFIG["local_epochs"], batch_size=CONFIG["batch_size"], verbose=0)
    return {f"layer_{i}": w.tolist() for i, w in enumerate(model.get_weights())}
# Aggregate Weights on the Server
def aggregate_weights(client_weights):
    return {
        f"layer_{i}": np.mean([w[f"layer_{i}"] for w in client_weights], axis=0).tolist()
        for i in range(len(client_weights[0]))
    }
# Evaluate Local Model on the Test Set
def evaluate_local_model(model):
    loss, accuracy = model.evaluate(x_test, y_test, verbose=0)
    return accuracy
# Federated Learning Process
global_model = create_model()
client_records = []

for round_num in range(CONFIG["num_rounds"]):
    print(f"--- Round {round_num + 1} ---")

    client_weights = []
    round_accuracies = []

    for client_id, data in clients_data.items():
        print(f"{client_id} training...")
        local_model = create_model()
        local_model.set_weights(global_model.get_weights())

        # Compute data distribution
        data_distribution = get_data_distribution(data["y"])

        # Measure computation time
        start_time = time.time()
        client_weight = train_local_model(local_model, data["x"], data["y"])
        end_time = time.time()
        computation_time = end_time - start_time

        # Save local weights
        with open(f"weights_fashion_mnist/{client_id}_round_{round_num+1}.json", "w") as f:
            json.dump(client_weight, f, indent=4)

        # Evaluate local model
        local_accuracy = evaluate_local_model(local_model)
        round_accuracies.append(local_accuracy)

        # Record client info
        client_records.append({
            "Client": client_id,
            "Round": round_num + 1,
            "Data_Distribution": data_distribution,
            "Computation_Time": computation_time,
            "Quality_Factor": local_accuracy
        })

        client_weights.append(client_weight)

    # Aggregate weights on server
    print("Aggregating client weights...")
    new_global_weights = aggregate_weights(client_weights)
    global_model.set_weights([np.array(new_global_weights[f"layer_{i}"]) for i in range(len(new_global_weights))])

    # Save global model weights
    with open(f"weights_fashion_mnist/global_round_{round_num+1}.json", "w") as f:
        json.dump(new_global_weights, f, indent=4)
# Compile client information into a DataFrame
client_df = pd.DataFrame(client_records)
# Compute mean quality factor per client
mean_accuracies = client_df.groupby("Client")["Quality_Factor"].mean().reset_index().rename(columns={"Quality_Factor": "Reliability_Score"})
client_df = client_df.merge(mean_accuracies, on="Client")
# Save the DataFrame to a CSV
csv_filename = "MLaaS_FASHION_MNIST_Clients.csv"
client_df.to_csv(csv_filename, index=False)
print(f"Federated Learning Process Complete! Statistics saved in '{csv_filename}'.")

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


--- Round 1 ---
client_1 training...
client_2 training...
client_3 training...
client_4 training...
client_5 training...
client_6 training...
client_7 training...
client_8 training...
client_9 training...
client_10 training...
client_11 training...
client_12 training...
client_13 training...
client_14 training...
client_15 training...
client_16 training...
client_17 training...
client_18 training...
client_19 training...
client_20 training...
Aggregating client weights...
--- Round 2 ---
client_1 training...
client_2 training...
client_3 training...
client_4 training...
client_5 training...
client_6 training...
client_7 training...
client_8 training...
client_9 training...
client_10 training...
client_11 training...
client_12 training...
client_13 training...
client_14 training...
client_15 training...
client_16 training...
client_17 training...
client_18 training...
client_19 training...
client_20 training...
Aggregating client weights...
--- Round 3 ---
client_1 training...
client_2 

In [None]:
# Step 1: Mount your Google Drive
from google.colab import drive
drive.mount('/content/drive')
# Step 2: Copy your folder to Google Drive
import shutil
# Replace 'your_folder_name' with the path of the folder you want to save
# Replace 'destination_folder_name' with the name you want in Google Drive
source_folder = '/content/weights_fashion_mnist'  # Folder in Colab
destination_folder = '/content/drive/MyDrive/MLaaS Genarator Weight FMINIST 32/IID'  # Folder in Google Drive
# Copy the entire folder to Google Drive
shutil.copytree(source_folder, destination_folder)
print("Folder successfully copied to Google Drive!")

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
Folder successfully copied to Google Drive!


**Fashion-MNIST NIID**
---

In [None]:
import numpy as np
import tensorflow as tf
import json
import pandas as pd
import time
from tensorflow.keras import layers, models
from tensorflow.keras.datasets import fashion_mnist
from sklearn.utils import shuffle
import os

# Global Configuration
CONFIG = {
    "num_clients": 20,
    "num_rounds": 20,
    "local_epochs": 3,
    "batch_size": 32,
    "learning_rate": 0.01,
    "reduced_neurons": 64
}

# Create directory to store weights
os.makedirs("weights_fashion_mnist_1", exist_ok=True)

# Load and Prepare Fashion-MNIST Dataset
(x_train, y_train), (x_test, y_test) = fashion_mnist.load_data()
input_shape = (28, 28, 1)  # Fashion-MNIST uses grayscale images
num_classes = 10

# Normalize and reshape data
x_train = x_train.astype('float32') / 255.0
x_test = x_test.astype('float32') / 255.0
x_train = np.expand_dims(x_train, axis=-1)
x_test = np.expand_dims(x_test, axis=-1)

# Custom Data Distributions for Selected Clients (Non-IID for Clients 1-5)
client_distributions = {
    "client_1": {i: 100 for i in range(10)},  # Balanced with 100 samples per label
    "client_2": {0: 100, 1: 30, 2: 40, 3: 200, 4: 10, 5: 50, 6: 70, 7: 90, 8: 60, 9: 20},  # Mixed, unbalanced
    "client_3": {0: 150, 1: 150, 2: 150, 3: 150, 4: 150, 5: 150, 6: 150, 7: 150, 8: 150, 9:150},  # All samples from label 3
    "client_4": {0: 50, 1: 50, 2: 50, 3: 50, 4: 50, 5: 50, 6: 50, 7: 50, 8: 50, 9:50},  # All samples from label 5
    "client_5": {0: 500, 1: 500, 2: 500, 3: 500, 4: 500, 5: 500, 6: 500, 7: 500, 8: 500, 9:500}  # All samples from label 7
}

# Assign remaining clients a standard IID split
def split_data_custom(x, y, num_clients, client_distributions):
    client_data = {f"client_{i+1}": {"x": [], "y": []} for i in range(num_clients)}
    label_indices = {i: np.where(y == i)[0] for i in range(num_classes)}

    for client, dist in client_distributions.items():
        client_id = int(client.split("_")[1])
        for label, num_samples in dist.items():
            indices = np.random.choice(label_indices[label], num_samples, replace=False)
            client_data[client]["x"].extend(x[indices].tolist())
            client_data[client]["y"].extend(y[indices].tolist())
            label_indices[label] = np.setdiff1d(label_indices[label], indices)

    remaining_clients = [c for c in client_data if c not in client_distributions]
    remaining_x, remaining_y = [], []
    for label in range(num_classes):
        remaining_x.extend(x[label_indices[label]].tolist())
        remaining_y.extend(y[label_indices[label]].tolist())

    remaining_x, remaining_y = shuffle(remaining_x, remaining_y)
    data_per_client = len(remaining_y) // len(remaining_clients)

    start = 0
    for client in remaining_clients:
        end = start + data_per_client
        client_data[client]["x"] = remaining_x[start:end]
        client_data[client]["y"] = remaining_y[start:end]
        start = end

    return client_data

clients_data = split_data_custom(x_train, y_train, CONFIG["num_clients"], client_distributions)

# Define the Model
def create_model():
    model = models.Sequential([
        layers.Conv2D(16, (3, 3), activation='relu', input_shape=input_shape),
        layers.MaxPooling2D((2, 2)),
        layers.Flatten(),
        layers.Dense(CONFIG["reduced_neurons"], activation='relu'),
        layers.Dense(num_classes, activation='softmax')
    ])
    model.compile(
        optimizer=tf.keras.optimizers.SGD(learning_rate=CONFIG["learning_rate"]),
        loss='sparse_categorical_crossentropy',
        metrics=['accuracy']
    )
    return model

# Data distribution function
def get_data_distribution(y):
    distribution = {i: 0 for i in range(num_classes)}
    for label in y:
        distribution[label] += 1
    return distribution

# Train Local Model on Each Client
def train_local_model(model, x, y):
    model.fit(np.array(x), np.array(y), epochs=CONFIG["local_epochs"], batch_size=CONFIG["batch_size"], verbose=0)
    return {f"layer_{i}": w.tolist() for i, w in enumerate(model.get_weights())}

# Aggregate Weights on the Server
def aggregate_weights(client_weights):
    return {
        f"layer_{i}": np.mean([w[f"layer_{i}"] for w in client_weights], axis=0).tolist()
        for i in range(len(client_weights[0]))
    }

# Evaluate Local Model on the Test Set
def evaluate_local_model(model):
    loss, accuracy = model.evaluate(x_test, y_test, verbose=0)
    return accuracy

# Federated Learning Process
global_model = create_model()
client_records = []

for round_num in range(CONFIG["num_rounds"]):
    print(f"--- Round {round_num + 1} ---")

    client_weights = []
    round_accuracies = []

    for client_id, data in clients_data.items():
        print(f"{client_id} training...")
        local_model = create_model()
        local_model.set_weights(global_model.get_weights())

        # Compute data distribution
        data_distribution = get_data_distribution(data["y"])

        # Measure computation time
        start_time = time.time()
        client_weight = train_local_model(local_model, data["x"], data["y"])
        end_time = time.time()
        computation_time = end_time - start_time

        # Save local weights
        with open(f"weights_fashion_mnist_1/{client_id}_round_{round_num+1}.json", "w") as f:
            json.dump(client_weight, f, indent=4)

        # Evaluate local model
        local_accuracy = evaluate_local_model(local_model)
        round_accuracies.append(local_accuracy)

        # Record client info
        client_records.append({
            "Client": client_id,
            "Round": round_num + 1,
            "Data_Distribution": data_distribution,
            "Computation_Time": computation_time,
            "Quality_Factor": local_accuracy
        })

        client_weights.append(client_weight)

    # Aggregate weights on server
    print("Aggregating client weights...")
    new_global_weights = aggregate_weights(client_weights)
    global_model.set_weights([np.array(new_global_weights[f"layer_{i}"]) for i in range(len(new_global_weights))])

    # Save global model weights
    with open(f"weights_fashion_mnist/global_round_{round_num+1}.json", "w") as f:
        json.dump(new_global_weights, f, indent=4)

# Save DataFrame
client_df = pd.DataFrame(client_records)
client_df.to_csv("MLaaS_FASHION_MNIST_Clients_iid.csv", index=False)
print("Federated Learning Process Complete! Data saved.")

Exception ignored in: <function _xla_gc_callback at 0x7fefe9c29440>
Traceback (most recent call last):
  File "/usr/local/lib/python3.11/dist-packages/jax/_src/lib/__init__.py", line 96, in _xla_gc_callback
    def _xla_gc_callback(*args):
    
KeyboardInterrupt: 
Exception ignored in: <function _xla_gc_callback at 0x7fefe9c29440>
Traceback (most recent call last):
  File "/usr/local/lib/python3.11/dist-packages/jax/_src/lib/__init__.py", line 96, in _xla_gc_callback
    def _xla_gc_callback(*args):
    
KeyboardInterrupt: 
Exception ignored in: <function _xla_gc_callback at 0x7fefe9c29440>
Traceback (most recent call last):
  File "/usr/local/lib/python3.11/dist-packages/jax/_src/lib/__init__.py", line 96, in _xla_gc_callback
    def _xla_gc_callback(*args):
    
KeyboardInterrupt: 
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


--- Round 1 ---
client_1 training...
client_2 training...
client_3 training...
client_4 training...
client_5 training...
client_6 training...
client_7 training...
client_8 training...
client_9 training...
client_10 training...
client_11 training...
client_12 training...
client_13 training...
client_14 training...
client_15 training...
client_16 training...
client_17 training...
client_18 training...
client_19 training...
client_20 training...
Aggregating client weights...
--- Round 2 ---
client_1 training...
client_2 training...
client_3 training...
client_4 training...
client_5 training...
client_6 training...
client_7 training...
client_8 training...
client_9 training...
client_10 training...
client_11 training...
client_12 training...
client_13 training...
client_14 training...
client_15 training...
client_16 training...
client_17 training...
client_18 training...
client_19 training...
client_20 training...
Aggregating client weights...
--- Round 3 ---
client_1 training...
client_2 

In [None]:
# Step 1: Mount your Google Drive
from google.colab import drive
drive.mount('/content/drive')
# Step 2: Copy your folder to Google Drive
import shutil
# Replace 'your_folder_name' with the path of the folder you want to save
# Replace 'destination_folder_name' with the name you want in Google Drive
source_folder = '/content/weights_fashion_mnist_1'  # Folder in Colab
destination_folder = '/content/drive/MyDrive/MLaaS Genarator Weight FMINIST 32/NIID'  # Folder in Google Drive
# Copy the entire folder to Google Drive
shutil.copytree(source_folder, destination_folder)
print("Folder successfully copied to Google Drive!")

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
Folder successfully copied to Google Drive!


In [None]:
import pandas as pd

# Load the uploaded CSV files
file1 = pd.read_csv('/content/MLaaS_FASHION_MNIST_Clients.csv')
file2 = pd.read_csv('/content/MLaaS_FASHION_MNIST_Clients_iid.csv')  # Assuming it's a duplicate upload of the same file


# Merge the three datasets
combined_df = pd.concat([file1, file2], ignore_index=True)

# Add the MLS_ID column starting from 1 up to the length of the combined dataset
combined_df['MLaaS_ID'] = range(1, len(combined_df) + 1)

# Save the combined dataset to a CSV file
output_file = '/content/Combined_MLaaS_Clients_FMINIST_32.csv'
combined_df.to_csv(output_file, index=False)
combined_df

Unnamed: 0,Client,Round,Data_Distribution,Computation_Time,Quality_Factor,Reliability_Score,MLaaS_ID
0,client_1,1,"{0: 300, 1: 300, 2: 300, 3: 300, 4: 300, 5: 30...",8.973562,0.7264,0.804915,1
1,client_2,1,"{0: 300, 1: 300, 2: 300, 3: 300, 4: 300, 5: 30...",7.691949,0.6473,0.801875,2
2,client_3,1,"{0: 300, 1: 300, 2: 300, 3: 300, 4: 300, 5: 30...",8.264498,0.7302,0.808980,3
3,client_4,1,"{0: 300, 1: 300, 2: 300, 3: 300, 4: 300, 5: 30...",8.126710,0.7158,0.801425,4
4,client_5,1,"{0: 300, 1: 300, 2: 300, 3: 300, 4: 300, 5: 30...",8.114779,0.7046,0.804190,5
...,...,...,...,...,...,...,...
795,client_16,20,"{0: 317, 1: 346, 2: 352, 3: 347, 4: 359, 5: 35...",9.371960,0.8463,,796
796,client_17,20,"{0: 337, 1: 354, 2: 363, 3: 347, 4: 320, 5: 35...",8.348448,0.8399,,797
797,client_18,20,"{0: 369, 1: 369, 2: 366, 3: 320, 4: 314, 5: 33...",8.621405,0.8425,,798
798,client_19,20,"{0: 308, 1: 323, 2: 340, 3: 354, 4: 367, 5: 35...",6.819442,0.8443,,799


In [None]:
import numpy as np
import tensorflow as tf
import json
import pandas as pd
import time
from tensorflow.keras import layers, models
from tensorflow.keras.datasets import fashion_mnist
from sklearn.utils import shuffle
import os

# Global Configuration
CONFIG = {
    "num_clients": 20,
    "num_rounds": 3,
    "local_epochs": 1,
    "batch_size": 32,
    "learning_rate": 0.01,
    "reduced_neurons": 64
}

# Create directory to store weights
os.makedirs("weights_fashion_mnist", exist_ok=True)

# Load and Prepare Fashion-MNIST Dataset
(x_train, y_train), (x_test, y_test) = fashion_mnist.load_data()
input_shape = (28, 28, 1)  # Fashion-MNIST uses grayscale images
num_classes = 10

# Normalize and reshape data
x_train = x_train.astype('float32') / 255.0
x_test = x_test.astype('float32') / 255.0
x_train = np.expand_dims(x_train, axis=-1)
x_test = np.expand_dims(x_test, axis=-1)

# Split the data among clients
def split_data(x, y, num_clients):
    data_per_client = len(x) // num_clients
    return {
        f"client_{i+1}": {
            "x": x[i * data_per_client: (i + 1) * data_per_client].tolist(),
            "y": y[i * data_per_client: (i + 1) * data_per_client].tolist(),
        }
        for i in range(num_clients)
    }

clients_data = split_data(x_train, y_train, CONFIG["num_clients"])

# Define the Model
def create_model():
    model = models.Sequential([
        layers.Conv2D(16, (3, 3), activation='relu', input_shape=input_shape),
        layers.MaxPooling2D((2, 2)),
        layers.Flatten(),
        layers.Dense(CONFIG["reduced_neurons"], activation='relu'),
        layers.Dense(num_classes, activation='softmax')
    ])
    model.compile(
        optimizer=tf.keras.optimizers.SGD(learning_rate=CONFIG["learning_rate"]),
        loss='sparse_categorical_crossentropy',
        metrics=['accuracy']
    )
    return model

# Data distribution function
def get_data_distribution(y):
    distribution = {i: 0 for i in range(num_classes)}
    for label in y:
        distribution[label] += 1
    return distribution

# Train Local Model on Each Client
def train_local_model(model, x, y):
    model.fit(np.array(x), np.array(y), epochs=CONFIG["local_epochs"], batch_size=CONFIG["batch_size"], verbose=0)
    return {f"layer_{i}": w.tolist() for i, w in enumerate(model.get_weights())}

# Aggregate Weights on the Server
def aggregate_weights(client_weights):
    return {
        f"layer_{i}": np.mean([w[f"layer_{i}"] for w in client_weights], axis=0).tolist()
        for i in range(len(client_weights[0]))
    }

# Evaluate Local Model on the Test Set
def evaluate_local_model(model):
    loss, accuracy = model.evaluate(x_test, y_test, verbose=0)
    return accuracy

# Federated Learning Process
global_model = create_model()
client_records = []

for round_num in range(CONFIG["num_rounds"]):
    print(f"--- Round {round_num + 1} ---")

    client_weights = []
    round_accuracies = []

    for client_id, data in clients_data.items():
        print(f"{client_id} training...")
        local_model = create_model()
        local_model.set_weights(global_model.get_weights())

        # Compute data distribution
        data_distribution = get_data_distribution(data["y"])

        # Measure computation time
        start_time = time.time()
        client_weight = train_local_model(local_model, data["x"], data["y"])
        end_time = time.time()
        computation_time = end_time - start_time

        # Save local weights
        with open(f"weights_fashion_mnist_1/{client_id}_round_{round_num+1}.json", "w") as f:
            json.dump(client_weight, f, indent=4)

        # Evaluate local model
        local_accuracy = evaluate_local_model(local_model)
        round_accuracies.append(local_accuracy)

        # Record client info
        client_records.append({
            "Client": client_id,
            "Round": round_num + 1,
            "Data_Distribution": data_distribution,
            "Computation_Time": computation_time,
            "Quality_Factor": local_accuracy
        })

        client_weights.append(client_weight)

    # Aggregate weights on server
    print("Aggregating client weights...")
    new_global_weights = aggregate_weights(client_weights)
    global_model.set_weights([np.array(new_global_weights[f"layer_{i}"]) for i in range(len(new_global_weights))])

    # Save global model weights
    with open(f"weights_fashion_mnist/global_round_{round_num+1}.json", "w") as f:
        json.dump(new_global_weights, f, indent=4)

# Compile client information into a DataFrame
client_df = pd.DataFrame(client_records)

# Compute mean quality factor per client
mean_accuracies = client_df.groupby("Client")["Quality_Factor"].mean().reset_index().rename(columns={"Quality_Factor": "Reliability_Score"})
client_df = client_df.merge(mean_accuracies, on="Client")

# Save the DataFrame to a CSV
csv_filename = "MLaaS_FASHION_MNIST_Clients.csv"
client_df.to_csv(csv_filename, index=False)

print(f"Federated Learning Process Complete! Statistics saved in '{csv_filename}'.")


Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/train-labels-idx1-ubyte.gz
Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/train-images-idx3-ubyte.gz
Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/t10k-labels-idx1-ubyte.gz
Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/t10k-images-idx3-ubyte.gz
--- Round 1 ---
client_1 training...
client_2 training...
client_3 training...
client_4 training...
client_5 training...
client_6 training...
client_7 training...
client_8 training...
client_9 training...
client_10 training...
client_11 training...
client_12 training...
client_13 training...
client_14 training...
client_15 training...
client_16 training...
client_17 training...
client_18 training...
client_19 training...
client_20 training...
Aggregating client weights...
--- Round 2 ---
client_1 training...
client_2 training...
client_3 training...
client_4 t

**Human Activity Recognition**
---

**Adaptive Composability Rules**
---

**Adaptive MLaaS Composability Model**
---

In [None]:
import os
import json
import numpy as np
import pandas as pd
import ast
from google.colab import drive

# Mount Google Drive
drive.mount('/content/drive')

# Paths
csv_file_path = '/content/Combined_MLaaS_Clients.csv'
client_dirs = [
    '/content/drive/MyDrive/MLaaS Genarator Weights/',
    '/content/drive/MyDrive/MLaaS Genarator Weights/NIID/',
    '/content/drive/MyDrive/MLaaS Genarator Weights/NIID_2/'
]
global_model_paths = [
     '/content/drive/MyDrive/MLaaS Genarator Weights/CSV AND GLOBAL/global_round_3.json',
    '/content/drive/MyDrive/MLaaS Genarator Weights/NIID/Global_model/global_round_3.json',
    '/content/drive/MyDrive/MLaaS Genarator Weights/NIID_2/Global_model/global_round_3.json'
]

# Load the CSV file
df = pd.read_csv(csv_file_path)

# Step 1: Parse the 'Data_Distribution' from string to dictionary
df['Parsed_Distribution'] = df['Data_Distribution'].apply(ast.literal_eval)

# Step 2: Compute the global reference distribution
client_distributions = np.array([list(dist.values()) for dist in df['Parsed_Distribution']])
global_reference_distribution = np.mean(client_distributions, axis=0)

# Step 3: Define thresholds
THRESHOLD_DUM = 490
THRESHOLD_SUM = 1.5
THRESHOLD_HQS = 0.05
THRESHOLD_SRS = 0.7
ALPHA_SUM = 1

# Step 4: Define functions for existing measurements
def compute_dum(client_distribution):
    ed = np.sqrt(np.sum((np.array(list(client_distribution.values())) - global_reference_distribution) ** 2))
    return 1 if ed < THRESHOLD_DUM else 0

def compute_sum(response_time, avg_response_time):
    value = (response_time / avg_response_time) ** ALPHA_SUM
    return 1 if value < THRESHOLD_SUM else 0

def compute_hqs(quality_factor, avg_quality_factor):
    similarity = np.abs(quality_factor - avg_quality_factor)
    return 1 if similarity <= THRESHOLD_HQS else 0

def compute_srs(reliability_score):
    return 1 if reliability_score >= THRESHOLD_SRS else 0

# Flatten model weights for MUM calculation
def flatten_weights(weights_dict):
    flat_weights = []
    for key in weights_dict:
        layer_weights = np.array(weights_dict[key])
        flat_weights.extend(layer_weights.flatten())
    return np.array(flat_weights)

def calculate_mum(local_weights, global_weights):
    min_length = min(len(local_weights), len(global_weights))
    local_weights = local_weights[:min_length]
    global_weights = global_weights[:min_length]
    differences = np.abs((local_weights - global_weights) / global_weights)
    return np.mean(differences)

# Load global models
global_models = []
for path in global_model_paths:
    with open(path, 'r') as f:
        global_weights = json.load(f)
        global_models.append(flatten_weights(global_weights))

# Step 5: Calculate average response time and quality factor
avg_response_time = df['Computation_Time'].mean()
avg_quality_factor = df['Quality_Factor'].mean()

# Initialize a list for results
binary_vector = []

# Process each client in the directories
client_index = 0
for dir_index, client_dir in enumerate(client_dirs):
    for filename in sorted(os.listdir(client_dir)):
        if filename.endswith('.json'):
            client_file_path = os.path.join(client_dir, filename)
            with open(client_file_path, 'r') as client_file:
                local_weights = json.load(client_file)
                local_flat = flatten_weights(local_weights)
                mum_value = calculate_mum(local_flat, global_models[dir_index])

                # Get corresponding row from DataFrame
                row = df.iloc[client_index]
                dum_score = compute_dum(row['Parsed_Distribution'])
                sum_score = compute_sum(row['Computation_Time'], avg_response_time)
                hqs_score = compute_hqs(row['Quality_Factor'], avg_quality_factor)
                srs_score = compute_srs(row['Reliability_Score'])
                mum_score = 1 if mum_value < 0.3 else 0  # Threshold for MUM

                # Append scores
                client_vector = [dum_score, sum_score, hqs_score, srs_score, mum_score]
                binary_vector.append(client_vector)
                client_index += 1

# Convert to DataFrame and save
binary_vector_df = pd.DataFrame(binary_vector, columns=['DUM', 'SUM', 'HQS', 'SRS', 'MUM'])
binary_vector_df['Client'] = df['Client'].values
output_file = '/content/drive/MyDrive/MLaaS Genarator Weights/Final_Results_With_MUM.csv'
binary_vector_df.to_csv(output_file, index=False)

# Display the results
print("Final scores with MUM included:")
print(binary_vector_df)

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
Final scores with MUM included:
     DUM  SUM  HQS  SRS  MUM     Client
0      1    1    1    1    1   client_1
1      1    1    1    1    1   client_2
2      1    1    1    1    1   client_3
3      1    1    1    1    1   client_4
4      1    1    1    1    1   client_5
..   ...  ...  ...  ...  ...        ...
175    1    1    0    0    1  client_16
176    1    1    1    0    1  client_17
177    1    1    1    0    1  client_18
178    1    1    1    0    1  client_19
179    1    1    1    0    1  client_20

[180 rows x 6 columns]


**MLaaS Genarator Weight MINIST32**
---

In [None]:
import os
import json
import numpy as np
import pandas as pd
import ast
from google.colab import drive

# Mount Google Drive
drive.mount('/content/drive')

# Paths
csv_file_path = '/content/Combined_MLaaS_Clients_32.csv'
client_dirs = [
    '/content/drive/MyDrive/MLaaS Genarator Weight MNIST 32/',
    '/content/drive/MyDrive/MLaaS Genarator Weight MNIST 32/IID/',
    '/content/drive/MyDrive/MLaaS Genarator Weight MNIST 32/NIID_2/'
]
global_model_paths = [
    '/content/drive/MyDrive/MLaaS Genarator Weight MNIST 32/global_round_5.json',
    '/content/drive/MyDrive/MLaaS Genarator Weight MNIST 32/IID/global_round_5.json',
    '/content/drive/MyDrive/MLaaS Genarator Weight MNIST 32/NIID_2/global_round_5.json'
]

# Load the CSV file
df = pd.read_csv(csv_file_path)

# Step 1: Parse 'Data_Distribution' safely
def safe_parse_distribution(value):
    try:
        return ast.literal_eval(value) if isinstance(value, str) else value
    except (ValueError, SyntaxError):
        return {}

df['Parsed_Distribution'] = df['Data_Distribution'].apply(safe_parse_distribution)
df["model category"] = "Minist32"

# Compute the global reference distribution
client_distributions = np.array([list(dist.values()) for dist in df['Parsed_Distribution'] if dist])
global_reference_distribution = np.mean(client_distributions, axis=0) if len(client_distributions) > 0 else np.array([])

# Define thresholds
THRESHOLD_DUM = 490
THRESHOLD_SUM = 1.5
THRESHOLD_HQS = 0.05
THRESHOLD_SRS = 0.7
ALPHA_SUM = 1

# Define measurement functions
def compute_dum(client_distribution):
    if len(global_reference_distribution) == 0:
        return 0
    ed = np.sqrt(np.sum((np.array(list(client_distribution.values())) - global_reference_distribution) ** 2))
    return 1 if ed < THRESHOLD_DUM else 0

def compute_sum(response_time, avg_response_time):
    return 1 if (response_time / avg_response_time) ** ALPHA_SUM < THRESHOLD_SUM else 0

def compute_hqs(quality_factor, avg_quality_factor):
    return 1 if abs(quality_factor - avg_quality_factor) <= THRESHOLD_HQS else 0

def compute_srs(reliability_score):
    return 1 if reliability_score >= THRESHOLD_SRS else 0

# Function to Flatten Model Weights
def flatten_weights(weights_dict):
    flat_weights = []
    for layer in weights_dict.values():
        if isinstance(layer, list):
            layer_weights = np.array(layer).flatten()
            flat_weights.extend(layer_weights)
    return np.array(flat_weights)

# Function to Calculate MUM Score
def calculate_mum(local_weights, global_weights):
    min_length = min(len(local_weights), len(global_weights))
    local_weights, global_weights = local_weights[:min_length], global_weights[:min_length]
    differences = np.abs((local_weights - global_weights) / global_weights)
    return np.mean(differences)

# Load Global Models
global_models = []
for path in global_model_paths:
    with open(path, 'r') as f:
        global_weights = json.load(f)
        global_models.append(flatten_weights(global_weights))

# Compute Average Response Time & Quality Factor
avg_response_time = df['Computation_Time'].mean()
avg_quality_factor = df['Quality_Factor'].mean()

# **Fix: Create a fast lookup dictionary for client IDs**
client_map = {str(client_id): idx for idx, client_id in enumerate(df['Client'].astype(str))}

# Process Clients
binary_vector = []
for dir_index, client_dir in enumerate(client_dirs):
    if not os.path.exists(client_dir):
        print(f"Warning: {client_dir} does not exist!")
        continue

    json_files = sorted([f for f in os.listdir(client_dir) if f.endswith('.json')])

    for filename in json_files:
        # **Extract client ID from "client_X_round_Y.json"**
        client_id = "_".join(filename.split('_')[:2])  # Extracts "client_X"

        # **Ensure it is in the CSV**
        row_index = client_map.get(client_id)
        if row_index is None:
            continue  # Skip unmatched files

        row = df.iloc[row_index]

        # Load Local Model Weights
        client_file_path = os.path.join(client_dir, filename)
        with open(client_file_path, 'r') as client_file:
            local_weights = json.load(client_file)

        local_flat = flatten_weights(local_weights)
        mum_value = calculate_mum(local_flat, global_models[dir_index])
        mum_score = 1 if mum_value < 0.3 else 0

        # Compute Scores
        dum_score = compute_dum(row['Parsed_Distribution'])
        sum_score = compute_sum(row['Computation_Time'], avg_response_time)
        hqs_score = compute_hqs(row['Quality_Factor'], avg_quality_factor)
        srs_score = compute_srs(row['Reliability_Score'])

        # Store Results
        binary_vector.append([client_id, dum_score, sum_score, hqs_score, srs_score, mum_score])

# Convert to DataFrame and Save
binary_vector_df = pd.DataFrame(binary_vector, columns=['Client', 'DUM', 'SUM', 'HQS', 'SRS', 'MUM'])
output_file = '/content/Final_Results_With_MUM_32.csv'
binary_vector_df.to_csv(output_file, index=False)

# Display Results
print("Final scores with MUM included:")
print(binary_vector_df)

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
Final scores with MUM included:
        Client  DUM  SUM  HQS  SRS  MUM
0    client_10    1    1    0    1    0
1    client_10    1    1    0    1    1
2    client_10    1    1    0    1    1
3    client_10    1    1    0    1    1
4    client_10    1    1    0    1    1
..         ...  ...  ...  ...  ...  ...
295   client_9    1    1    0    1    0
296   client_9    1    1    0    1    1
297   client_9    1    1    0    1    1
298   client_9    1    1    0    1    1
299   client_9    1    1    0    1    1

[300 rows x 6 columns]


In [None]:
b

**MLaaS Genarator Weight MINIST64**
---

In [None]:
import os
import json
import numpy as np
import pandas as pd
import ast
from google.colab import drive

# Mount Google Drive
drive.mount('/content/drive')

# Paths
csv_file_path = '/content/Combined_MLaaS_Clients_64.csv'
client_dirs = [
    '/content/drive/MyDrive/MLaaS Genarator Weight MNIST 64/',
    '/content/drive/MyDrive/MLaaS Genarator Weight MNIST 64/IID/',
    '/content/drive/MyDrive/MLaaS Genarator Weight MNIST 64/NIID_2/'
]
global_model_paths = [
    '/content/drive/MyDrive/MLaaS Genarator Weight MNIST 64/global_round_5.json',
    '/content/drive/MyDrive/MLaaS Genarator Weight MNIST 64/IID/global_round_5.json',
    '/content/drive/MyDrive/MLaaS Genarator Weight MNIST 64/NIID_2/global_round_5.json'
]

# Load the CSV file
df = pd.read_csv(csv_file_path)

# Step 1: Parse 'Data_Distribution' safely
def safe_parse_distribution(value):
    try:
        return ast.literal_eval(value) if isinstance(value, str) else value
    except (ValueError, SyntaxError):
        return {}

df['Parsed_Distribution'] = df['Data_Distribution'].apply(safe_parse_distribution)
df["model category"] = "Minist32"

# Compute the global reference distribution
client_distributions = np.array([list(dist.values()) for dist in df['Parsed_Distribution'] if dist])
global_reference_distribution = np.mean(client_distributions, axis=0) if len(client_distributions) > 0 else np.array([])

# Define thresholds
THRESHOLD_DUM = 490
THRESHOLD_SUM = 1.5
THRESHOLD_HQS = 0.05
THRESHOLD_SRS = 0.7
ALPHA_SUM = 1

# Define measurement functions
def compute_dum(client_distribution):
    if len(global_reference_distribution) == 0:
        return 0
    ed = np.sqrt(np.sum((np.array(list(client_distribution.values())) - global_reference_distribution) ** 2))
    return 1 if ed < THRESHOLD_DUM else 0

def compute_sum(response_time, avg_response_time):
    return 1 if (response_time / avg_response_time) ** ALPHA_SUM < THRESHOLD_SUM else 0

def compute_hqs(quality_factor, avg_quality_factor):
    return 1 if abs(quality_factor - avg_quality_factor) <= THRESHOLD_HQS else 0

def compute_srs(reliability_score):
    return 1 if reliability_score >= THRESHOLD_SRS else 0

# Function to Flatten Model Weights
def flatten_weights(weights_dict):
    flat_weights = []
    for layer in weights_dict.values():
        if isinstance(layer, list):
            layer_weights = np.array(layer).flatten()
            flat_weights.extend(layer_weights)
    return np.array(flat_weights)

# Function to Calculate MUM Score
def calculate_mum(local_weights, global_weights):
    min_length = min(len(local_weights), len(global_weights))
    local_weights, global_weights = local_weights[:min_length], global_weights[:min_length]
    differences = np.abs((local_weights - global_weights) / global_weights)
    return np.mean(differences)

# Load Global Models
global_models = []
for path in global_model_paths:
    with open(path, 'r') as f:
        global_weights = json.load(f)
        global_models.append(flatten_weights(global_weights))

# Compute Average Response Time & Quality Factor
avg_response_time = df['Computation_Time'].mean()
avg_quality_factor = df['Quality_Factor'].mean()

# **Fix: Create a fast lookup dictionary for client IDs**
client_map = {str(client_id): idx for idx, client_id in enumerate(df['Client'].astype(str))}

# Process Clients
binary_vector = []
for dir_index, client_dir in enumerate(client_dirs):
    if not os.path.exists(client_dir):
        print(f"Warning: {client_dir} does not exist!")
        continue

    json_files = sorted([f for f in os.listdir(client_dir) if f.endswith('.json')])

    for filename in json_files:
        # **Extract client ID from "client_X_round_Y.json"**
        client_id = "_".join(filename.split('_')[:2])  # Extracts "client_X"

        # **Ensure it is in the CSV**
        row_index = client_map.get(client_id)
        if row_index is None:
            continue  # Skip unmatched files

        row = df.iloc[row_index]

        # Load Local Model Weights
        client_file_path = os.path.join(client_dir, filename)
        with open(client_file_path, 'r') as client_file:
            local_weights = json.load(client_file)

        local_flat = flatten_weights(local_weights)
        mum_value = calculate_mum(local_flat, global_models[dir_index])
        mum_score = 1 if mum_value < 0.3 else 0

        # Compute Scores
        dum_score = compute_dum(row['Parsed_Distribution'])
        sum_score = compute_sum(row['Computation_Time'], avg_response_time)
        hqs_score = compute_hqs(row['Quality_Factor'], avg_quality_factor)
        srs_score = compute_srs(row['Reliability_Score'])

        # Store Results
        binary_vector.append([client_id, dum_score, sum_score, hqs_score, srs_score, mum_score])

# Convert to DataFrame and Save
binary_vector_df = pd.DataFrame(binary_vector, columns=['Client', 'DUM', 'SUM', 'HQS', 'SRS', 'MUM'])
output_file = '/content/Final_Results_With_MUM_64.csv'
binary_vector_df.to_csv(output_file, index=False)

# Display Results
print("Final scores with MUM included:")
print(binary_vector_df)

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


  differences = np.abs((local_weights - global_weights) / global_weights)


Final scores with MUM included:
        Client  DUM  SUM  HQS  SRS  MUM
0    client_10    1    1    1    1    0
1    client_10    1    1    1    1    0
2    client_10    1    1    1    1    0
3    client_10    1    1    1    1    0
4    client_10    1    1    1    1    1
..         ...  ...  ...  ...  ...  ...
595   client_9    1    1    1    1    0
596   client_9    1    1    1    1    0
597   client_9    1    1    1    1    0
598   client_9    1    1    1    1    0
599   client_9    1    1    1    1    0

[600 rows x 6 columns]


**MLaaS Genarator Weight FMINIST16**
---

In [None]:
import os
import json
import numpy as np
import pandas as pd
import ast
from google.colab import drive

# Mount Google Drive
drive.mount('/content/drive')

# Paths
csv_file_path = '/content/Combined_MLaaS_Clients_FMINIST_16.csv'
client_dirs = [
    '/content/drive/MyDrive/MLaaS Genarator Weight FMINIST 16/NIID/',
    '/content/drive/MyDrive/MLaaS Genarator Weight FMINIST 16/IID/'
]
global_model_paths = [
    '/content/drive/MyDrive/MLaaS Genarator Weight FMINIST 16/IID/global_round_10.json',
    '/content/drive/MyDrive/MLaaS Genarator Weight FMINIST 16/IID/global_round_10.json'
]

# Load the CSV file
df = pd.read_csv(csv_file_path)

# Step 1: Parse 'Data_Distribution' safely
def safe_parse_distribution(value):
    try:
        return ast.literal_eval(value) if isinstance(value, str) else value
    except (ValueError, SyntaxError):
        return {}

df['Parsed_Distribution'] = df['Data_Distribution'].apply(safe_parse_distribution)
df["model category"] = "Minist32"

# Compute the global reference distribution
client_distributions = np.array([list(dist.values()) for dist in df['Parsed_Distribution'] if dist])
global_reference_distribution = np.mean(client_distributions, axis=0) if len(client_distributions) > 0 else np.array([])

# Define thresholds
THRESHOLD_DUM = 490
THRESHOLD_SUM = 1.5
THRESHOLD_HQS = 0.05
THRESHOLD_SRS = 0.7
ALPHA_SUM = 1

# Define measurement functions
def compute_dum(client_distribution):
    if len(global_reference_distribution) == 0:
        return 0
    ed = np.sqrt(np.sum((np.array(list(client_distribution.values())) - global_reference_distribution) ** 2))
    return 1 if ed < THRESHOLD_DUM else 0

def compute_sum(response_time, avg_response_time):
    return 1 if (response_time / avg_response_time) ** ALPHA_SUM < THRESHOLD_SUM else 0

def compute_hqs(quality_factor, avg_quality_factor):
    return 1 if abs(quality_factor - avg_quality_factor) <= THRESHOLD_HQS else 0

def compute_srs(reliability_score):
    return 1 if reliability_score >= THRESHOLD_SRS else 0

# Function to Flatten Model Weights
def flatten_weights(weights_dict):
    flat_weights = []
    for layer in weights_dict.values():
        if isinstance(layer, list):
            layer_weights = np.array(layer).flatten()
            flat_weights.extend(layer_weights)
    return np.array(flat_weights)

# Function to Calculate MUM Score
def calculate_mum(local_weights, global_weights):
    min_length = min(len(local_weights), len(global_weights))
    local_weights, global_weights = local_weights[:min_length], global_weights[:min_length]
    differences = np.abs((local_weights - global_weights) / global_weights)
    return np.mean(differences)

# Load Global Models
global_models = []
for path in global_model_paths:
    with open(path, 'r') as f:
        global_weights = json.load(f)
        global_models.append(flatten_weights(global_weights))

# Compute Average Response Time & Quality Factor
avg_response_time = df['Computation_Time'].mean()
avg_quality_factor = df['Quality_Factor'].mean()

# **Fix: Create a fast lookup dictionary for client IDs**
client_map = {str(client_id): idx for idx, client_id in enumerate(df['Client'].astype(str))}

# Process Clients
binary_vector = []
for dir_index, client_dir in enumerate(client_dirs):
    if not os.path.exists(client_dir):
        print(f"Warning: {client_dir} does not exist!")
        continue

    json_files = sorted([f for f in os.listdir(client_dir) if f.endswith('.json')])

    for filename in json_files:
        # **Extract client ID from "client_X_round_Y.json"**
        client_id = "_".join(filename.split('_')[:2])  # Extracts "client_X"

        # **Ensure it is in the CSV**
        row_index = client_map.get(client_id)
        if row_index is None:
            continue  # Skip unmatched files

        row = df.iloc[row_index]

        # Load Local Model Weights
        client_file_path = os.path.join(client_dir, filename)
        with open(client_file_path, 'r') as client_file:
            local_weights = json.load(client_file)

        local_flat = flatten_weights(local_weights)
        mum_value = calculate_mum(local_flat, global_models[dir_index])
        mum_score = 1 if mum_value < 0.3 else 0

        # Compute Scores
        dum_score = compute_dum(row['Parsed_Distribution'])
        sum_score = compute_sum(row['Computation_Time'], avg_response_time)
        hqs_score = compute_hqs(row['Quality_Factor'], avg_quality_factor)
        srs_score = compute_srs(row['Reliability_Score'])

        # Store Results
        binary_vector.append([client_id, dum_score, sum_score, hqs_score, srs_score, mum_score])

# Convert to DataFrame and Save
binary_vector_df = pd.DataFrame(binary_vector, columns=['Client', 'DUM', 'SUM', 'HQS', 'SRS', 'MUM'])
output_file = '/content/Final_Results_With_MUM_16_FMINIST.csv'
binary_vector_df.to_csv(output_file, index=False)

# Display Results
print("Final scores with MUM included:")
print(binary_vector_df)

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
Final scores with MUM included:
        Client  DUM  SUM  HQS  SRS  MUM
0    client_10    1    1    0    0    0
1    client_10    1    1    0    0    0
2    client_10    1    1    0    0    0
3    client_10    1    1    0    0    0
4    client_10    1    1    0    0    0
..         ...  ...  ...  ...  ...  ...
395   client_9    1    1    1    0    0
396   client_9    1    1    1    0    1
397   client_9    1    1    1    0    1
398   client_9    1    1    1    0    1
399   client_9    1    1    1    0    1

[400 rows x 6 columns]


**MLaaS Genarator Weight FMINIST32**
---

In [None]:
import os
import json
import numpy as np
import pandas as pd
import ast
from google.colab import drive

# Mount Google Drive
drive.mount('/content/drive')

# Paths
csv_file_path = '/content/Combined_MLaaS_Clients_FMINIST_32.csv'
client_dirs = [
    '/content/drive/MyDrive/MLaaS Genarator Weight FMINIST 32/NIID/',
    '/content/drive/MyDrive/MLaaS Genarator Weight FMINIST 32/IID/'
]
global_model_paths = [
    '/content/drive/MyDrive/MLaaS Genarator Weight FMINIST 32/IID/global_round_10.json',
    '/content/drive/MyDrive/MLaaS Genarator Weight FMINIST 32/IID/global_round_10.json'
]

# Load the CSV file
df = pd.read_csv(csv_file_path)

# Step 1: Parse 'Data_Distribution' safely
def safe_parse_distribution(value):
    try:
        return ast.literal_eval(value) if isinstance(value, str) else value
    except (ValueError, SyntaxError):
        return {}

df['Parsed_Distribution'] = df['Data_Distribution'].apply(safe_parse_distribution)
df["model category"] = "Minist32"

# Compute the global reference distribution
client_distributions = np.array([list(dist.values()) for dist in df['Parsed_Distribution'] if dist])
global_reference_distribution = np.mean(client_distributions, axis=0) if len(client_distributions) > 0 else np.array([])

# Define thresholds
THRESHOLD_DUM = 490
THRESHOLD_SUM = 1.5
THRESHOLD_HQS = 0.05
THRESHOLD_SRS = 0.7
ALPHA_SUM = 1

# Define measurement functions
def compute_dum(client_distribution):
    if len(global_reference_distribution) == 0:
        return 0
    ed = np.sqrt(np.sum((np.array(list(client_distribution.values())) - global_reference_distribution) ** 2))
    return 1 if ed < THRESHOLD_DUM else 0

def compute_sum(response_time, avg_response_time):
    return 1 if (response_time / avg_response_time) ** ALPHA_SUM < THRESHOLD_SUM else 0

def compute_hqs(quality_factor, avg_quality_factor):
    return 1 if abs(quality_factor - avg_quality_factor) <= THRESHOLD_HQS else 0

def compute_srs(reliability_score):
    return 1 if reliability_score >= THRESHOLD_SRS else 0

# Function to Flatten Model Weights
def flatten_weights(weights_dict):
    flat_weights = []
    for layer in weights_dict.values():
        if isinstance(layer, list):
            layer_weights = np.array(layer).flatten()
            flat_weights.extend(layer_weights)
    return np.array(flat_weights)

# Function to Calculate MUM Score
def calculate_mum(local_weights, global_weights):
    min_length = min(len(local_weights), len(global_weights))
    local_weights, global_weights = local_weights[:min_length], global_weights[:min_length]
    differences = np.abs((local_weights - global_weights) / global_weights)
    return np.mean(differences)

# Load Global Models
global_models = []
for path in global_model_paths:
    with open(path, 'r') as f:
        global_weights = json.load(f)
        global_models.append(flatten_weights(global_weights))

# Compute Average Response Time & Quality Factor
avg_response_time = df['Computation_Time'].mean()
avg_quality_factor = df['Quality_Factor'].mean()

# **Fix: Create a fast lookup dictionary for client IDs**
client_map = {str(client_id): idx for idx, client_id in enumerate(df['Client'].astype(str))}

# Process Clients
binary_vector = []
for dir_index, client_dir in enumerate(client_dirs):
    if not os.path.exists(client_dir):
        print(f"Warning: {client_dir} does not exist!")
        continue

    json_files = sorted([f for f in os.listdir(client_dir) if f.endswith('.json')])

    for filename in json_files:
        # **Extract client ID from "client_X_round_Y.json"**
        client_id = "_".join(filename.split('_')[:2])  # Extracts "client_X"

        # **Ensure it is in the CSV**
        row_index = client_map.get(client_id)
        if row_index is None:
            continue  # Skip unmatched files

        row = df.iloc[row_index]

        # Load Local Model Weights
        client_file_path = os.path.join(client_dir, filename)
        with open(client_file_path, 'r') as client_file:
            local_weights = json.load(client_file)

        local_flat = flatten_weights(local_weights)
        mum_value = calculate_mum(local_flat, global_models[dir_index])
        mum_score = 1 if mum_value < 0.3 else 0

        # Compute Scores
        dum_score = compute_dum(row['Parsed_Distribution'])
        sum_score = compute_sum(row['Computation_Time'], avg_response_time)
        hqs_score = compute_hqs(row['Quality_Factor'], avg_quality_factor)
        srs_score = compute_srs(row['Reliability_Score'])

        # Store Results
        binary_vector.append([client_id, dum_score, sum_score, hqs_score, srs_score, mum_score])

# Convert to DataFrame and Save
binary_vector_df = pd.DataFrame(binary_vector, columns=['Client', 'DUM', 'SUM', 'HQS', 'SRS', 'MUM'])
output_file = '/content/Final_Results_With_MUM_32_FMINIST.csv'
binary_vector_df.to_csv(output_file, index=False)

# Display Results
print("Final scores with MUM included:")
print(binary_vector_df)

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
Final scores with MUM included:
        Client  DUM  SUM  HQS  SRS  MUM
0    client_10    1    1    0    0    0
1    client_10    1    1    0    0    0
2    client_10    1    1    0    0    0
3    client_10    1    1    0    0    0
4    client_10    1    1    0    0    0
..         ...  ...  ...  ...  ...  ...
795   client_9    1    1    1    0    0
796   client_9    1    1    1    0    1
797   client_9    1    1    1    0    1
798   client_9    1    1    1    0    1
799   client_9    1    1    1    0    1

[800 rows x 6 columns]


In [None]:
CMLaaS_MINIST_16 = pd.read_csv("/content/Combined_MLaaS_Clients.csv")
CMLaaS_MINIST_16["model category"] = "Minist16"
CMLaaS_MINIST_16

Unnamed: 0,Client,Round,Data_Distribution,Computation_Time,Quality_Factor,Reliability_Score,MLaaS_ID,model category
0,client_1,1,"{0: 285, 1: 339, 2: 299, 3: 295, 4: 325, 5: 27...",2.186110,0.5504,0.731167,1,Minist16
1,client_2,1,"{0: 307, 1: 332, 2: 282, 3: 313, 4: 298, 5: 24...",1.536191,0.5383,0.733767,2,Minist16
2,client_3,1,"{0: 299, 1: 352, 2: 308, 3: 314, 4: 276, 5: 26...",2.141187,0.5568,0.738267,3,Minist16
3,client_4,1,"{0: 315, 1: 328, 2: 287, 3: 306, 4: 285, 5: 27...",1.415759,0.5596,0.731400,4,Minist16
4,client_5,1,"{0: 290, 1: 339, 2: 286, 3: 320, 4: 284, 5: 27...",1.520962,0.5370,0.727367,5,Minist16
...,...,...,...,...,...,...,...,...
175,client_16,3,"{0: 100, 1: 100, 2: 100, 3: 100, 4: 100, 5: 10...",0.791545,0.6389,0.502167,176,Minist16
176,client_17,3,"{0: 100, 1: 100, 2: 100, 3: 100, 4: 100, 5: 10...",0.885868,0.5938,0.479467,177,Minist16
177,client_18,3,"{0: 100, 1: 100, 2: 100, 3: 100, 4: 100, 5: 10...",0.846018,0.6064,0.478733,178,Minist16
178,client_19,3,"{0: 100, 1: 100, 2: 100, 3: 100, 4: 100, 5: 10...",0.801713,0.6106,0.475567,179,Minist16


In [None]:
CMLaaS_MINIST_32 = pd.read_csv("/content/Combined_MLaaS_Clients_32.csv")
CMLaaS_MINIST_32["model category"] = "Minist32"
CMLaaS_MINIST_32

Unnamed: 0,Client,Round,Data_Distribution,Computation_Time,Quality_Factor,Reliability_Score,MLaaS_ID,model category
0,client_1,1,"{0: 285, 1: 339, 2: 299, 3: 295, 4: 325, 5: 27...",6.716711,0.8551,0.89514,1,Minist32
1,client_2,1,"{0: 307, 1: 332, 2: 282, 3: 313, 4: 298, 5: 24...",7.158237,0.8487,0.89474,2,Minist32
2,client_3,1,"{0: 299, 1: 352, 2: 308, 3: 314, 4: 276, 5: 26...",6.309666,0.8603,0.89648,3,Minist32
3,client_4,1,"{0: 315, 1: 328, 2: 287, 3: 306, 4: 285, 5: 27...",4.640386,0.8260,0.88550,4,Minist32
4,client_5,1,"{0: 290, 1: 339, 2: 286, 3: 320, 4: 284, 5: 27...",6.303348,0.8530,0.89442,5,Minist32
...,...,...,...,...,...,...,...,...
295,client_16,5,"{0: 100, 1: 100, 2: 100, 3: 100, 4: 100, 5: 10...",3.705131,0.8667,0.77820,296,Minist32
296,client_17,5,"{0: 100, 1: 100, 2: 100, 3: 100, 4: 100, 5: 10...",6.124486,0.8818,0.79644,297,Minist32
297,client_18,5,"{0: 100, 1: 100, 2: 100, 3: 100, 4: 100, 5: 10...",2.477601,0.8826,0.81464,298,Minist32
298,client_19,5,"{0: 100, 1: 100, 2: 100, 3: 100, 4: 100, 5: 10...",2.026807,0.8796,0.79394,299,Minist32


In [None]:
CMLaaS_MINIST_64 = pd.read_csv("/content/Combined_MLaaS_Clients_64.csv")
CMLaaS_MINIST_64["model category"] = "Minist64"
CMLaaS_MINIST_64

Unnamed: 0,Client,Round,Data_Distribution,Computation_Time,Quality_Factor,Reliability_Score,MLaaS_ID,model category
0,client_1,1,"{0: 285, 1: 339, 2: 299, 3: 295, 4: 325, 5: 27...",20.881774,0.8774,0.92243,1,Minist64
1,client_2,1,"{0: 307, 1: 332, 2: 282, 3: 313, 4: 298, 5: 24...",17.688163,0.8722,0.91886,2,Minist64
2,client_3,1,"{0: 299, 1: 352, 2: 308, 3: 314, 4: 276, 5: 26...",19.310270,0.8873,0.92170,3,Minist64
3,client_4,1,"{0: 315, 1: 328, 2: 287, 3: 306, 4: 285, 5: 27...",24.872246,0.8480,0.91596,4,Minist64
4,client_5,1,"{0: 290, 1: 339, 2: 286, 3: 320, 4: 284, 5: 27...",17.652168,0.8827,0.92494,5,Minist64
...,...,...,...,...,...,...,...,...
595,client_16,10,"{0: 100, 1: 100, 2: 100, 3: 100, 4: 100, 5: 10...",6.801612,0.9163,0.87653,596,Minist64
596,client_17,10,"{0: 100, 1: 100, 2: 100, 3: 100, 4: 100, 5: 10...",7.328037,0.9201,0.87825,597,Minist64
597,client_18,10,"{0: 100, 1: 100, 2: 100, 3: 100, 4: 100, 5: 10...",7.089456,0.9217,0.88580,598,Minist64
598,client_19,10,"{0: 100, 1: 100, 2: 100, 3: 100, 4: 100, 5: 10...",6.903012,0.8952,0.87578,599,Minist64


In [None]:
CMLaaS_FMINIST_16 = pd.read_csv("/content/Combined_MLaaS_Clients_FMINIST_16.csv")
CMLaaS_FMINIST_16["model category"] = "FMinist16"
CMLaaS_FMINIST_16

Unnamed: 0,Client,Round,Data_Distribution,Computation_Time,Quality_Factor,Reliability_Score,MLaaS_ID,model category
0,client_1,1,"{0: 300, 1: 300, 2: 300, 3: 300, 4: 300, 5: 30...",7.727399,0.7242,0.78356,1,FMinist16
1,client_2,1,"{0: 300, 1: 300, 2: 300, 3: 300, 4: 300, 5: 30...",5.312258,0.7162,0.77398,2,FMinist16
2,client_3,1,"{0: 300, 1: 300, 2: 300, 3: 300, 4: 300, 5: 30...",6.180043,0.7030,0.77892,3,FMinist16
3,client_4,1,"{0: 300, 1: 300, 2: 300, 3: 300, 4: 300, 5: 30...",9.320201,0.7211,0.76735,4,FMinist16
4,client_5,1,"{0: 300, 1: 300, 2: 300, 3: 300, 4: 300, 5: 30...",8.370446,0.7161,0.78265,5,FMinist16
...,...,...,...,...,...,...,...,...
395,client_16,10,"{0: 327, 1: 333, 2: 347, 3: 336, 4: 334, 5: 33...",8.600663,0.8191,,396,FMinist16
396,client_17,10,"{0: 334, 1: 354, 2: 333, 3: 334, 4: 361, 5: 33...",9.494205,0.8206,,397,FMinist16
397,client_18,10,"{0: 334, 1: 362, 2: 328, 3: 316, 4: 320, 5: 38...",8.545703,0.7799,,398,FMinist16
398,client_19,10,"{0: 379, 1: 339, 2: 348, 3: 319, 4: 330, 5: 33...",8.641838,0.8118,,399,FMinist16


In [None]:
CMLaaS_FMINIST_32 = pd.read_csv("/content/Combined_MLaaS_Clients_FMINIST_32.csv")
CMLaaS_FMINIST_32["model category"] = "FMinist32"
CMLaaS_FMINIST_32

Unnamed: 0,Client,Round,Data_Distribution,Computation_Time,Quality_Factor,Reliability_Score,MLaaS_ID,model category
0,client_1,1,"{0: 300, 1: 300, 2: 300, 3: 300, 4: 300, 5: 30...",8.973562,0.7264,0.804915,1,FMinist32
1,client_2,1,"{0: 300, 1: 300, 2: 300, 3: 300, 4: 300, 5: 30...",7.691949,0.6473,0.801875,2,FMinist32
2,client_3,1,"{0: 300, 1: 300, 2: 300, 3: 300, 4: 300, 5: 30...",8.264498,0.7302,0.808980,3,FMinist32
3,client_4,1,"{0: 300, 1: 300, 2: 300, 3: 300, 4: 300, 5: 30...",8.126710,0.7158,0.801425,4,FMinist32
4,client_5,1,"{0: 300, 1: 300, 2: 300, 3: 300, 4: 300, 5: 30...",8.114779,0.7046,0.804190,5,FMinist32
...,...,...,...,...,...,...,...,...
795,client_16,20,"{0: 317, 1: 346, 2: 352, 3: 347, 4: 359, 5: 35...",9.371960,0.8463,,796,FMinist32
796,client_17,20,"{0: 337, 1: 354, 2: 363, 3: 347, 4: 320, 5: 35...",8.348448,0.8399,,797,FMinist32
797,client_18,20,"{0: 369, 1: 369, 2: 366, 3: 320, 4: 314, 5: 33...",8.621405,0.8425,,798,FMinist32
798,client_19,20,"{0: 308, 1: 323, 2: 340, 3: 354, 4: 367, 5: 35...",6.819442,0.8443,,799,FMinist32


In [None]:
CMLaaS= pd.concat([CMLaaS_MINIST_16,CMLaaS_MINIST_32,CMLaaS_MINIST_64,CMLaaS_FMINIST_16,CMLaaS_FMINIST_32],ignore_index=True)
CMLaaS

Unnamed: 0,Client,Round,Data_Distribution,Computation_Time,Quality_Factor,Reliability_Score,MLaaS_ID,model category
0,client_1,1,"{0: 285, 1: 339, 2: 299, 3: 295, 4: 325, 5: 27...",2.186110,0.5504,0.731167,1,Minist16
1,client_2,1,"{0: 307, 1: 332, 2: 282, 3: 313, 4: 298, 5: 24...",1.536191,0.5383,0.733767,2,Minist16
2,client_3,1,"{0: 299, 1: 352, 2: 308, 3: 314, 4: 276, 5: 26...",2.141187,0.5568,0.738267,3,Minist16
3,client_4,1,"{0: 315, 1: 328, 2: 287, 3: 306, 4: 285, 5: 27...",1.415759,0.5596,0.731400,4,Minist16
4,client_5,1,"{0: 290, 1: 339, 2: 286, 3: 320, 4: 284, 5: 27...",1.520962,0.5370,0.727367,5,Minist16
...,...,...,...,...,...,...,...,...
2275,client_16,20,"{0: 317, 1: 346, 2: 352, 3: 347, 4: 359, 5: 35...",9.371960,0.8463,,796,FMinist32
2276,client_17,20,"{0: 337, 1: 354, 2: 363, 3: 347, 4: 320, 5: 35...",8.348448,0.8399,,797,FMinist32
2277,client_18,20,"{0: 369, 1: 369, 2: 366, 3: 320, 4: 314, 5: 33...",8.621405,0.8425,,798,FMinist32
2278,client_19,20,"{0: 308, 1: 323, 2: 340, 3: 354, 4: 367, 5: 35...",6.819442,0.8443,,799,FMinist32


In [None]:
CMLaaS.to_csv("/content/CMLaaS.csv")

In [None]:
Adaptive_CMLaaS_MINIST_16 = pd.read_csv("/content/Final_Results_With_MUM.csv")
Adaptive_CMLaaS_MINIST_16["model category"] = "Minist16"
Adaptive_CMLaaS_MINIST_16

Unnamed: 0,DUM,SUM,HQS,SRS,MUM,Client,model category
0,1,1,1,1,1,client_1,Minist16
1,1,1,1,1,1,client_2,Minist16
2,1,1,1,1,1,client_3,Minist16
3,1,1,1,1,1,client_4,Minist16
4,1,1,1,1,1,client_5,Minist16
...,...,...,...,...,...,...,...
175,1,1,0,0,1,client_16,Minist16
176,1,1,1,0,1,client_17,Minist16
177,1,1,1,0,1,client_18,Minist16
178,1,1,1,0,1,client_19,Minist16


In [None]:
Adaptive_CMLaaS_MINIST_32 = pd.read_csv("/content/Final_Results_With_MUM_32.csv")
Adaptive_CMLaaS_MINIST_32["model category"] = "Minist32"
Adaptive_CMLaaS_MINIST_32

Unnamed: 0,Client,DUM,SUM,HQS,SRS,MUM,model category
0,client_10,1,1,0,1,0,Minist32
1,client_10,1,1,0,1,1,Minist32
2,client_10,1,1,0,1,1,Minist32
3,client_10,1,1,0,1,1,Minist32
4,client_10,1,1,0,1,1,Minist32
...,...,...,...,...,...,...,...
295,client_9,1,1,0,1,0,Minist32
296,client_9,1,1,0,1,1,Minist32
297,client_9,1,1,0,1,1,Minist32
298,client_9,1,1,0,1,1,Minist32


In [None]:
Adaptive_CMLaaS_MINIST_64 = pd.read_csv("/content/Final_Results_With_MUM_64.csv")
Adaptive_CMLaaS_MINIST_64["model category"] = "Minist64"
Adaptive_CMLaaS_MINIST_64

Unnamed: 0,Client,DUM,SUM,HQS,SRS,MUM,model category
0,client_10,1,1,1,1,0,Minist64
1,client_10,1,1,1,1,0,Minist64
2,client_10,1,1,1,1,0,Minist64
3,client_10,1,1,1,1,0,Minist64
4,client_10,1,1,1,1,1,Minist64
...,...,...,...,...,...,...,...
595,client_9,1,1,1,1,0,Minist64
596,client_9,1,1,1,1,0,Minist64
597,client_9,1,1,1,1,0,Minist64
598,client_9,1,1,1,1,0,Minist64


In [None]:
Adaptive_CMLaaS_FMINIST_16 = pd.read_csv("/content/Final_Results_With_MUM_16_FMINIST.csv")
Adaptive_CMLaaS_FMINIST_16["model category"] = "FMinist16"
Adaptive_CMLaaS_FMINIST_16

Unnamed: 0,Client,DUM,SUM,HQS,SRS,MUM,model category
0,client_10,1,1,0,0,0,FMinist16
1,client_10,1,1,0,0,0,FMinist16
2,client_10,1,1,0,0,0,FMinist16
3,client_10,1,1,0,0,0,FMinist16
4,client_10,1,1,0,0,0,FMinist16
...,...,...,...,...,...,...,...
395,client_9,1,1,1,0,0,FMinist16
396,client_9,1,1,1,0,1,FMinist16
397,client_9,1,1,1,0,1,FMinist16
398,client_9,1,1,1,0,1,FMinist16


In [None]:
Adaptive_CMLaaS_FMINIST_32 = pd.read_csv("/content/Final_Results_With_MUM_32_FMINIST.csv")
Adaptive_CMLaaS_FMINIST_32["model category"] = "FMinist32"
Adaptive_CMLaaS_FMINIST_32

Unnamed: 0,Client,DUM,SUM,HQS,SRS,MUM,model category
0,client_10,1,1,0,0,0,FMinist32
1,client_10,1,1,0,0,0,FMinist32
2,client_10,1,1,0,0,0,FMinist32
3,client_10,1,1,0,0,0,FMinist32
4,client_10,1,1,0,0,0,FMinist32
...,...,...,...,...,...,...,...
795,client_9,1,1,1,0,0,FMinist32
796,client_9,1,1,1,0,1,FMinist32
797,client_9,1,1,1,0,1,FMinist32
798,client_9,1,1,1,0,1,FMinist32


In [None]:
Adaptive_CMLaaS_FMINIST= pd.concat([Adaptive_CMLaaS_MINIST_16,Adaptive_CMLaaS_MINIST_32,Adaptive_CMLaaS_MINIST_64,Adaptive_CMLaaS_FMINIST_16, Adaptive_CMLaaS_FMINIST_32],ignore_index=True)
Adaptive_CMLaaS_FMINIST

Unnamed: 0,DUM,SUM,HQS,SRS,MUM,Client,model category
0,1,1,1,1,1,client_1,Minist16
1,1,1,1,1,1,client_2,Minist16
2,1,1,1,1,1,client_3,Minist16
3,1,1,1,1,1,client_4,Minist16
4,1,1,1,1,1,client_5,Minist16
...,...,...,...,...,...,...,...
2275,1,1,1,0,0,client_9,FMinist32
2276,1,1,1,0,1,client_9,FMinist32
2277,1,1,1,0,1,client_9,FMinist32
2278,1,1,1,0,1,client_9,FMinist32


In [None]:
Adaptive_CMLaaS_FMINIST.to_csv("/content/Adaptive_CMLaaS_FMINIST.csv")

In [None]:
binary_vector_df['MUM'].value_counts()

Unnamed: 0_level_0,count
MUM,Unnamed: 1_level_1
1,110
0,70


**OLD CODE Dont RUN**
---

In [None]:
import pandas as pd
import numpy as np
import ast

# Load the CSV file
file_path = '/content/Combined_MLaaS_Clients.csv'
df = pd.read_csv(file_path)

# Step 1: Parse the 'Data_Distribution' from string to dictionary
df['Parsed_Distribution'] = df['Data_Distribution'].apply(ast.literal_eval)

# Step 2: Compute the global reference distribution
client_distributions = np.array([list(dist.values()) for dist in df['Parsed_Distribution']])
global_reference_distribution = np.mean(client_distributions, axis=0)

# Step 3: Define thresholds
THRESHOLD_DUM = 405  # Adjust as needed
THRESHOLD_SUM = 1.5  # Example threshold for response time ratio
THRESHOLD_HQS = 0.05  # Allowable difference in quality factor
THRESHOLD_SRS = 0.7  # Minimum acceptable reliability score
ALPHA_SUM = 1  # Power parameter for SUM computation

# Step 4: Define functions for each measurement
def compute_dum(client_distribution):
    ed = np.sqrt(np.sum((np.array(list(client_distribution.values())) - global_reference_distribution) ** 2))
    return 1 if ed < THRESHOLD_DUM else 0

def compute_sum(response_time, avg_response_time):
    value = (response_time / avg_response_time) ** ALPHA_SUM
    return 1 if value < THRESHOLD_SUM else 0

def compute_hqs(quality_factor, avg_quality_factor):
    similarity = np.abs(quality_factor - avg_quality_factor)
    return 1 if similarity <= THRESHOLD_HQS else 0

def compute_srs(reliability_score):
    return 1 if reliability_score >= THRESHOLD_SRS else 0

# Step 5: Calculate average response time and quality factor for comparison
avg_response_time = df['Computation_Time'].mean()
avg_quality_factor = df['Quality_Factor'].mean()

# Step 6: Apply functions and generate the binary vector
binary_vector = []

for index, row in df.iterrows():
    dum_score = compute_dum(row['Parsed_Distribution'])
    sum_score = compute_sum(row['Computation_Time'], avg_response_time)
    hqs_score = compute_hqs(row['Quality_Factor'], avg_quality_factor)
    srs_score = compute_srs(row['Reliability_Score'])

    # Combine all scores into a vector
    client_vector = [dum_score, sum_score, hqs_score, srs_score]
    binary_vector.append(client_vector)

# Convert to DataFrame for better visualization
binary_vector_df = pd.DataFrame(binary_vector, columns=['DUM', 'SUM', 'HQS', 'SRS'])
binary_vector_df['Client'] = df['Client'].values

# Save the results to a new CSV file
output_file = '/content/Client_Binary_Vector_Scores.csv'
binary_vector_df.to_csv(output_file, index=False)

# Display the result
print("Binary vector scores for each client:")
print(binary_vector_df)


Binary vector scores for each client:
     DUM  SUM  HQS  SRS     Client
0      1    1    1    1   client_1
1      1    1    1    1   client_2
2      1    1    1    1   client_3
3      1    1    1    1   client_4
4      1    1    1    1   client_5
..   ...  ...  ...  ...        ...
175    0    1    0    0  client_16
176    0    1    1    0  client_17
177    0    1    1    0  client_18
178    0    1    1    0  client_19
179    0    1    1    0  client_20

[180 rows x 5 columns]


**Statistics**
---

In [None]:
binary_vector_df['DUM'].value_counts()

Unnamed: 0_level_0,count
DUM,Unnamed: 1_level_1
0,120
1,60


In [None]:
binary_vector_df['SUM'].value_counts()

Unnamed: 0_level_0,count
SUM,Unnamed: 1_level_1
1,153
0,27


In [None]:
binary_vector_df['HQS'].value_counts()

Unnamed: 0_level_0,count
HQS,Unnamed: 1_level_1
0,137
1,43


In [None]:
binary_vector_df['SRS'].value_counts()

Unnamed: 0_level_0,count
SRS,Unnamed: 1_level_1
1,90
0,90
