**New MLaas Composability Model**
---

In [373]:
import os
import ast
import pandas as pd
import numpy as np
from google.colab import drive

drive.mount('/content/drive')

# =====================================================
# INPUT FILES (ONLY THESE TWO MATTER)
# =====================================================
COMBOS_PATH   = "/content/Completed_Combinations_10_MNIST_n15_with_GlobalMetrics.csv"
PROFILES_PATH = "/content/MNIST_Client_Profiles_For_Composability_100.csv"
OUT_PATH      = "/content/combination_2_WITH_COMPOSABILITY_SCORES.csv"

df_combos   = pd.read_csv(COMBOS_PATH)
df_profiles = pd.read_csv(PROFILES_PATH)

# Works for any n (2/3/5/7/9/10/...)
client_cols = [c for c in df_combos.columns if c.startswith("Client_")]

# =====================================================
# CONFIG
# =====================================================
THRESHOLD_MCS = 0.85

WEIGHTS = {
    "DHS": 0.99,
    "MUS": 0.95,
    "SHS": 0.45,
    "SES": 0.60,
    "HSQ": 0.78,
    "SRS": 0.32,
}

WEIGHTS_DIR = "/content/drive/MyDrive/MLaaS_Weights_20_MNIST"

ddd=pd.read_csv("/content/FMNIST_Client_Profiles_For_Composability_30_30.csv")

# SES CONFIG (LATENCY-ONLY)
SES_T_DEFAULT = 3676.331401
SES_ALPHA_DEFAULT = 1.0

# =====================================================
# HELPERS
# =====================================================
def make_combination_string(row, client_cols):
    ids = []
    for c in client_cols:
        if pd.notna(row[c]):
            ids.append(str(int(row[c])))
    return "_".join(ids)

def _cosine_sim(a, b, eps=1e-12):
    a = np.asarray(a, dtype=np.float64)
    b = np.asarray(b, dtype=np.float64)
    na, nb = np.linalg.norm(a), np.linalg.norm(b)
    if na < eps or nb < eps:
        return 0.0
    return float(np.dot(a, b) / (na * nb))

def _clip01(x):
    if x is None or not np.isfinite(x):
        return np.nan
    return float(np.clip(x, 0.0, 1.0))

# =====================================================
# GROUP-LEVEL RULES
# =====================================================
def compute_dhs_group(client_profiles):
    label_cols = [c for c in client_profiles.columns if c.startswith("Label")]
    dists = client_profiles[label_cols].to_numpy(dtype=np.float64)
    terms = []
    for d in dists:
        if d.sum() > 0:
            terms.append((d.max() - d.min()) / d.sum())
    return 1.0 - np.mean(terms) if terms else 0.0

def compute_shs_group(client_profiles, alpha=0.5, beta=0.5, eps=1e-12):
    """
    Service Heterogeneity Score (SHS) using Computation Power (C_p) and Bandwidth (BW).

    Steps:
    1) Take C_p and BW from client_profiles
    2) Min-max scale each within the group to [0,1]
    3) Compute group means (mu_cp, mu_bw)
    4) Compute weighted mean absolute deviation from means
    5) Convert to score: SHS = 1 - mean_deviation
    """

    cp = client_profiles["C_p"].astype(float).to_numpy()
    bw = client_profiles["BW"].astype(float).to_numpy()

    if cp.size == 0 or bw.size == 0:
        return 1.0  # no heterogeneity if no clients

    # -----------------------------
    # Min-max scaling to [0,1]
    # -----------------------------
    cp_min, cp_max = np.min(cp), np.max(cp)
    bw_min, bw_max = np.min(bw), np.max(bw)

    if abs(cp_max - cp_min) < eps:
        cp_scaled = np.ones_like(cp, dtype=np.float64)  # all identical => no variation
    else:
        cp_scaled = (cp - cp_min) / (cp_max - cp_min)

    if abs(bw_max - bw_min) < eps:
        bw_scaled = np.ones_like(bw, dtype=np.float64)
    else:
        bw_scaled = (bw - bw_min) / (bw_max - bw_min)

    # -----------------------------
    # Mean + deviation
    # -----------------------------
    mu_cp = cp_scaled.mean()
    mu_bw = bw_scaled.mean()

    dev = alpha * np.abs(cp_scaled - mu_cp) + beta * np.abs(bw_scaled - mu_bw)

    # Higher score = more homogeneous (less heterogeneity)
    shs = 1.0 - dev.mean()

    return float(np.clip(shs, 0.0, 1.0))

# =====================================================
# ✅ FIXED SES (LATENCY ONLY)
# =====================================================
def compute_ses_group(client_profiles, T, alpha=1.0):
    latencies = client_profiles["Latency(ms)"].astype(float).to_numpy()
    if latencies.size == 0:
        return 1.0
    mean_latency = latencies.mean()
    if mean_latency <= T:
        return 1.0
    return float((T / mean_latency) ** alpha)

def _to_quality_vector(x):
    try:
        return np.asarray(ast.literal_eval(x), dtype=np.float64)
    except Exception:
        return None

def compute_hsq_group(client_profiles):
    vecs = []
    for q in client_profiles["Quality_Factor"].values:
        v = _to_quality_vector(q)
        if v is not None:
            vecs.append(v)
    if not vecs:
        return 0.0
    L = min(len(v) for v in vecs)
    vecs = [v[:L] for v in vecs]
    mu = np.mean(np.vstack(vecs), axis=0)
    return np.mean([_cosine_sim(v, mu) for v in vecs])

def compute_srs_group(client_profiles):
    r = client_profiles["Reliability_Score"].astype(float).to_numpy()
    mu = r.mean()
    return 1.0 - np.mean(np.abs(r - mu))

# =====================================================
# MUS
# =====================================================
_VEC_CACHE = {}

def _client_weight_path(cid):
    return os.path.join(WEIGHTS_DIR, f"client_{int(cid)}_local.npz")

def _npz_to_vec(path):
    if path in _VEC_CACHE:
        return _VEC_CACHE[path]
    if not os.path.exists(path):
        _VEC_CACHE[path] = np.asarray([])
        return _VEC_CACHE[path]
    data = np.load(path)
    vec = np.concatenate([data[k].ravel() for k in data.files])
    _VEC_CACHE[path] = vec
    return vec

def compute_mus_for_client_ids(client_ids):
    vecs = []
    for cid in client_ids:
        v = _npz_to_vec(_client_weight_path(cid))
        if v.size > 0:
            vecs.append(v)
    if not vecs:
        return np.nan
    L = min(len(v) for v in vecs)
    vecs = [v[:L] for v in vecs]
    mu = np.mean(np.vstack(vecs), axis=0)
    sims = [_cosine_sim(v, mu) for v in vecs]
    return (np.mean(sims) + 1.0) / 2.0

# =====================================================
# BUILD COMBINATIONS
# =====================================================
df_combos["Combination"] = df_combos.apply(
    lambda r: make_combination_string(r, client_cols), axis=1
)
df_combos["Num_Clients"] = df_combos[client_cols].notna().sum(axis=1)

# =====================================================
# COMPUTE RULES + MCS
# =====================================================
records = []

for _, row in df_combos.iterrows():
    client_ids = [row[c] for c in client_cols if pd.notna(row[c])]
    prof = df_profiles[df_profiles["Client_ID"].isin(client_ids)]
    if prof.empty:
        continue

    DHS = _clip01(compute_dhs_group(prof))
    SHS = _clip01(compute_shs_group(prof))
    SES = _clip01(compute_ses_group(
        prof, T=SES_T_DEFAULT, alpha=SES_ALPHA_DEFAULT
    ))
    HSQ = _clip01(compute_hsq_group(prof))
    SRS = _clip01(compute_srs_group(prof))
    MUS = _clip01(compute_mus_for_client_ids(client_ids))

    num, den = 0.0, 0.0
    for k, v in [("DHS",DHS),("SHS",SHS),("SES",SES),
                 ("HSQ",HSQ),("SRS",SRS),("MUS",MUS)]:
        if np.isfinite(v):
            num += WEIGHTS[k] * v
            den += WEIGHTS[k]

    MCS = num / den if den > 0 else np.nan

    records.append({
        "Combination": row["Combination"],
        "Num_Clients": len(client_ids),
        "DHS": DHS,
        "SHS": SHS,
        "SES": SES,
        "HSQ": HSQ,
        "SRS": SRS,
        "MUS": MUS,
        "MCS": _clip01(MCS),
        "Is_Composable_MCS": int(MCS > THRESHOLD_MCS),
    })
df_rules = pd.DataFrame(records)
df_out = df_combos.merge(df_rules, on=["Combination","Num_Clients"], how="left")
df_out = df_out.sort_values("MCS", ascending=False).reset_index(drop=True)
df_out.to_csv(OUT_PATH, index=False)
print(f"✅ Saved: {OUT_PATH}")
df_out

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
✅ Saved: /content/combination_2_WITH_COMPOSABILITY_SCORES.csv


Unnamed: 0.1,Unnamed: 0,Combination_ID,Client_1,Client_2,Client_3,Client_4,Client_5,Client_6,Client_7,Client_8,...,Combination,Num_Clients,DHS,SHS,SES,HSQ,SRS,MUS,MCS,Is_Composable_MCS
0,,25,1,2,3,4,5,6,7,9,...,1_2_3_4_5_6_7_9_10_15,10,0.928928,0.781359,0.926512,0.808388,0.916,0.668970,0.827957,0
1,,32,1,2,3,4,5,6,7,9,...,1_2_3_4_5_6_7_9_12_15,10,0.917830,0.761299,0.910402,0.820682,0.920,0.667555,0.823029,0
2,,29,1,2,3,4,5,6,7,9,...,1_2_3_4_5_6_7_9_11_15,10,0.922517,0.758769,0.908712,0.814156,0.924,0.668658,0.822961,0
3,,164,1,2,3,4,5,7,9,10,...,1_2_3_4_5_7_9_10_11_15,10,0.903413,0.767647,0.935200,0.812896,0.916,0.669872,0.822615,0
4,,42,1,2,3,4,5,6,7,10,...,1_2_3_4_5_6_7_10_12_15,10,0.929893,0.764681,0.895736,0.816639,0.910,0.667167,0.822526,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2998,,1275,1,2,6,8,9,11,12,13,...,1_2_6_8_9_11_12_13_14_15,10,0.838563,0.695206,0.866922,0.738056,0.864,0.668962,0.770379,0
2999,,2001,1,7,8,9,10,11,12,13,...,1_7_8_9_10_11_12_13_14_15,10,0.819458,0.715218,0.869984,0.748770,0.856,0.669557,0.769961,0
3000,,1286,1,2,8,9,10,11,12,13,...,1_2_8_9_10_11_12_13_14_15,10,0.819458,0.706622,0.890997,0.732641,0.870,0.668425,0.769855,0
3001,,1770,1,3,6,8,9,11,12,13,...,1_3_6_8_9_11_12_13_14_15,10,0.838563,0.709102,0.858987,0.732545,0.864,0.668845,0.769666,0


In [374]:
df_out[["DHS","SHS","SES","HSQ","SRS","MUS","MCS"]].describe()

Unnamed: 0,DHS,SHS,SES,HSQ,SRS,MUS,MCS
count,3003.0,3003.0,3003.0,3003.0,3003.0,3003.0,3003.0
mean,0.879639,0.733185,0.890116,0.760475,0.875495,0.668356,0.792937
std,0.026764,0.031998,0.023749,0.021595,0.023605,0.001254,0.009653
min,0.819458,0.659477,0.829018,0.706882,0.83,0.664474,0.766914
25%,0.860539,0.704217,0.873061,0.744596,0.856,0.667511,0.785964
50%,0.879643,0.734695,0.889128,0.758966,0.88,0.66839,0.792225
75%,0.896941,0.761652,0.905904,0.774618,0.892,0.669223,0.79927
max,0.968224,0.798528,0.975594,0.840403,0.94,0.672011,0.827957


In [375]:
df_out[["Global_Accuracy","MCS","Global_Mean_Quality_Factor"]].describe()

Unnamed: 0,Global_Accuracy,MCS,Global_Mean_Quality_Factor
count,3003.0,3003.0,3003.0
mean,78.739333,0.792937,49.694265
std,1.663225,0.009653,2.758789
min,75.848,0.766914,42.744495
25%,77.3435,0.785964,47.62124
50%,78.306,0.792225,49.659143
75%,80.2485,0.79927,51.554543
max,82.891,0.827957,57.844033


**CIFAR10 MLaaS Composability Model**
---

In [419]:
import os
import ast
import numpy as np
import pandas as pd
from google.colab import drive

drive.mount('/content/drive')

# =====================================================
# ✅ HAR INPUTS (UPDATED FROM YOUR SCREENSHOT)
# =====================================================
# If these files are inside a folder (e.g., "/content/HAR/" or "/content/k/"),
# just set BASE_DIR accordingly.
BASE_DIR = "/content"   # change to "/content/HAR" or "/content/k" if needed

PROFILES_PATH = os.path.join(BASE_DIR, "CIFAR10_Client_Profiles_For_Composability_100.csv")

COMBOS_PATHS = [
    os.path.join(BASE_DIR, "All_Combinations_2_CIFAR_100_4050.csv"),   # A
    os.path.join(BASE_DIR, "All_Combinations_3_CIFAR_32_4960.csv"),    # B
    os.path.join(BASE_DIR, "All_Combinations_5_CIFAR_16_4368.csv"),    # C
    os.path.join(BASE_DIR, "All_Combinations_7_CIFAR_15_6435.csv"),    # D
    os.path.join(BASE_DIR, "All_Combinations_10_CIFAR_15_3003.CSV"),   # E
]

NAMES = ["A", "B", "C", "D", "E"]

# ✅ Output folder (each will be saved as a separate CSV)
OUT_DIR = BASE_DIR
os.makedirs(OUT_DIR, exist_ok=True)

# =====================================================
# CONFIG
# =====================================================
WEIGHTS_DIR = "/content/drive/MyDrive/MLaaS_Weights_20_CIFAR10"  # ✅ HAR weights folder

WEIGHTS = {
    "DHS": 0.99,
    "MUS": 0.95,
    "SHS": 0.45,
    "SES": 0.60,
    "HSQ": 0.78,
    "SRS": 0.32,
}

# SES CONFIG (LATENCY ONLY)  (keep as-is unless you want HAR-specific threshold)
SES_T_DEFAULT = 3676.331401
SES_ALPHA_DEFAULT = 1.0

# =====================================================
# LOAD PROFILES ONCE
# =====================================================
df_profiles = pd.read_csv(PROFILES_PATH)

# =====================================================
# HELPERS
# =====================================================
def make_combination_string(row, client_cols):
    ids = []
    for c in client_cols:
        if pd.notna(row[c]):
            ids.append(str(int(row[c])))
    return "_".join(ids)

def _cosine_sim(a, b, eps=1e-12):
    a = np.asarray(a, dtype=np.float64)
    b = np.asarray(b, dtype=np.float64)
    na, nb = np.linalg.norm(a), np.linalg.norm(b)
    if na < eps or nb < eps:
        return 0.0
    return float(np.dot(a, b) / (na * nb))

def _clip01(x):
    if x is None or not np.isfinite(x):
        return np.nan
    return float(np.clip(x, 0.0, 1.0))

# =====================================================
# GROUP-LEVEL RULES
# =====================================================
def compute_dhs_group(client_profiles):
    label_cols = [c for c in client_profiles.columns if c.startswith("Label")]
    dists = client_profiles[label_cols].to_numpy(dtype=np.float64)
    terms = []
    for d in dists:
        if d.sum() > 0:
            terms.append((d.max() - d.min()) / d.sum())
    return 1.0 - np.mean(terms) if terms else 0.0

def compute_shs_group(client_profiles, alpha=0.5, beta=0.5, eps=1e-12):
    cp = client_profiles["C_p"].astype(float).to_numpy()
    bw = client_profiles["BW"].astype(float).to_numpy()
    if cp.size == 0 or bw.size == 0:
        return 1.0

    cp_min, cp_max = np.min(cp), np.max(cp)
    bw_min, bw_max = np.min(bw), np.max(bw)

    if abs(cp_max - cp_min) < eps:
        cp_scaled = np.ones_like(cp, dtype=np.float64)
    else:
        cp_scaled = (cp - cp_min) / (cp_max - cp_min)

    if abs(bw_max - bw_min) < eps:
        bw_scaled = np.ones_like(bw, dtype=np.float64)
    else:
        bw_scaled = (bw - bw_min) / (bw_max - bw_min)

    mu_cp = cp_scaled.mean()
    mu_bw = bw_scaled.mean()

    dev = alpha * np.abs(cp_scaled - mu_cp) + beta * np.abs(bw_scaled - mu_bw)
    shs = 1.0 - dev.mean()
    return float(np.clip(shs, 0.0, 1.0))

def compute_ses_group(client_profiles, T, alpha=1.0):
    latencies = client_profiles["Latency(ms)"].astype(float).to_numpy()
    if latencies.size == 0:
        return 1.0
    mean_latency = latencies.mean()
    if mean_latency <= T:
        return 1.0
    return float((T / mean_latency) ** alpha)

def _to_quality_vector(x):
    try:
        return np.asarray(ast.literal_eval(x), dtype=np.float64)
    except Exception:
        return None

def compute_hsq_group(client_profiles):
    vecs = []
    for q in client_profiles["Quality_Factor"].values:
        v = _to_quality_vector(q)
        if v is not None:
            vecs.append(v)
    if not vecs:
        return 0.0
    L = min(len(v) for v in vecs)
    vecs = [v[:L] for v in vecs]
    mu = np.mean(np.vstack(vecs), axis=0)
    return np.mean([_cosine_sim(v, mu) for v in vecs])

def compute_srs_group(client_profiles):
    r = client_profiles["Reliability_Score"].astype(float).to_numpy()
    mu = r.mean()
    return 1.0 - np.mean(np.abs(r - mu))

# =====================================================
# MUS (WEIGHTS-BASED)
# =====================================================
_VEC_CACHE = {}

def _client_weight_path(cid):
    return os.path.join(WEIGHTS_DIR, f"client_{int(cid)}_local.npz")

def _npz_to_vec(path):
    if path in _VEC_CACHE:
        return _VEC_CACHE[path]
    if not os.path.exists(path):
        _VEC_CACHE[path] = np.asarray([])
        return _VEC_CACHE[path]
    data = np.load(path)
    vec = np.concatenate([data[k].ravel() for k in data.files])
    _VEC_CACHE[path] = vec
    return vec

def compute_mus_for_client_ids(client_ids):
    vecs = []
    for cid in client_ids:
        v = _npz_to_vec(_client_weight_path(cid))
        if v.size > 0:
            vecs.append(v)
    if not vecs:
        return np.nan
    L = min(len(v) for v in vecs)
    vecs = [v[:L] for v in vecs]
    mu = np.mean(np.vstack(vecs), axis=0)
    sims = [_cosine_sim(v, mu) for v in vecs]
    return (np.mean(sims) + 1.0) / 2.0

# =====================================================
# CORE PIPELINE FOR ONE FILE
# =====================================================
def compute_df_out_for_one_combo_file(combos_path, df_profiles, out_path):
    df_combos = pd.read_csv(combos_path)
    client_cols = [c for c in df_combos.columns if c.startswith("Client_")]

    df_combos["Combination"] = df_combos.apply(
        lambda r: make_combination_string(r, client_cols), axis=1
    )
    df_combos["Num_Clients"] = df_combos[client_cols].notna().sum(axis=1)

    records = []
    for _, row in df_combos.iterrows():
        client_ids = [row[c] for c in client_cols if pd.notna(row[c])]
        prof = df_profiles[df_profiles["Client_ID"].isin(client_ids)]
        if prof.empty:
            continue

        DHS = _clip01(compute_dhs_group(prof))
        SHS = _clip01(compute_shs_group(prof))
        SES = _clip01(compute_ses_group(prof, T=SES_T_DEFAULT, alpha=SES_ALPHA_DEFAULT))
        HSQ = _clip01(compute_hsq_group(prof))
        SRS = _clip01(compute_srs_group(prof))
        MUS = _clip01(compute_mus_for_client_ids(client_ids))

        num, den = 0.0, 0.0
        for k, v in [("DHS",DHS),("SHS",SHS),("SES",SES),("HSQ",HSQ),("SRS",SRS),("MUS",MUS)]:
            if np.isfinite(v):
                num += WEIGHTS[k] * v
                den += WEIGHTS[k]

        MCS = num / den if den > 0 else np.nan

        records.append({
            "Combination": row["Combination"],
            "Num_Clients": len(client_ids),
            "DHS": DHS,
            "SHS": SHS,
            "SES": SES,
            "HSQ": HSQ,
            "SRS": SRS,
            "MUS": MUS,
            "MCS": _clip01(MCS),
        })

    df_rules = pd.DataFrame(records)
    df_out = df_combos.merge(df_rules, on=["Combination", "Num_Clients"], how="left")
    df_out = df_out.sort_values("MCS", ascending=False).reset_index(drop=True)

    df_out.to_csv(out_path, index=False)
    print(f"✅ Saved: {out_path} | rows={len(df_out)}")
    return df_out

# =====================================================
# RUN ALL 5 HAR FILES + STORE IN DICT
# =====================================================
dfs = {}

for name, path in zip(NAMES, COMBOS_PATHS):
    out_path = os.path.join(OUT_DIR, f"combination_WITH_COMPOSABILITY_SCORES_{name}_HAR.csv")
    dfs[name] = compute_df_out_for_one_combo_file(path, df_profiles, out_path)
# Example:
# dfs["A"].head()

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
✅ Saved: /content/combination_WITH_COMPOSABILITY_SCORES_A_HAR.csv | rows=4950
✅ Saved: /content/combination_WITH_COMPOSABILITY_SCORES_B_HAR.csv | rows=4960
✅ Saved: /content/combination_WITH_COMPOSABILITY_SCORES_C_HAR.csv | rows=4368
✅ Saved: /content/combination_WITH_COMPOSABILITY_SCORES_D_HAR.csv | rows=6435
✅ Saved: /content/combination_WITH_COMPOSABILITY_SCORES_E_HAR.csv | rows=3003


In [436]:
from sklearn.metrics import accuracy_score, classification_report
import pandas as pd

# =====================================================
# THRESHOLDS (SET THEM HERE ONCE)
THRESHOLDS = {
    "A": {"acc": 51, "mcs": 0.75},
    "B": {"acc": 50, "mcs": 0.74},
    "C": {"acc": 50, "mcs": 0.76},
    "D": {"acc": 50 ,"mcs": 0.765},
    "E": {"acc": 50, "mcs": 0.755},
}

# Map file-name -> K
K_MAP = {"A": 2, "B": 3, "C": 5, "D": 7, "E": 10}

# =====================================================
# BUILD METRICS + LATEX LINES
# =====================================================
rows = []
latex_lines = []

for name in ["A", "B", "C", "D", "E"]:
    df_merged = dfs[name].copy()

    acc_thr = THRESHOLDS[name]["acc"]
    mcs_thr = THRESHOLDS[name]["mcs"]

    # binary labels
    y_true = (df_merged["Global_Accuracy"] > acc_thr).astype(int)
    y_pred = (df_merged["MCS"] > mcs_thr).astype(int)

    # accuracy
    acc = accuracy_score(y_true, y_pred)

    # report dict
    rep = classification_report(y_true, y_pred, digits=6, output_dict=True, zero_division=0)



    w_p = rep["weighted avg"]["precision"]
    w_r = rep["weighted avg"]["recall"]
    w_f = rep["weighted avg"]["f1-score"]

    K = K_MAP[name]

    # store full numbers (for table if you want)
    rows.append([K, acc, w_p, w_r, w_f])

    # ✅ LaTeX line (your requested: Accuracy, weighted precision, weighted recall)
    # If you also want macro and weighted F1, it's included too (easy to delete).
    latex_lines.append(
        rf"$K={K}$ & {acc:.4f} & {w_p:.4f} & {w_r:.4f} & {w_f:.4f} \\"
    )

# =====================================================
# PRINT LATEX ROWS (COPY-PASTE)
# Columns: K | Acc | Macro P | Macro R | Macro F1 | W P | W R | W F1
# =====================================================
print("LATEX ROWS:")
for line in latex_lines:
    print(line)

# Optional: dataframe view (not required)
summary_df = pd.DataFrame(
    rows,
    columns=["K", "Accuracy", "Weighted_P", "Weighted_R", "Weighted_F1"]
)
summary_df


LATEX ROWS:
$K=2$ & 0.7337 & 0.7398 & 0.7337 & 0.7367 \\
$K=3$ & 0.7349 & 0.7492 & 0.7349 & 0.7415 \\
$K=5$ & 0.7239 & 0.7246 & 0.7239 & 0.7243 \\
$K=7$ & 0.7330 & 0.6858 & 0.7330 & 0.7039 \\
$K=10$ & 0.7339 & 0.8302 & 0.7339 & 0.7719 \\


Unnamed: 0,K,Accuracy,Weighted_P,Weighted_R,Weighted_F1
0,2,0.733737,0.739752,0.733737,0.736676
1,3,0.734879,0.74921,0.734879,0.741458
2,5,0.723901,0.724638,0.723901,0.724268
3,7,0.733023,0.685815,0.733023,0.703943
4,10,0.733933,0.830213,0.733933,0.771885


In [None]:
& 73.37 & 73.98 & 73.37
& 73.49 & 74.92 & 73.49
& 72.39 & 72.46 & 72.39
& 73.30 & 68.58 & 73.30
& 73.39 & 83.02 & 73.39 \\

**FMNIST Composability Model**
---

In [458]:
import os
import ast
import numpy as np
import pandas as pd
from google.colab import drive

drive.mount('/content/drive')

# =====================================================
# ✅ HAR INPUTS (UPDATED FROM YOUR SCREENSHOT)
# =====================================================
# If these files are inside a folder (e.g., "/content/HAR/" or "/content/k/"),
# just set BASE_DIR accordingly.
BASE_DIR = "/content"   # change to "/content/HAR" or "/content/k" if needed

PROFILES_PATH = os.path.join(BASE_DIR, "FMNIST_Client_Profiles_For_Composability_100.csv")

COMBOS_PATHS = [
    os.path.join(BASE_DIR, "All_Combinations_2_FMNIST_100_4050.csv"),   # A
    os.path.join(BASE_DIR, "All_Combinations_3_FMNIST_32_4960.csv"),    # B
    os.path.join(BASE_DIR, "All_Combinations_5_FMNIST_16_4368.csv"),    # C
    os.path.join(BASE_DIR, "All_Combinations_7_FMNIST_15_6435.csv"),    # D
    os.path.join(BASE_DIR, "All_Combinations_10_FMNIST_15_3003.CSV"),   # E
]

NAMES = ["A", "B", "C", "D", "E"]

# ✅ Output folder (each will be saved as a separate CSV)
OUT_DIR = BASE_DIR
os.makedirs(OUT_DIR, exist_ok=True)

# =====================================================
# CONFIG
# =====================================================
WEIGHTS_DIR = "/content/drive/MyDrive/MLaaS_Weights_30_FMNIST"  # ✅ HAR weights folder

WEIGHTS = {
    "DHS": 0.99,
    "MUS": 0.95,
    "SHS": 0.45,
    "SES": 0.60,
    "HSQ": 0.78,
    "SRS": 0.32,
}

# SES CONFIG (LATENCY ONLY)  (keep as-is unless you want HAR-specific threshold)
SES_T_DEFAULT = 3676.331401
SES_ALPHA_DEFAULT = 1.0

# =====================================================
# LOAD PROFILES ONCE
# =====================================================
df_profiles = pd.read_csv(PROFILES_PATH)

# =====================================================
# HELPERS
# =====================================================
def make_combination_string(row, client_cols):
    ids = []
    for c in client_cols:
        if pd.notna(row[c]):
            ids.append(str(int(row[c])))
    return "_".join(ids)

def _cosine_sim(a, b, eps=1e-12):
    a = np.asarray(a, dtype=np.float64)
    b = np.asarray(b, dtype=np.float64)
    na, nb = np.linalg.norm(a), np.linalg.norm(b)
    if na < eps or nb < eps:
        return 0.0
    return float(np.dot(a, b) / (na * nb))

def _clip01(x):
    if x is None or not np.isfinite(x):
        return np.nan
    return float(np.clip(x, 0.0, 1.0))

# =====================================================
# GROUP-LEVEL RULES
# =====================================================
def compute_dhs_group(client_profiles):
    label_cols = [c for c in client_profiles.columns if c.startswith("Label")]
    dists = client_profiles[label_cols].to_numpy(dtype=np.float64)
    terms = []
    for d in dists:
        if d.sum() > 0:
            terms.append((d.max() - d.min()) / d.sum())
    return 1.0 - np.mean(terms) if terms else 0.0

def compute_shs_group(client_profiles, alpha=0.5, beta=0.5, eps=1e-12):
    cp = client_profiles["C_p"].astype(float).to_numpy()
    bw = client_profiles["BW"].astype(float).to_numpy()
    if cp.size == 0 or bw.size == 0:
        return 1.0

    cp_min, cp_max = np.min(cp), np.max(cp)
    bw_min, bw_max = np.min(bw), np.max(bw)

    if abs(cp_max - cp_min) < eps:
        cp_scaled = np.ones_like(cp, dtype=np.float64)
    else:
        cp_scaled = (cp - cp_min) / (cp_max - cp_min)

    if abs(bw_max - bw_min) < eps:
        bw_scaled = np.ones_like(bw, dtype=np.float64)
    else:
        bw_scaled = (bw - bw_min) / (bw_max - bw_min)

    mu_cp = cp_scaled.mean()
    mu_bw = bw_scaled.mean()

    dev = alpha * np.abs(cp_scaled - mu_cp) + beta * np.abs(bw_scaled - mu_bw)
    shs = 1.0 - dev.mean()
    return float(np.clip(shs, 0.0, 1.0))

def compute_ses_group(client_profiles, T, alpha=1.0):
    latencies = client_profiles["Latency(ms)"].astype(float).to_numpy()
    if latencies.size == 0:
        return 1.0
    mean_latency = latencies.mean()
    if mean_latency <= T:
        return 1.0
    return float((T / mean_latency) ** alpha)

def _to_quality_vector(x):
    try:
        return np.asarray(ast.literal_eval(x), dtype=np.float64)
    except Exception:
        return None

def compute_hsq_group(client_profiles):
    vecs = []
    for q in client_profiles["Quality_Factor"].values:
        v = _to_quality_vector(q)
        if v is not None:
            vecs.append(v)
    if not vecs:
        return 0.0
    L = min(len(v) for v in vecs)
    vecs = [v[:L] for v in vecs]
    mu = np.mean(np.vstack(vecs), axis=0)
    return np.mean([_cosine_sim(v, mu) for v in vecs])

def compute_srs_group(client_profiles):
    r = client_profiles["Reliability_Score"].astype(float).to_numpy()
    mu = r.mean()
    return 1.0 - np.mean(np.abs(r - mu))

# =====================================================
# MUS (WEIGHTS-BASED)
# =====================================================
_VEC_CACHE = {}

def _client_weight_path(cid):
    return os.path.join(WEIGHTS_DIR, f"client_{int(cid)}_local.npz")

def _npz_to_vec(path):
    if path in _VEC_CACHE:
        return _VEC_CACHE[path]
    if not os.path.exists(path):
        _VEC_CACHE[path] = np.asarray([])
        return _VEC_CACHE[path]
    data = np.load(path)
    vec = np.concatenate([data[k].ravel() for k in data.files])
    _VEC_CACHE[path] = vec
    return vec

def compute_mus_for_client_ids(client_ids):
    vecs = []
    for cid in client_ids:
        v = _npz_to_vec(_client_weight_path(cid))
        if v.size > 0:
            vecs.append(v)
    if not vecs:
        return np.nan
    L = min(len(v) for v in vecs)
    vecs = [v[:L] for v in vecs]
    mu = np.mean(np.vstack(vecs), axis=0)
    sims = [_cosine_sim(v, mu) for v in vecs]
    return (np.mean(sims) + 1.0) / 2.0

# =====================================================
# CORE PIPELINE FOR ONE FILE
# =====================================================
def compute_df_out_for_one_combo_file(combos_path, df_profiles, out_path):
    df_combos = pd.read_csv(combos_path)
    client_cols = [c for c in df_combos.columns if c.startswith("Client_")]

    df_combos["Combination"] = df_combos.apply(
        lambda r: make_combination_string(r, client_cols), axis=1
    )
    df_combos["Num_Clients"] = df_combos[client_cols].notna().sum(axis=1)

    records = []
    for _, row in df_combos.iterrows():
        client_ids = [row[c] for c in client_cols if pd.notna(row[c])]
        prof = df_profiles[df_profiles["Client_ID"].isin(client_ids)]
        if prof.empty:
            continue

        DHS = _clip01(compute_dhs_group(prof))
        SHS = _clip01(compute_shs_group(prof))
        SES = _clip01(compute_ses_group(prof, T=SES_T_DEFAULT, alpha=SES_ALPHA_DEFAULT))
        HSQ = _clip01(compute_hsq_group(prof))
        SRS = _clip01(compute_srs_group(prof))
        MUS = _clip01(compute_mus_for_client_ids(client_ids))

        num, den = 0.0, 0.0
        for k, v in [("DHS",DHS),("SHS",SHS),("SES",SES),("HSQ",HSQ),("SRS",SRS),("MUS",MUS)]:
            if np.isfinite(v):
                num += WEIGHTS[k] * v
                den += WEIGHTS[k]

        MCS = num / den if den > 0 else np.nan

        records.append({
            "Combination": row["Combination"],
            "Num_Clients": len(client_ids),
            "DHS": DHS,
            "SHS": SHS,
            "SES": SES,
            "HSQ": HSQ,
            "SRS": SRS,
            "MUS": MUS,
            "MCS": _clip01(MCS),
        })

    df_rules = pd.DataFrame(records)
    df_out = df_combos.merge(df_rules, on=["Combination", "Num_Clients"], how="left")
    df_out = df_out.sort_values("MCS", ascending=False).reset_index(drop=True)

    df_out.to_csv(out_path, index=False)
    print(f"✅ Saved: {out_path} | rows={len(df_out)}")
    return df_out

# =====================================================
# RUN ALL 5 HAR FILES + STORE IN DICT
# =====================================================
dfs = {}

for name, path in zip(NAMES, COMBOS_PATHS):
    out_path = os.path.join(OUT_DIR, f"combination_WITH_COMPOSABILITY_SCORES_{name}_FMNIST.csv")
    dfs[name] = compute_df_out_for_one_combo_file(path, df_profiles, out_path)

# Example:
# dfs["A"].head()

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
✅ Saved: /content/combination_WITH_COMPOSABILITY_SCORES_A_FMNIST.csv | rows=4950
✅ Saved: /content/combination_WITH_COMPOSABILITY_SCORES_B_FMNIST.csv | rows=4960
✅ Saved: /content/combination_WITH_COMPOSABILITY_SCORES_C_FMNIST.csv | rows=4368
✅ Saved: /content/combination_WITH_COMPOSABILITY_SCORES_D_FMNIST.csv | rows=6435
✅ Saved: /content/combination_WITH_COMPOSABILITY_SCORES_E_FMNIST.csv | rows=3003


In [468]:
from sklearn.metrics import accuracy_score, classification_report
import pandas as pd

# =====================================================
# THRESHOLDS (SET THEM HERE ONCE)

THRESHOLDS = {
    "A": {"acc": 83, "mcs": 0.88},
    "B": {"acc": 73, "mcs": 0.83},
    "C": {"acc": 73, "mcs": 0.83},
    "D": {"acc": 73 ,"mcs": 0.812},
    "E": {"acc": 72.25, "mcs": 0.81},
}


# Map file-name -> K
K_MAP = {"A": 2, "B": 3, "C": 5, "D": 7, "E": 10}

# =====================================================
# BUILD METRICS + LATEX LINES
# =====================================================
rows = []
latex_lines = []

for name in ["A", "B", "C", "D", "E"]:
    df_merged = dfs[name].copy()

    acc_thr = THRESHOLDS[name]["acc"]
    mcs_thr = THRESHOLDS[name]["mcs"]

    # binary labels
    y_true = (df_merged["Global_Accuracy"] > acc_thr).astype(int)
    y_pred = (df_merged["MCS"] > mcs_thr).astype(int)

    # accuracy
    acc = accuracy_score(y_true, y_pred)

    # report dict
    rep = classification_report(y_true, y_pred, digits=6, output_dict=True, zero_division=0)



    w_p = rep["weighted avg"]["precision"]
    w_r = rep["weighted avg"]["recall"]
    w_f = rep["weighted avg"]["f1-score"]

    K = K_MAP[name]

    # store full numbers (for table if you want)
    rows.append([K, acc, w_p, w_r, w_f])

    # ✅ LaTeX line (your requested: Accuracy, weighted precision, weighted recall)
    # If you also want macro and weighted F1, it's included too (easy to delete).
    latex_lines.append(
        rf"$K={K}$ & {acc:.4f} & {w_p:.4f} & {w_r:.4f} & {w_f:.4f} \\"
    )

# =====================================================
# PRINT LATEX ROWS (COPY-PASTE)
# Columns: K | Acc | Macro P | Macro R | Macro F1 | W P | W R | W F1
# =====================================================
print("LATEX ROWS:")
for line in latex_lines:
    print(line)

# Optional: dataframe view (not required)
summary_df = pd.DataFrame(
    rows,
    columns=["K", "Accuracy", "Weighted_P", "Weighted_R", "Weighted_F1"]
)
summary_df


LATEX ROWS:
$K=2$ & 0.7127 & 0.6952 & 0.7127 & 0.7035 \\
$K=3$ & 0.7462 & 0.8753 & 0.7462 & 0.8020 \\
$K=5$ & 0.7328 & 0.7046 & 0.7328 & 0.7182 \\
$K=7$ & 0.6856 & 0.8481 & 0.6856 & 0.7554 \\
$K=10$ & 0.6780 & 0.5970 & 0.6780 & 0.6271 \\


Unnamed: 0,K,Accuracy,Weighted_P,Weighted_R,Weighted_F1
0,2,0.712727,0.695153,0.712727,0.703505
1,3,0.746169,0.875314,0.746169,0.801962
2,5,0.73283,0.704561,0.73283,0.718224
3,7,0.685625,0.84808,0.685625,0.755428
4,10,0.677989,0.596991,0.677989,0.627103


**HAR Composability Model**
---

In [398]:
import os
import ast
import numpy as np
import pandas as pd
from google.colab import drive

drive.mount('/content/drive')

# =====================================================
# ✅ HAR INPUTS (UPDATED FROM YOUR SCREENSHOT)
# =====================================================
# If these files are inside a folder (e.g., "/content/HAR/" or "/content/k/"),
# just set BASE_DIR accordingly.
BASE_DIR = "/content"   # change to "/content/HAR" or "/content/k" if needed

PROFILES_PATH = os.path.join(BASE_DIR, "HAR_Client_Profiles_For_Composability_100.csv")

COMBOS_PATHS = [
    os.path.join(BASE_DIR, "All_Combinations_2_HAR_100_4050.csv"),   # A
    os.path.join(BASE_DIR, "All_Combinations_3_HAR_32_4960.csv"),    # B
    os.path.join(BASE_DIR, "All_Combinations_5_HAR_16_4368.csv"),    # C
    os.path.join(BASE_DIR, "All_Combinations_7_HAR_15_6435.csv"),    # D
    os.path.join(BASE_DIR, "All_Combinations_10_HAR_15_3003.CSV"),   # E
]

NAMES = ["A", "B", "C", "D", "E"]

# ✅ Output folder (each will be saved as a separate CSV)
OUT_DIR = BASE_DIR
os.makedirs(OUT_DIR, exist_ok=True)

# =====================================================
# CONFIG
# =====================================================
WEIGHTS_DIR = "/content/drive/MyDrive/MLaaS_Weights_20_HAR"  # ✅ HAR weights folder

WEIGHTS = {
    "DHS": 0.99,
    "MUS": 0.95,
    "SHS": 0.45,
    "SES": 0.60,
    "HSQ": 0.78,
    "SRS": 0.32,
}

# SES CONFIG (LATENCY ONLY)  (keep as-is unless you want HAR-specific threshold)
SES_T_DEFAULT = 3676.331401
SES_ALPHA_DEFAULT = 1.0

# =====================================================
# LOAD PROFILES ONCE
# =====================================================
df_profiles = pd.read_csv(PROFILES_PATH)

# =====================================================
# HELPERS
# =====================================================
def make_combination_string(row, client_cols):
    ids = []
    for c in client_cols:
        if pd.notna(row[c]):
            ids.append(str(int(row[c])))
    return "_".join(ids)

def _cosine_sim(a, b, eps=1e-12):
    a = np.asarray(a, dtype=np.float64)
    b = np.asarray(b, dtype=np.float64)
    na, nb = np.linalg.norm(a), np.linalg.norm(b)
    if na < eps or nb < eps:
        return 0.0
    return float(np.dot(a, b) / (na * nb))

def _clip01(x):
    if x is None or not np.isfinite(x):
        return np.nan
    return float(np.clip(x, 0.0, 1.0))

# =====================================================
# GROUP-LEVEL RULES
# =====================================================
def compute_dhs_group(client_profiles):
    label_cols = [c for c in client_profiles.columns if c.startswith("Label")]
    dists = client_profiles[label_cols].to_numpy(dtype=np.float64)
    terms = []
    for d in dists:
        if d.sum() > 0:
            terms.append((d.max() - d.min()) / d.sum())
    return 1.0 - np.mean(terms) if terms else 0.0

def compute_shs_group(client_profiles, alpha=0.5, beta=0.5, eps=1e-12):
    cp = client_profiles["C_p"].astype(float).to_numpy()
    bw = client_profiles["BW"].astype(float).to_numpy()
    if cp.size == 0 or bw.size == 0:
        return 1.0

    cp_min, cp_max = np.min(cp), np.max(cp)
    bw_min, bw_max = np.min(bw), np.max(bw)

    if abs(cp_max - cp_min) < eps:
        cp_scaled = np.ones_like(cp, dtype=np.float64)
    else:
        cp_scaled = (cp - cp_min) / (cp_max - cp_min)

    if abs(bw_max - bw_min) < eps:
        bw_scaled = np.ones_like(bw, dtype=np.float64)
    else:
        bw_scaled = (bw - bw_min) / (bw_max - bw_min)

    mu_cp = cp_scaled.mean()
    mu_bw = bw_scaled.mean()

    dev = alpha * np.abs(cp_scaled - mu_cp) + beta * np.abs(bw_scaled - mu_bw)
    shs = 1.0 - dev.mean()
    return float(np.clip(shs, 0.0, 1.0))

def compute_ses_group(client_profiles, T, alpha=1.0):
    latencies = client_profiles["Latency(ms)"].astype(float).to_numpy()
    if latencies.size == 0:
        return 1.0
    mean_latency = latencies.mean()
    if mean_latency <= T:
        return 1.0
    return float((T / mean_latency) ** alpha)

def _to_quality_vector(x):
    try:
        return np.asarray(ast.literal_eval(x), dtype=np.float64)
    except Exception:
        return None

def compute_hsq_group(client_profiles):
    vecs = []
    for q in client_profiles["Quality_Factor"].values:
        v = _to_quality_vector(q)
        if v is not None:
            vecs.append(v)
    if not vecs:
        return 0.0
    L = min(len(v) for v in vecs)
    vecs = [v[:L] for v in vecs]
    mu = np.mean(np.vstack(vecs), axis=0)
    return np.mean([_cosine_sim(v, mu) for v in vecs])

def compute_srs_group(client_profiles):
    r = client_profiles["Reliability_Score"].astype(float).to_numpy()
    mu = r.mean()
    return 1.0 - np.mean(np.abs(r - mu))

# =====================================================
# MUS (WEIGHTS-BASED)
# =====================================================
_VEC_CACHE = {}

def _client_weight_path(cid):
    return os.path.join(WEIGHTS_DIR, f"client_{int(cid)}_local.npz")

def _npz_to_vec(path):
    if path in _VEC_CACHE:
        return _VEC_CACHE[path]
    if not os.path.exists(path):
        _VEC_CACHE[path] = np.asarray([])
        return _VEC_CACHE[path]
    data = np.load(path)
    vec = np.concatenate([data[k].ravel() for k in data.files])
    _VEC_CACHE[path] = vec
    return vec

def compute_mus_for_client_ids(client_ids):
    vecs = []
    for cid in client_ids:
        v = _npz_to_vec(_client_weight_path(cid))
        if v.size > 0:
            vecs.append(v)
    if not vecs:
        return np.nan
    L = min(len(v) for v in vecs)
    vecs = [v[:L] for v in vecs]
    mu = np.mean(np.vstack(vecs), axis=0)
    sims = [_cosine_sim(v, mu) for v in vecs]
    return (np.mean(sims) + 1.0) / 2.0

# =====================================================
# CORE PIPELINE FOR ONE FILE
# =====================================================
def compute_df_out_for_one_combo_file(combos_path, df_profiles, out_path):
    df_combos = pd.read_csv(combos_path)
    client_cols = [c for c in df_combos.columns if c.startswith("Client_")]

    df_combos["Combination"] = df_combos.apply(
        lambda r: make_combination_string(r, client_cols), axis=1
    )
    df_combos["Num_Clients"] = df_combos[client_cols].notna().sum(axis=1)

    records = []
    for _, row in df_combos.iterrows():
        client_ids = [row[c] for c in client_cols if pd.notna(row[c])]
        prof = df_profiles[df_profiles["Client_ID"].isin(client_ids)]
        if prof.empty:
            continue

        DHS = _clip01(compute_dhs_group(prof))
        SHS = _clip01(compute_shs_group(prof))
        SES = _clip01(compute_ses_group(prof, T=SES_T_DEFAULT, alpha=SES_ALPHA_DEFAULT))
        HSQ = _clip01(compute_hsq_group(prof))
        SRS = _clip01(compute_srs_group(prof))
        MUS = _clip01(compute_mus_for_client_ids(client_ids))

        num, den = 0.0, 0.0
        for k, v in [("DHS",DHS),("SHS",SHS),("SES",SES),("HSQ",HSQ),("SRS",SRS),("MUS",MUS)]:
            if np.isfinite(v):
                num += WEIGHTS[k] * v
                den += WEIGHTS[k]

        MCS = num / den if den > 0 else np.nan

        records.append({
            "Combination": row["Combination"],
            "Num_Clients": len(client_ids),
            "DHS": DHS,
            "SHS": SHS,
            "SES": SES,
            "HSQ": HSQ,
            "SRS": SRS,
            "MUS": MUS,
            "MCS": _clip01(MCS),
        })

    df_rules = pd.DataFrame(records)
    df_out = df_combos.merge(df_rules, on=["Combination", "Num_Clients"], how="left")
    df_out = df_out.sort_values("MCS", ascending=False).reset_index(drop=True)

    df_out.to_csv(out_path, index=False)
    print(f"✅ Saved: {out_path} | rows={len(df_out)}")
    return df_out

# =====================================================
# RUN ALL 5 HAR FILES + STORE IN DICT
# =====================================================
dfs = {}

for name, path in zip(NAMES, COMBOS_PATHS):
    out_path = os.path.join(OUT_DIR, f"combination_WITH_COMPOSABILITY_SCORES_{name}_HAR.csv")
    dfs[name] = compute_df_out_for_one_combo_file(path, df_profiles, out_path)

# Example:
# dfs["A"].head()

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
✅ Saved: /content/combination_WITH_COMPOSABILITY_SCORES_A_HAR.csv | rows=4950
✅ Saved: /content/combination_WITH_COMPOSABILITY_SCORES_B_HAR.csv | rows=4960
✅ Saved: /content/combination_WITH_COMPOSABILITY_SCORES_C_HAR.csv | rows=4368
✅ Saved: /content/combination_WITH_COMPOSABILITY_SCORES_D_HAR.csv | rows=6435
✅ Saved: /content/combination_WITH_COMPOSABILITY_SCORES_E_HAR.csv | rows=3003


In [403]:
from sklearn.metrics import accuracy_score, classification_report
import pandas as pd

# =====================================================
# THRESHOLDS (SET THEM HERE ONCE)
# =====================================================
THRESHOLDS = {
    "A": {"acc": 84, "mcs": 0.83},
    "B": {"acc": 88.35, "mcs": 0.79},
    "C": {"acc": 88.50, "mcs": 0.79},
    "D": {"acc": 88.20, "mcs": 0.78},
    "E": {"acc": 88.30, "mcs": 0.77},
}

# Map file-name -> K
K_MAP = {"A": 2, "B": 3, "C": 5, "D": 7, "E": 10}

# =====================================================
# BUILD METRICS + LATEX LINES
# =====================================================
rows = []
latex_lines = []

for name in ["A", "B", "C", "D", "E"]:
    df_merged = dfs[name].copy()

    acc_thr = THRESHOLDS[name]["acc"]
    mcs_thr = THRESHOLDS[name]["mcs"]

    # binary labels
    y_true = (df_merged["Global_Accuracy"] > acc_thr).astype(int)
    y_pred = (df_merged["MCS"] > mcs_thr).astype(int)

    # accuracy
    acc = accuracy_score(y_true, y_pred)

    # report dict
    rep = classification_report(y_true, y_pred, digits=6, output_dict=True, zero_division=0)



    w_p = rep["weighted avg"]["precision"]
    w_r = rep["weighted avg"]["recall"]
    w_f = rep["weighted avg"]["f1-score"]

    K = K_MAP[name]

    # store full numbers (for table if you want)
    rows.append([K, acc, w_p, w_r, w_f])

    # ✅ LaTeX line (your requested: Accuracy, weighted precision, weighted recall)
    # If you also want macro and weighted F1, it's included too (easy to delete).
    latex_lines.append(
        rf"$K={K}$ & {acc:.4f} & {w_p:.4f} & {w_r:.4f} & {w_f:.4f} \\"
    )

# =====================================================
# PRINT LATEX ROWS (COPY-PASTE)
# Columns: K | Acc | Macro P | Macro R | Macro F1 | W P | W R | W F1
# =====================================================
print("LATEX ROWS:")
for line in latex_lines:
    print(line)

# Optional: dataframe view (not required)
summary_df = pd.DataFrame(
    rows,
    columns=["K", "Accuracy", "Weighted_P", "Weighted_R", "Weighted_F1"]
)
summary_df


LATEX ROWS:
$K=2$ & 0.7069 & 0.6877 & 0.7069 & 0.6764 \\
$K=3$ & 0.7089 & 0.6957 & 0.7089 & 0.7015 \\
$K=5$ & 0.7012 & 0.6844 & 0.7012 & 0.6918 \\
$K=7$ & 0.6816 & 0.6691 & 0.6816 & 0.6712 \\
$K=10$ & 0.6670 & 0.7477 & 0.6670 & 0.6982 \\


Unnamed: 0,K,Accuracy,Weighted_P,Weighted_R,Weighted_F1
0,2,0.706869,0.687701,0.706869,0.67644
1,3,0.708871,0.695713,0.708871,0.701454
2,5,0.701236,0.684388,0.701236,0.691806
3,7,0.681585,0.669056,0.681585,0.671175
4,10,0.667,0.747667,0.667,0.698189


In [None]:
& 70.69 & 68.77 & 70.69
& 70.89 & 69.57 & 70.89
& 70.12 & 68.44 & 70.12
& 68.16 & 66.91 & 68.16
& 66.70 & 74.77 & 66.70\\

**MNIST Composability Model**
---

In [380]:
import os
import ast
import numpy as np
import pandas as pd
from google.colab import drive

drive.mount('/content/drive')

# =====================================================
# INPUTS
# =====================================================
# INPUTS
# =====================================================
PROFILES_PATH = "/content/MNIST_Client_Profiles_For_Composability_100.csv"

COMBOS_PATHS = [
    "/content/All_Combinations_2_MNIST_100_with_GlobalMetrics.csv",              # A
    "/content/All_Combinations_3_MNIST_32_with_GlobalMetrics.csv",               # B
    "/content/All_Combinations_5_MNIST_first16clients_with_GlobalMetrics.csv",   # C
    "/content/Completed_Combinations_7_MNIST_n15_with_GlobalMetrics.csv",        # D
    "/content/Completed_Combinations_10_MNIST_n15_with_GlobalMetrics.csv",       # E
]

NAMES = ["A", "B", "C", "D", "E"]

# ✅ Output folder (each will be saved as a separate CSV)
OUT_DIR = "/content"
os.makedirs(OUT_DIR, exist_ok=True)

# =====================================================
# CONFIG
# =====================================================
WEIGHTS_DIR = "/content/drive/MyDrive/MLaaS_Weights_20_MNIST"

WEIGHTS = {
    "DHS": 0.99,
    "MUS": 0.95,
    "SHS": 0.45,
    "SES": 0.60,
    "HSQ": 0.78,
    "SRS": 0.32,
}

# SES CONFIG (LATENCY ONLY)
SES_T_DEFAULT = 3676.331401
SES_ALPHA_DEFAULT = 1.0

# =====================================================
# LOAD PROFILES ONCE
# =====================================================
df_profiles = pd.read_csv(PROFILES_PATH)

# =====================================================
# HELPERS
# =====================================================
def make_combination_string(row, client_cols):
    ids = []
    for c in client_cols:
        if pd.notna(row[c]):
            ids.append(str(int(row[c])))
    return "_".join(ids)

def _cosine_sim(a, b, eps=1e-12):
    a = np.asarray(a, dtype=np.float64)
    b = np.asarray(b, dtype=np.float64)
    na, nb = np.linalg.norm(a), np.linalg.norm(b)
    if na < eps or nb < eps:
        return 0.0
    return float(np.dot(a, b) / (na * nb))

def _clip01(x):
    if x is None or not np.isfinite(x):
        return np.nan
    return float(np.clip(x, 0.0, 1.0))

# =====================================================
# GROUP-LEVEL RULES
# =====================================================
def compute_dhs_group(client_profiles):
    label_cols = [c for c in client_profiles.columns if c.startswith("Label")]
    dists = client_profiles[label_cols].to_numpy(dtype=np.float64)
    terms = []
    for d in dists:
        if d.sum() > 0:
            terms.append((d.max() - d.min()) / d.sum())
    return 1.0 - np.mean(terms) if terms else 0.0

def compute_shs_group(client_profiles, alpha=0.5, beta=0.5, eps=1e-12):
    cp = client_profiles["C_p"].astype(float).to_numpy()
    bw = client_profiles["BW"].astype(float).to_numpy()
    if cp.size == 0 or bw.size == 0:
        return 1.0

    cp_min, cp_max = np.min(cp), np.max(cp)
    bw_min, bw_max = np.min(bw), np.max(bw)

    if abs(cp_max - cp_min) < eps:
        cp_scaled = np.ones_like(cp, dtype=np.float64)
    else:
        cp_scaled = (cp - cp_min) / (cp_max - cp_min)

    if abs(bw_max - bw_min) < eps:
        bw_scaled = np.ones_like(bw, dtype=np.float64)
    else:
        bw_scaled = (bw - bw_min) / (bw_max - bw_min)

    mu_cp = cp_scaled.mean()
    mu_bw = bw_scaled.mean()

    dev = alpha * np.abs(cp_scaled - mu_cp) + beta * np.abs(bw_scaled - mu_bw)
    shs = 1.0 - dev.mean()
    return float(np.clip(shs, 0.0, 1.0))

def compute_ses_group(client_profiles, T, alpha=1.0):
    latencies = client_profiles["Latency(ms)"].astype(float).to_numpy()
    if latencies.size == 0:
        return 1.0
    mean_latency = latencies.mean()
    if mean_latency <= T:
        return 1.0
    return float((T / mean_latency) ** alpha)

def _to_quality_vector(x):
    try:
        return np.asarray(ast.literal_eval(x), dtype=np.float64)
    except Exception:
        return None

def compute_hsq_group(client_profiles):
    vecs = []
    for q in client_profiles["Quality_Factor"].values:
        v = _to_quality_vector(q)
        if v is not None:
            vecs.append(v)
    if not vecs:
        return 0.0
    L = min(len(v) for v in vecs)
    vecs = [v[:L] for v in vecs]
    mu = np.mean(np.vstack(vecs), axis=0)
    return np.mean([_cosine_sim(v, mu) for v in vecs])

def compute_srs_group(client_profiles):
    r = client_profiles["Reliability_Score"].astype(float).to_numpy()
    mu = r.mean()
    return 1.0 - np.mean(np.abs(r - mu))

# =====================================================
# MUS (WEIGHTS-BASED)
# =====================================================
_VEC_CACHE = {}

def _client_weight_path(cid):
    return os.path.join(WEIGHTS_DIR, f"client_{int(cid)}_local.npz")

def _npz_to_vec(path):
    if path in _VEC_CACHE:
        return _VEC_CACHE[path]
    if not os.path.exists(path):
        _VEC_CACHE[path] = np.asarray([])
        return _VEC_CACHE[path]
    data = np.load(path)
    vec = np.concatenate([data[k].ravel() for k in data.files])
    _VEC_CACHE[path] = vec
    return vec

def compute_mus_for_client_ids(client_ids):
    vecs = []
    for cid in client_ids:
        v = _npz_to_vec(_client_weight_path(cid))
        if v.size > 0:
            vecs.append(v)
    if not vecs:
        return np.nan
    L = min(len(v) for v in vecs)
    vecs = [v[:L] for v in vecs]
    mu = np.mean(np.vstack(vecs), axis=0)
    sims = [_cosine_sim(v, mu) for v in vecs]
    return (np.mean(sims) + 1.0) / 2.0

# =====================================================
# CORE PIPELINE FOR ONE FILE
# =====================================================
def compute_df_out_for_one_combo_file(combos_path, df_profiles, out_path):
    df_combos = pd.read_csv(combos_path)

    client_cols = [c for c in df_combos.columns if c.startswith("Client_")]

    df_combos["Combination"] = df_combos.apply(
        lambda r: make_combination_string(r, client_cols), axis=1
    )
    df_combos["Num_Clients"] = df_combos[client_cols].notna().sum(axis=1)

    records = []
    for _, row in df_combos.iterrows():
        client_ids = [row[c] for c in client_cols if pd.notna(row[c])]
        prof = df_profiles[df_profiles["Client_ID"].isin(client_ids)]
        if prof.empty:
            continue

        DHS = _clip01(compute_dhs_group(prof))
        SHS = _clip01(compute_shs_group(prof))
        SES = _clip01(compute_ses_group(prof, T=SES_T_DEFAULT, alpha=SES_ALPHA_DEFAULT))
        HSQ = _clip01(compute_hsq_group(prof))
        SRS = _clip01(compute_srs_group(prof))
        MUS = _clip01(compute_mus_for_client_ids(client_ids))

        num, den = 0.0, 0.0
        for k, v in [("DHS",DHS),("SHS",SHS),("SES",SES),("HSQ",HSQ),("SRS",SRS),("MUS",MUS)]:
            if np.isfinite(v):
                num += WEIGHTS[k] * v
                den += WEIGHTS[k]

        MCS = num / den if den > 0 else np.nan

        records.append({
            "Combination": row["Combination"],
            "Num_Clients": len(client_ids),
            "DHS": DHS,
            "SHS": SHS,
            "SES": SES,
            "HSQ": HSQ,
            "SRS": SRS,
            "MUS": MUS,
            "MCS": _clip01(MCS),
        })

    df_rules = pd.DataFrame(records)
    df_out = df_combos.merge(df_rules, on=["Combination","Num_Clients"], how="left")
    df_out = df_out.sort_values("MCS", ascending=False).reset_index(drop=True)

    df_out.to_csv(out_path, index=False)
    print(f"✅ Saved: {out_path} | rows={len(df_out)}")
    return df_out

# =====================================================
# RUN ALL 5 FILES + STORE IN DICT
# =====================================================
dfs = {}  # ✅ your 5 dataframes live here (dfs["A"], dfs["B"], ...)

for name, path in zip(NAMES, COMBOS_PATHS):
    out_path = os.path.join(OUT_DIR, f"combination_WITH_COMPOSABILITY_SCORES_{name}.csv")
    dfs[name] = compute_df_out_for_one_combo_file(path, df_profiles, out_path)

# Example access:
# dfs["A"].head()


Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
✅ Saved: /content/combination_WITH_COMPOSABILITY_SCORES_A.csv | rows=4950
✅ Saved: /content/combination_WITH_COMPOSABILITY_SCORES_B.csv | rows=4960
✅ Saved: /content/combination_WITH_COMPOSABILITY_SCORES_C.csv | rows=4368
✅ Saved: /content/combination_WITH_COMPOSABILITY_SCORES_D.csv | rows=6435
✅ Saved: /content/combination_WITH_COMPOSABILITY_SCORES_E.csv | rows=3003


In [395]:
from sklearn.metrics import accuracy_score, classification_report
import pandas as pd

# =====================================================
# THRESHOLDS (SET THEM HERE ONCE)
# =====================================================
THRESHOLDS = {
    "A": {"acc": 74, "mcs": 0.74},  # K=2
    "B": {"acc": 82, "mcs": 0.85},  # K=3
    "C": {"acc": 81, "mcs": 0.82},  # K=5
    "D": {"acc": 81, "mcs": 0.82},  # K=7
    "E": {"acc": 81, "mcs": 0.80},  # K=10
}

# Map file-name -> K
K_MAP = {"A": 2, "B": 3, "C": 5, "D": 7, "E": 10}

# =====================================================
# BUILD METRICS + LATEX LINES
# =====================================================
rows = []
latex_lines = []

for name in ["A", "B", "C", "D", "E"]:
    df_merged = dfs[name].copy()

    acc_thr = THRESHOLDS[name]["acc"]
    mcs_thr = THRESHOLDS[name]["mcs"]

    # binary labels
    y_true = (df_merged["Global_Accuracy"] > acc_thr).astype(int)
    y_pred = (df_merged["MCS"] > mcs_thr).astype(int)

    # accuracy
    acc = accuracy_score(y_true, y_pred)

    # report dict
    rep = classification_report(y_true, y_pred, digits=6, output_dict=True, zero_division=0)

    w_p = rep["weighted avg"]["precision"]
    w_r = rep["weighted avg"]["recall"]
    w_f = rep["weighted avg"]["f1-score"]

    K = K_MAP[name]

    # store full numbers (for table if you want)
    rows.append([K, acc, w_p, w_r, w_f])

    # ✅ LaTeX line (your requested: Accuracy, weighted precision, weighted recall)
    # If you also want macro and weighted F1, it's included too (easy to delete).
    latex_lines.append(
        rf"$K={K}$ & {acc:.4f} & {w_p:.4f} & {w_r:.4f} & {w_f:.4f} \\"
    )

# =====================================================
# PRINT LATEX ROWS (COPY-PASTE)
# Columns: K | Acc | Macro P | Macro R | Macro F1 | W P | W R | W F1
# =====================================================
print("LATEX ROWS:")
for line in latex_lines:
    print(line)

# Optional: dataframe view (not required)
summary_df = pd.DataFrame(
    rows,
    columns=["K", "Accuracy", "Weighted_P", "Weighted_R", "Weighted_F1"]
)
summary_df


LATEX ROWS:
$K=2$ & 0.7434 & 0.5830 & 0.7434 & 0.6356 \\
$K=3$ & 0.7200 & 0.7227 & 0.7200 & 0.7213 \\
$K=5$ & 0.7296 & 0.7242 & 0.7296 & 0.7268 \\
$K=7$ & 0.7622 & 0.7183 & 0.7622 & 0.7118 \\
$K=10$ & 0.7469 & 0.8198 & 0.7469 & 0.7768 \\


Unnamed: 0,K,Accuracy,Weighted_P,Weighted_R,Weighted_F1
0,2,0.743434,0.582954,0.743434,0.635612
1,3,0.71996,0.72265,0.71996,0.721278
2,5,0.729625,0.724224,0.729625,0.726802
3,7,0.762238,0.718275,0.762238,0.711787
4,10,0.74692,0.819771,0.74692,0.776821


In [None]:
 & 0.7434 & 0.5830 & 0.7434 \\
 & 0.7200 & 0.7227 & 0.7200 \\
 & 0.7296 & 0.7242 & 0.7296  \\
 & 0.7622 & 0.7183 & 0.7622 \\
 & 0.7469 & 0.8198 & 0.7469 \\