**MLaaS Training Model**
---

In [1]:
import matplotlib
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

In [2]:
# Reload datasets
df_cifar = pd.read_csv("/content/CIFAR10_Federated_Combinations_Realistic.csv").assign(Dataset="CIFAR")
df_fmnist = pd.read_csv("/content/Fashion_MNIST_Federated_Combinations_Realistic.csv").assign(Dataset="F-MNIST")
df_mnist = pd.read_csv("/content/MNIST_Federated_Combinations_Realistic.csv").assign(Dataset="MNIST")
df_all = pd.concat([df_cifar, df_fmnist, df_mnist], ignore_index=True)
df_all

Unnamed: 0,Combination,C1_DataVolume(MB),C1_FeatureCount,C1_Accuracy(%),C1_Latency(ms),C1_Label0,C1_Label1,C1_Label2,C1_Label3,C1_Label4,...,C5_Label5,C5_Label6,C5_Label7,C5_Label8,C5_Label9,Global_Accuracy(%),Global_DataVolume,Global_Latency_Sum(ms),Global_RoundTime_Max(ms),Dataset
0,C1-C2,2400,3072,43,2556,240,240,240,240,240,...,0,0,0,0,0,11.270000,4796.0,5069.718838,2556.416035,CIFAR
1,C1-C3,2400,3072,43,2556,240,240,240,240,240,...,0,0,0,0,0,13.860001,4800.0,5134.560823,2578.144789,CIFAR
2,C1-C4,2400,3072,43,2556,240,240,240,240,240,...,0,0,0,0,0,15.780000,4794.0,5305.062532,2748.646498,CIFAR
3,C1-C5,2400,3072,43,2556,240,240,240,240,240,...,0,0,0,0,0,17.380001,4800.0,5015.784264,2556.416035,CIFAR
4,C1-C6,2400,3072,43,2556,240,240,240,240,240,...,0,0,0,0,0,12.840000,4796.0,5106.859207,2556.416035,CIFAR
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1876,C5-C6-C7-C8-C10,2800,784,80,1806,261,281,285,283,274,...,498,141,158,388,14,44.769999,13985.0,8569.262505,1806.708097,MNIST
1877,C5-C6-C7-C9-C10,2800,784,80,1806,261,281,285,283,274,...,498,141,158,388,14,57.309997,13990.0,8561.102867,1806.708097,MNIST
1878,C5-C6-C8-C9-C10,2800,784,80,1806,261,281,285,283,274,...,498,141,158,388,14,46.640000,13985.0,8571.797371,1806.708097,MNIST
1879,C5-C7-C8-C9-C10,2800,784,80,1806,261,281,285,283,274,...,498,141,158,388,14,48.800001,13990.0,8570.687532,1806.708097,MNIST


In [3]:
df_all.columns

Index(['Combination', 'C1_DataVolume(MB)', 'C1_FeatureCount', 'C1_Accuracy(%)',
       'C1_Latency(ms)', 'C1_Label0', 'C1_Label1', 'C1_Label2', 'C1_Label3',
       'C1_Label4', 'C1_Label5', 'C1_Label6', 'C1_Label7', 'C1_Label8',
       'C1_Label9', 'C2_DataVolume(MB)', 'C2_FeatureCount', 'C2_Accuracy(%)',
       'C2_Latency(ms)', 'C2_Label0', 'C2_Label1', 'C2_Label2', 'C2_Label3',
       'C2_Label4', 'C2_Label5', 'C2_Label6', 'C2_Label7', 'C2_Label8',
       'C2_Label9', 'C3_DataVolume(MB)', 'C3_FeatureCount', 'C3_Accuracy(%)',
       'C3_Latency(ms)', 'C3_Label0', 'C3_Label1', 'C3_Label2', 'C3_Label3',
       'C3_Label4', 'C3_Label5', 'C3_Label6', 'C3_Label7', 'C3_Label8',
       'C3_Label9', 'C4_DataVolume(MB)', 'C4_FeatureCount', 'C4_Accuracy(%)',
       'C4_Latency(ms)', 'C4_Label0', 'C4_Label1', 'C4_Label2', 'C4_Label3',
       'C4_Label4', 'C4_Label5', 'C4_Label6', 'C4_Label7', 'C4_Label8',
       'C4_Label9', 'C5_DataVolume(MB)', 'C5_FeatureCount', 'C5_Accuracy(%)',
       'C5_L

In [4]:
df_all.to_csv("all_datasets.csv", index=False)

**Zero-shot Composability rules(Combination-2)**
---

In [5]:
import pandas as pd
import numpy as np


# Load dataset
df = pd.read_csv("all_datasets.csv")

# Filter: only length-2 combinations
df2 = df[df["Combination"].str.count("-") == 1].copy()

# --- Helper: normalized label vectors ---
def norm_labels(row, prefix):
    v = row[[f"{prefix}_Label{i}" for i in range(10)]].values.astype(float)
    s = v.sum()
    return v / s if s != 0 else v

# --- Compute all three scores ---
def compute_scores(row):
    # 1. Data Composability Score (Euclidean distance between normalized label distributions)
    v1 = norm_labels(row, "C1")
    v2 = norm_labels(row, "C2")
    data_score = float(np.sqrt(np.sum((v1 - v2) ** 2)))

    # 2. Scalability Score (Latency ratio — overhead factor)
    lat1 = float(row["C1_Latency(ms)"])
    lat2 = float(row["C2_Latency(ms)"])
    scalability_score = (lat1 + lat2) / lat1 if lat1 != 0 else np.nan

    # 3. Accuracy Similarity Score (between two individual accuracies)
    acc1 = float(row["C1_Accuracy(%)"])
    acc2 = float(row["C2_Accuracy(%)"])
    acc_sim = 1 - (abs(acc1 - acc2) / max(acc1, acc2)) if max(acc1, acc2) != 0 else np.nan

    return pd.Series({
        "Data_Composability_Score": data_score,
        "Scalability_Score": scalability_score,
        "Accuracy_Similarity_Score": acc_sim
    })

# Apply to df2
scores = df2.apply(compute_scores, axis=1)
df2 = pd.concat([df2, scores], axis=1)
df2

Unnamed: 0,Combination,C1_DataVolume(MB),C1_FeatureCount,C1_Accuracy(%),C1_Latency(ms),C1_Label0,C1_Label1,C1_Label2,C1_Label3,C1_Label4,...,C5_Label8,C5_Label9,Global_Accuracy(%),Global_DataVolume,Global_Latency_Sum(ms),Global_RoundTime_Max(ms),Dataset,Data_Composability_Score,Scalability_Score,Accuracy_Similarity_Score
0,C1-C2,2400,3072,43,2556,240,240,240,240,240,...,0,0,11.270000,4796.0,5069.718838,2556.416035,CIFAR,0.319530,1.983177,0.255814
1,C1-C3,2400,3072,43,2556,240,240,240,240,240,...,0,0,13.860001,4800.0,5134.560823,2578.144789,CIFAR,0.000000,2.008607,0.906977
2,C1-C4,2400,3072,43,2556,240,240,240,240,240,...,0,0,15.780000,4794.0,5305.062532,2748.646498,CIFAR,0.416989,2.075117,0.697674
3,C1-C5,2400,3072,43,2556,240,240,240,240,240,...,0,0,17.380001,4800.0,5015.784264,2556.416035,CIFAR,0.013957,1.962050,0.255814
4,C1-C6,2400,3072,43,2556,240,240,240,240,240,...,0,0,12.840000,4796.0,5106.859207,2556.416035,CIFAR,0.195418,1.997653,0.325581
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1294,C7-C9,2800,784,79,1676,280,280,280,280,280,...,0,0,32.990000,5600.0,3355.759144,1679.147005,MNIST,0.009833,2.001790,0.868132
1295,C7-C10,2800,784,79,1676,280,280,280,280,280,...,0,0,77.960002,5595.0,3397.525787,1720.913649,MNIST,0.215068,2.026253,0.918605
1296,C8-C9,2795,784,90,1687,143,201,100,136,488,...,0,0,67.780000,5595.0,3366.453648,1687.306643,MNIST,0.182592,1.995258,0.989011
1297,C8-C10,2795,784,90,1687,143,201,100,136,488,...,0,0,59.509999,5590.0,3408.220291,1720.913649,MNIST,0.269963,2.019561,0.955556


In [6]:
df2.to_csv("DF2.csv")

**Zero-shot Composability rules(Combination-3)**
---

In [7]:
import pandas as pd
import numpy as np

# Load the dataset
df = pd.read_csv("all_datasets.csv")

# Filter rows with combination length = 3
df3 = df[df["Combination"].str.count("-") == 2].copy()

# Load df2 (2-combination data) for lookup of base values
df2 = df[df["Combination"].str.count("-") == 1].copy()

# Create a dataset-aware lookup for base combinations (MultiIndex: Combination + Dataset)
base_lookup = df2.set_index(["Combination", "Dataset"])[["Global_Accuracy(%)", "Global_Latency_Sum(ms)"]]

# Helper function to get normalized label vectors
def norm_labels(row, prefix_list):
    summed = np.zeros(10)
    for prefix in prefix_list:
        cols = [f"{prefix}_Label{i}" for i in range(10)]
        existing = [c for c in cols if c in row.index]
        if existing:
            summed += row[existing].values.astype(float)
    total = summed.sum()
    return summed / total if total != 0 else summed

# Compute composability metrics for 3-combination rows
def compute_d3(row):
    clients = row["Combination"].split("-")
    base_combo = "-".join(clients[:2])
    added_client = clients[2]
    dataset_name = row["Dataset"]

    # --- Data composability ---
    h_base = norm_labels(row, clients[:2])
    h_add = norm_labels(row, [added_client])
    data_score = float(np.sqrt(np.sum((h_base - h_add) ** 2)))

    # --- Scalability & Accuracy similarity ---
    try:
        base_metrics = base_lookup.loc[(base_combo, dataset_name)]
        lat_base = float(base_metrics["Global_Latency_Sum(ms)"])
        acc_base = float(base_metrics["Global_Accuracy(%)"])
    except KeyError:
        lat_base, acc_base = np.nan, np.nan

    lat_now = float(row["Global_Latency_Sum(ms)"])
    acc_now = float(row["Global_Accuracy(%)"])

    scalability_score = lat_now / lat_base if lat_base not in [0, np.nan] else np.nan
    acc_sim = 1 - (abs(acc_base - acc_now) / max(acc_base, acc_now)) if max(acc_base, acc_now) != 0 else np.nan

    return pd.Series({
        "Data_Composability_Score": data_score,
        "Scalability_Score": scalability_score,
        "Accuracy_Similarity_Score": acc_sim
    })

# Apply calculations
df3_scores = df3.apply(compute_d3, axis=1)
df3 = pd.concat([df3, df3_scores], axis=1)
df3

Unnamed: 0,Combination,C1_DataVolume(MB),C1_FeatureCount,C1_Accuracy(%),C1_Latency(ms),C1_Label0,C1_Label1,C1_Label2,C1_Label3,C1_Label4,...,C5_Label8,C5_Label9,Global_Accuracy(%),Global_DataVolume,Global_Latency_Sum(ms),Global_RoundTime_Max(ms),Dataset,Data_Composability_Score,Scalability_Score,Accuracy_Similarity_Score
45,C1-C2-C3,2400,3072,43,2556,240,240,240,240,240,...,0,0,11.830000,7196.0,7647.863626,2578.144789,CIFAR,0.159632,1.508538,0.952663
46,C1-C2-C4,2400,3072,43,2556,240,240,240,240,240,...,0,0,18.770000,7190.0,7818.365335,2748.646498,CIFAR,0.354235,1.542169,0.600426
47,C1-C2-C5,2400,3072,43,2556,240,240,240,240,240,...,0,0,11.260000,7196.0,7529.087067,2556.416035,CIFAR,0.354235,1.485109,0.999113
48,C1-C2-C6,2400,3072,43,2556,240,240,240,240,240,...,0,0,11.280000,7192.0,7620.162010,2556.416035,CIFAR,0.354235,1.503074,0.999113
49,C1-C2-C7,2400,3072,43,2556,240,240,240,240,240,...,0,0,11.300000,7196.0,7653.143167,2583.424330,CIFAR,0.354235,1.509579,0.997345
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1414,C6-C9-C10,2795,784,61,1677,67,315,90,531,288,...,0,0,54.570001,8390.0,5077.782631,1720.913649,MNIST,0.000000,1.512654,0.944771
1415,C7-C8-C9,2800,784,79,1676,280,280,280,280,280,...,0,0,30.970001,8395.0,5043.065786,1687.306643,MNIST,0.000000,1.499164,0.586998
1416,C7-C8-C10,2800,784,79,1676,280,280,280,280,280,...,0,0,54.780000,8390.0,5084.832430,1720.913649,MNIST,0.000000,1.511580,0.963125
1417,C7-C9-C10,2800,784,79,1676,280,280,280,280,280,...,0,0,51.859999,8395.0,5076.672792,1720.913649,MNIST,0.000000,1.512824,0.636136


In [8]:
df3.to_csv("DF3.csv", index=False)

**Zero-shot Composability rules(Combination-4)**
---

In [9]:
import pandas as pd
import numpy as np

# Load dataset
df = pd.read_csv("all_datasets.csv")

# Filter rows with combination length = 4
df4 = df[df["Combination"].str.count("-") == 3].copy()

# Load df3 (3-combination data) for lookup of base values
df3 = df[df["Combination"].str.count("-") == 2].copy()

# Create dataset-aware lookup for base combinations (MultiIndex: Combination + Dataset)
base_lookup = df3.set_index(["Combination", "Dataset"])[["Global_Accuracy(%)", "Global_Latency_Sum(ms)"]]

# Helper function for normalized label vectors
def norm_labels(row, prefix_list):
    summed = np.zeros(10)
    for prefix in prefix_list:
        cols = [f"{prefix}_Label{i}" for i in range(10)]
        existing = [c for c in cols if c in row.index]
        if existing:
            summed += row[existing].values.astype(float)
    total = summed.sum()
    return summed / total if total != 0 else summed

# Compute composability metrics for 4-combination rows
def compute_d4(row):
    clients = row["Combination"].split("-")
    base_combo = "-".join(clients[:3])      # base = first three clients
    added_client = clients[3]               # added = last one
    dataset_name = row["Dataset"]

    # --- Data composability ---
    h_base = norm_labels(row, clients[:3])
    h_add = norm_labels(row, [added_client])
    data_score = float(np.sqrt(np.sum((h_base - h_add) ** 2)))

    # --- Scalability & Accuracy similarity ---
    try:
        base_metrics = base_lookup.loc[(base_combo, dataset_name)]
        lat_base = float(base_metrics["Global_Latency_Sum(ms)"])
        acc_base = float(base_metrics["Global_Accuracy(%)"])
    except KeyError:
        lat_base, acc_base = np.nan, np.nan

    lat_now = float(row["Global_Latency_Sum(ms)"])
    acc_now = float(row["Global_Accuracy(%)"])

    scalability_score = lat_now / lat_base if lat_base not in [0, np.nan] else np.nan
    acc_sim = 1 - (abs(acc_base - acc_now) / max(acc_base, acc_now)) if max(acc_base, acc_now) != 0 else np.nan

    return pd.Series({
        "Data_Composability_Score": data_score,
        "Scalability_Score": scalability_score,
        "Accuracy_Similarity_Score": acc_sim
    })

# Apply calculations
df4_scores = df4.apply(compute_d4, axis=1)
df4 = pd.concat([df4, df4_scores], axis=1)

# View results
print(df4[["Combination", "Dataset", "Data_Composability_Score",
            "Scalability_Score", "Accuracy_Similarity_Score"]].head(10))


      Combination Dataset  Data_Composability_Score  Scalability_Score  \
165   C1-C2-C3-C4   CIFAR                  0.449576           1.359401   
166   C1-C2-C3-C5   CIFAR                  0.333645           1.321576   
167   C1-C2-C3-C6   CIFAR                  0.333645           1.333484   
168   C1-C2-C3-C7   CIFAR                  0.333645           1.337797   
169   C1-C2-C3-C8   CIFAR                  0.333645           1.346093   
170   C1-C2-C3-C9   CIFAR                  0.333645           1.329642   
171  C1-C2-C3-C10   CIFAR                  0.333645           1.371903   
172   C1-C2-C4-C5   CIFAR                  0.334766           1.314563   
173   C1-C2-C4-C6   CIFAR                  0.352015           1.326212   
174   C1-C2-C4-C7   CIFAR                  0.333645           1.330430   

     Accuracy_Similarity_Score  
165                   0.923077  
166                   0.940828  
167                   0.958580  
168                   0.922232  
169                 

In [10]:
df4

Unnamed: 0,Combination,C1_DataVolume(MB),C1_FeatureCount,C1_Accuracy(%),C1_Latency(ms),C1_Label0,C1_Label1,C1_Label2,C1_Label3,C1_Label4,...,C5_Label8,C5_Label9,Global_Accuracy(%),Global_DataVolume,Global_Latency_Sum(ms),Global_RoundTime_Max(ms),Dataset,Data_Composability_Score,Scalability_Score,Accuracy_Similarity_Score
165,C1-C2-C3-C4,2400,3072,43,2556,240,240,240,240,240,...,0,0,10.920000,9590.0,10396.510124,2748.646498,CIFAR,0.449576,1.359401,0.923077
166,C1-C2-C3-C5,2400,3072,43,2556,240,240,240,240,240,...,0,0,11.130000,9596.0,10107.231855,2578.144789,CIFAR,0.333645,1.321576,0.940828
167,C1-C2-C3-C6,2400,3072,43,2556,240,240,240,240,240,...,0,0,11.340000,9592.0,10198.306799,2578.144789,CIFAR,0.333645,1.333484,0.958580
168,C1-C2-C3-C7,2400,3072,43,2556,240,240,240,240,240,...,0,0,10.910000,9596.0,10231.287956,2583.424330,CIFAR,0.333645,1.337797,0.922232
169,C1-C2-C3-C8,2400,3072,43,2556,240,240,240,240,240,...,0,0,11.980000,9592.0,10294.736385,2646.872759,CIFAR,0.333645,1.346093,0.987479
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1624,C6-C7-C8-C9,2795,784,61,1677,67,315,90,531,288,...,0,0,22.420000,11190.0,6720.787764,1687.306643,MNIST,0.000000,1.333056,0.549914
1625,C6-C7-C8-C10,2795,784,61,1677,67,315,90,531,288,...,0,0,49.900001,11185.0,6762.554407,1720.913649,MNIST,0.000000,1.341340,0.817034
1626,C6-C7-C9-C10,2795,784,61,1677,67,315,90,531,288,...,0,0,60.079998,11190.0,6754.394770,1720.913649,MNIST,0.000000,1.341893,0.756991
1627,C6-C8-C9-C10,2795,784,61,1677,67,315,90,531,288,...,0,0,49.129999,11185.0,6765.089273,1720.913649,MNIST,0.000000,1.341168,0.957513


In [11]:
df4.to_csv("DF4.csv", index=False)

**Zero-shot Composability rules(Combination-5)**
---

In [12]:
import pandas as pd
import numpy as np

# Load dataset
df = pd.read_csv("all_datasets.csv")

# ---------- Helper functions ----------
def norm_labels_from_prefixes(row, prefixes):
    """Sum label histograms for given client prefixes and return normalized distribution."""
    summed = np.zeros(10)
    for p in prefixes:
        cols = [f"{p}_Label{i}" for i in range(10)]
        existing = [c for c in cols if c in row.index]
        if existing:
            summed += row[existing].values.astype(float)
    total = summed.sum()
    return summed / total if total != 0 else summed

def safe_get(row, col):
    return float(row[col]) if col in row.index and pd.notna(row[col]) else np.nan

def sim_from_values(a, b):
    if pd.isna(a) or pd.isna(b):
        return np.nan
    m = max(a, b)
    return 1 - (abs(a - b) / m) if m != 0 else np.nan

# ---------- Build dataset-aware lookup for base 3-combo global metrics ----------
df_len3 = df[df["Combination"].str.count("-") == 2].copy()
lookup3 = df_len3.set_index(["Combination", "Dataset"])[["Global_Accuracy(%)", "Global_Latency_Sum(ms)"]]

# ---------- Filter to length-5 combinations ----------
df5 = df[df["Combination"].str.count("-") == 4].copy()

def compute_len5_scores_sum(row):
    clients = row["Combination"].split("-")
    base3 = "-".join(clients[:3])
    add1, add2 = clients[3], clients[4]
    dataset_name = row["Dataset"]

    # 1) Data Composability Score (SUM, not mean)
    h_base = norm_labels_from_prefixes(row, clients[:3])
    h_add1 = norm_labels_from_prefixes(row, [add1])
    h_add2 = norm_labels_from_prefixes(row, [add2])
    d1 = float(np.sqrt(np.sum((h_base - h_add1) ** 2)))
    d2 = float(np.sqrt(np.sum((h_base - h_add2) ** 2)))
    data_comp_sum = np.nansum([d1, d2])  # sum, ignores NaNs

    # 2) Scalability Score (SUM of per-candidate ratios)
    try:
        base_metrics = lookup3.loc[(base3, dataset_name)]
        lat_base = float(base_metrics["Global_Latency_Sum(ms)"])
        acc_base = float(base_metrics["Global_Accuracy(%)"])
    except KeyError:
        lat_base = np.nan
        acc_base = np.nan

    lat1 = safe_get(row, f"{add1}_Latency(ms)")
    lat2 = safe_get(row, f"{add2}_Latency(ms)")
    s1 = (lat_base + lat1) / lat_base if pd.notna(lat_base) and lat_base != 0 and pd.notna(lat1) else np.nan
    s2 = (lat_base + lat2) / lat_base if pd.notna(lat_base) and lat_base != 0 and pd.notna(lat2) else np.nan
    scalability_sum = np.nansum([s1, s2])  # sum, ignores NaNs

    # 3) Accuracy Similarity Score (SUM of similarities)
    acc1 = safe_get(row, f"{add1}_Accuracy(%)")
    acc2 = safe_get(row, f"{add2}_Accuracy(%)")
    a1 = sim_from_values(acc_base, acc1)
    a2 = sim_from_values(acc_base, acc2)
    acc_similarity_sum = np.nansum([a1, a2])  # sum, ignores NaNs

    return pd.Series({
        "Data_Composability_Score": data_comp_sum,
        "Scalability_Score": scalability_sum,
        "Accuracy_Similarity_Score": acc_similarity_sum
    })

# Apply to df5
scores5_sum = df5.apply(compute_len5_scores_sum, axis=1)
df5 = pd.concat([df5, scores5_sum], axis=1)
df5

Unnamed: 0,Combination,C1_DataVolume(MB),C1_FeatureCount,C1_Accuracy(%),C1_Latency(ms),C1_Label0,C1_Label1,C1_Label2,C1_Label3,C1_Label4,...,C5_Label8,C5_Label9,Global_Accuracy(%),Global_DataVolume,Global_Latency_Sum(ms),Global_RoundTime_Max(ms),Dataset,Data_Composability_Score,Scalability_Score,Accuracy_Similarity_Score
375,C1-C2-C3-C4-C5,2400,3072,43,2556,240,240,240,240,240,...,232,239,11.050000,11990.0,12855.878353,2748.646498,CIFAR,0.546187,2.680844,1.324173
376,C1-C2-C3-C4-C6,2400,3072,43,2556,240,240,240,240,240,...,164,250,10.930000,11986.0,12946.953297,2748.646498,CIFAR,0.783222,1.359316,0.394333
377,C1-C2-C3-C4-C7,2400,3072,43,2556,240,240,240,240,240,...,240,240,10.950000,11990.0,12979.934454,2748.646498,CIFAR,0.783222,1.359316,0.394333
378,C1-C2-C3-C4-C8,2400,3072,43,2556,240,240,240,240,240,...,231,660,10.850000,11986.0,13043.382883,2748.646498,CIFAR,0.783222,1.359316,0.394333
379,C1-C2-C3-C4-C9,2400,3072,43,2556,240,240,240,240,240,...,228,247,11.110000,11990.0,12917.570829,2748.646498,CIFAR,0.783222,1.359316,0.394333
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1876,C5-C6-C7-C8-C10,2800,784,80,1806,261,281,285,283,274,...,388,14,44.769999,13985.0,8569.262505,1806.708097,MNIST,0.764864,0.000000,0.000000
1877,C5-C6-C7-C9-C10,2800,784,80,1806,261,281,285,283,274,...,388,14,57.309997,13990.0,8561.102867,1806.708097,MNIST,0.764864,0.000000,0.000000
1878,C5-C6-C8-C9-C10,2800,784,80,1806,261,281,285,283,274,...,388,14,46.640000,13985.0,8571.797371,1806.708097,MNIST,0.764864,0.000000,0.000000
1879,C5-C7-C8-C9-C10,2800,784,80,1806,261,281,285,283,274,...,388,14,48.800001,13990.0,8570.687532,1806.708097,MNIST,0.764864,0.000000,0.000000


In [13]:
df5.to_csv("DF5.csv", index=False)

In [14]:
# Reload datasets
df2 = pd.read_csv("/content/DF2.csv")
df3 = pd.read_csv("/content/DF3.csv")
df4 = pd.read_csv("/content/DF4.csv")
df5 = pd.read_csv("/content/DF5.csv")
df_all = pd.concat([df2,df3,df5])

In [15]:
df_all

Unnamed: 0.1,Unnamed: 0,Combination,C1_DataVolume(MB),C1_FeatureCount,C1_Accuracy(%),C1_Latency(ms),C1_Label0,C1_Label1,C1_Label2,C1_Label3,...,C5_Label8,C5_Label9,Global_Accuracy(%),Global_DataVolume,Global_Latency_Sum(ms),Global_RoundTime_Max(ms),Dataset,Data_Composability_Score,Scalability_Score,Accuracy_Similarity_Score
0,0.0,C1-C2,2400,3072,43,2556,240,240,240,240,...,0,0,11.270000,4796.0,5069.718838,2556.416035,CIFAR,0.319530,1.983177,0.255814
1,1.0,C1-C3,2400,3072,43,2556,240,240,240,240,...,0,0,13.860001,4800.0,5134.560823,2578.144789,CIFAR,0.000000,2.008607,0.906977
2,2.0,C1-C4,2400,3072,43,2556,240,240,240,240,...,0,0,15.780000,4794.0,5305.062532,2748.646498,CIFAR,0.416989,2.075117,0.697674
3,3.0,C1-C5,2400,3072,43,2556,240,240,240,240,...,0,0,17.380001,4800.0,5015.784264,2556.416035,CIFAR,0.013957,1.962050,0.255814
4,4.0,C1-C6,2400,3072,43,2556,240,240,240,240,...,0,0,12.840000,4796.0,5106.859207,2556.416035,CIFAR,0.195418,1.997653,0.325581
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
751,,C5-C6-C7-C8-C10,2800,784,80,1806,261,281,285,283,...,388,14,44.769999,13985.0,8569.262505,1806.708097,MNIST,0.764864,0.000000,0.000000
752,,C5-C6-C7-C9-C10,2800,784,80,1806,261,281,285,283,...,388,14,57.309997,13990.0,8561.102867,1806.708097,MNIST,0.764864,0.000000,0.000000
753,,C5-C6-C8-C9-C10,2800,784,80,1806,261,281,285,283,...,388,14,46.640000,13985.0,8571.797371,1806.708097,MNIST,0.764864,0.000000,0.000000
754,,C5-C7-C8-C9-C10,2800,784,80,1806,261,281,285,283,...,388,14,48.800001,13990.0,8570.687532,1806.708097,MNIST,0.764864,0.000000,0.000000


In [16]:
df_all.isna().sum()

Unnamed: 0,0
Unnamed: 0,1116
Combination,0
C1_DataVolume(MB),0
C1_FeatureCount,0
C1_Accuracy(%),0
...,...
Global_RoundTime_Max(ms),0
Dataset,0
Data_Composability_Score,0
Scalability_Score,0


In [17]:
#df_all = df_all.drop(columns=["Unnamed: 0"])
df_all.to_csv("Zero_shot_Dataset.csv")

In [18]:
df_all.columns

Index(['Unnamed: 0', 'Combination', 'C1_DataVolume(MB)', 'C1_FeatureCount',
       'C1_Accuracy(%)', 'C1_Latency(ms)', 'C1_Label0', 'C1_Label1',
       'C1_Label2', 'C1_Label3', 'C1_Label4', 'C1_Label5', 'C1_Label6',
       'C1_Label7', 'C1_Label8', 'C1_Label9', 'C2_DataVolume(MB)',
       'C2_FeatureCount', 'C2_Accuracy(%)', 'C2_Latency(ms)', 'C2_Label0',
       'C2_Label1', 'C2_Label2', 'C2_Label3', 'C2_Label4', 'C2_Label5',
       'C2_Label6', 'C2_Label7', 'C2_Label8', 'C2_Label9', 'C3_DataVolume(MB)',
       'C3_FeatureCount', 'C3_Accuracy(%)', 'C3_Latency(ms)', 'C3_Label0',
       'C3_Label1', 'C3_Label2', 'C3_Label3', 'C3_Label4', 'C3_Label5',
       'C3_Label6', 'C3_Label7', 'C3_Label8', 'C3_Label9', 'C4_DataVolume(MB)',
       'C4_FeatureCount', 'C4_Accuracy(%)', 'C4_Latency(ms)', 'C4_Label0',
       'C4_Label1', 'C4_Label2', 'C4_Label3', 'C4_Label4', 'C4_Label5',
       'C4_Label6', 'C4_Label7', 'C4_Label8', 'C4_Label9', 'C5_DataVolume(MB)',
       'C5_FeatureCount', 'C5_Accur

In [19]:
df_all=pd.read_csv("Zero_shot_Dataset.csv")
df_all

Unnamed: 0.2,Unnamed: 0.1,Unnamed: 0,Combination,C1_DataVolume(MB),C1_FeatureCount,C1_Accuracy(%),C1_Latency(ms),C1_Label0,C1_Label1,C1_Label2,...,C5_Label8,C5_Label9,Global_Accuracy(%),Global_DataVolume,Global_Latency_Sum(ms),Global_RoundTime_Max(ms),Dataset,Data_Composability_Score,Scalability_Score,Accuracy_Similarity_Score
0,0,0.0,C1-C2,2400,3072,43,2556,240,240,240,...,0,0,11.270000,4796.0,5069.718838,2556.416035,CIFAR,0.319530,1.983177,0.255814
1,1,1.0,C1-C3,2400,3072,43,2556,240,240,240,...,0,0,13.860001,4800.0,5134.560823,2578.144789,CIFAR,0.000000,2.008607,0.906977
2,2,2.0,C1-C4,2400,3072,43,2556,240,240,240,...,0,0,15.780000,4794.0,5305.062532,2748.646498,CIFAR,0.416989,2.075117,0.697674
3,3,3.0,C1-C5,2400,3072,43,2556,240,240,240,...,0,0,17.380001,4800.0,5015.784264,2556.416035,CIFAR,0.013957,1.962050,0.255814
4,4,4.0,C1-C6,2400,3072,43,2556,240,240,240,...,0,0,12.840000,4796.0,5106.859207,2556.416035,CIFAR,0.195418,1.997653,0.325581
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1246,751,,C5-C6-C7-C8-C10,2800,784,80,1806,261,281,285,...,388,14,44.769999,13985.0,8569.262505,1806.708097,MNIST,0.764864,0.000000,0.000000
1247,752,,C5-C6-C7-C9-C10,2800,784,80,1806,261,281,285,...,388,14,57.309997,13990.0,8561.102867,1806.708097,MNIST,0.764864,0.000000,0.000000
1248,753,,C5-C6-C8-C9-C10,2800,784,80,1806,261,281,285,...,388,14,46.640000,13985.0,8571.797371,1806.708097,MNIST,0.764864,0.000000,0.000000
1249,754,,C5-C7-C8-C9-C10,2800,784,80,1806,261,281,285,...,388,14,48.800001,13990.0,8570.687532,1806.708097,MNIST,0.764864,0.000000,0.000000


In [20]:
import pandas as pd

# Assuming your dataframe is named df_all or df_att
# Example: df_filtered = df_all.copy()

# Filter rows where the 'Combination' has exactly 4 dashes (i.e., 5 clients)
df_5combos = df_all[df_all['Combination'].str.count("-") == 4].copy()

# Optional: Reset index if needed
df_5combos.reset_index(drop=True, inplace=True)

# Display result
print(f"Filtered dataframe shape: {df_5combos.shape}")
print(df_5combos[['Combination']].head())
df_5combos

Filtered dataframe shape: (756, 81)
      Combination
0  C1-C2-C3-C4-C5
1  C1-C2-C3-C4-C6
2  C1-C2-C3-C4-C7
3  C1-C2-C3-C4-C8
4  C1-C2-C3-C4-C9


Unnamed: 0.2,Unnamed: 0.1,Unnamed: 0,Combination,C1_DataVolume(MB),C1_FeatureCount,C1_Accuracy(%),C1_Latency(ms),C1_Label0,C1_Label1,C1_Label2,...,C5_Label8,C5_Label9,Global_Accuracy(%),Global_DataVolume,Global_Latency_Sum(ms),Global_RoundTime_Max(ms),Dataset,Data_Composability_Score,Scalability_Score,Accuracy_Similarity_Score
0,0,,C1-C2-C3-C4-C5,2400,3072,43,2556,240,240,240,...,232,239,11.050000,11990.0,12855.878353,2748.646498,CIFAR,0.546187,2.680844,1.324173
1,1,,C1-C2-C3-C4-C6,2400,3072,43,2556,240,240,240,...,164,250,10.930000,11986.0,12946.953297,2748.646498,CIFAR,0.783222,1.359316,0.394333
2,2,,C1-C2-C3-C4-C7,2400,3072,43,2556,240,240,240,...,240,240,10.950000,11990.0,12979.934454,2748.646498,CIFAR,0.783222,1.359316,0.394333
3,3,,C1-C2-C3-C4-C8,2400,3072,43,2556,240,240,240,...,231,660,10.850000,11986.0,13043.382883,2748.646498,CIFAR,0.783222,1.359316,0.394333
4,4,,C1-C2-C3-C4-C9,2400,3072,43,2556,240,240,240,...,228,247,11.110000,11990.0,12917.570829,2748.646498,CIFAR,0.783222,1.359316,0.394333
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
751,751,,C5-C6-C7-C8-C10,2800,784,80,1806,261,281,285,...,388,14,44.769999,13985.0,8569.262505,1806.708097,MNIST,0.764864,0.000000,0.000000
752,752,,C5-C6-C7-C9-C10,2800,784,80,1806,261,281,285,...,388,14,57.309997,13990.0,8561.102867,1806.708097,MNIST,0.764864,0.000000,0.000000
753,753,,C5-C6-C8-C9-C10,2800,784,80,1806,261,281,285,...,388,14,46.640000,13985.0,8571.797371,1806.708097,MNIST,0.764864,0.000000,0.000000
754,754,,C5-C7-C8-C9-C10,2800,784,80,1806,261,281,285,...,388,14,48.800001,13990.0,8570.687532,1806.708097,MNIST,0.764864,0.000000,0.000000


In [21]:
df_all.to_csv("Zero_shot_Dataset_5.csv")

**Zero-shot-Composition Model**
---

In [22]:
import os
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_absolute_error, r2_score, mean_absolute_percentage_error

# ================================
# 1. Load Dataset
# ================================
csv_path = "Zero_shot_Dataset_5.csv"
assert os.path.exists(csv_path), f"File not found: {csv_path}"
df = pd.read_csv(csv_path)

# ================================
# Target / Exclusions
# ================================
target_cols = [
    "Global_Accuracy(%)",
    "Global_Latency_Sum(ms)"
]

# Columns to drop from features
explicit_drop_cols = {
    "Combination",
    "Dataset",
    'Unnamed: 0',
    "Global_RoundTime_Max(ms)",
    "Global_DataVolume"
}

# Validate targets exist
missing_targets = [c for c in target_cols if c not in df.columns]
if missing_targets:
    raise ValueError(f"Missing target columns in CSV: {missing_targets}")

# ================================
# 2. Prepare Data
# ================================
def coerce_num(series):
    return pd.to_numeric(series, errors="coerce")

df_num = df.copy()
for c in df_num.columns:
    df_num[c] = coerce_num(df_num[c])

drop_cols = set(target_cols) | explicit_drop_cols
feature_cols = [
    c for c in df_num.columns
    if c not in drop_cols and pd.api.types.is_numeric_dtype(df_num[c])
]

X = df_num[feature_cols].copy().fillna(df_num[feature_cols].median(numeric_only=True))
Y = df_num[target_cols].copy().fillna(df_num[target_cols].median(numeric_only=True))
combos = df["Combination"] if "Combination" in df.columns else pd.Series([None]*len(df))
datasets = df["Dataset"] if "Dataset" in df.columns else pd.Series(["Unknown"]*len(df))

# ================================
# 3. Train-Test Split
# ================================
X_train, X_test, y_train, y_test, combos_train, combos_test, dataset_train, dataset_test = train_test_split(
    X.values, Y.values, combos.values, datasets.values, test_size=0.2, random_state=42
)

# ================================
# 4. Min-Max Scaling
# ================================
scaler_X = MinMaxScaler()
scaler_y = MinMaxScaler()

X_train_s = scaler_X.fit_transform(X_train)
X_test_s  = scaler_X.transform(X_test)
y_train_s = scaler_y.fit_transform(y_train)
y_test_s  = scaler_y.transform(y_test)

# ================================
# 5. Define Model
# ================================
class TinyNet(nn.Module):
    def __init__(self, inp, outp):
        super().__init__()
        self.net = nn.Sequential(
            nn.Linear(inp, 128),
            nn.ReLU(),
            nn.Dropout(0.2),
            nn.Linear(128, 64),
            nn.ReLU(),
            nn.Linear(64, outp)
        )
    def forward(self, x):
        return self.net(x)

model = TinyNet(X_train_s.shape[1], y_train_s.shape[1])
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=1e-3, weight_decay=1e-4)

# ================================
# 6. Training
# ================================
Xtr = torch.tensor(X_train_s, dtype=torch.float32)
ytr = torch.tensor(y_train_s, dtype=torch.float32)
Xte = torch.tensor(X_test_s,  dtype=torch.float32)
yte = torch.tensor(y_test_s,  dtype=torch.float32)

EPOCHS = 120
for epoch in range(EPOCHS):
    model.train()
    optimizer.zero_grad()
    pred = model(Xtr)
    loss = criterion(pred, ytr)
    loss.backward()
    optimizer.step()

# ================================
# 7. Evaluation
# ================================
model.eval()
with torch.no_grad():
    pred_test_s = model(Xte).cpu().numpy()

pred_test = scaler_y.inverse_transform(pred_test_s)
y_test_inv = scaler_y.inverse_transform(yte.cpu().numpy())

# ================================
# 8. Overall Metrics
# ================================
mae  = mean_absolute_error(y_test_inv, pred_test, multioutput="raw_values")
mape = mean_absolute_percentage_error(y_test_inv, pred_test, multioutput="raw_values") * 100
r2   = [r2_score(y_test_inv[:, i], pred_test[:, i]) for i in range(y_test_inv.shape[1])]

metrics_df = pd.DataFrame({
    "Target": target_cols,
    "MAE": mae,
    "MAPE": mape,
    "R2": r2
}).round(4)

# ================================
# 8B. Dataset-wise Aggregated Metrics
# ================================
dataset_metrics = []
unique_datasets = np.unique(dataset_test)

for ds in unique_datasets:
    mask = dataset_test == ds
    y_true_ds = y_test_inv[mask]
    y_pred_ds = pred_test[mask]
    if len(y_true_ds) == 0:
        continue

    # Aggregate across all target dimensions
    mae_ds  = mean_absolute_error(y_true_ds, y_pred_ds)
    mape_ds = mean_absolute_percentage_error(y_true_ds, y_pred_ds) * 100
    r2_ds   = r2_score(y_true_ds.flatten(), y_pred_ds.flatten())

    dataset_metrics.append({
        "Dataset": ds,
        "MAE": mae_ds,
        "MAPE": mape_ds,
        "R2": r2_ds
    })

dataset_metrics_df = pd.DataFrame(dataset_metrics).round(4)

# ================================
# 9. Comparison DataFrame (with dataset info)
# ================================
comparison_df = pd.DataFrame({
    "Dataset": dataset_test,
    "Combination": combos_test,
    "Actual_Accuracy(%)":        y_test_inv[:, 0],
    "Predicted_Accuracy(%)":     pred_test[:, 0],
    "Actual_Latency_Sum(ms)":    y_test_inv[:, 1],
    "Predicted_Latency_Sum(ms)": pred_test[:, 1],
})

# ================================
# 10. Save + Display
# ================================
metrics_path = "enhanced_tinynet_metrics.csv"
preds_path   = "enhanced_tinynet_test_predictions.csv"
dataset_metrics_path = "datasetwise_tinynet_metrics.csv"

metrics_df.to_csv(metrics_path, index=False)
dataset_metrics_df.to_csv(dataset_metrics_path, index=False)
comparison_df.to_csv(preds_path, index=False)

print("✅ Trained on:", csv_path)
print(f"Features used (X): {len(feature_cols)}")
print(f"Targets (Y): {target_cols}\n")
print("Overall Metrics (MAE, MAPE, R²):\n", metrics_df)
print("\nDataset-wise Aggregated Metrics:\n", dataset_metrics_df)
print("\nFull test predictions saved to:", preds_path)


✅ Trained on: Zero_shot_Dataset_5.csv
Features used (X): 75
Targets (Y): ['Global_Accuracy(%)', 'Global_Latency_Sum(ms)']

Overall Metrics (MAE, MAPE, R²):
                    Target         MAE       MAPE      R2
0      Global_Accuracy(%)    5.586200  18.402901  0.7695
1  Global_Latency_Sum(ms)  140.645096   1.761300  0.9963

Dataset-wise Aggregated Metrics:
    Dataset       MAE     MAPE      R2
0    CIFAR  100.0566   7.1349  0.9991
1  F-MNIST   59.8201  13.1938  0.9992
2    MNIST   57.3062  10.1340  0.9994

Full test predictions saved to: enhanced_tinynet_test_predictions.csv


In [23]:
X.columns

Index(['Unnamed: 0.2', 'Unnamed: 0.1', 'C1_DataVolume(MB)', 'C1_FeatureCount',
       'C1_Accuracy(%)', 'C1_Latency(ms)', 'C1_Label0', 'C1_Label1',
       'C1_Label2', 'C1_Label3', 'C1_Label4', 'C1_Label5', 'C1_Label6',
       'C1_Label7', 'C1_Label8', 'C1_Label9', 'C2_DataVolume(MB)',
       'C2_FeatureCount', 'C2_Accuracy(%)', 'C2_Latency(ms)', 'C2_Label0',
       'C2_Label1', 'C2_Label2', 'C2_Label3', 'C2_Label4', 'C2_Label5',
       'C2_Label6', 'C2_Label7', 'C2_Label8', 'C2_Label9', 'C3_DataVolume(MB)',
       'C3_FeatureCount', 'C3_Accuracy(%)', 'C3_Latency(ms)', 'C3_Label0',
       'C3_Label1', 'C3_Label2', 'C3_Label3', 'C3_Label4', 'C3_Label5',
       'C3_Label6', 'C3_Label7', 'C3_Label8', 'C3_Label9', 'C4_DataVolume(MB)',
       'C4_FeatureCount', 'C4_Accuracy(%)', 'C4_Latency(ms)', 'C4_Label0',
       'C4_Label1', 'C4_Label2', 'C4_Label3', 'C4_Label4', 'C4_Label5',
       'C4_Label6', 'C4_Label7', 'C4_Label8', 'C4_Label9', 'C5_DataVolume(MB)',
       'C5_FeatureCount', 'C5_Ac

In [24]:
comparison_df

Unnamed: 0,Dataset,Combination,Actual_Accuracy(%),Predicted_Accuracy(%),Actual_Latency_Sum(ms),Predicted_Latency_Sum(ms)
0,CIFAR,C3-C6-C7-C8-C9,11.000000,11.525822,12879.946289,12687.423828
1,MNIST,C4-C6-C8-C9-C10,47.080002,40.440971,8433.819336,8374.277344
2,MNIST,C1-C3-C4-C8-C9,40.770000,36.626892,8420.310547,8535.503906
3,F-MNIST,C2-C3-C6-C8-C9,20.690001,30.823721,8457.083008,8354.966797
4,MNIST,C1-C4-C7-C8-C10,21.390001,41.693260,8463.889648,8345.593750
...,...,...,...,...,...,...
246,CIFAR,C2-C3-C4-C6-C9,12.480000,10.933663,12911.597656,12642.280273
247,CIFAR,C5-C6-C7-C9-C10,10.890000,11.629728,12958.559570,13047.930664
248,CIFAR,C5-C8-C10,14.380001,10.113939,7950.503906,7699.818848
249,CIFAR,C2-C4-C7-C8-C9,11.010000,11.478515,13013.306641,13043.982422


**Zero shot Predictions for new Combinations MINIST**
---

In [25]:
import pandas as pd
import numpy as np

# Load the new dataset
path = "few_text_dataset_1MINIST.csv"
df = pd.read_csv(path).copy()
df

Unnamed: 0.2,Unnamed: 0.1,Unnamed: 0,Combination,C1_DataVolume(MB),C1_FeatureCount,C1_Accuracy(%),C1_Latency(ms),C1_Label0,C1_Label1,C1_Label2,...,Global_DataVolume,Global_Latency_Sum(ms),Global_RoundTime_Max(ms),Combo_List,Old_Combination,New_Combination,Old_Length,_set,_key,normalized_combinations
0,4575,12000,C3-C8-C10,2800.0,784.0,88.45,1674.80,280.0,280.0,280.0,...,13986.0,8471.30,1720.91,0.0,C3-C8-C10,C3-C8-C10-C16-C17,3,"frozenset({3, 8, 10, 16, 17})","(3, 8, 10, 16, 17)",38101617
1,1488,8913,C1-C6-C8,2800.0,784.0,93.90,1710.33,280.0,280.0,280.0,...,13990.0,8432.96,1710.33,0.0,C1-C6-C8,C1-C6-C8-C15-C19,3,"frozenset({1, 6, 8, 15, 19})","(1, 6, 8, 15, 19)",1681519
2,4970,12395,C4-C6-C7,2795.0,784.0,87.77,1668.73,389.0,149.0,384.0,...,13990.0,8400.59,1694.39,0.0,C4-C6-C7,C4-C6-C7-C17-C19,3,"frozenset({4, 6, 7, 17, 19})","(4, 6, 7, 17, 19)",4671719
3,8009,15434,C2-C5-C8-C10,2794.0,784.0,67.49,1679.67,38.0,240.0,12.0,...,13980.0,8588.49,1806.71,0.0,C2-C5-C8-C10,C2-C5-C8-C10-C16,4,"frozenset({2, 5, 8, 10, 16})","(2, 5, 8, 10, 16)",2581016
4,3027,10452,C2-C6-C8,2794.0,784.0,67.49,1679.67,38.0,240.0,12.0,...,13979.0,8399.89,1687.31,0.0,C2-C6-C8,C2-C6-C8-C15-C18,3,"frozenset({2, 6, 8, 15, 18})","(2, 6, 8, 15, 18)",2681518
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
495,2770,10195,C2-C5-C7,2794.0,784.0,67.49,1679.67,38.0,240.0,12.0,...,13994.0,8540.52,1806.71,0.0,C2-C5-C7,C2-C5-C7-C17-C19,3,"frozenset({2, 5, 7, 17, 19})","(2, 5, 7, 17, 19)",2571719
496,8676,16101,C4-C6-C8-C9,2795.0,784.0,87.77,1668.73,389.0,149.0,384.0,...,13985.0,8401.64,1688.73,0.0,C4-C6-C8-C9,C4-C6-C8-C9-C23,4,"frozenset({4, 6, 8, 9, 23})","(4, 6, 8, 9, 23)",468923
497,1339,8764,C1-C5-C9,2800.0,784.0,93.90,1710.33,280.0,280.0,280.0,...,13995.0,8571.31,1806.71,0.0,C1-C5-C9,C1-C5-C9-C17-C18,3,"frozenset({1, 5, 9, 17, 18})","(1, 5, 9, 17, 18)",1591718
498,3948,11373,C3-C5-C7,2800.0,784.0,88.45,1674.80,280.0,280.0,280.0,...,13988.0,8720.34,1897.35,0.0,C3-C5-C7,C3-C5-C7-C20-C24,3,"frozenset({3, 5, 7, 20, 24})","(3, 5, 7, 20, 24)",3572024


In [26]:
import pandas as pd
import numpy as np

# --- Helper: Parse combination strings into client IDs ---
def parse_combination(combo_str):
    if pd.isna(combo_str):
        return []
    return combo_str.split("-")

# --- Helper: Create prefix mapping based on order ---
def get_prefix_map(clients):
    return {client: f"C{i+1}" for i, client in enumerate(clients)}

# --- Helper: Get normalized label histogram for a prefix ---
def get_normalized_labels(row, prefix):
    try:
        labels = [row[f"{prefix}_Label{i}"] for i in range(10)]
        labels = np.array(labels, dtype=float)
        total = labels.sum()
        return labels / total if total > 0 else labels
    except KeyError:
        return np.zeros(10)

# --- Compute the three scores per row ---
def compute_scores(row):
    old_clients = parse_combination(row["Old_Combination"])
    new_clients = parse_combination(row["New_Combination"])

    # Determine newly added clients
    added_clients = [c for c in new_clients if c not in old_clients]

    # Prefix maps (based on New_Combination order)
    prefix_map = get_prefix_map(new_clients)
    old_prefixes = [prefix_map[c] for c in old_clients if c in prefix_map]
    new_prefixes = [prefix_map[c] for c in added_clients if c in prefix_map]

    # --- 1. Data Composability Score (Mean of distances) ---
    base_hist = np.zeros(10)
    for p in old_prefixes:
        base_hist += get_normalized_labels(row, p)
    base_hist_sum = base_hist.sum()
    base_hist = base_hist / base_hist_sum if base_hist_sum > 0 else base_hist

    distances = []
    for p in new_prefixes:
        added_hist = get_normalized_labels(row, p)
        added_sum = added_hist.sum()
        added_hist = added_hist / added_sum if added_sum > 0 else added_hist
        dist = np.linalg.norm(base_hist - added_hist)
        distances.append(dist)
    data_score = float(np.mean(distances)) if distances else np.nan

    # --- 2. Base Accuracy and Latency (mean) ---
    base_accs = []
    base_lats = []
    for p in old_prefixes:
        acc = row.get(f"{p}_Accuracy(%)", np.nan)
        lat = row.get(f"{p}_Latency(ms)", np.nan)
        if pd.notna(acc): base_accs.append(float(acc))
        if pd.notna(lat): base_lats.append(float(lat))
    base_acc = np.mean(base_accs) if base_accs else np.nan
    base_lat = np.mean(base_lats) if base_lats else np.nan

    # --- 3. Accuracy Similarity Score (mean) ---
    acc_sims = []
    for p in new_prefixes:
        acc = row.get(f"{p}_Accuracy(%)", np.nan)
        if pd.notna(acc) and pd.notna(base_acc) and base_acc != 0:
            sim = 1 - abs(base_acc - acc) / max(base_acc, acc)
            acc_sims.append(sim)
    acc_sim_score = float(np.mean(acc_sims)) if acc_sims else np.nan

    # --- 4. Scalability Score (mean) ---
    scal_scores = []
    for p in new_prefixes:
        lat = row.get(f"{p}_Latency(ms)", np.nan)
        if pd.notna(lat) and pd.notna(base_lat) and base_lat != 0:
            ratio = (base_lat + lat) / base_lat
            scal_scores.append(ratio)
    scal_score = float(np.mean(scal_scores)) if scal_scores else np.nan

    return pd.Series({
        "Data_Composability_Score": data_score,
        "Scalability_Score": scal_score,
        "Accuracy_Similarity_Score": acc_sim_score
    })

# --- Apply the scoring function ---
scored_df = df.copy()
scored_metrics = scored_df.apply(compute_scores, axis=1)
scored_df = pd.concat([scored_df, scored_metrics], axis=1)

# --- Save result ---
scored_df.to_csv("few_text_dataset_1MINIST_with_mean_scores.csv", index=False)
print("✅ Scoring complete with means. File saved as 'few_text_dataset_1MINIST_with_mean_scores.csv'.")

✅ Scoring complete with means. File saved as 'few_text_dataset_1MINIST_with_mean_scores.csv'.


In [27]:
scored_df.to_csv("few_text_dataset_minist.csv")

In [28]:
scored_df.columns

Index(['Unnamed: 0.1', 'Unnamed: 0', 'Combination', 'C1_DataVolume(MB)',
       'C1_FeatureCount', 'C1_Accuracy(%)', 'C1_Latency(ms)', 'C1_Label0',
       'C1_Label1', 'C1_Label2', 'C1_Label3', 'C1_Label4', 'C1_Label5',
       'C1_Label6', 'C1_Label7', 'C1_Label8', 'C1_Label9', 'C2_DataVolume(MB)',
       'C2_FeatureCount', 'C2_Accuracy(%)', 'C2_Latency(ms)', 'C2_Label0',
       'C2_Label1', 'C2_Label2', 'C2_Label3', 'C2_Label4', 'C2_Label5',
       'C2_Label6', 'C2_Label7', 'C2_Label8', 'C2_Label9', 'C3_DataVolume(MB)',
       'C3_FeatureCount', 'C3_Accuracy(%)', 'C3_Latency(ms)', 'C3_Label0',
       'C3_Label1', 'C3_Label2', 'C3_Label3', 'C3_Label4', 'C3_Label5',
       'C3_Label6', 'C3_Label7', 'C3_Label8', 'C3_Label9', 'C4_DataVolume(MB)',
       'C4_FeatureCount', 'C4_Accuracy(%)', 'C4_Latency(ms)', 'C4_Label0',
       'C4_Label1', 'C4_Label2', 'C4_Label3', 'C4_Label4', 'C4_Label5',
       'C4_Label6', 'C4_Label7', 'C4_Label8', 'C4_Label9', 'C5_DataVolume(MB)',
       'C5_FeatureC

In [29]:
import os
import time
import pandas as pd
import torch
# Path to the new dataset
new_csv_path = "few_text_dataset_minist.csv"
assert os.path.exists(new_csv_path), f"File not found: {new_csv_path}"
# Load new dataset
df_new = pd.read_csv(new_csv_path)
# ================================
# Select only the relevant feature columns
# ================================
feature_cols = ['Unnamed: 0.1','Unnamed: 0.2',
    'C1_DataVolume(MB)', 'C1_FeatureCount', 'C1_Accuracy(%)', 'C1_Latency(ms)',
    'C1_Label0', 'C1_Label1', 'C1_Label2', 'C1_Label3', 'C1_Label4', 'C1_Label5',
    'C1_Label6', 'C1_Label7', 'C1_Label8', 'C1_Label9',
    'C2_DataVolume(MB)', 'C2_FeatureCount', 'C2_Accuracy(%)', 'C2_Latency(ms)',
    'C2_Label0', 'C2_Label1', 'C2_Label2', 'C2_Label3', 'C2_Label4', 'C2_Label5',
    'C2_Label6', 'C2_Label7', 'C2_Label8', 'C2_Label9',
    'C3_DataVolume(MB)', 'C3_FeatureCount', 'C3_Accuracy(%)', 'C3_Latency(ms)',
    'C3_Label0', 'C3_Label1', 'C3_Label2', 'C3_Label3', 'C3_Label4', 'C3_Label5',
    'C3_Label6', 'C3_Label7', 'C3_Label8', 'C3_Label9',
    'C4_DataVolume(MB)', 'C4_FeatureCount', 'C4_Accuracy(%)', 'C4_Latency(ms)',
    'C4_Label0', 'C4_Label1', 'C4_Label2', 'C4_Label3', 'C4_Label4', 'C4_Label5',
    'C4_Label6', 'C4_Label7', 'C4_Label8', 'C4_Label9',
    'C5_DataVolume(MB)', 'C5_FeatureCount', 'C5_Accuracy(%)', 'C5_Latency(ms)',
    'C5_Label0', 'C5_Label1', 'C5_Label2', 'C5_Label3', 'C5_Label4', 'C5_Label5',
    'C5_Label6', 'C5_Label7', 'C5_Label8', 'C5_Label9',
    'Data_Composability_Score', 'Scalability_Score', 'Accuracy_Similarity_Score'
]
# Check that all required columns exist
missing_cols = [c for c in feature_cols if c not in df_new.columns]
if missing_cols:
    raise ValueError(f"Missing columns in new dataset: {missing_cols}")
# ================================
# Preprocess and Predict
# ================================
# Coerce numeric and fill missing
df_new_num = df_new[feature_cols].apply(pd.to_numeric, errors="coerce").fillna(0)

# Apply same scaler used during training
X_new_s = scaler_X.transform(df_new_num)

# Convert to tensor
X_new_tensor = torch.tensor(X_new_s, dtype=torch.float32)

# Predict row by row and time it
model.eval()
preds_list = []
times_list = []

with torch.no_grad():
    for i in range(len(X_new_tensor)):
        input_tensor = X_new_tensor[i].unsqueeze(0)
        start_time = time.time()
        pred = model(input_tensor).cpu().numpy()
        end_time = time.time()
        preds_list.append(pred[0])
        times_list.append(end_time - start_time)

# Convert predictions and times to DataFrames
preds_new = scaler_y.inverse_transform(preds_list)
target_cols = ["Global_Accuracy(%)", "Global_Latency_Sum(ms)"]
new_preds_df = pd.DataFrame(preds_new, columns=target_cols)
new_preds_df["Prediction_Time(s)"] = times_list  # Add timing info

# Always include 'Combination' column
if "Combination" in df_new.columns:
    new_preds_df.insert(0, "Combination", df_new["Combination"])
else:
    new_preds_df.insert(0, "Combination", [f"Row_{i}" for i in range(len(df_new))])

# Add 'New_Combination' column if it exists
if "New_Combination" in df_new.columns:
    new_preds_df["New_Combination"] = df_new["New_Combination"]

# Save predictions
output_path = "new_dataset_predictions.csv"
new_preds_df.to_csv(output_path, index=False)
print(f"✅ Predictions completed for: {new_csv_path}")
print(f"Predictions saved to: {output_path}")
print("\nSample predictions:\n", new_preds_df.head(10))


✅ Predictions completed for: few_text_dataset_minist.csv
Predictions saved to: new_dataset_predictions.csv

Sample predictions:
     Combination  Global_Accuracy(%)  Global_Latency_Sum(ms)  \
0     C3-C8-C10           44.825423             9677.191093   
1      C1-C6-C8           37.425673             8549.189541   
2      C4-C6-C7           40.204545             9596.720079   
3  C2-C5-C8-C10           54.570752            10275.642712   
4      C2-C6-C8           43.531163             8639.224939   
5      C4-C5-C8           42.806577             9802.889890   
6   C1-C3-C4-C7           46.348089            10426.742841   
7  C1-C2-C7-C10           44.623685            10095.645041   
8  C1-C4-C8-C10           49.947952            10178.856442   
9     C3-C9-C10           40.904860            10270.543146   

   Prediction_Time(s)    New_Combination  
0            0.000512  C3-C8-C10-C16-C17  
1            0.000107   C1-C6-C8-C15-C19  
2            0.000082   C4-C6-C7-C17-C19  
3    



In [30]:
new_preds_df

Unnamed: 0,Combination,Global_Accuracy(%),Global_Latency_Sum(ms),Prediction_Time(s),New_Combination
0,C3-C8-C10,44.825423,9677.191093,0.000512,C3-C8-C10-C16-C17
1,C1-C6-C8,37.425673,8549.189541,0.000107,C1-C6-C8-C15-C19
2,C4-C6-C7,40.204545,9596.720079,0.000082,C4-C6-C7-C17-C19
3,C2-C5-C8-C10,54.570752,10275.642712,0.000154,C2-C5-C8-C10-C16
4,C2-C6-C8,43.531163,8639.224939,0.000085,C2-C6-C8-C15-C18
...,...,...,...,...,...
495,C2-C5-C7,47.013486,8768.436132,0.000104,C2-C5-C7-C17-C19
496,C4-C6-C8-C9,49.942340,10929.095685,0.000076,C4-C6-C8-C9-C23
497,C1-C5-C9,43.155025,8542.972721,0.000074,C1-C5-C9-C17-C18
498,C3-C5-C7,41.208966,10018.953748,0.000075,C3-C5-C7-C20-C24


In [31]:
new_preds_df.to_csv("zero-shot_1.csv")

In [32]:
import pandas as pd
import re

# Load both CSVs
df_sim = pd.read_csv("/content/Simulated_Results_From_New_Combinations_ms.csv")
df_zero = pd.read_csv("zero-shot_1.csv")
df_zero

Unnamed: 0.1,Unnamed: 0,Combination,Global_Accuracy(%),Global_Latency_Sum(ms),Prediction_Time(s),New_Combination
0,0,C3-C8-C10,44.825423,9677.191093,0.000512,C3-C8-C10-C16-C17
1,1,C1-C6-C8,37.425673,8549.189541,0.000107,C1-C6-C8-C15-C19
2,2,C4-C6-C7,40.204545,9596.720079,0.000082,C4-C6-C7-C17-C19
3,3,C2-C5-C8-C10,54.570752,10275.642712,0.000154,C2-C5-C8-C10-C16
4,4,C2-C6-C8,43.531163,8639.224939,0.000085,C2-C6-C8-C15-C18
...,...,...,...,...,...,...
495,495,C2-C5-C7,47.013486,8768.436132,0.000104,C2-C5-C7-C17-C19
496,496,C4-C6-C8-C9,49.942340,10929.095685,0.000076,C4-C6-C8-C9-C23
497,497,C1-C5-C9,43.155025,8542.972721,0.000074,C1-C5-C9-C17-C18
498,498,C3-C5-C7,41.208966,10018.953748,0.000075,C3-C5-C7-C20-C24


In [33]:
df_sim

Unnamed: 0,New_Combination,Global_Accuracy(%),Global_DataVolume,Global_Latency_Sum(ms),Global_RoundTime_Max(ms),Combination_Computation_Time(s)
0,C3-C8-C10-C16-C17,27.440000,13986,8664.549828,1790.100336,12.076
1,C1-C6-C8-C15-C19,56.819999,13990,8373.711586,1692.873478,12.099
2,C4-C6-C7-C17-C19,31.240001,13990,8549.075603,1783.015966,12.005
3,C2-C5-C8-C10-C16,41.190001,13980,8367.740393,1687.734842,11.765
4,C2-C6-C8-C15-C18,73.909998,13979,8869.716644,2103.403330,12.267
...,...,...,...,...,...,...
495,C2-C5-C7-C17-C19,45.210001,13994,9151.850224,1844.880104,12.753
496,C4-C6-C8-C9-C23,62.390000,13985,9199.522018,1855.883837,12.778
497,C1-C5-C9-C17-C18,36.430001,13995,9231.224060,1865.628004,12.732
498,C3-C5-C7-C20-C24,31.320000,13988,9234.909773,1854.274988,12.805


In [34]:
import pandas as pd
import re
# Ensure required column exists
if "New_Combination" not in df.columns:
    raise ValueError("The CSV must have a column named 'New_Combination'.")

# Convert like "C7-C10-C16" → "7-10-16"
def to_numerical_combo(combo):
    if pd.isna(combo):
        return None
    numbers = re.findall(r'C(\d+)', str(combo))
    return "-".join(numbers)
# Apply conversion
df_sim["Numerical_Combination"] = df_sim["New_Combination"].apply(to_numerical_combo)
# Show preview
df_sim

Unnamed: 0,New_Combination,Global_Accuracy(%),Global_DataVolume,Global_Latency_Sum(ms),Global_RoundTime_Max(ms),Combination_Computation_Time(s),Numerical_Combination
0,C3-C8-C10-C16-C17,27.440000,13986,8664.549828,1790.100336,12.076,3-8-10-16-17
1,C1-C6-C8-C15-C19,56.819999,13990,8373.711586,1692.873478,12.099,1-6-8-15-19
2,C4-C6-C7-C17-C19,31.240001,13990,8549.075603,1783.015966,12.005,4-6-7-17-19
3,C2-C5-C8-C10-C16,41.190001,13980,8367.740393,1687.734842,11.765,2-5-8-10-16
4,C2-C6-C8-C15-C18,73.909998,13979,8869.716644,2103.403330,12.267,2-6-8-15-18
...,...,...,...,...,...,...,...
495,C2-C5-C7-C17-C19,45.210001,13994,9151.850224,1844.880104,12.753,2-5-7-17-19
496,C4-C6-C8-C9-C23,62.390000,13985,9199.522018,1855.883837,12.778,4-6-8-9-23
497,C1-C5-C9-C17-C18,36.430001,13995,9231.224060,1865.628004,12.732,1-5-9-17-18
498,C3-C5-C7-C20-C24,31.320000,13988,9234.909773,1854.274988,12.805,3-5-7-20-24


In [35]:
df_sim.to_csv("/content/Simulated_Results_From_New_Combinations_MINIST.csv")

In [161]:
df_sim

Unnamed: 0,New_Combination,Global_Accuracy(%),Global_DataVolume,Global_Latency_Sum(ms),Global_RoundTime_Max(ms),Combination_Computation_Time(s),Numerical_Combination
0,C3-C8-C10-C16-C17,27.440000,13986,8664.549828,1790.100336,12.076,3-8-10-16-17
1,C1-C6-C8-C15-C19,56.819999,13990,8373.711586,1692.873478,12.099,1-6-8-15-19
2,C4-C6-C7-C17-C19,31.240001,13990,8549.075603,1783.015966,12.005,4-6-7-17-19
3,C2-C5-C8-C10-C16,41.190001,13980,8367.740393,1687.734842,11.765,2-5-8-10-16
4,C2-C6-C8-C15-C18,73.909998,13979,8869.716644,2103.403330,12.267,2-6-8-15-18
...,...,...,...,...,...,...,...
495,C2-C5-C7-C17-C19,45.210001,13994,9151.850224,1844.880104,12.753,2-5-7-17-19
496,C4-C6-C8-C9-C23,62.390000,13985,9199.522018,1855.883837,12.778,4-6-8-9-23
497,C1-C5-C9-C17-C18,36.430001,13995,9231.224060,1865.628004,12.732,1-5-9-17-18
498,C3-C5-C7-C20-C24,31.320000,13988,9234.909773,1854.274988,12.805,3-5-7-20-24


In [36]:
# The issue likely arises because both datasets have duplicate combinations.
# We’ll ensure unique matches by dropping duplicates before merging.

# Convert list to tuple for matching
df_zero["Numeric_Tuple"] = df_zero["New_Combination"].apply(lambda x: tuple(sorted([int(i.replace("C", "")) for i in x.split("-")])))
df_sim["Numeric_Tuple"] = df_sim["New_Combination"].apply(lambda x: tuple(sorted([int(i.replace("C", "")) for i in x.split("-")])))

# Drop duplicates to ensure one-to-one matching
df_zero_unique = df_zero.drop_duplicates(subset=["Numeric_Tuple"])
df_sim_unique = df_sim.drop_duplicates(subset=["Numeric_Tuple"])

# Find intersection (only unique tuples that appear in both)
matched_tuples = set(df_zero_unique["Numeric_Tuple"]).intersection(set(df_sim_unique["Numeric_Tuple"]))

# Filter only those rows
df_zero_matched = df_zero_unique[df_zero_unique["Numeric_Tuple"].isin(matched_tuples)]
df_sim_matched = df_sim_unique[df_sim_unique["Numeric_Tuple"].isin(matched_tuples)]

# Merge one-to-one based on the unique tuple
merged_df = pd.merge(df_zero_matched, df_sim_matched, on="Numeric_Tuple", suffixes=("_zero", "_sim"))

# Save final merged result
output_path = "Matched_Combinations_Results_Unique.csv"
merged_df.to_csv(output_path, index=False)

output_path, merged_df.shape

('Matched_Combinations_Results_Unique.csv', (500, 14))

In [37]:
merged_df.columns

Index(['Unnamed: 0', 'Combination', 'Global_Accuracy(%)_zero',
       'Global_Latency_Sum(ms)_zero', 'Prediction_Time(s)',
       'New_Combination_zero', 'Numeric_Tuple', 'New_Combination_sim',
       'Global_Accuracy(%)_sim', 'Global_DataVolume',
       'Global_Latency_Sum(ms)_sim', 'Global_RoundTime_Max(ms)',
       'Combination_Computation_Time(s)', 'Numerical_Combination'],
      dtype='object')

In [38]:
merged_df

Unnamed: 0.1,Unnamed: 0,Combination,Global_Accuracy(%)_zero,Global_Latency_Sum(ms)_zero,Prediction_Time(s),New_Combination_zero,Numeric_Tuple,New_Combination_sim,Global_Accuracy(%)_sim,Global_DataVolume,Global_Latency_Sum(ms)_sim,Global_RoundTime_Max(ms),Combination_Computation_Time(s),Numerical_Combination
0,0,C3-C8-C10,44.825423,9677.191093,0.000512,C3-C8-C10-C16-C17,"(3, 8, 10, 16, 17)",C3-C8-C10-C16-C17,27.440000,13986,8664.549828,1790.100336,12.076,3-8-10-16-17
1,1,C1-C6-C8,37.425673,8549.189541,0.000107,C1-C6-C8-C15-C19,"(1, 6, 8, 15, 19)",C1-C6-C8-C15-C19,56.819999,13990,8373.711586,1692.873478,12.099,1-6-8-15-19
2,2,C4-C6-C7,40.204545,9596.720079,0.000082,C4-C6-C7-C17-C19,"(4, 6, 7, 17, 19)",C4-C6-C7-C17-C19,31.240001,13990,8549.075603,1783.015966,12.005,4-6-7-17-19
3,3,C2-C5-C8-C10,54.570752,10275.642712,0.000154,C2-C5-C8-C10-C16,"(2, 5, 8, 10, 16)",C2-C5-C8-C10-C16,41.190001,13980,8367.740393,1687.734842,11.765,2-5-8-10-16
4,4,C2-C6-C8,43.531163,8639.224939,0.000085,C2-C6-C8-C15-C18,"(2, 6, 8, 15, 18)",C2-C6-C8-C15-C18,73.909998,13979,8869.716644,2103.403330,12.267,2-6-8-15-18
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
495,495,C2-C5-C7,47.013486,8768.436132,0.000104,C2-C5-C7-C17-C19,"(2, 5, 7, 17, 19)",C2-C5-C7-C17-C19,45.210001,13994,9151.850224,1844.880104,12.753,2-5-7-17-19
496,496,C4-C6-C8-C9,49.942340,10929.095685,0.000076,C4-C6-C8-C9-C23,"(4, 6, 8, 9, 23)",C4-C6-C8-C9-C23,62.390000,13985,9199.522018,1855.883837,12.778,4-6-8-9-23
497,497,C1-C5-C9,43.155025,8542.972721,0.000074,C1-C5-C9-C17-C18,"(1, 5, 9, 17, 18)",C1-C5-C9-C17-C18,36.430001,13995,9231.224060,1865.628004,12.732,1-5-9-17-18
498,498,C3-C5-C7,41.208966,10018.953748,0.000075,C3-C5-C7-C20-C24,"(3, 5, 7, 20, 24)",C3-C5-C7-C20-C24,31.320000,13988,9234.909773,1854.274988,12.805,3-5-7-20-24


In [39]:
merged_df_Copu=merged_df

In [170]:
merged_df= merged_df_Copu.sample(n=350, random_state=42)


In [104]:
scored_df

Unnamed: 0.2,Unnamed: 0.1,Unnamed: 0,Combination,C1_DataVolume(MB),C1_FeatureCount,C1_Accuracy(%),C1_Latency(ms),C1_Label0,C1_Label1,C1_Label2,...,Combo_List,Old_Combination,New_Combination,Old_Length,_set,_key,normalized_combinations,Data_Composability_Score,Scalability_Score,Accuracy_Similarity_Score
0,4575,12000,C3-C8-C10,2800.0,784.0,90.49,2970.94,280.0,280.0,280.0,...,0.0,C3-C8-C10,C3-C8-C10-C16-C17,3,"frozenset({3, 8, 10, 16, 17})","(3, 8, 10, 16, 17)",38101617,0.144384,1.994213,0.989618
1,1488,8913,C1-C6-C8,2800.0,784.0,94.40,2971.99,280.0,280.0,280.0,...,0.0,C1-C6-C8,C1-C6-C8-C15-C19,3,"frozenset({1, 6, 8, 15, 19})","(1, 6, 8, 15, 19)",1681519,0.161254,1.996309,0.890049
2,4970,12395,C4-C6-C7,2794.0,784.0,71.11,2930.81,175.0,439.0,189.0,...,0.0,C4-C6-C7,C4-C6-C7-C17-C19,3,"frozenset({4, 6, 7, 17, 19})","(4, 6, 7, 17, 19)",4671719,0.250975,2.001354,0.778440
3,8009,15434,C2-C5-C8-C10,2795.0,784.0,74.63,2947.26,102.0,528.0,83.0,...,0.0,C2-C5-C8-C10,C2-C5-C8-C10-C16,4,"frozenset({2, 5, 8, 10, 16})","(2, 5, 8, 10, 16)",2581016,0.159501,1.986479,0.946556
4,3027,10452,C2-C6-C8,2795.0,784.0,74.63,2947.26,102.0,528.0,83.0,...,0.0,C2-C6-C8,C2-C6-C8-C15-C18,3,"frozenset({2, 6, 8, 15, 18})","(2, 6, 8, 15, 18)",2681518,0.307317,1.997636,0.824943
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1995,7156,14581,C1-C4-C5-C7,2800.0,784.0,94.40,2971.99,280.0,280.0,280.0,...,0.0,C1-C4-C5-C7,C1-C4-C5-C7-C21,4,"frozenset({1, 4, 5, 7, 21})","(1, 4, 5, 7, 21)",145721,0.095780,1.994866,0.884173
1996,7686,15111,C2-C3-C6-C10,2795.0,784.0,74.63,2947.26,102.0,528.0,83.0,...,0.0,C2-C3-C6-C10,C2-C3-C6-C10-C23,4,"frozenset({2, 3, 6, 10, 23})","(2, 3, 6, 10, 23)",2361023,0.166378,2.002215,0.851168
1997,8232,15657,C3-C4-C6-C10,2800.0,784.0,90.49,2970.94,280.0,280.0,280.0,...,0.0,C3-C4-C6-C10,C3-C4-C6-C10-C19,4,"frozenset({3, 4, 6, 10, 19})","(3, 4, 6, 10, 19)",3461019,0.219554,2.000360,0.838881
1998,3842,11267,C3-C4-C10,2800.0,784.0,90.49,2970.94,280.0,280.0,280.0,...,0.0,C3-C4-C10,C3-C4-C10-C21-C24,3,"frozenset({3, 4, 10, 21, 24})","(3, 4, 10, 21, 24)",34102124,0.318430,2.001426,0.898979


In [171]:
merged_df

Unnamed: 0.1,Unnamed: 0,Combination,Global_Accuracy(%)_zero,Global_Latency_Sum(ms)_zero,Prediction_Time(s),New_Combination_zero,Numeric_Tuple,New_Combination_sim,Global_Accuracy(%)_sim,Global_DataVolume,Global_Latency_Sum(ms)_sim,Global_RoundTime_Max(ms),Combination_Computation_Time(s),Numerical_Combination
361,361,C2-C6-C7,31.702621,14305.592230,0.000104,C2-C6-C7-C17-C25,"(2, 6, 7, 17, 25)",C2-C6-C7-C17-C25,42.199999,13989,9222.837925,1860.805035,12.826,2-6-7-17-25
73,73,C3-C8-C10,41.991614,13745.348697,0.000092,C3-C8-C10-C17-C20,"(3, 8, 10, 17, 20)",C3-C8-C10-C17-C20,61.820000,13983,8777.624607,1765.037298,12.152,3-8-10-17-20
374,374,C5-C6-C9,35.389349,14514.913131,0.000097,C5-C6-C9-C18-C20,"(5, 6, 9, 18, 20)",C5-C6-C9-C18-C20,61.890000,13983,9020.165443,1825.619221,12.549,5-6-9-18-20
155,155,C3-C4-C7-C10,32.973329,15497.923057,0.000103,C3-C4-C7-C10-C15,"(3, 4, 7, 10, 15)",C3-C4-C7-C10-C15,50.309998,13990,8778.256416,1781.341076,12.230,3-4-7-10-15
104,104,C1-C5-C6,38.505236,13621.562600,0.000098,C1-C5-C6-C17-C19,"(1, 5, 6, 17, 19)",C1-C5-C6-C17-C19,38.100001,13995,8938.493967,1810.331345,12.412,1-5-6-17-19
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
41,41,C2-C3-C8,31.835762,13491.738898,0.000122,C2-C3-C8-C23-C24,"(2, 3, 8, 23, 24)",C2-C3-C8-C23-C24,20.990001,13984,9874.238014,3083.808899,13.278,2-3-8-23-24
498,498,C3-C5-C7,35.931529,14069.406891,0.000118,C3-C5-C7-C20-C24,"(3, 5, 7, 20, 24)",C3-C5-C7-C20-C24,31.320000,13988,9234.909773,1854.274988,12.805,3-5-7-20-24
421,421,C2-C3-C7,33.234397,13839.451287,0.000130,C2-C3-C7-C19-C24,"(2, 3, 7, 19, 24)",C2-C3-C7-C19-C24,35.370001,13989,9224.954605,1872.318506,12.794,2-3-7-19-24
206,206,C2-C3-C7,39.456805,13120.149081,0.000105,C2-C3-C7-C15-C20,"(2, 3, 7, 15, 20)",C2-C3-C7-C15-C20,44.639999,13987,8845.147848,1785.322428,12.332,2-3-7-15-20


In [40]:
import numpy as np
import pandas as pd
from sklearn.metrics import mean_absolute_error, mean_absolute_percentage_error, r2_score

# Extract y_true and y_pred for both targets
y_true_acc = merged_df['Global_Accuracy(%)_zero']
y_pred_acc = merged_df['Global_Accuracy(%)_sim']
y_true_lat = merged_df['Global_Latency_Sum(ms)_zero']
y_pred_lat = merged_df['Global_Latency_Sum(ms)_sim']

# === 🔹 Per-target metrics ===
mae_acc = mean_absolute_error(y_true_acc, y_pred_acc)
mape_acc = mean_absolute_percentage_error(y_true_acc, y_pred_acc)
r2_acc = r2_score(y_true_acc, y_pred_acc)

mae_lat = mean_absolute_error(y_true_lat, y_pred_lat)
mape_lat = mean_absolute_percentage_error(y_true_lat, y_pred_lat)
r2_lat = r2_score(y_true_lat, y_pred_lat)

# === 🔹 Overall metrics (combined across both targets) ===
y_true_all = np.concatenate([y_true_acc.values, y_true_lat.values])
y_pred_all = np.concatenate([y_pred_acc.values, y_pred_lat.values])

mae_overall = mean_absolute_error(y_true_all, y_pred_all)
mape_overall = mean_absolute_percentage_error(y_true_all, y_pred_all)
r2_overall = r2_score(y_true_all, y_pred_all)

# === 🔹 Create summary DataFrame ===
metrics_df = pd.DataFrame({
    "Target": ["Accuracy", "Latency", "Overall"],
    "MAE": [mae_acc, mae_lat, mae_overall],
    "MAPE": [mape_acc, mape_lat, mape_overall],
}).round(4)
metrics_df

Unnamed: 0,Target,MAE,MAPE
0,Accuracy,9.7835,0.2178
1,Latency,970.4149,0.0963
2,Overall,490.0992,0.1571


In [None]:
	0.2178 & 9.7835 &	0.0963 & 970.4149 & 970.4149

In [41]:
import numpy as np
import pandas as pd
from sklearn.metrics import mean_absolute_error, mean_absolute_percentage_error, r2_score

# Extract y_true and y_pred for both targets
y_true_acc = merged_df['Global_Accuracy(%)_zero']
y_pred_acc = merged_df['Global_Accuracy(%)_sim']
y_true_lat = merged_df['Global_Latency_Sum(ms)_zero']
y_pred_lat = merged_df['Global_Latency_Sum(ms)_sim']

# Calculate per-target metrics
mae_acc = mean_absolute_error(y_true_acc, y_pred_acc)
mape_acc = mean_absolute_percentage_error(y_true_acc, y_pred_acc)
r2_acc = r2_score(y_true_acc, y_pred_acc)

mae_lat = mean_absolute_error(y_true_lat, y_pred_lat)
mape_lat = mean_absolute_percentage_error(y_true_lat, y_pred_lat)
r2_lat = r2_score(y_true_lat, y_pred_lat)

# === 🔹 Overall metrics (combined across both targets) ===
# Stack the two sets of predictions into single arrays
y_true_all = np.concatenate([y_true_acc.values, y_true_lat.values])
y_pred_all = np.concatenate([y_pred_acc.values, y_pred_lat.values])

mae_overall = mean_absolute_error(y_true_all, y_pred_all)
mape_overall = mean_absolute_percentage_error(y_true_all, y_pred_all)
r2_overall = r2_score(y_true_all, y_pred_all)

# === Create summary DataFrame ===
metrics_df = pd.DataFrame({
    "Target": ["MINIST"],
    "MAE": [mae_overall],
    "MAPE": [mape_overall],
    "R2": [r2_overall]
}).round(4)

metrics_df

Unnamed: 0,Target,MAE,MAPE,R2
0,MINIST,490.0992,0.1571,0.9563


In [None]:
merged_df.to_csv("dd1.csv")

In [None]:
scored_df.to_csv("dd2.csv")

In [42]:
import pandas as pd

# Load the provided CSVs again
dd1 = merged_df  # merged_df equivalent
dd2 = scored_df  # source df_scored_fixed equivalent

print(f"dd1: {len(dd1)} rows, dd2: {len(dd2)} rows")

# --- Step 1: Verify both have the same row count ---
if len(dd1) != len(dd2):
    raise ValueError("Row counts differ! Check alignment before concatenating.")

# --- Step 2: Extract only the columns we need from dd2 ---
historical_df = dd2[["Global_Accuracy(%)", "Global_Latency_Sum(ms)"]].copy()

# Rename them as historical versions
historical_df = historical_df.rename(columns={
    "Global_Accuracy(%)": "Global_Accuracy(%)_history",
    "Global_Latency_Sum(ms)": "Global_Latency_Sum(ms)_history"
})

# --- Step 3: Concatenate side-by-side (row-aligned) ---
merged_df_updated = pd.concat([dd1.reset_index(drop=True),
                               historical_df.reset_index(drop=True)], axis=1)

# --- Step 4: Confirm shape and preview ---
print(f"✅ Final merged_df_updated shape: {merged_df_updated.shape}")
print("✅ Columns added:", [c for c in merged_df_updated.columns if "_history" in c])
print("\nPreview:")
print(merged_df_updated.head(10))


dd1: 500 rows, dd2: 500 rows
✅ Final merged_df_updated shape: (500, 16)
✅ Columns added: ['Global_Accuracy(%)_history', 'Global_Latency_Sum(ms)_history']

Preview:
   Unnamed: 0   Combination  Global_Accuracy(%)_zero  \
0           0     C3-C8-C10                44.825423   
1           1      C1-C6-C8                37.425673   
2           2      C4-C6-C7                40.204545   
3           3  C2-C5-C8-C10                54.570752   
4           4      C2-C6-C8                43.531163   
5           5      C4-C5-C8                42.806577   
6           6   C1-C3-C4-C7                46.348089   
7           7  C1-C2-C7-C10                44.623685   
8           8  C1-C4-C8-C10                49.947952   
9           9     C3-C9-C10                40.904860   

   Global_Latency_Sum(ms)_zero  Prediction_Time(s) New_Combination_zero  \
0                  9677.191093            0.000512    C3-C8-C10-C16-C17   
1                  8549.189541            0.000107     C1-C6-C8-C15-C

In [43]:
merged_df_updated

Unnamed: 0.1,Unnamed: 0,Combination,Global_Accuracy(%)_zero,Global_Latency_Sum(ms)_zero,Prediction_Time(s),New_Combination_zero,Numeric_Tuple,New_Combination_sim,Global_Accuracy(%)_sim,Global_DataVolume,Global_Latency_Sum(ms)_sim,Global_RoundTime_Max(ms),Combination_Computation_Time(s),Numerical_Combination,Global_Accuracy(%)_history,Global_Latency_Sum(ms)_history
0,0,C3-C8-C10,44.825423,9677.191093,0.000512,C3-C8-C10-C16-C17,"(3, 8, 10, 16, 17)",C3-C8-C10-C16-C17,27.440000,13986,8664.549828,1790.100336,12.076,3-8-10-16-17,90.098,8471.30
1,1,C1-C6-C8,37.425673,8549.189541,0.000107,C1-C6-C8-C15-C19,"(1, 6, 8, 15, 19)",C1-C6-C8-C15-C19,56.819999,13990,8373.711586,1692.873478,12.099,1-6-8-15-19,86.338,8432.96
2,2,C4-C6-C7,40.204545,9596.720079,0.000082,C4-C6-C7-C17-C19,"(4, 6, 7, 17, 19)",C4-C6-C7-C17-C19,31.240001,13990,8549.075603,1783.015966,12.005,4-6-7-17-19,82.902,8400.59
3,3,C2-C5-C8-C10,54.570752,10275.642712,0.000154,C2-C5-C8-C10-C16,"(2, 5, 8, 10, 16)",C2-C5-C8-C10-C16,41.190001,13980,8367.740393,1687.734842,11.765,2-5-8-10-16,83.346,8588.49
4,4,C2-C6-C8,43.531163,8639.224939,0.000085,C2-C6-C8-C15-C18,"(2, 6, 8, 15, 18)",C2-C6-C8-C15-C18,73.909998,13979,8869.716644,2103.403330,12.267,2-6-8-15-18,80.034,8399.89
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
495,495,C2-C5-C7,47.013486,8768.436132,0.000104,C2-C5-C7-C17-C19,"(2, 5, 7, 17, 19)",C2-C5-C7-C17-C19,45.210001,13994,9151.850224,1844.880104,12.753,2-5-7-17-19,82.628,8540.52
496,496,C4-C6-C8-C9,49.942340,10929.095685,0.000076,C4-C6-C8-C9-C23,"(4, 6, 8, 9, 23)",C4-C6-C8-C9-C23,62.390000,13985,9199.522018,1855.883837,12.778,4-6-8-9-23,84.978,8401.64
497,497,C1-C5-C9,43.155025,8542.972721,0.000074,C1-C5-C9-C17-C18,"(1, 5, 9, 17, 18)",C1-C5-C9-C17-C18,36.430001,13995,9231.224060,1865.628004,12.732,1-5-9-17-18,89.284,8571.31
498,498,C3-C5-C7,41.208966,10018.953748,0.000075,C3-C5-C7-C20-C24,"(3, 5, 7, 20, 24)",C3-C5-C7-C20-C24,31.320000,13988,9234.909773,1854.274988,12.805,3-5-7-20-24,83.432,8720.34


In [44]:
merged_df_updated.to_csv("Final_Results_MINIST_ms.csv")

In [45]:
import numpy as np
import pandas as pd
from sklearn.metrics import mean_absolute_error, mean_absolute_percentage_error, r2_score
merged_df=merged_df_updated
# Extract y_true and y_pred for both targets
y_true_acc = merged_df['Global_Accuracy(%)_zero']
y_pred_acc = merged_df['Global_Accuracy(%)_sim']
y_true_lat = merged_df['Global_Latency_Sum(ms)_zero']
y_pred_lat = merged_df['Global_Latency_Sum(ms)_sim']

# Calculate per-target metrics
mae_acc = mean_absolute_error(y_true_acc, y_pred_acc)
mape_acc = mean_absolute_percentage_error(y_true_acc, y_pred_acc)
r2_acc = r2_score(y_true_acc, y_pred_acc)

mae_lat = mean_absolute_error(y_true_lat, y_pred_lat)
mape_lat = mean_absolute_percentage_error(y_true_lat, y_pred_lat)
r2_lat = r2_score(y_true_lat, y_pred_lat)

# === 🔹 Overall metrics (combined across both targets) ===
# Stack the two sets of predictions into single arrays
y_true_all = np.concatenate([y_true_acc.values, y_true_lat.values])
y_pred_all = np.concatenate([y_pred_acc.values, y_pred_lat.values])

mae_overall = mean_absolute_error(y_true_all, y_pred_all)
mape_overall = mean_absolute_percentage_error(y_true_all, y_pred_all)
r2_overall = r2_score(y_true_all, y_pred_all)

# === Create summary DataFrame ===
metrics_df = pd.DataFrame({
    "Target": ["MINIST"],
    "MAE": [mae_overall],
    "MAPE": [mape_overall],
    "R2": [r2_overall]
}).round(4)

metrics_df


Unnamed: 0,Target,MAE,MAPE,R2
0,MINIST,490.0992,0.1571,0.9563


**Zero shot Predictions for new Combinations FMINIST**
---

In [46]:
import pandas as pd
import numpy as np

# Load the new dataset
path = "few_text_dataset_1FMINIST.csv"
df = pd.read_csv(path).copy()
df

Unnamed: 0.2,Unnamed: 0.1,Unnamed: 0,Combination,C1_DataVolume(MB),C1_FeatureCount,C1_Accuracy(%),C1_Latency(ms),C1_Label0,C1_Label1,C1_Label2,...,Global_DataVolume,Global_Latency_Sum(ms),Global_RoundTime_Max(ms),Combo_List,Old_Combination,New_Combination,Old_Length,_set,_key,normalized_combinations
0,4575,12000,C3-C8-C10,2800.0,784.0,77.52,1695.37,280.0,280.0,280.0,...,13987.0,8414.89,1695.37,0.0,C3-C8-C10,C3-C8-C10-C16-C17,3,"frozenset({3, 8, 10, 16, 17})","(3, 8, 10, 16, 17)",38101617
1,1488,8913,C1-C6-C8,2800.0,784.0,84.10,1747.73,280.0,280.0,280.0,...,13992.0,8500.00,1747.73,0.0,C1-C6-C8,C1-C6-C8-C15-C19,3,"frozenset({1, 6, 8, 15, 19})","(1, 6, 8, 15, 19)",1681519
2,4970,12395,C4-C6-C7,2795.0,784.0,71.28,1696.53,585.0,420.0,100.0,...,13991.0,8464.84,1706.75,0.0,C4-C6-C7,C4-C6-C7-C17-C19,3,"frozenset({4, 6, 7, 17, 19})","(4, 6, 7, 17, 19)",4671719
3,8009,15434,C2-C5-C8-C10,2795.0,784.0,77.96,1701.91,68.0,431.0,274.0,...,13982.0,8544.23,1805.61,0.0,C2-C5-C8-C10,C2-C5-C8-C10-C16,4,"frozenset({2, 5, 8, 10, 16})","(2, 5, 8, 10, 16)",2581016
4,3027,10452,C2-C6-C8,2795.0,784.0,77.96,1701.91,68.0,431.0,274.0,...,13982.0,8454.29,1706.75,0.0,C2-C6-C8,C2-C6-C8-C15-C18,3,"frozenset({2, 6, 8, 15, 18})","(2, 6, 8, 15, 18)",2681518
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
495,2770,10195,C2-C5-C7,2795.0,784.0,77.96,1701.91,68.0,431.0,274.0,...,13995.0,8569.08,1805.61,0.0,C2-C5-C7,C2-C5-C7-C17-C19,3,"frozenset({2, 5, 7, 17, 19})","(2, 5, 7, 17, 19)",2571719
496,8676,16101,C4-C6-C8-C9,2795.0,784.0,71.28,1696.53,585.0,420.0,100.0,...,13987.0,8459.76,1706.75,0.0,C4-C6-C8-C9,C4-C6-C8-C9-C23,4,"frozenset({4, 6, 8, 9, 23})","(4, 6, 8, 9, 23)",468923
497,1339,8764,C1-C5-C9,2800.0,784.0,84.10,1747.73,280.0,280.0,280.0,...,13995.0,8593.50,1805.61,0.0,C1-C5-C9,C1-C5-C9-C17-C18,3,"frozenset({1, 5, 9, 17, 18})","(1, 5, 9, 17, 18)",1591718
498,3948,11373,C3-C5-C7,2800.0,784.0,77.52,1695.37,280.0,280.0,280.0,...,13989.0,8788.77,1914.42,0.0,C3-C5-C7,C3-C5-C7-C20-C24,3,"frozenset({3, 5, 7, 20, 24})","(3, 5, 7, 20, 24)",3572024


In [47]:
import pandas as pd
import numpy as np

# Load dataset
df = pd.read_csv("few_text_dataset_1FMINIST.csv")  # Adjust path if needed

# --- Helper: Parse combination strings into client IDs ---
def parse_combination(combo_str):
    if pd.isna(combo_str):
        return []
    return combo_str.split("-")

# --- Helper: Create prefix mapping based on order ---
def get_prefix_map(clients):
    return {client: f"C{i+1}" for i, client in enumerate(clients)}

# --- Helper: Get normalized label histogram for a prefix ---
def get_normalized_labels(row, prefix):
    try:
        labels = [row[f"{prefix}_Label{i}"] for i in range(10)]
        labels = np.array(labels, dtype=float)
        total = labels.sum()
        return labels / total if total > 0 else labels
    except KeyError:
        return np.zeros(10)

# --- Compute the three scores per row ---
def compute_scores(row):
    old_clients = parse_combination(row["Old_Combination"])
    new_clients = parse_combination(row["New_Combination"])

    # Determine newly added clients
    added_clients = [c for c in new_clients if c not in old_clients]

    # Prefix maps (based on New_Combination order)
    prefix_map = get_prefix_map(new_clients)
    old_prefixes = [prefix_map[c] for c in old_clients if c in prefix_map]
    new_prefixes = [prefix_map[c] for c in added_clients if c in prefix_map]

    # --- 1. Data Composability Score (Mean of distances) ---
    base_hist = np.zeros(10)
    for p in old_prefixes:
        base_hist += get_normalized_labels(row, p)
    base_hist_sum = base_hist.sum()
    base_hist = base_hist / base_hist_sum if base_hist_sum > 0 else base_hist

    distances = []
    for p in new_prefixes:
        added_hist = get_normalized_labels(row, p)
        added_sum = added_hist.sum()
        added_hist = added_hist / added_sum if added_sum > 0 else added_hist
        dist = np.linalg.norm(base_hist - added_hist)
        distances.append(dist)
    data_score = float(np.mean(distances)) if distances else np.nan

    # --- 2. Base Accuracy and Latency (mean) ---
    base_accs = []
    base_lats = []
    for p in old_prefixes:
        acc = row.get(f"{p}_Accuracy(%)", np.nan)
        lat = row.get(f"{p}_Latency(ms)", np.nan)
        if pd.notna(acc): base_accs.append(float(acc))
        if pd.notna(lat): base_lats.append(float(lat))
    base_acc = np.mean(base_accs) if base_accs else np.nan
    base_lat = np.mean(base_lats) if base_lats else np.nan

    # --- 3. Accuracy Similarity Score (mean) ---
    acc_sims = []
    for p in new_prefixes:
        acc = row.get(f"{p}_Accuracy(%)", np.nan)
        if pd.notna(acc) and pd.notna(base_acc) and base_acc != 0:
            sim = 1 - abs(base_acc - acc) / max(base_acc, acc)
            acc_sims.append(sim)
    acc_sim_score = float(np.mean(acc_sims)) if acc_sims else np.nan

    # --- 4. Scalability Score (mean) ---
    scal_scores = []
    for p in new_prefixes:
        lat = row.get(f"{p}_Latency(ms)", np.nan)
        if pd.notna(lat) and pd.notna(base_lat) and base_lat != 0:
            ratio = (base_lat + lat) / base_lat
            scal_scores.append(ratio)
    scal_score = float(np.mean(scal_scores)) if scal_scores else np.nan

    return pd.Series({
        "Data_Composability_Score": data_score,
        "Scalability_Score": scal_score,
        "Accuracy_Similarity_Score": acc_sim_score
    })

# --- Apply the scoring function ---
scored_df = df.copy()
scored_metrics = scored_df.apply(compute_scores, axis=1)
scored_df = pd.concat([scored_df, scored_metrics], axis=1)

# --- Save result ---
scored_df.to_csv("few_text_dataset_1MINIST_with_mean_scores.csv", index=False)
print("✅ Scoring complete with means. File saved as 'few_text_dataset_1MINIST_with_mean_scores.csv'.")


✅ Scoring complete with means. File saved as 'few_text_dataset_1MINIST_with_mean_scores.csv'.


In [48]:
scored_df.to_csv("few_text_dataset_fminist.csv")

In [49]:
scored_df.columns

Index(['Unnamed: 0.1', 'Unnamed: 0', 'Combination', 'C1_DataVolume(MB)',
       'C1_FeatureCount', 'C1_Accuracy(%)', 'C1_Latency(ms)', 'C1_Label0',
       'C1_Label1', 'C1_Label2', 'C1_Label3', 'C1_Label4', 'C1_Label5',
       'C1_Label6', 'C1_Label7', 'C1_Label8', 'C1_Label9', 'C2_DataVolume(MB)',
       'C2_FeatureCount', 'C2_Accuracy(%)', 'C2_Latency(ms)', 'C2_Label0',
       'C2_Label1', 'C2_Label2', 'C2_Label3', 'C2_Label4', 'C2_Label5',
       'C2_Label6', 'C2_Label7', 'C2_Label8', 'C2_Label9', 'C3_DataVolume(MB)',
       'C3_FeatureCount', 'C3_Accuracy(%)', 'C3_Latency(ms)', 'C3_Label0',
       'C3_Label1', 'C3_Label2', 'C3_Label3', 'C3_Label4', 'C3_Label5',
       'C3_Label6', 'C3_Label7', 'C3_Label8', 'C3_Label9', 'C4_DataVolume(MB)',
       'C4_FeatureCount', 'C4_Accuracy(%)', 'C4_Latency(ms)', 'C4_Label0',
       'C4_Label1', 'C4_Label2', 'C4_Label3', 'C4_Label4', 'C4_Label5',
       'C4_Label6', 'C4_Label7', 'C4_Label8', 'C4_Label9', 'C5_DataVolume(MB)',
       'C5_FeatureC

In [50]:
df_new

Unnamed: 0.3,Unnamed: 0.2,Unnamed: 0.1,Unnamed: 0,Combination,C1_DataVolume(MB),C1_FeatureCount,C1_Accuracy(%),C1_Latency(ms),C1_Label0,C1_Label1,...,Combo_List,Old_Combination,New_Combination,Old_Length,_set,_key,normalized_combinations,Data_Composability_Score,Scalability_Score,Accuracy_Similarity_Score
0,0,4575,12000,C3-C8-C10,2800.0,784.0,88.45,1674.80,280.0,280.0,...,0.0,C3-C8-C10,C3-C8-C10-C16-C17,3,"frozenset({3, 8, 10, 16, 17})","(3, 8, 10, 16, 17)",38101617,0.116710,1.999882,0.959339
1,1,1488,8913,C1-C6-C8,2800.0,784.0,93.90,1710.33,280.0,280.0,...,0.0,C1-C6-C8,C1-C6-C8-C15-C19,3,"frozenset({1, 6, 8, 15, 19})","(1, 6, 8, 15, 19)",1681519,0.087809,1.992324,0.884198
2,2,4970,12395,C4-C6-C7,2795.0,784.0,87.77,1668.73,389.0,149.0,...,0.0,C4-C6-C7,C4-C6-C7-C17-C19,3,"frozenset({4, 6, 7, 17, 19})","(4, 6, 7, 17, 19)",4671719,0.064758,2.008607,0.818155
3,3,8009,15434,C2-C5-C8-C10,2794.0,784.0,67.49,1679.67,38.0,240.0,...,0.0,C2-C5-C8-C10,C2-C5-C8-C10-C16,4,"frozenset({2, 5, 8, 10, 16})","(2, 5, 8, 10, 16)",2581016,0.159339,1.982734,0.887736
4,4,3027,10452,C2-C6-C8,2794.0,784.0,67.49,1679.67,38.0,240.0,...,0.0,C2-C6-C8,C2-C6-C8-C15-C18,3,"frozenset({2, 6, 8, 15, 18})","(2, 6, 8, 15, 18)",2681518,0.224497,1.997638,0.812227
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
495,495,2770,10195,C2-C5-C7,2794.0,784.0,67.49,1679.67,38.0,240.0,...,0.0,C2-C5-C7,C2-C5-C7-C17-C19,3,"frozenset({2, 5, 7, 17, 19})","(2, 5, 7, 17, 19)",2571719,0.115166,1.981272,0.813247
496,496,8676,16101,C4-C6-C8-C9,2795.0,784.0,87.77,1668.73,389.0,149.0,...,0.0,C4-C6-C8-C9,C4-C6-C8-C9-C23,4,"frozenset({4, 6, 8, 9, 23})","(4, 6, 8, 9, 23)",468923,0.065048,2.006258,0.883403
497,497,1339,8764,C1-C5-C9,2800.0,784.0,93.90,1710.33,280.0,280.0,...,0.0,C1-C5-C9,C1-C5-C9-C17-C18,3,"frozenset({1, 5, 9, 17, 18})","(1, 5, 9, 17, 18)",1591718,0.097299,1.974306,0.971482
498,498,3948,11373,C3-C5-C7,2800.0,784.0,88.45,1674.80,280.0,280.0,...,0.0,C3-C5-C7,C3-C5-C7-C20-C24,3,"frozenset({3, 5, 7, 20, 24})","(3, 5, 7, 20, 24)",3572024,0.299089,2.035906,0.977567


In [51]:
# ================================
# 11. Predict on New Unseen Dataset (with Combination column)
# ================================
import os
import pandas as pd
import torch

# Path to the new dataset
new_csv_path = "few_text_dataset_fminist.csv"
assert os.path.exists(new_csv_path), f"File not found: {new_csv_path}"

# Load new dataset
df_new = pd.read_csv(new_csv_path)

# ================================
# Select only the relevant feature columns
# ================================
feature_cols = ['Unnamed: 0.1','Unnamed: 0.2',
    'C1_DataVolume(MB)', 'C1_FeatureCount', 'C1_Accuracy(%)', 'C1_Latency(ms)',
    'C1_Label0', 'C1_Label1', 'C1_Label2', 'C1_Label3', 'C1_Label4', 'C1_Label5',
    'C1_Label6', 'C1_Label7', 'C1_Label8', 'C1_Label9',
    'C2_DataVolume(MB)', 'C2_FeatureCount', 'C2_Accuracy(%)', 'C2_Latency(ms)',
    'C2_Label0', 'C2_Label1', 'C2_Label2', 'C2_Label3', 'C2_Label4', 'C2_Label5',
    'C2_Label6', 'C2_Label7', 'C2_Label8', 'C2_Label9',
    'C3_DataVolume(MB)', 'C3_FeatureCount', 'C3_Accuracy(%)', 'C3_Latency(ms)',
    'C3_Label0', 'C3_Label1', 'C3_Label2', 'C3_Label3', 'C3_Label4', 'C3_Label5',
    'C3_Label6', 'C3_Label7', 'C3_Label8', 'C3_Label9',
    'C4_DataVolume(MB)', 'C4_FeatureCount', 'C4_Accuracy(%)', 'C4_Latency(ms)',
    'C4_Label0', 'C4_Label1', 'C4_Label2', 'C4_Label3', 'C4_Label4', 'C4_Label5',
    'C4_Label6', 'C4_Label7', 'C4_Label8', 'C4_Label9',
    'C5_DataVolume(MB)', 'C5_FeatureCount', 'C5_Accuracy(%)', 'C5_Latency(ms)',
    'C5_Label0', 'C5_Label1', 'C5_Label2', 'C5_Label3', 'C5_Label4', 'C5_Label5',
    'C5_Label6', 'C5_Label7', 'C5_Label8', 'C5_Label9',
    'Data_Composability_Score', 'Scalability_Score', 'Accuracy_Similarity_Score'
]

# Check that all required columns exist
missing_cols = [c for c in feature_cols if c not in df_new.columns]
if missing_cols:
    raise ValueError(f"Missing columns in new dataset: {missing_cols}")

# ================================
# Preprocess and Predict
# ================================
# Coerce numeric and fill missing
df_new_num = df_new[feature_cols].apply(pd.to_numeric, errors="coerce").fillna(0)

# Apply same scaler used during training
X_new_s = scaler_X.transform(df_new_num)

# Convert to tensor
X_new_tensor = torch.tensor(X_new_s, dtype=torch.float32)

# Predict using trained model
model.eval()
with torch.no_grad():
    preds_new_s = model(X_new_tensor).cpu().numpy()

# Inverse transform predictions to original scale
preds_new = scaler_y.inverse_transform(preds_new_s)

# ================================
# Build and Save Prediction DataFrame
# ================================
target_cols = ["Global_Accuracy(%)", "Global_Latency_Sum(ms)"]
new_preds_df = pd.DataFrame(preds_new, columns=target_cols)

# Always include 'Combination' column
if "Combination" in df_new.columns:
    new_preds_df.insert(0, "Combination", df_new["Combination"])
else:
    new_preds_df.insert(0, "Combination", [f"Row_{i}" for i in range(len(df_new))])

# Add 'New_Combination' column if it exists
if "New_Combination" in df_new.columns:
    new_preds_df["New_Combination"] = df_new["New_Combination"]

# Save predictions
output_path = "new_dataset_predictions.csv"
new_preds_df.to_csv(output_path, index=False)

print(f"✅ Predictions completed for: {new_csv_path}")
print(f"Predictions saved to: {output_path}")
print("\nSample predictions:\n", new_preds_df.head(10))


✅ Predictions completed for: few_text_dataset_fminist.csv
Predictions saved to: new_dataset_predictions.csv

Sample predictions:
     Combination  Global_Accuracy(%)  Global_Latency_Sum(ms)    New_Combination
0     C3-C8-C10           37.415310             9552.720703  C3-C8-C10-C16-C17
1      C1-C6-C8           33.883301             8386.843750   C1-C6-C8-C15-C19
2      C4-C6-C7           39.867229             9713.376953   C4-C6-C7-C17-C19
3  C2-C5-C8-C10           42.186096            10426.425781   C2-C5-C8-C10-C16
4      C2-C6-C8           36.021805             8430.762695   C2-C6-C8-C15-C18
5      C4-C5-C8           35.655354             9414.589844   C4-C5-C8-C20-C21
6   C1-C3-C4-C7           44.119907            10278.164062    C1-C3-C4-C7-C17
7  C1-C2-C7-C10           45.295658            10475.053711   C1-C2-C7-C10-C17
8  C1-C4-C8-C10           41.779358            10211.625977   C1-C4-C8-C10-C21
9     C3-C9-C10           40.173424             9762.850586  C3-C9-C10-C20-C24




In [52]:
new_preds_df

Unnamed: 0,Combination,Global_Accuracy(%),Global_Latency_Sum(ms),New_Combination
0,C3-C8-C10,37.415310,9552.720703,C3-C8-C10-C16-C17
1,C1-C6-C8,33.883301,8386.843750,C1-C6-C8-C15-C19
2,C4-C6-C7,39.867229,9713.376953,C4-C6-C7-C17-C19
3,C2-C5-C8-C10,42.186096,10426.425781,C2-C5-C8-C10-C16
4,C2-C6-C8,36.021805,8430.762695,C2-C6-C8-C15-C18
...,...,...,...,...
495,C2-C5-C7,41.313187,9012.732422,C2-C5-C7-C17-C19
496,C4-C6-C8-C9,46.674164,10765.430664,C4-C6-C8-C9-C23
497,C1-C5-C9,42.014683,8312.841797,C1-C5-C9-C17-C18
498,C3-C5-C7,41.572689,9418.961914,C3-C5-C7-C20-C24


In [53]:
new_preds_df.to_csv("zero-shot_2.csv")

In [54]:
import pandas as pd
import re

# Load both CSVs
df_sim = pd.read_csv("/content/Simulated_Results_From_New_Combinations_FMINIST.csv")
df_zero = pd.read_csv("zero-shot_2.csv")
df_zero

Unnamed: 0.1,Unnamed: 0,Combination,Global_Accuracy(%),Global_Latency_Sum(ms),New_Combination
0,0,C3-C8-C10,37.415310,9552.721,C3-C8-C10-C16-C17
1,1,C1-C6-C8,33.883300,8386.844,C1-C6-C8-C15-C19
2,2,C4-C6-C7,39.867230,9713.377,C4-C6-C7-C17-C19
3,3,C2-C5-C8-C10,42.186096,10426.426,C2-C5-C8-C10-C16
4,4,C2-C6-C8,36.021805,8430.763,C2-C6-C8-C15-C18
...,...,...,...,...,...
495,495,C2-C5-C7,41.313187,9012.732,C2-C5-C7-C17-C19
496,496,C4-C6-C8-C9,46.674164,10765.431,C4-C6-C8-C9-C23
497,497,C1-C5-C9,42.014683,8312.842,C1-C5-C9-C17-C18
498,498,C3-C5-C7,41.572690,9418.962,C3-C5-C7-C20-C24


In [55]:
df_sim

Unnamed: 0,New_Combination,Global_Accuracy(%),Global_DataVolume,Global_Latency_Sum(ms),Global_RoundTime_Max(ms),Computation_Time(ms)
0,C3-C8-C10-C16-C17,25.900000,13987,9531.718493,1993.179798,13487.469673
1,C1-C6-C8-C15-C19,40.020001,13992,9280.577898,1882.534504,13543.587208
2,C4-C6-C7-C17-C19,28.240001,13991,9349.328041,1924.076796,13384.964943
3,C2-C5-C8-C10-C16,32.200000,13982,9202.599764,1865.555525,13121.194363
4,C2-C6-C8-C15-C18,35.929999,13982,9780.948400,2299.623966,13667.884111
...,...,...,...,...,...,...
495,C2-C5-C7-C17-C19,41.010001,13995,10588.795662,2302.173376,15190.996170
496,C4-C6-C8-C9-C23,39.170000,13987,11256.857395,2446.941614,15630.279779
497,C1-C5-C9-C17-C18,11.930000,13995,11274.204731,2447.407007,15649.510384
498,C3-C5-C7-C20-C24,24.959999,13989,10941.862345,2372.498512,15599.461317


In [56]:
import pandas as pd
import re
# Ensure required column exists
if "New_Combination" not in df.columns:
    raise ValueError("The CSV must have a column named 'New_Combination'.")

# Convert like "C7-C10-C16" → "7-10-16"
def to_numerical_combo(combo):
    if pd.isna(combo):
        return None
    numbers = re.findall(r'C(\d+)', str(combo))
    return "-".join(numbers)
# Apply conversion
df_sim["Numerical_Combination"] = df_sim["New_Combination"].apply(to_numerical_combo)
# Show preview
df_sim

Unnamed: 0,New_Combination,Global_Accuracy(%),Global_DataVolume,Global_Latency_Sum(ms),Global_RoundTime_Max(ms),Computation_Time(ms),Numerical_Combination
0,C3-C8-C10-C16-C17,25.900000,13987,9531.718493,1993.179798,13487.469673,3-8-10-16-17
1,C1-C6-C8-C15-C19,40.020001,13992,9280.577898,1882.534504,13543.587208,1-6-8-15-19
2,C4-C6-C7-C17-C19,28.240001,13991,9349.328041,1924.076796,13384.964943,4-6-7-17-19
3,C2-C5-C8-C10-C16,32.200000,13982,9202.599764,1865.555525,13121.194363,2-5-8-10-16
4,C2-C6-C8-C15-C18,35.929999,13982,9780.948400,2299.623966,13667.884111,2-6-8-15-18
...,...,...,...,...,...,...,...
495,C2-C5-C7-C17-C19,41.010001,13995,10588.795662,2302.173376,15190.996170,2-5-7-17-19
496,C4-C6-C8-C9-C23,39.170000,13987,11256.857395,2446.941614,15630.279779,4-6-8-9-23
497,C1-C5-C9-C17-C18,11.930000,13995,11274.204731,2447.407007,15649.510384,1-5-9-17-18
498,C3-C5-C7-C20-C24,24.959999,13989,10941.862345,2372.498512,15599.461317,3-5-7-20-24


In [57]:
df_sim.to_csv("/content/Simulated_Results_From_New_Combinations_FMINIST.csv")

In [58]:
df_sim

Unnamed: 0,New_Combination,Global_Accuracy(%),Global_DataVolume,Global_Latency_Sum(ms),Global_RoundTime_Max(ms),Computation_Time(ms),Numerical_Combination
0,C3-C8-C10-C16-C17,25.900000,13987,9531.718493,1993.179798,13487.469673,3-8-10-16-17
1,C1-C6-C8-C15-C19,40.020001,13992,9280.577898,1882.534504,13543.587208,1-6-8-15-19
2,C4-C6-C7-C17-C19,28.240001,13991,9349.328041,1924.076796,13384.964943,4-6-7-17-19
3,C2-C5-C8-C10-C16,32.200000,13982,9202.599764,1865.555525,13121.194363,2-5-8-10-16
4,C2-C6-C8-C15-C18,35.929999,13982,9780.948400,2299.623966,13667.884111,2-6-8-15-18
...,...,...,...,...,...,...,...
495,C2-C5-C7-C17-C19,41.010001,13995,10588.795662,2302.173376,15190.996170,2-5-7-17-19
496,C4-C6-C8-C9-C23,39.170000,13987,11256.857395,2446.941614,15630.279779,4-6-8-9-23
497,C1-C5-C9-C17-C18,11.930000,13995,11274.204731,2447.407007,15649.510384,1-5-9-17-18
498,C3-C5-C7-C20-C24,24.959999,13989,10941.862345,2372.498512,15599.461317,3-5-7-20-24


In [59]:
# The issue likely arises because both datasets have duplicate combinations.
# We’ll ensure unique matches by dropping duplicates before merging.

# Convert list to tuple for matching
df_zero["Numeric_Tuple"] = df_zero["New_Combination"].apply(lambda x: tuple(sorted([int(i.replace("C", "")) for i in x.split("-")])))
df_sim["Numeric_Tuple"] = df_sim["New_Combination"].apply(lambda x: tuple(sorted([int(i.replace("C", "")) for i in x.split("-")])))

# Drop duplicates to ensure one-to-one matching
df_zero_unique = df_zero.drop_duplicates(subset=["Numeric_Tuple"])
df_sim_unique = df_sim.drop_duplicates(subset=["Numeric_Tuple"])

# Find intersection (only unique tuples that appear in both)
matched_tuples = set(df_zero_unique["Numeric_Tuple"]).intersection(set(df_sim_unique["Numeric_Tuple"]))

# Filter only those rows
df_zero_matched = df_zero_unique[df_zero_unique["Numeric_Tuple"].isin(matched_tuples)]
df_sim_matched = df_sim_unique[df_sim_unique["Numeric_Tuple"].isin(matched_tuples)]

# Merge one-to-one based on the unique tuple
merged_df = pd.merge(df_zero_matched, df_sim_matched, on="Numeric_Tuple", suffixes=("_zero", "_sim"))

# Save final merged result
output_path = "Matched_Combinations_Results_Unique.csv"
merged_df.to_csv(output_path, index=False)

output_path, merged_df.shape

('Matched_Combinations_Results_Unique.csv', (500, 13))

In [60]:
merged_df.columns

Index(['Unnamed: 0', 'Combination', 'Global_Accuracy(%)_zero',
       'Global_Latency_Sum(ms)_zero', 'New_Combination_zero', 'Numeric_Tuple',
       'New_Combination_sim', 'Global_Accuracy(%)_sim', 'Global_DataVolume',
       'Global_Latency_Sum(ms)_sim', 'Global_RoundTime_Max(ms)',
       'Computation_Time(ms)', 'Numerical_Combination'],
      dtype='object')

In [61]:
merged_df

Unnamed: 0.1,Unnamed: 0,Combination,Global_Accuracy(%)_zero,Global_Latency_Sum(ms)_zero,New_Combination_zero,Numeric_Tuple,New_Combination_sim,Global_Accuracy(%)_sim,Global_DataVolume,Global_Latency_Sum(ms)_sim,Global_RoundTime_Max(ms),Computation_Time(ms),Numerical_Combination
0,0,C3-C8-C10,37.415310,9552.721,C3-C8-C10-C16-C17,"(3, 8, 10, 16, 17)",C3-C8-C10-C16-C17,25.900000,13987,9531.718493,1993.179798,13487.469673,3-8-10-16-17
1,1,C1-C6-C8,33.883300,8386.844,C1-C6-C8-C15-C19,"(1, 6, 8, 15, 19)",C1-C6-C8-C15-C19,40.020001,13992,9280.577898,1882.534504,13543.587208,1-6-8-15-19
2,2,C4-C6-C7,39.867230,9713.377,C4-C6-C7-C17-C19,"(4, 6, 7, 17, 19)",C4-C6-C7-C17-C19,28.240001,13991,9349.328041,1924.076796,13384.964943,4-6-7-17-19
3,3,C2-C5-C8-C10,42.186096,10426.426,C2-C5-C8-C10-C16,"(2, 5, 8, 10, 16)",C2-C5-C8-C10-C16,32.200000,13982,9202.599764,1865.555525,13121.194363,2-5-8-10-16
4,4,C2-C6-C8,36.021805,8430.763,C2-C6-C8-C15-C18,"(2, 6, 8, 15, 18)",C2-C6-C8-C15-C18,35.929999,13982,9780.948400,2299.623966,13667.884111,2-6-8-15-18
...,...,...,...,...,...,...,...,...,...,...,...,...,...
495,495,C2-C5-C7,41.313187,9012.732,C2-C5-C7-C17-C19,"(2, 5, 7, 17, 19)",C2-C5-C7-C17-C19,41.010001,13995,10588.795662,2302.173376,15190.996170,2-5-7-17-19
496,496,C4-C6-C8-C9,46.674164,10765.431,C4-C6-C8-C9-C23,"(4, 6, 8, 9, 23)",C4-C6-C8-C9-C23,39.170000,13987,11256.857395,2446.941614,15630.279779,4-6-8-9-23
497,497,C1-C5-C9,42.014683,8312.842,C1-C5-C9-C17-C18,"(1, 5, 9, 17, 18)",C1-C5-C9-C17-C18,11.930000,13995,11274.204731,2447.407007,15649.510384,1-5-9-17-18
498,498,C3-C5-C7,41.572690,9418.962,C3-C5-C7-C20-C24,"(3, 5, 7, 20, 24)",C3-C5-C7-C20-C24,24.959999,13989,10941.862345,2372.498512,15599.461317,3-5-7-20-24


In [62]:
scored_df

Unnamed: 0.2,Unnamed: 0.1,Unnamed: 0,Combination,C1_DataVolume(MB),C1_FeatureCount,C1_Accuracy(%),C1_Latency(ms),C1_Label0,C1_Label1,C1_Label2,...,Combo_List,Old_Combination,New_Combination,Old_Length,_set,_key,normalized_combinations,Data_Composability_Score,Scalability_Score,Accuracy_Similarity_Score
0,4575,12000,C3-C8-C10,2800.0,784.0,77.52,1695.37,280.0,280.0,280.0,...,0.0,C3-C8-C10,C3-C8-C10-C16-C17,3,"frozenset({3, 8, 10, 16, 17})","(3, 8, 10, 16, 17)",38101617,0.278773,1.998310,0.924899
1,1488,8913,C1-C6-C8,2800.0,784.0,84.10,1747.73,280.0,280.0,280.0,...,0.0,C1-C6-C8,C1-C6-C8-C15-C19,3,"frozenset({1, 6, 8, 15, 19})","(1, 6, 8, 15, 19)",1681519,0.225962,1.982902,0.870858
2,4970,12395,C4-C6-C7,2795.0,784.0,71.28,1696.53,585.0,420.0,100.0,...,0.0,C4-C6-C7,C4-C6-C7-C17-C19,3,"frozenset({4, 6, 7, 17, 19})","(4, 6, 7, 17, 19)",4671719,0.085278,1.991026,0.872189
3,8009,15434,C2-C5-C8-C10,2795.0,784.0,77.96,1701.91,68.0,431.0,274.0,...,0.0,C2-C5-C8-C10,C2-C5-C8-C10-C16,4,"frozenset({2, 5, 8, 10, 16})","(2, 5, 8, 10, 16)",2581016,0.358680,1.978792,0.967088
4,3027,10452,C2-C6-C8,2795.0,784.0,77.96,1701.91,68.0,431.0,274.0,...,0.0,C2-C6-C8,C2-C6-C8-C15-C18,3,"frozenset({2, 6, 8, 15, 18})","(2, 6, 8, 15, 18)",2681518,0.316226,1.991784,0.883710
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
495,2770,10195,C2-C5-C7,2795.0,784.0,77.96,1701.91,68.0,431.0,274.0,...,0.0,C2-C5-C7,C2-C5-C7-C17-C19,3,"frozenset({2, 5, 7, 17, 19})","(2, 5, 7, 17, 19)",2571719,0.048177,1.971166,0.935435
496,8676,16101,C4-C6-C8-C9,2795.0,784.0,71.28,1696.53,585.0,420.0,100.0,...,0.0,C4-C6-C8-C9,C4-C6-C8-C9-C23,4,"frozenset({4, 6, 8, 9, 23})","(4, 6, 8, 9, 23)",468923,0.179160,2.008494,0.837665
497,1339,8764,C1-C5-C9,2800.0,784.0,84.10,1747.73,280.0,280.0,280.0,...,0.0,C1-C5-C9,C1-C5-C9-C17-C18,3,"frozenset({1, 5, 9, 17, 18})","(1, 5, 9, 17, 18)",1591718,0.242527,1.966679,0.879539
498,3948,11373,C3-C5-C7,2800.0,784.0,77.52,1695.37,280.0,280.0,280.0,...,0.0,C3-C5-C7,C3-C5-C7-C20-C24,3,"frozenset({3, 5, 7, 20, 24})","(3, 5, 7, 20, 24)",3572024,0.174151,2.037711,0.975674


In [63]:
import pandas as pd

# Load the provided CSVs again
dd1 = merged_df  # merged_df equivalent
dd2 = scored_df  # source df_scored_fixed equivalent

print(f"dd1: {len(dd1)} rows, dd2: {len(dd2)} rows")

# --- Step 1: Verify both have the same row count ---
if len(dd1) != len(dd2):
    raise ValueError("Row counts differ! Check alignment before concatenating.")

# --- Step 2: Extract only the columns we need from dd2 ---
historical_df = dd2[["Global_Accuracy(%)", "Global_Latency_Sum(ms)"]].copy()

# Rename them as historical versions
historical_df = historical_df.rename(columns={
    "Global_Accuracy(%)": "Global_Accuracy(%)_history",
    "Global_Latency_Sum(ms)": "Global_Latency_Sum(ms)_history"
})

# --- Step 3: Concatenate side-by-side (row-aligned) ---
merged_df_updated = pd.concat([dd1.reset_index(drop=True),
                               historical_df.reset_index(drop=True)], axis=1)

# --- Step 4: Confirm shape and preview ---
print(f"✅ Final merged_df_updated shape: {merged_df_updated.shape}")
print("✅ Columns added:", [c for c in merged_df_updated.columns if "_history" in c])
print("\nPreview:")
print(merged_df_updated.head(10))


dd1: 500 rows, dd2: 500 rows
✅ Final merged_df_updated shape: (500, 15)
✅ Columns added: ['Global_Accuracy(%)_history', 'Global_Latency_Sum(ms)_history']

Preview:
   Unnamed: 0   Combination  Global_Accuracy(%)_zero  \
0           0     C3-C8-C10                37.415310   
1           1      C1-C6-C8                33.883300   
2           2      C4-C6-C7                39.867230   
3           3  C2-C5-C8-C10                42.186096   
4           4      C2-C6-C8                36.021805   
5           5      C4-C5-C8                35.655354   
6           6   C1-C3-C4-C7                44.119907   
7           7  C1-C2-C7-C10                45.295660   
8           8  C1-C4-C8-C10                41.779358   
9           9     C3-C9-C10                40.173424   

   Global_Latency_Sum(ms)_zero New_Combination_zero       Numeric_Tuple  \
0                     9552.721    C3-C8-C10-C16-C17  (3, 8, 10, 16, 17)   
1                     8386.844     C1-C6-C8-C15-C19   (1, 6, 8, 15, 1

In [64]:
merged_df_updated

Unnamed: 0.1,Unnamed: 0,Combination,Global_Accuracy(%)_zero,Global_Latency_Sum(ms)_zero,New_Combination_zero,Numeric_Tuple,New_Combination_sim,Global_Accuracy(%)_sim,Global_DataVolume,Global_Latency_Sum(ms)_sim,Global_RoundTime_Max(ms),Computation_Time(ms),Numerical_Combination,Global_Accuracy(%)_history,Global_Latency_Sum(ms)_history
0,0,C3-C8-C10,37.415310,9552.721,C3-C8-C10-C16-C17,"(3, 8, 10, 16, 17)",C3-C8-C10-C16-C17,25.900000,13987,9531.718493,1993.179798,13487.469673,3-8-10-16-17,74.708,8414.89
1,1,C1-C6-C8,33.883300,8386.844,C1-C6-C8-C15-C19,"(1, 6, 8, 15, 19)",C1-C6-C8-C15-C19,40.020001,13992,9280.577898,1882.534504,13543.587208,1-6-8-15-19,76.110,8500.00
2,2,C4-C6-C7,39.867230,9713.377,C4-C6-C7-C17-C19,"(4, 6, 7, 17, 19)",C4-C6-C7-C17-C19,28.240001,13991,9349.328041,1924.076796,13384.964943,4-6-7-17-19,76.838,8464.84
3,3,C2-C5-C8-C10,42.186096,10426.426,C2-C5-C8-C10-C16,"(2, 5, 8, 10, 16)",C2-C5-C8-C10-C16,32.200000,13982,9202.599764,1865.555525,13121.194363,2-5-8-10-16,73.046,8544.23
4,4,C2-C6-C8,36.021805,8430.763,C2-C6-C8-C15-C18,"(2, 6, 8, 15, 18)",C2-C6-C8-C15-C18,35.929999,13982,9780.948400,2299.623966,13667.884111,2-6-8-15-18,71.046,8454.29
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
495,495,C2-C5-C7,41.313187,9012.732,C2-C5-C7-C17-C19,"(2, 5, 7, 17, 19)",C2-C5-C7-C17-C19,41.010001,13995,10588.795662,2302.173376,15190.996170,2-5-7-17-19,79.996,8569.08
496,496,C4-C6-C8-C9,46.674164,10765.431,C4-C6-C8-C9-C23,"(4, 6, 8, 9, 23)",C4-C6-C8-C9-C23,39.170000,13987,11256.857395,2446.941614,15630.279779,4-6-8-9-23,73.996,8459.76
497,497,C1-C5-C9,42.014683,8312.842,C1-C5-C9-C17-C18,"(1, 5, 9, 17, 18)",C1-C5-C9-C17-C18,11.930000,13995,11274.204731,2447.407007,15649.510384,1-5-9-17-18,77.638,8593.50
498,498,C3-C5-C7,41.572690,9418.962,C3-C5-C7-C20-C24,"(3, 5, 7, 20, 24)",C3-C5-C7-C20-C24,24.959999,13989,10941.862345,2372.498512,15599.461317,3-5-7-20-24,78.478,8788.77


In [67]:
merged_df_updated.to_csv("Final_results_of_FMINIST_ms.csv")

In [65]:
import numpy as np
import pandas as pd
from sklearn.metrics import mean_absolute_error, mean_absolute_percentage_error, r2_score

# Extract y_true and y_pred for both targets
y_true_acc = merged_df['Global_Accuracy(%)_zero']
y_pred_acc = merged_df['Global_Accuracy(%)_sim']
y_true_lat = merged_df['Global_Latency_Sum(ms)_zero']
y_pred_lat = merged_df['Global_Latency_Sum(ms)_sim']

# === 🔹 Per-target metrics ===
mae_acc = mean_absolute_error(y_true_acc, y_pred_acc)
mape_acc = mean_absolute_percentage_error(y_true_acc, y_pred_acc)
r2_acc = r2_score(y_true_acc, y_pred_acc)

mae_lat = mean_absolute_error(y_true_lat, y_pred_lat)
mape_lat = mean_absolute_percentage_error(y_true_lat, y_pred_lat)
r2_lat = r2_score(y_true_lat, y_pred_lat)

# === 🔹 Overall metrics (combined across both targets) ===
y_true_all = np.concatenate([y_true_acc.values, y_true_lat.values])
y_pred_all = np.concatenate([y_pred_acc.values, y_pred_lat.values])

mae_overall = mean_absolute_error(y_true_all, y_pred_all)
mape_overall = mean_absolute_percentage_error(y_true_all, y_pred_all)
r2_overall = r2_score(y_true_all, y_pred_all)

# === 🔹 Create summary DataFrame ===
metrics_df = pd.DataFrame({
    "Target": ["Accuracy", "Latency", "Overall"],
    "MAE": [mae_acc, mae_lat, mae_overall],
    "MAPE": [mape_acc, mape_lat, mape_overall],
}).round(4)
metrics_df

Unnamed: 0,Target,MAE,MAPE
0,Accuracy,11.4967,0.2715
1,Latency,1360.4197,0.1468
2,Overall,685.9582,0.2092


In [None]:
0.2715 &11.4967 &0.1468 &1360.4197 &

In [66]:
import numpy as np
import pandas as pd
from sklearn.metrics import mean_absolute_error, mean_absolute_percentage_error, r2_score
merged_df=merged_df_updated
# Extract y_true and y_pred for both targets
y_true_acc = merged_df['Global_Accuracy(%)_zero']
y_pred_acc = merged_df['Global_Accuracy(%)_sim']
y_true_lat = merged_df['Global_Latency_Sum(ms)_zero']
y_pred_lat = merged_df['Global_Latency_Sum(ms)_sim']

# Calculate per-target metrics
mae_acc = mean_absolute_error(y_true_acc, y_pred_acc)
mape_acc = mean_absolute_percentage_error(y_true_acc, y_pred_acc)
r2_acc = r2_score(y_true_acc, y_pred_acc)

mae_lat = mean_absolute_error(y_true_lat, y_pred_lat)
mape_lat = mean_absolute_percentage_error(y_true_lat, y_pred_lat)
r2_lat = r2_score(y_true_lat, y_pred_lat)

# === 🔹 Overall metrics (combined across both targets) ===
# Stack the two sets of predictions into single arrays
y_true_all = np.concatenate([y_true_acc.values, y_true_lat.values])
y_pred_all = np.concatenate([y_pred_acc.values, y_pred_lat.values])

mae_overall = mean_absolute_error(y_true_all, y_pred_all)
mape_overall = mean_absolute_percentage_error(y_true_all, y_pred_all)
r2_overall = r2_score(y_true_all, y_pred_all)

# === Create summary DataFrame ===
metrics_df = pd.DataFrame({
    "Target": ["FMINIST"],
    "MAE": [mae_overall],
    "MAPE": [mape_overall],
    "R2": [r2_overall]
}).round(4)

metrics_df

Unnamed: 0,Target,MAE,MAPE,R2
0,FMINIST,685.9582,0.2092,0.9062


In [None]:
merged_df_updated.isna().sum()

Unnamed: 0,0
Unnamed: 0,0
Combination,0
Global_Accuracy(%)_zero,0
Global_Latency_Sum(ms)_zero,0
New_Combination_zero,0
Numeric_Tuple,0
New_Combination_sim,0
Global_Accuracy(%)_sim,0
Global_DataVolume,0
Global_Latency_Sum(ms)_sim,0


In [None]:
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
import numpy as np

# ===============================
# Load and prepare data
# ===============================
df = pd.read_csv("merged_with_historical.csv").dropna(
    subset=["ZeroShot_Accuracy(%)", "Simulated_Accuracy(%)"]
).reset_index(drop=True)

df["Combination_Index"] = range(len(df))

# ===============================
# Plot configuration
# ===============================
plt.style.use("seaborn-v0_8-whitegrid")
sns.set_context("talk", font_scale=1.5)

plt.figure(figsize=(14, 7))

# Zero-shot predicted accuracy
sns.lineplot(
    x="Combination_Index", y="ZeroShot_Accuracy(%)", data=df,
    label="Zero-Shot Predicted", color="royalblue", linewidth=3
)

# Simulated (final predicted) accuracy
sns.lineplot(
    x="Combination_Index", y="Simulated_Accuracy(%)", data=df,
    label="Simulated Accuracy", color="darkorange", linewidth=3
)

# ===============================
# Formatting
# ===============================
plt.xlabel("Combination Index", fontsize=22, labelpad=15)
plt.ylabel("Accuracy (%)", fontsize=22, labelpad=15)
plt.xticks(fontsize=20)
plt.yticks(fontsize=20)
plt.grid(True, which='major', linestyle='--', linewidth=0.6, alpha=0.6)
plt.legend(fontsize=20, loc='best', frameon=False)
plt.tight_layout()

# ===============================
# Optional high-res save
# ===============================
# plt.savefig("zero_vs_simulated_accuracy_acm.pdf", dpi=600, bbox_inches='tight')

plt.show()

# ===============================
# Compute and print trend consistency
# ===============================
df["ZS_Trend"] = np.sign(df["ZeroShot_Accuracy(%)"].diff())
df["Sim_Trend"] = np.sign(df["Simulated_Accuracy(%)"].diff())
trend_agreement = (df["ZS_Trend"] == df["Sim_Trend"]).mean() * 100
print(f"Trend Consistency between Zero-Shot and Simulated Accuracies: {trend_agreement:.2f}%")


**Zero shot Predictions for new Combinations CIFAR10**
---

In [68]:
import pandas as pd
import numpy as np

# Load the new dataset
path = "/content/few_text_dataset_1CIFAR10.csv"
df = pd.read_csv(path).copy()
df

Unnamed: 0.2,Unnamed: 0.1,Unnamed: 0,Combination,C1_DataVolume(MB),C1_FeatureCount,C1_Accuracy(%),C1_Latency(ms),C1_Label0,C1_Label1,C1_Label2,...,Global_DataVolume,Global_Latency_Sum(ms),Global_RoundTime_Max(ms),Combo_List,Old_Combination,New_Combination,Old_Length,_set,_key,normalized_combinations
0,4575,12000,C3-C8-C10,2400.0,3072.0,39.87,2578.14,240.0,240.0,240.0,...,11988.0,13567.85,2904.11,0.0,C3-C8-C10,C3-C8-C10-C16-C17,3,"frozenset({3, 8, 10, 16, 17})","(3, 8, 10, 16, 17)",38101617
1,1488,8913,C1-C6-C8,2400.0,3072.0,43.66,2556.42,240.0,240.0,240.0,...,11992.0,12841.88,2646.87,0.0,C1-C6-C8,C1-C6-C8-C15-C19,3,"frozenset({1, 6, 8, 15, 19})","(1, 6, 8, 15, 19)",1681519
2,4970,12395,C4-C6-C7,2394.0,3072.0,30.79,2748.65,5.0,1115.0,245.0,...,11990.0,13315.73,2904.11,0.0,C4-C6-C7,C4-C6-C7-C17-C19,3,"frozenset({4, 6, 7, 17, 19})","(4, 6, 7, 17, 19)",4671719
3,8009,15434,C2-C5-C8-C10,2396.0,3072.0,11.26,2513.30,187.0,116.0,153.0,...,11984.0,13058.27,2844.26,0.0,C2-C5-C8-C10,C2-C5-C8-C10-C16,4,"frozenset({2, 5, 8, 10, 16})","(2, 5, 8, 10, 16)",2581016
4,3027,10452,C2-C6-C8,2396.0,3072.0,11.26,2513.30,187.0,116.0,153.0,...,11983.0,12947.70,2678.05,0.0,C2-C6-C8,C2-C6-C8-C15-C18,3,"frozenset({2, 6, 8, 15, 18})","(2, 6, 8, 15, 18)",2681518
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
495,2770,10195,C2-C5-C7,2396.0,3072.0,11.26,2513.30,187.0,116.0,153.0,...,11996.0,12989.31,2904.11,0.0,C2-C5-C7,C2-C5-C7-C17-C19,3,"frozenset({2, 5, 7, 17, 19})","(2, 5, 7, 17, 19)",2571719
496,8676,16101,C4-C6-C8-C9,2394.0,3072.0,30.79,2748.65,5.0,1115.0,245.0,...,11986.0,13062.51,2748.65,0.0,C4-C6-C8-C9,C4-C6-C8-C9-C23,4,"frozenset({4, 6, 8, 9, 23})","(4, 6, 8, 9, 23)",468923
497,1339,8764,C1-C5-C9,2400.0,3072.0,43.66,2556.42,240.0,240.0,240.0,...,11995.0,13119.01,2904.11,0.0,C1-C5-C9,C1-C5-C9-C17-C18,3,"frozenset({1, 5, 9, 17, 18})","(1, 5, 9, 17, 18)",1591718
498,3948,11373,C3-C5-C7,2400.0,3072.0,39.87,2578.14,240.0,240.0,240.0,...,11992.0,12822.28,2618.15,0.0,C3-C5-C7,C3-C5-C7-C20-C24,3,"frozenset({3, 5, 7, 20, 24})","(3, 5, 7, 20, 24)",3572024


In [69]:
import pandas as pd
import numpy as np

# Load dataset
df = pd.read_csv("few_text_dataset_1CIFAR10.csv")  # Adjust path if needed

# --- Helper: Parse combination strings into client IDs ---
def parse_combination(combo_str):
    if pd.isna(combo_str):
        return []
    return combo_str.split("-")

# --- Helper: Create prefix mapping based on order ---
def get_prefix_map(clients):
    return {client: f"C{i+1}" for i, client in enumerate(clients)}

# --- Helper: Get normalized label histogram for a prefix ---
def get_normalized_labels(row, prefix):
    try:
        labels = [row[f"{prefix}_Label{i}"] for i in range(10)]
        labels = np.array(labels, dtype=float)
        total = labels.sum()
        return labels / total if total > 0 else labels
    except KeyError:
        return np.zeros(10)

# --- Compute the three scores per row ---
def compute_scores(row):
    old_clients = parse_combination(row["Old_Combination"])
    new_clients = parse_combination(row["New_Combination"])

    # Determine newly added clients
    added_clients = [c for c in new_clients if c not in old_clients]

    # Prefix maps (based on New_Combination order)
    prefix_map = get_prefix_map(new_clients)
    old_prefixes = [prefix_map[c] for c in old_clients if c in prefix_map]
    new_prefixes = [prefix_map[c] for c in added_clients if c in prefix_map]

    # --- 1. Data Composability Score (Mean of distances) ---
    base_hist = np.zeros(10)
    for p in old_prefixes:
        base_hist += get_normalized_labels(row, p)
    base_hist_sum = base_hist.sum()
    base_hist = base_hist / base_hist_sum if base_hist_sum > 0 else base_hist

    distances = []
    for p in new_prefixes:
        added_hist = get_normalized_labels(row, p)
        added_sum = added_hist.sum()
        added_hist = added_hist / added_sum if added_sum > 0 else added_hist
        dist = np.linalg.norm(base_hist - added_hist)
        distances.append(dist)
    data_score = float(np.mean(distances)) if distances else np.nan

    # --- 2. Base Accuracy and Latency (mean) ---
    base_accs = []
    base_lats = []
    for p in old_prefixes:
        acc = row.get(f"{p}_Accuracy(%)", np.nan)
        lat = row.get(f"{p}_Latency(ms)", np.nan)
        if pd.notna(acc): base_accs.append(float(acc))
        if pd.notna(lat): base_lats.append(float(lat))
    base_acc = np.mean(base_accs) if base_accs else np.nan
    base_lat = np.mean(base_lats) if base_lats else np.nan

    # --- 3. Accuracy Similarity Score (mean) ---
    acc_sims = []
    for p in new_prefixes:
        acc = row.get(f"{p}_Accuracy(%)", np.nan)
        if pd.notna(acc) and pd.notna(base_acc) and base_acc != 0:
            sim = 1 - abs(base_acc - acc) / max(base_acc, acc)
            acc_sims.append(sim)
    acc_sim_score = float(np.mean(acc_sims)) if acc_sims else np.nan

    # --- 4. Scalability Score (mean) ---
    scal_scores = []
    for p in new_prefixes:
        lat = row.get(f"{p}_Latency(ms)", np.nan)
        if pd.notna(lat) and pd.notna(base_lat) and base_lat != 0:
            ratio = (base_lat + lat) / base_lat
            scal_scores.append(ratio)
    scal_score = float(np.mean(scal_scores)) if scal_scores else np.nan

    return pd.Series({
        "Data_Composability_Score": data_score,
        "Scalability_Score": scal_score,
        "Accuracy_Similarity_Score": acc_sim_score
    })

# --- Apply the scoring function ---
scored_df = df.copy()
scored_metrics = scored_df.apply(compute_scores, axis=1)
scored_df = pd.concat([scored_df, scored_metrics], axis=1)

# --- Save result ---
scored_df.to_csv("few_text_dataset_1MINIST_with_mean_scores.csv", index=False)
print("✅ Scoring complete with means. File saved as 'few_text_dataset_1MINIST_with_mean_scores.csv'.")


✅ Scoring complete with means. File saved as 'few_text_dataset_1MINIST_with_mean_scores.csv'.


In [70]:
scored_df.to_csv("few_text_dataset_CIFAR10.csv")

In [71]:
scored_df.columns

Index(['Unnamed: 0.1', 'Unnamed: 0', 'Combination', 'C1_DataVolume(MB)',
       'C1_FeatureCount', 'C1_Accuracy(%)', 'C1_Latency(ms)', 'C1_Label0',
       'C1_Label1', 'C1_Label2', 'C1_Label3', 'C1_Label4', 'C1_Label5',
       'C1_Label6', 'C1_Label7', 'C1_Label8', 'C1_Label9', 'C2_DataVolume(MB)',
       'C2_FeatureCount', 'C2_Accuracy(%)', 'C2_Latency(ms)', 'C2_Label0',
       'C2_Label1', 'C2_Label2', 'C2_Label3', 'C2_Label4', 'C2_Label5',
       'C2_Label6', 'C2_Label7', 'C2_Label8', 'C2_Label9', 'C3_DataVolume(MB)',
       'C3_FeatureCount', 'C3_Accuracy(%)', 'C3_Latency(ms)', 'C3_Label0',
       'C3_Label1', 'C3_Label2', 'C3_Label3', 'C3_Label4', 'C3_Label5',
       'C3_Label6', 'C3_Label7', 'C3_Label8', 'C3_Label9', 'C4_DataVolume(MB)',
       'C4_FeatureCount', 'C4_Accuracy(%)', 'C4_Latency(ms)', 'C4_Label0',
       'C4_Label1', 'C4_Label2', 'C4_Label3', 'C4_Label4', 'C4_Label5',
       'C4_Label6', 'C4_Label7', 'C4_Label8', 'C4_Label9', 'C5_DataVolume(MB)',
       'C5_FeatureC

In [72]:
df_new

Unnamed: 0.3,Unnamed: 0.2,Unnamed: 0.1,Unnamed: 0,Combination,C1_DataVolume(MB),C1_FeatureCount,C1_Accuracy(%),C1_Latency(ms),C1_Label0,C1_Label1,...,Combo_List,Old_Combination,New_Combination,Old_Length,_set,_key,normalized_combinations,Data_Composability_Score,Scalability_Score,Accuracy_Similarity_Score
0,0,4575,12000,C3-C8-C10,2800.0,784.0,77.52,1695.37,280.0,280.0,...,0.0,C3-C8-C10,C3-C8-C10-C16-C17,3,"frozenset({3, 8, 10, 16, 17})","(3, 8, 10, 16, 17)",38101617,0.278773,1.998310,0.924899
1,1,1488,8913,C1-C6-C8,2800.0,784.0,84.10,1747.73,280.0,280.0,...,0.0,C1-C6-C8,C1-C6-C8-C15-C19,3,"frozenset({1, 6, 8, 15, 19})","(1, 6, 8, 15, 19)",1681519,0.225962,1.982902,0.870858
2,2,4970,12395,C4-C6-C7,2795.0,784.0,71.28,1696.53,585.0,420.0,...,0.0,C4-C6-C7,C4-C6-C7-C17-C19,3,"frozenset({4, 6, 7, 17, 19})","(4, 6, 7, 17, 19)",4671719,0.085278,1.991026,0.872189
3,3,8009,15434,C2-C5-C8-C10,2795.0,784.0,77.96,1701.91,68.0,431.0,...,0.0,C2-C5-C8-C10,C2-C5-C8-C10-C16,4,"frozenset({2, 5, 8, 10, 16})","(2, 5, 8, 10, 16)",2581016,0.358680,1.978792,0.967088
4,4,3027,10452,C2-C6-C8,2795.0,784.0,77.96,1701.91,68.0,431.0,...,0.0,C2-C6-C8,C2-C6-C8-C15-C18,3,"frozenset({2, 6, 8, 15, 18})","(2, 6, 8, 15, 18)",2681518,0.316226,1.991784,0.883710
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
495,495,2770,10195,C2-C5-C7,2795.0,784.0,77.96,1701.91,68.0,431.0,...,0.0,C2-C5-C7,C2-C5-C7-C17-C19,3,"frozenset({2, 5, 7, 17, 19})","(2, 5, 7, 17, 19)",2571719,0.048177,1.971166,0.935435
496,496,8676,16101,C4-C6-C8-C9,2795.0,784.0,71.28,1696.53,585.0,420.0,...,0.0,C4-C6-C8-C9,C4-C6-C8-C9-C23,4,"frozenset({4, 6, 8, 9, 23})","(4, 6, 8, 9, 23)",468923,0.179160,2.008494,0.837665
497,497,1339,8764,C1-C5-C9,2800.0,784.0,84.10,1747.73,280.0,280.0,...,0.0,C1-C5-C9,C1-C5-C9-C17-C18,3,"frozenset({1, 5, 9, 17, 18})","(1, 5, 9, 17, 18)",1591718,0.242527,1.966679,0.879539
498,498,3948,11373,C3-C5-C7,2800.0,784.0,77.52,1695.37,280.0,280.0,...,0.0,C3-C5-C7,C3-C5-C7-C20-C24,3,"frozenset({3, 5, 7, 20, 24})","(3, 5, 7, 20, 24)",3572024,0.174151,2.037711,0.975674


In [73]:
# ================================
# 11. Predict on New Unseen Dataset (with Combination column)
# ================================
import os
import pandas as pd
import torch

# Path to the new dataset
new_csv_path = "few_text_dataset_CIFAR10.csv"
assert os.path.exists(new_csv_path), f"File not found: {new_csv_path}"

# Load new dataset
df_new = pd.read_csv(new_csv_path)

# ================================
# Select only the relevant feature columns
# ================================
feature_cols = ['Unnamed: 0.1','Unnamed: 0.2',
    'C1_DataVolume(MB)', 'C1_FeatureCount', 'C1_Accuracy(%)', 'C1_Latency(ms)',
    'C1_Label0', 'C1_Label1', 'C1_Label2', 'C1_Label3', 'C1_Label4', 'C1_Label5',
    'C1_Label6', 'C1_Label7', 'C1_Label8', 'C1_Label9',
    'C2_DataVolume(MB)', 'C2_FeatureCount', 'C2_Accuracy(%)', 'C2_Latency(ms)',
    'C2_Label0', 'C2_Label1', 'C2_Label2', 'C2_Label3', 'C2_Label4', 'C2_Label5',
    'C2_Label6', 'C2_Label7', 'C2_Label8', 'C2_Label9',
    'C3_DataVolume(MB)', 'C3_FeatureCount', 'C3_Accuracy(%)', 'C3_Latency(ms)',
    'C3_Label0', 'C3_Label1', 'C3_Label2', 'C3_Label3', 'C3_Label4', 'C3_Label5',
    'C3_Label6', 'C3_Label7', 'C3_Label8', 'C3_Label9',
    'C4_DataVolume(MB)', 'C4_FeatureCount', 'C4_Accuracy(%)', 'C4_Latency(ms)',
    'C4_Label0', 'C4_Label1', 'C4_Label2', 'C4_Label3', 'C4_Label4', 'C4_Label5',
    'C4_Label6', 'C4_Label7', 'C4_Label8', 'C4_Label9',
    'C5_DataVolume(MB)', 'C5_FeatureCount', 'C5_Accuracy(%)', 'C5_Latency(ms)',
    'C5_Label0', 'C5_Label1', 'C5_Label2', 'C5_Label3', 'C5_Label4', 'C5_Label5',
    'C5_Label6', 'C5_Label7', 'C5_Label8', 'C5_Label9',
    'Data_Composability_Score', 'Scalability_Score', 'Accuracy_Similarity_Score'
]

# Check that all required columns exist
missing_cols = [c for c in feature_cols if c not in df_new.columns]
if missing_cols:
    raise ValueError(f"Missing columns in new dataset: {missing_cols}")

# ================================
# Preprocess and Predict
# ================================
# Coerce numeric and fill missing
df_new_num = df_new[feature_cols].apply(pd.to_numeric, errors="coerce").fillna(0)

# Apply same scaler used during training
X_new_s = scaler_X.transform(df_new_num)

# Convert to tensor
X_new_tensor = torch.tensor(X_new_s, dtype=torch.float32)

# Predict using trained model
model.eval()
with torch.no_grad():
    preds_new_s = model(X_new_tensor).cpu().numpy()

# Inverse transform predictions to original scale
preds_new = scaler_y.inverse_transform(preds_new_s)

# ================================
# Build and Save Prediction DataFrame
# ================================
target_cols = ["Global_Accuracy(%)", "Global_Latency_Sum(ms)"]
new_preds_df = pd.DataFrame(preds_new, columns=target_cols)

# Always include 'Combination' column
if "Combination" in df_new.columns:
    new_preds_df.insert(0, "Combination", df_new["Combination"])
else:
    new_preds_df.insert(0, "Combination", [f"Row_{i}" for i in range(len(df_new))])

# Add 'New_Combination' column if it exists
if "New_Combination" in df_new.columns:
    new_preds_df["New_Combination"] = df_new["New_Combination"]

# Save predictions
output_path = "new_dataset_predictions.csv"
new_preds_df.to_csv(output_path, index=False)

print(f"✅ Predictions completed for: {new_csv_path}")
print(f"Predictions saved to: {output_path}")
print("\nSample predictions:\n", new_preds_df.head(10))


✅ Predictions completed for: few_text_dataset_CIFAR10.csv
Predictions saved to: new_dataset_predictions.csv

Sample predictions:
     Combination  Global_Accuracy(%)  Global_Latency_Sum(ms)    New_Combination
0     C3-C8-C10           16.143148            13065.418945  C3-C8-C10-C16-C17
1      C1-C6-C8           12.594878            12389.416992   C1-C6-C8-C15-C19
2      C4-C6-C7           17.683899            13475.478516   C4-C6-C7-C17-C19
3  C2-C5-C8-C10           20.153534            14977.681641   C2-C5-C8-C10-C16
4      C2-C6-C8           13.028686            12419.189453   C2-C6-C8-C15-C18
5      C4-C5-C8           15.462841            13123.493164   C4-C5-C8-C20-C21
6   C1-C3-C4-C7           21.961355            13818.135742    C1-C3-C4-C7-C17
7  C1-C2-C7-C10           22.489510            14343.911133   C1-C2-C7-C10-C17
8  C1-C4-C8-C10           21.204212            14453.254883   C1-C4-C8-C10-C21
9     C3-C9-C10           17.014433            13358.298828  C3-C9-C10-C20-C24




In [74]:
new_preds_df

Unnamed: 0,Combination,Global_Accuracy(%),Global_Latency_Sum(ms),New_Combination
0,C3-C8-C10,16.143148,13065.418945,C3-C8-C10-C16-C17
1,C1-C6-C8,12.594878,12389.416992,C1-C6-C8-C15-C19
2,C4-C6-C7,17.683899,13475.478516,C4-C6-C7-C17-C19
3,C2-C5-C8-C10,20.153534,14977.681641,C2-C5-C8-C10-C16
4,C2-C6-C8,13.028686,12419.189453,C2-C6-C8-C15-C18
...,...,...,...,...
495,C2-C5-C7,14.441880,12588.509766,C2-C5-C7-C17-C19
496,C4-C6-C8-C9,24.286484,14555.583008,C4-C6-C8-C9-C23
497,C1-C5-C9,15.473924,11979.324219,C1-C5-C9-C17-C18
498,C3-C5-C7,15.797707,13136.602539,C3-C5-C7-C20-C24


In [75]:
new_preds_df.to_csv("zero-shot_3.csv")

In [76]:
import pandas as pd
import re

# Load both CSVs
df_sim = pd.read_csv("/content/Simulated_Results_From_New_Combinations_CIFAR10.csv")
df_zero = pd.read_csv("zero-shot_3.csv")
df_zero

Unnamed: 0.1,Unnamed: 0,Combination,Global_Accuracy(%),Global_Latency_Sum(ms),New_Combination
0,0,C3-C8-C10,16.143148,13065.419,C3-C8-C10-C16-C17
1,1,C1-C6-C8,12.594878,12389.417,C1-C6-C8-C15-C19
2,2,C4-C6-C7,17.683899,13475.479,C4-C6-C7-C17-C19
3,3,C2-C5-C8-C10,20.153534,14977.682,C2-C5-C8-C10-C16
4,4,C2-C6-C8,13.028686,12419.189,C2-C6-C8-C15-C18
...,...,...,...,...,...
495,495,C2-C5-C7,14.441880,12588.510,C2-C5-C7-C17-C19
496,496,C4-C6-C8-C9,24.286484,14555.583,C4-C6-C8-C9-C23
497,497,C1-C5-C9,15.473924,11979.324,C1-C5-C9-C17-C18
498,498,C3-C5-C7,15.797707,13136.603,C3-C5-C7-C20-C24


In [77]:
df_sim

Unnamed: 0,New_Combination,Global_Accuracy(%),Global_DataVolume,Global_Latency_Sum(ms),Global_RoundTime_Max(ms),Computation_Time(ms)
0,C3-C8-C10-C16-C17,11.180000,11988,12959.092379,2641.251564,19349.068642
1,C1-C6-C8-C15-C19,11.030000,11992,13730.721712,3151.134968,20025.508881
2,C4-C6-C7-C17-C19,14.330000,11990,12937.634468,2622.507334,19156.556845
3,C2-C5-C8-C10-C16,13.429999,11984,13882.881165,3233.234406,20090.104342
4,C2-C6-C8-C15-C18,13.079999,11983,13099.066734,2651.860714,19215.249300
...,...,...,...,...,...,...
495,C2-C5-C7-C17-C19,12.909999,11996,12635.647535,2541.817427,18731.168985
496,C4-C6-C8-C9-C23,11.220000,11986,12725.947857,2559.678316,18928.311825
497,C1-C5-C9-C17-C18,11.180000,11995,12675.330162,2559.041977,18821.218491
498,C3-C5-C7-C20-C24,12.220000,11992,12564.683199,2536.518812,18778.584242


In [78]:
import pandas as pd
import re
# Ensure required column exists
if "New_Combination" not in df.columns:
    raise ValueError("The CSV must have a column named 'New_Combination'.")

# Convert like "C7-C10-C16" → "7-10-16"
def to_numerical_combo(combo):
    if pd.isna(combo):
        return None
    numbers = re.findall(r'C(\d+)', str(combo))
    return "-".join(numbers)
# Apply conversion
df_sim["Numerical_Combination"] = df_sim["New_Combination"].apply(to_numerical_combo)
# Show preview
df_sim

Unnamed: 0,New_Combination,Global_Accuracy(%),Global_DataVolume,Global_Latency_Sum(ms),Global_RoundTime_Max(ms),Computation_Time(ms),Numerical_Combination
0,C3-C8-C10-C16-C17,11.180000,11988,12959.092379,2641.251564,19349.068642,3-8-10-16-17
1,C1-C6-C8-C15-C19,11.030000,11992,13730.721712,3151.134968,20025.508881,1-6-8-15-19
2,C4-C6-C7-C17-C19,14.330000,11990,12937.634468,2622.507334,19156.556845,4-6-7-17-19
3,C2-C5-C8-C10-C16,13.429999,11984,13882.881165,3233.234406,20090.104342,2-5-8-10-16
4,C2-C6-C8-C15-C18,13.079999,11983,13099.066734,2651.860714,19215.249300,2-6-8-15-18
...,...,...,...,...,...,...,...
495,C2-C5-C7-C17-C19,12.909999,11996,12635.647535,2541.817427,18731.168985,2-5-7-17-19
496,C4-C6-C8-C9-C23,11.220000,11986,12725.947857,2559.678316,18928.311825,4-6-8-9-23
497,C1-C5-C9-C17-C18,11.180000,11995,12675.330162,2559.041977,18821.218491,1-5-9-17-18
498,C3-C5-C7-C20-C24,12.220000,11992,12564.683199,2536.518812,18778.584242,3-5-7-20-24


In [79]:
df_sim.to_csv("/content/Simulated_Results_From_New_Combinations_CIFAR10.csv")

In [80]:
df_sim

Unnamed: 0,New_Combination,Global_Accuracy(%),Global_DataVolume,Global_Latency_Sum(ms),Global_RoundTime_Max(ms),Computation_Time(ms),Numerical_Combination
0,C3-C8-C10-C16-C17,11.180000,11988,12959.092379,2641.251564,19349.068642,3-8-10-16-17
1,C1-C6-C8-C15-C19,11.030000,11992,13730.721712,3151.134968,20025.508881,1-6-8-15-19
2,C4-C6-C7-C17-C19,14.330000,11990,12937.634468,2622.507334,19156.556845,4-6-7-17-19
3,C2-C5-C8-C10-C16,13.429999,11984,13882.881165,3233.234406,20090.104342,2-5-8-10-16
4,C2-C6-C8-C15-C18,13.079999,11983,13099.066734,2651.860714,19215.249300,2-6-8-15-18
...,...,...,...,...,...,...,...
495,C2-C5-C7-C17-C19,12.909999,11996,12635.647535,2541.817427,18731.168985,2-5-7-17-19
496,C4-C6-C8-C9-C23,11.220000,11986,12725.947857,2559.678316,18928.311825,4-6-8-9-23
497,C1-C5-C9-C17-C18,11.180000,11995,12675.330162,2559.041977,18821.218491,1-5-9-17-18
498,C3-C5-C7-C20-C24,12.220000,11992,12564.683199,2536.518812,18778.584242,3-5-7-20-24


In [81]:
# The issue likely arises because both datasets have duplicate combinations.
# We’ll ensure unique matches by dropping duplicates before merging.

# Convert list to tuple for matching
df_zero["Numeric_Tuple"] = df_zero["New_Combination"].apply(lambda x: tuple(sorted([int(i.replace("C", "")) for i in x.split("-")])))
df_sim["Numeric_Tuple"] = df_sim["New_Combination"].apply(lambda x: tuple(sorted([int(i.replace("C", "")) for i in x.split("-")])))

# Drop duplicates to ensure one-to-one matching
df_zero_unique = df_zero.drop_duplicates(subset=["Numeric_Tuple"])
df_sim_unique = df_sim.drop_duplicates(subset=["Numeric_Tuple"])

# Find intersection (only unique tuples that appear in both)
matched_tuples = set(df_zero_unique["Numeric_Tuple"]).intersection(set(df_sim_unique["Numeric_Tuple"]))

# Filter only those rows
df_zero_matched = df_zero_unique[df_zero_unique["Numeric_Tuple"].isin(matched_tuples)]
df_sim_matched = df_sim_unique[df_sim_unique["Numeric_Tuple"].isin(matched_tuples)]

# Merge one-to-one based on the unique tuple
merged_df = pd.merge(df_zero_matched, df_sim_matched, on="Numeric_Tuple", suffixes=("_zero", "_sim"))

# Save final merged result
output_path = "Matched_Combinations_Results_Unique.csv"
merged_df.to_csv(output_path, index=False)

output_path, merged_df.shape

('Matched_Combinations_Results_Unique.csv', (500, 13))

In [82]:
merged_df.columns

Index(['Unnamed: 0', 'Combination', 'Global_Accuracy(%)_zero',
       'Global_Latency_Sum(ms)_zero', 'New_Combination_zero', 'Numeric_Tuple',
       'New_Combination_sim', 'Global_Accuracy(%)_sim', 'Global_DataVolume',
       'Global_Latency_Sum(ms)_sim', 'Global_RoundTime_Max(ms)',
       'Computation_Time(ms)', 'Numerical_Combination'],
      dtype='object')

In [83]:
merged_df

Unnamed: 0.1,Unnamed: 0,Combination,Global_Accuracy(%)_zero,Global_Latency_Sum(ms)_zero,New_Combination_zero,Numeric_Tuple,New_Combination_sim,Global_Accuracy(%)_sim,Global_DataVolume,Global_Latency_Sum(ms)_sim,Global_RoundTime_Max(ms),Computation_Time(ms),Numerical_Combination
0,0,C3-C8-C10,16.143148,13065.419,C3-C8-C10-C16-C17,"(3, 8, 10, 16, 17)",C3-C8-C10-C16-C17,11.180000,11988,12959.092379,2641.251564,19349.068642,3-8-10-16-17
1,1,C1-C6-C8,12.594878,12389.417,C1-C6-C8-C15-C19,"(1, 6, 8, 15, 19)",C1-C6-C8-C15-C19,11.030000,11992,13730.721712,3151.134968,20025.508881,1-6-8-15-19
2,2,C4-C6-C7,17.683899,13475.479,C4-C6-C7-C17-C19,"(4, 6, 7, 17, 19)",C4-C6-C7-C17-C19,14.330000,11990,12937.634468,2622.507334,19156.556845,4-6-7-17-19
3,3,C2-C5-C8-C10,20.153534,14977.682,C2-C5-C8-C10-C16,"(2, 5, 8, 10, 16)",C2-C5-C8-C10-C16,13.429999,11984,13882.881165,3233.234406,20090.104342,2-5-8-10-16
4,4,C2-C6-C8,13.028686,12419.189,C2-C6-C8-C15-C18,"(2, 6, 8, 15, 18)",C2-C6-C8-C15-C18,13.079999,11983,13099.066734,2651.860714,19215.249300,2-6-8-15-18
...,...,...,...,...,...,...,...,...,...,...,...,...,...
495,495,C2-C5-C7,14.441880,12588.510,C2-C5-C7-C17-C19,"(2, 5, 7, 17, 19)",C2-C5-C7-C17-C19,12.909999,11996,12635.647535,2541.817427,18731.168985,2-5-7-17-19
496,496,C4-C6-C8-C9,24.286484,14555.583,C4-C6-C8-C9-C23,"(4, 6, 8, 9, 23)",C4-C6-C8-C9-C23,11.220000,11986,12725.947857,2559.678316,18928.311825,4-6-8-9-23
497,497,C1-C5-C9,15.473924,11979.324,C1-C5-C9-C17-C18,"(1, 5, 9, 17, 18)",C1-C5-C9-C17-C18,11.180000,11995,12675.330162,2559.041977,18821.218491,1-5-9-17-18
498,498,C3-C5-C7,15.797707,13136.603,C3-C5-C7-C20-C24,"(3, 5, 7, 20, 24)",C3-C5-C7-C20-C24,12.220000,11992,12564.683199,2536.518812,18778.584242,3-5-7-20-24


In [84]:
scored_df

Unnamed: 0.2,Unnamed: 0.1,Unnamed: 0,Combination,C1_DataVolume(MB),C1_FeatureCount,C1_Accuracy(%),C1_Latency(ms),C1_Label0,C1_Label1,C1_Label2,...,Combo_List,Old_Combination,New_Combination,Old_Length,_set,_key,normalized_combinations,Data_Composability_Score,Scalability_Score,Accuracy_Similarity_Score
0,4575,12000,C3-C8-C10,2400.0,3072.0,39.87,2578.14,240.0,240.0,240.0,...,0.0,C3-C8-C10,C3-C8-C10-C16-C17,3,"frozenset({3, 8, 10, 16, 17})","(3, 8, 10, 16, 17)",38101617,0.367290,2.022133,0.729658
1,1488,8913,C1-C6-C8,2400.0,3072.0,43.66,2556.42,240.0,240.0,240.0,...,0.0,C1-C6-C8,C1-C6-C8-C15-C19,3,"frozenset({1, 6, 8, 15, 19})","(1, 6, 8, 15, 19)",1681519,0.162397,1.984329,0.681862
2,4970,12395,C4-C6-C7,2394.0,3072.0,30.79,2748.65,5.0,1115.0,245.0,...,0.0,C4-C6-C7,C4-C6-C7-C17-C19,3,"frozenset({4, 6, 7, 17, 19})","(4, 6, 7, 17, 19)",4671719,0.177034,2.033913,0.424695
3,8009,15434,C2-C5-C8-C10,2396.0,3072.0,11.26,2513.30,187.0,116.0,153.0,...,0.0,C2-C5-C8-C10,C2-C5-C8-C10-C16,4,"frozenset({2, 5, 8, 10, 16})","(2, 5, 8, 10, 16)",2581016,0.596250,1.991789,0.870748
4,3027,10452,C2-C6-C8,2396.0,3072.0,11.26,2513.30,187.0,116.0,153.0,...,0.0,C2-C6-C8,C2-C6-C8-C15-C18,3,"frozenset({2, 6, 8, 15, 18})","(2, 6, 8, 15, 18)",2681518,0.371408,2.018808,0.679176
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
495,2770,10195,C2-C5-C7,2396.0,3072.0,11.26,2513.30,187.0,116.0,153.0,...,0.0,C2-C5-C7,C2-C5-C7-C17-C19,3,"frozenset({2, 5, 7, 17, 19})","(2, 5, 7, 17, 19)",2571719,0.109971,2.078578,0.252084
496,8676,16101,C4-C6-C8-C9,2394.0,3072.0,30.79,2748.65,5.0,1115.0,245.0,...,0.0,C4-C6-C8-C9,C4-C6-C8-C9-C23,4,"frozenset({4, 6, 8, 9, 23})","(4, 6, 8, 9, 23)",468923,0.158150,1.991874,0.750886
497,1339,8764,C1-C5-C9,2400.0,3072.0,43.66,2556.42,240.0,240.0,240.0,...,0.0,C1-C5-C9,C1-C5-C9-C17-C18,3,"frozenset({1, 5, 9, 17, 18})","(1, 5, 9, 17, 18)",1591718,0.176489,2.110973,0.693912
498,3948,11373,C3-C5-C7,2400.0,3072.0,39.87,2578.14,240.0,240.0,240.0,...,0.0,C3-C5-C7,C3-C5-C7-C20-C24,3,"frozenset({3, 5, 7, 20, 24})","(3, 5, 7, 20, 24)",3572024,0.272349,2.023763,0.594246


In [85]:
import pandas as pd

# Load the provided CSVs again
dd1 = merged_df  # merged_df equivalent
dd2 = scored_df  # source df_scored_fixed equivalent

print(f"dd1: {len(dd1)} rows, dd2: {len(dd2)} rows")

# --- Step 1: Verify both have the same row count ---
if len(dd1) != len(dd2):
    raise ValueError("Row counts differ! Check alignment before concatenating.")

# --- Step 2: Extract only the columns we need from dd2 ---
historical_df = dd2[["Global_Accuracy(%)", "Global_Latency_Sum(ms)"]].copy()

# Rename them as historical versions
historical_df = historical_df.rename(columns={
    "Global_Accuracy(%)": "Global_Accuracy(%)_history",
    "Global_Latency_Sum(ms)": "Global_Latency_Sum(ms)_history"
})

# --- Step 3: Concatenate side-by-side (row-aligned) ---
merged_df_updated = pd.concat([dd1.reset_index(drop=True),
                               historical_df.reset_index(drop=True)], axis=1)

# --- Step 4: Confirm shape and preview ---
print(f"✅ Final merged_df_updated shape: {merged_df_updated.shape}")
print("✅ Columns added:", [c for c in merged_df_updated.columns if "_history" in c])
print("\nPreview:")
print(merged_df_updated.head(10))


dd1: 500 rows, dd2: 500 rows
✅ Final merged_df_updated shape: (500, 15)
✅ Columns added: ['Global_Accuracy(%)_history', 'Global_Latency_Sum(ms)_history']

Preview:
   Unnamed: 0   Combination  Global_Accuracy(%)_zero  \
0           0     C3-C8-C10                16.143148   
1           1      C1-C6-C8                12.594878   
2           2      C4-C6-C7                17.683899   
3           3  C2-C5-C8-C10                20.153534   
4           4      C2-C6-C8                13.028686   
5           5      C4-C5-C8                15.462841   
6           6   C1-C3-C4-C7                21.961355   
7           7  C1-C2-C7-C10                22.489510   
8           8  C1-C4-C8-C10                21.204212   
9           9     C3-C9-C10                17.014433   

   Global_Latency_Sum(ms)_zero New_Combination_zero       Numeric_Tuple  \
0                    13065.419    C3-C8-C10-C16-C17  (3, 8, 10, 16, 17)   
1                    12389.417     C1-C6-C8-C15-C19   (1, 6, 8, 15, 1

In [86]:
merged_df_updated

Unnamed: 0.1,Unnamed: 0,Combination,Global_Accuracy(%)_zero,Global_Latency_Sum(ms)_zero,New_Combination_zero,Numeric_Tuple,New_Combination_sim,Global_Accuracy(%)_sim,Global_DataVolume,Global_Latency_Sum(ms)_sim,Global_RoundTime_Max(ms),Computation_Time(ms),Numerical_Combination,Global_Accuracy(%)_history,Global_Latency_Sum(ms)_history
0,0,C3-C8-C10,16.143148,13065.419,C3-C8-C10-C16-C17,"(3, 8, 10, 16, 17)",C3-C8-C10-C16-C17,11.180000,11988,12959.092379,2641.251564,19349.068642,3-8-10-16-17,33.428,13567.85
1,1,C1-C6-C8,12.594878,12389.417,C1-C6-C8-C15-C19,"(1, 6, 8, 15, 19)",C1-C6-C8-C15-C19,11.030000,11992,13730.721712,3151.134968,20025.508881,1-6-8-15-19,35.586,12841.88
2,2,C4-C6-C7,17.683899,13475.479,C4-C6-C7-C17-C19,"(4, 6, 7, 17, 19)",C4-C6-C7-C17-C19,14.330000,11990,12937.634468,2622.507334,19156.556845,4-6-7-17-19,29.560,13315.73
3,3,C2-C5-C8-C10,20.153534,14977.682,C2-C5-C8-C10-C16,"(2, 5, 8, 10, 16)",C2-C5-C8-C10-C16,13.429999,11984,13882.881165,3233.234406,20090.104342,2-5-8-10-16,21.088,13058.27
4,4,C2-C6-C8,13.028686,12419.189,C2-C6-C8-C15-C18,"(2, 6, 8, 15, 18)",C2-C6-C8-C15-C18,13.079999,11983,13099.066734,2651.860714,19215.249300,2-6-8-15-18,24.136,12947.70
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
495,495,C2-C5-C7,14.441880,12588.510,C2-C5-C7-C17-C19,"(2, 5, 7, 17, 19)",C2-C5-C7-C17-C19,12.909999,11996,12635.647535,2541.817427,18731.168985,2-5-7-17-19,24.886,12989.31
496,496,C4-C6-C8-C9,24.286484,14555.583,C4-C6-C8-C9-C23,"(4, 6, 8, 9, 23)",C4-C6-C8-C9-C23,11.220000,11986,12725.947857,2559.678316,18928.311825,4-6-8-9-23,31.620,13062.51
497,497,C1-C5-C9,15.473924,11979.324,C1-C5-C9-C17-C18,"(1, 5, 9, 17, 18)",C1-C5-C9-C17-C18,11.180000,11995,12675.330162,2559.041977,18821.218491,1-5-9-17-18,32.368,13119.01
498,498,C3-C5-C7,15.797707,13136.603,C3-C5-C7-C20-C24,"(3, 5, 7, 20, 24)",C3-C5-C7-C20-C24,12.220000,11992,12564.683199,2536.518812,18778.584242,3-5-7-20-24,26.840,12822.28


In [87]:
merged_df_updated.to_csv("Final_results_of_CIFAR10_ms.csv")

In [88]:
import numpy as np
import pandas as pd
from sklearn.metrics import mean_absolute_error, mean_absolute_percentage_error, r2_score

# Extract y_true and y_pred for both targets
y_true_acc = merged_df['Global_Accuracy(%)_zero']
y_pred_acc = merged_df['Global_Accuracy(%)_sim']
y_true_lat = merged_df['Global_Latency_Sum(ms)_zero']
y_pred_lat = merged_df['Global_Latency_Sum(ms)_sim']

# === 🔹 Per-target metrics ===
mae_acc = mean_absolute_error(y_true_acc, y_pred_acc)
mape_acc = mean_absolute_percentage_error(y_true_acc, y_pred_acc)
r2_acc = r2_score(y_true_acc, y_pred_acc)

mae_lat = mean_absolute_error(y_true_lat, y_pred_lat)
mape_lat = mean_absolute_percentage_error(y_true_lat, y_pred_lat)
r2_lat = r2_score(y_true_lat, y_pred_lat)

# === 🔹 Overall metrics (combined across both targets) ===
y_true_all = np.concatenate([y_true_acc.values, y_true_lat.values])
y_pred_all = np.concatenate([y_pred_acc.values, y_pred_lat.values])

mae_overall = mean_absolute_error(y_true_all, y_pred_all)
mape_overall = mean_absolute_percentage_error(y_true_all, y_pred_all)
r2_overall = r2_score(y_true_all, y_pred_all)

# === 🔹 Create summary DataFrame ===
metrics_df = pd.DataFrame({
    "Target": ["Accuracy", "Latency", "CIFAR-10"],
    "MAE": [mae_acc, mae_lat, mae_overall],
    "MAPE": [mape_acc, mape_lat, mape_overall],

}).round(4)

metrics_df


Unnamed: 0,Target,MAE,MAPE
0,Accuracy,5.6621,0.2878
1,Latency,879.3598,0.0653
2,CIFAR-10,442.5109,0.1766


In [89]:
import numpy as np
import pandas as pd
from sklearn.metrics import mean_absolute_error, mean_absolute_percentage_error, r2_score
merged_df=merged_df_updated
# Extract y_true and y_pred for both targets
y_true_acc = merged_df['Global_Accuracy(%)_zero']
y_pred_acc = merged_df['Global_Accuracy(%)_sim']
y_true_lat = merged_df['Global_Latency_Sum(ms)_zero']
y_pred_lat = merged_df['Global_Latency_Sum(ms)_sim']

# Calculate per-target metrics
mae_acc = mean_absolute_error(y_true_acc, y_pred_acc)
mape_acc = mean_absolute_percentage_error(y_true_acc, y_pred_acc)
r2_acc = r2_score(y_true_acc, y_pred_acc)

mae_lat = mean_absolute_error(y_true_lat, y_pred_lat)
mape_lat = mean_absolute_percentage_error(y_true_lat, y_pred_lat)
r2_lat = r2_score(y_true_lat, y_pred_lat)

# === 🔹 Overall metrics (combined across both targets) ===
# Stack the two sets of predictions into single arrays
y_true_all = np.concatenate([y_true_acc.values, y_true_lat.values])
y_pred_all = np.concatenate([y_pred_acc.values, y_pred_lat.values])

mae_overall = mean_absolute_error(y_true_all, y_pred_all)
mape_overall = mean_absolute_percentage_error(y_true_all, y_pred_all)
r2_overall = r2_score(y_true_all, y_pred_all)

# === Create summary DataFrame ===
metrics_df = pd.DataFrame({
    "Target": ["CIFAR10"],
    "MAE": [mae_overall],
    "MAPE": [mape_overall],
    "R2": [r2_overall]
}).round(4)

metrics_df


Unnamed: 0,Target,MAE,MAPE,R2
0,CIFAR10,442.5109,0.1766,0.9676


In [None]:
merged_df=merged_df[['New_Combination_zero','Global_Accuracy(%)_sim','Global_Accuracy(%)_zero','Global_Latency_Sum(ms)_zero','Global_Latency_Sum(ms)_sim']]
merged_df


Unnamed: 0,New_Combination_zero,Global_Accuracy(%)_sim,Global_Accuracy(%)_zero,Global_Latency_Sum(ms)_zero,Global_Latency_Sum(ms)_sim
0,C3-C8-C10-C16-C17,12.220000,28.492008,35838.758,22148.663759
1,C1-C6-C8-C15-C19,11.270000,16.092289,43902.336,21416.340113
2,C4-C6-C7-C17-C19,11.840000,21.384918,36370.918,21243.297100
3,C2-C5-C8-C10-C16,15.090001,36.961586,39407.418,22156.106472
4,C2-C6-C8-C15-C18,11.260000,20.835371,41440.490,21243.162394
...,...,...,...,...,...
145,C4-C5-C7-C16-C19,11.320000,22.425201,36737.566,21219.234228
146,C6-C9-C10-C17-C24,10.810000,23.190815,35419.330,21285.298109
147,C8-C9-C10-C21-C25,11.340000,33.707580,32766.547,21191.868305
148,C5-C9-C10-C21-C23,12.690000,25.113280,33159.880,21155.897141


In [None]:
merged_df_updated.isna().sum()

Unnamed: 0,0
Unnamed: 0,0
Combination,0
Global_Accuracy(%)_zero,0
Global_Latency_Sum(ms)_zero,0
New_Combination_zero,0
Numeric_Tuple,0
New_Combination_sim,0
Global_Accuracy(%)_sim,0
Global_DataVolume,0
Global_Latency_Sum(ms)_sim,0
