In [66]:
import os
import pandas as pd
import numpy as np

RES_DIR = "../res"

# Load experiment results

In [67]:
def load_data():
    df = pd.read_parquet(os.path.join(RES_DIR, "all_experiments_results.parquet"))
    npz_data = np.load(os.path.join(RES_DIR, "all_experiments_embeddings.npz"), allow_pickle=True)

    for key in ["distance", "original_image_pred_proba", "embedding_pred_proba", "modified_image_pred_proba", "modified_image"]:
        df[key] = list(npz_data[key])

    df.dropna(axis=1, inplace=True)
    df["input_name"] = df["input_name"].astype(str)
    return df

In [68]:
results = load_data()

In [6]:
results.head(2)

Unnamed: 0,dataset,delta_multiplier,patch_option,input_name,explored_patches,patch_attribution,number_explored_patches,save_each,algorithm,iteration,...,time,repetition,original_image_pred,embedding_pred,modified_image_pred,distance,original_image_pred_proba,embedding_pred_proba,modified_image_pred_proba,modified_image
0,cifar,0.1,one,img_5658,[98],[0.12937094271183014],1,10,simec,0,...,1.000861,1,2,2,2,[2.6655062644596602e-24],"[1.5081425, -6.8751864, 5.2393827, 3.6001482, ...","[1.5081419, -6.8751874, 5.239383, 3.6001492, 1...","[1.5081425, -6.875187, 5.2393827, 3.6001487, 1...","[[105.99999, 117.000046, 71.99997, 99.99997], ..."
1,cifar,0.1,one,img_5658,[98],[0.12937094271183014],1,10,simec,10,...,0.773444,1,2,2,2,[-3.5269639600683465e-24],"[1.5081425, -6.8751864, 5.2393827, 3.6001482, ...","[1.5081418, -6.875186, 5.239382, 3.6001482, 1....","[1.5081425, -6.875187, 5.2393827, 3.6001487, 1...","[[105.99999, 117.000046, 71.99997, 99.99997], ..."


In [7]:
results.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 514898 entries, 0 to 514897
Data columns (total 21 columns):
 #   Column                     Non-Null Count   Dtype  
---  ------                     --------------   -----  
 0   dataset                    514898 non-null  object 
 1   delta_multiplier           514898 non-null  float64
 2   patch_option               514898 non-null  object 
 3   input_name                 514898 non-null  object 
 4   explored_patches           514898 non-null  object 
 5   patch_attribution          514898 non-null  object 
 6   number_explored_patches    514898 non-null  int64  
 7   save_each                  514898 non-null  int64  
 8   algorithm                  514898 non-null  object 
 9   iteration                  514898 non-null  int64  
 10  delta                      514898 non-null  float64
 11  time                       514898 non-null  float64
 12  repetition                 514898 non-null  int64  
 13  original_image_pred        51

# Prediction probabilities

Is there a change in top prediction?

In [70]:
results["orig_vs_embed_diff"] = results["original_image_pred"] != results["embedding_pred"]
results["orig_vs_mod_diff"] = results["original_image_pred"] != results["modified_image_pred"]

# Group by the requested columns and aggregate the differences
grouped_res = results.groupby(
    ["dataset", "patch_option", "delta_multiplier", "algorithm"]
).agg(
    orig_vs_embed_diff_count=("orig_vs_embed_diff", "sum"),
    orig_vs_mod_diff_count=("orig_vs_mod_diff", "sum"),
    total=("original_image_pred", "count")
).reset_index()
grouped_res

Unnamed: 0,dataset,patch_option,delta_multiplier,algorithm,orig_vs_embed_diff_count,orig_vs_mod_diff_count,total
0,cifar,all,0.1,simec,0,0,6060
1,cifar,all,0.1,simexp,923,648,6060
2,cifar,all,1.0,simec,0,0,6060
3,cifar,all,1.0,simexp,880,0,6060
4,cifar,one,0.1,simec,0,0,6060
5,cifar,one,0.1,simexp,0,0,6060
6,cifar,one,1.0,simec,0,0,6060
7,cifar,one,1.0,simexp,0,0,6060
8,cifar,q2,0.1,simec,0,0,6060
9,cifar,q2,0.1,simexp,216,0,6060


In [71]:
# Filter rows where predictions differ
diff_df = results[
    (results["original_image_pred"] != results["embedding_pred"]) |
    (results["original_image_pred"] != results["modified_image_pred"])
]

# Select the required columns
selected_columns = [
    "input_name", "repetition", "iteration",
    "original_image_pred", "embedding_pred", "modified_image_pred",
    "dataset", "patch_option", "delta_multiplier", "algorithm"
]

# Create the result dataframe
diff_details = diff_df[selected_columns].copy()

# Optional: Add a column to clarify which prediction(s) differ
diff_details["diff_type"] = diff_df.apply(lambda row: 
    "both" if (row["original_image_pred"] != row["embedding_pred"]) and (row["original_image_pred"] != row["modified_image_pred"]) else
    "embed_only" if row["original_image_pred"] != row["embedding_pred"] else
    "mod_only", axis=1
)

diff_details = diff_details.sort_values(by=["input_name", "repetition", "iteration"])

# Preview
diff_details

Unnamed: 0,input_name,repetition,iteration,original_image_pred,embedding_pred,modified_image_pred,dataset,patch_option,delta_multiplier,algorithm,diff_type
23362,img_2154,1,310,2,3,2,cifar,q2,1.0,simexp,embed_only
23363,img_2154,1,320,2,3,2,cifar,q2,1.0,simexp,embed_only
23364,img_2154,1,330,2,3,2,cifar,q2,1.0,simexp,embed_only
23365,img_2154,1,340,2,3,2,cifar,q2,1.0,simexp,embed_only
23366,img_2154,1,350,2,3,2,cifar,q2,1.0,simexp,embed_only
...,...,...,...,...,...,...,...,...,...,...,...
52048,img_989,3,330,0,2,0,cifar,q2,0.1,simexp,embed_only
52049,img_989,3,340,0,2,0,cifar,q2,0.1,simexp,embed_only
52050,img_989,3,350,0,2,0,cifar,q2,0.1,simexp,embed_only
52051,img_989,3,360,0,2,0,cifar,q2,0.1,simexp,embed_only


In [72]:
print("type\tdataset\talgo\tdelta m\tpatch\tinput_name\trep\titer\tor\temb\tmod")

group_cols = ["dataset", "patch_option", "delta_multiplier", "algorithm", "input_name", "repetition"]
dups = results.duplicated(subset=group_cols + ['iteration'], keep=False)
#print("Duplicates per iteration:", results[dups])

for group_keys, group in results.groupby(group_cols):
    group = group.sort_values("iteration")

    rows = list(group.itertuples(index=False))
    if not rows:
        continue

    # Initialize previous preds
    prev_emb = rows[0].embedding_pred
    prev_mod = rows[0].modified_image_pred

    # Iterate from second row onward
    for row in rows[1:]:
        changed_emb = row.embedding_pred != prev_emb
        changed_mod = row.modified_image_pred != prev_mod

        if changed_emb or changed_mod:
            change_type = "Both" if changed_emb and changed_mod else ("Emb" if changed_emb else "Mod")
            print(
                f"{change_type}\t"
                f"{row.dataset}\t{row.algorithm}\t{row.delta_multiplier}\t{row.patch_option}\t"
                f"{row.input_name}\t{row.repetition}\t{row.iteration}\t"
                f"{row.original_image_pred}\t{row.embedding_pred}\t{row.modified_image_pred}"
            )

        # Update trackers
        prev_emb = row.embedding_pred
        prev_mod = row.modified_image_pred

type	dataset	algo	delta m	patch	input_name	rep	iter	or	emb	mod
Emb	cifar	simexp	0.1	all	img_2697	1	40	3	2	3
Mod	cifar	simexp	0.1	all	img_2697	1	90	3	2	2
Emb	cifar	simexp	0.1	all	img_2697	1	270	3	0	2
Mod	cifar	simexp	0.1	all	img_2697	1	930	3	0	0
Emb	cifar	simexp	0.1	all	img_2697	2	20	3	2	3
Mod	cifar	simexp	0.1	all	img_2697	2	100	3	2	2
Emb	cifar	simexp	0.1	all	img_2697	2	480	3	0	2
Emb	cifar	simexp	0.1	all	img_2697	2	650	3	2	2
Emb	cifar	simexp	0.1	all	img_2697	2	810	3	0	2
Emb	cifar	simexp	0.1	all	img_2697	3	20	3	2	3
Mod	cifar	simexp	0.1	all	img_2697	3	90	3	2	2
Emb	cifar	simexp	0.1	all	img_2697	3	260	3	0	2
Mod	cifar	simexp	0.1	all	img_2697	3	970	3	0	0
Emb	cifar	simexp	0.1	all	img_6845	1	780	2	0	2
Emb	cifar	simexp	0.1	all	img_6845	2	810	2	0	2
Emb	cifar	simexp	0.1	all	img_6845	3	800	2	0	2
Emb	cifar	simexp	0.1	all	img_8969	1	10	0	2	0
Mod	cifar	simexp	0.1	all	img_8969	1	40	0	2	2
Emb	cifar	simexp	0.1	all	img_8969	2	10	0	2	0
Mod	cifar	simexp	0.1	all	img_8969	2	30	0	2	2
Emb	cifar	simexp	0.1	all	i

In [73]:
# Step 1: Sort the DataFrame
df_sorted = results.sort_values(by=['input_name', 'repetition', 'iteration'])

# Step 2: Define the original prediction per input_name + repetition
df_sorted['original_pred'] = df_sorted.groupby(['input_name', 'repetition'])['original_image_pred'].transform('first')

# Step 3: Compare current embedding/mod predictions to original prediction
df_sorted['embedding_changed'] = df_sorted['embedding_pred'] != df_sorted['original_pred']
df_sorted['modified_changed'] = df_sorted['modified_image_pred'] != df_sorted['original_pred']

# Step 4: Filter rows where a change happens (first occurrence)
change_rows = df_sorted[(df_sorted['embedding_changed']) | (df_sorted['modified_changed'])]

# Optional: Drop duplicates so we only get the first change per input + repetition
first_changes = change_rows.drop_duplicates(subset=['input_name', 'repetition'])

# Step 5: Select columns of interest
final_result = first_changes[[
    'input_name', 'repetition', 'iteration',
    'dataset', 'patch_option', 'delta_multiplier', 'algorithm',
    'embedding_changed', 'modified_changed'
]]
final_result.to_csv("interpretation.csv")

In [74]:
diff_details[["input_name", "original_image_pred", "embedding_pred", "modified_image_pred", "dataset", "patch_option", "delta_multiplier", "algorithm", "diff_type"]].drop_duplicates()

Unnamed: 0,input_name,original_image_pred,embedding_pred,modified_image_pred,dataset,patch_option,delta_multiplier,algorithm,diff_type
23362,img_2154,2,3,2,cifar,q2,1.0,simexp,embed_only
14848,img_2697,3,2,3,cifar,q2,1.0,simexp,embed_only
26971,img_2697,3,2,3,cifar,all,0.1,simexp,embed_only
14854,img_2697,3,0,2,cifar,q2,1.0,simexp,both
26976,img_2697,3,2,2,cifar,all,0.1,simexp,both
26994,img_2697,3,0,2,cifar,all,0.1,simexp,both
14572,img_2697,3,2,3,cifar,q2,1.0,simec,embed_only
14881,img_2697,3,0,0,cifar,q2,1.0,simexp,both
27060,img_2697,3,0,0,cifar,all,0.1,simexp,both
14954,img_2697,3,2,2,cifar,q2,1.0,simexp,both


In [75]:
diff_details[diff_details["diff_type"] == "both"][["input_name", "original_image_pred", "embedding_pred", "modified_image_pred", "dataset", "patch_option", "delta_multiplier", "algorithm", "diff_type"]].drop_duplicates()

Unnamed: 0,input_name,original_image_pred,embedding_pred,modified_image_pred,dataset,patch_option,delta_multiplier,algorithm,diff_type
14854,img_2697,3,0,2,cifar,q2,1.0,simexp,both
26976,img_2697,3,2,2,cifar,all,0.1,simexp,both
26994,img_2697,3,0,2,cifar,all,0.1,simexp,both
14881,img_2697,3,0,0,cifar,q2,1.0,simexp,both
27060,img_2697,3,0,0,cifar,all,0.1,simexp,both
14954,img_2697,3,2,2,cifar,q2,1.0,simexp,both
33637,img_8969,0,2,2,cifar,all,0.1,simexp,both
20914,img_8969,0,2,2,cifar,q2,1.0,simexp,both
36132,img_9309,2,0,0,cifar,all,0.1,simexp,both


In [76]:
diff_details[['dataset', 'patch_option', 'delta_multiplier', 'algorithm', 'input_name', 'repetition']].drop_duplicates()

Unnamed: 0,dataset,patch_option,delta_multiplier,algorithm,input_name,repetition
23362,cifar,q2,1.0,simexp,img_2154,1
23450,cifar,q2,1.0,simexp,img_2154,2
23554,cifar,q2,1.0,simexp,img_2154,3
14848,cifar,q2,1.0,simexp,img_2697,1
26971,cifar,all,0.1,simexp,img_2697,1
14572,cifar,q2,1.0,simec,img_2697,1
27070,cifar,all,0.1,simexp,img_2697,2
14951,cifar,q2,1.0,simexp,img_2697,2
14674,cifar,q2,1.0,simec,img_2697,2
27171,cifar,all,0.1,simexp,img_2697,3


In [77]:
list(
    diff_details[['dataset', 'patch_option', 'delta_multiplier', 'algorithm', 'input_name', 'repetition']]
    .drop_duplicates()
    .itertuples(index=False, name=None)
)

[('cifar', 'q2', 1.0, 'simexp', 'img_2154', 1),
 ('cifar', 'q2', 1.0, 'simexp', 'img_2154', 2),
 ('cifar', 'q2', 1.0, 'simexp', 'img_2154', 3),
 ('cifar', 'q2', 1.0, 'simexp', 'img_2697', 1),
 ('cifar', 'all', 0.1, 'simexp', 'img_2697', 1),
 ('cifar', 'q2', 1.0, 'simec', 'img_2697', 1),
 ('cifar', 'all', 0.1, 'simexp', 'img_2697', 2),
 ('cifar', 'q2', 1.0, 'simexp', 'img_2697', 2),
 ('cifar', 'q2', 1.0, 'simec', 'img_2697', 2),
 ('cifar', 'all', 0.1, 'simexp', 'img_2697', 3),
 ('cifar', 'q2', 1.0, 'simexp', 'img_2697', 3),
 ('cifar', 'q2', 1.0, 'simec', 'img_2697', 3),
 ('cifar', 'all', 1.0, 'simexp', 'img_4408', 1),
 ('cifar', 'q2', 1.0, 'simexp', 'img_4408', 1),
 ('cifar', 'all', 1.0, 'simexp', 'img_4408', 2),
 ('cifar', 'all', 1.0, 'simexp', 'img_4408', 3),
 ('cifar', 'all', 1.0, 'simexp', 'img_551', 2),
 ('cifar', 'all', 1.0, 'simexp', 'img_551', 3),
 ('cifar', 'all', 1.0, 'simexp', 'img_6071', 1),
 ('cifar', 'all', 1.0, 'simexp', 'img_6071', 3),
 ('cifar', 'all', 1.0, 'simexp', 'i