## Some Metrics 

In [1]:
import pandas as pd
import numpy as np
import os
import sys

In [2]:
df_results_v5a = pd.read_csv("results_v5a.csv")
df_results_v5m = pd.read_csv("results_v5m.csv")
df_results_v6a = pd.read_csv("results_v6_a.csv")
df_results_v6m = pd.read_csv("results_v6_m.csv")

df_results_v5a_synth = pd.read_csv("results_v5_synth_a.csv")
df_results_v6a_synth = pd.read_csv("results_v6_synth_a.csv")
df_results_v5m_synth = pd.read_csv("results_v5_synth_m.csv")
df_results_v6m_synth = pd.read_csv("results_v6_synth_m.csv")

In [12]:
import pandas as pd
import numpy as np

files = {
    "v5a": "results_v5a.csv",
    "v5m": "results_v5m.csv",
    "v6a": "results_v6_a.csv",
    "v6m": "results_v6_m.csv",
    "v5a_synth": "results_v5_synth_a.csv",
    "v6a_synth": "results_v6_synth_a.csv",
    "v5m_synth": "results_v5_synth_m.csv",
    "v6m_synth": "results_v6_synth_m.csv"
}

results = []

for name, path in files.items(): 
    df = pd.read_csv(path)
    area = df["avg_area"] # but we have one box so it's just the area of the box
    
    variance = np.std(np.sqrt(area)) # std of the square root of the average area
    rms = np.mean(np.sqrt(area)) # mean of the square root of the average area
    results.append({
        "fichier": name,
        "variance_avg_area": variance,
        "rms_avg_area": rms
    })

df_summary = pd.DataFrame(results)
print(df_summary)

     fichier  variance_avg_area  rms_avg_area
0        v5a         156.735305    194.921426
1        v5m         177.446512    200.419039
2        v6a         156.595371    188.763423
3        v6m         173.470811    193.051949
4  v5a_synth         185.844961    213.101722
5  v6a_synth         184.725090    206.806811
6  v5m_synth         209.783191    220.709397
7  v6m_synth         204.535365    213.173858


In [None]:
# pixel margin
import pandas as pd
import numpy as np
import ast
import re

def parse_box_string(s):
    try:
        if pd.isna(s) or not isinstance(s, str):
            return np.array([np.nan] * 4)
        s_fixed = re.sub(r'(?<=\d)\s+(?=\d)', ',', s) 
        val = ast.literal_eval(s_fixed) 
        if isinstance(val, list) and len(val) > 0:
            return np.array(val[0])
    except Exception:
        pass
    return np.array([np.nan] * 4)

def mean_margin(df, box1_col, box2_col):
    """Compute the mean margin between two boxes."""
    margins = []
    for b1_str, b2_str in zip(df[box1_col], df[box2_col]):
        box1 = parse_box_string(b1_str)
        box2 = parse_box_string(b2_str)
        if not np.any(np.isnan(box1)) and not np.any(np.isnan(box2)):
            diff = np.abs(box1 - box2) # difference between the two boxes
            margins.append(diff)
    margins = np.array(margins)
    return margins.mean(axis=0) if len(margins) > 0 else np.array([np.nan] * 4)

files = {
    "v5a": "results_v5a.csv",
    "v5m": "results_v5m.csv",
    "v6a": "results_v6_a.csv",
    "v6m": "results_v6_m.csv",
    "v5a_synth": "results_v5_synth_a.csv",
    "v6a_synth": "results_v6_synth_a.csv",
    "v5m_synth": "results_v5_synth_m.csv",
    "v6m_synth": "results_v6_synth_m.csv"
}

results = []

for name, path in files.items():
    df = pd.read_csv(path)
    margin_pred = mean_margin(df, "box_outer", "pred_yolo") # measure how much we can be confident in the prediction
    margin_gt = mean_margin(df, "box_outer", "gt")

    results.append({
        "fichier": name,
        "marge_left_pred": margin_pred[0],
        "marge_top_pred": margin_pred[1],
        "marge_right_pred": margin_pred[2],
        "marge_bottom_pred": margin_pred[3],
        "marge_left_gt": margin_gt[0],
        "marge_top_gt": margin_gt[1],
        "marge_right_gt": margin_gt[2],
        "marge_bottom_gt": margin_gt[3],
        # compute the mean of the 4 margins for the prediction
        "marge_pred_mean": np.mean(margin_pred),
        # and the mean of the 4 margins for the ground truth
        "marge_gt_mean": np.mean(margin_gt)
    })

df_marges = pd.DataFrame(results)
print(df_marges)

     fichier  marge_left_pred  marge_top_pred  marge_right_pred  \
0        v5a        11.756713        5.794552          8.930912   
1        v5m        18.168259        7.848067         12.041179   
2        v6a        10.388060        3.471438          7.755489   
3        v6m        15.578313        4.922356         11.754398   
4  v5a_synth        11.990723        5.502686          8.865966   
5  v6a_synth        10.659422        3.202028          7.755492   
6  v5m_synth        21.240302        8.009249         14.387174   
7  v6m_synth        18.833792        4.975623         13.151775   

   marge_bottom_pred  marge_left_gt  marge_top_gt  marge_right_gt  \
0           8.539671      11.975850      6.073535       10.035560   
1          10.647329      17.099191      8.068846       11.990066   
2           6.523437      10.519379      4.729569        9.205454   
3           7.316546      15.147784      6.206010       11.931098   
4           8.059444      12.234019      5.911663  

In [7]:
# stretch
def box_area(box):
    """Compute area of a box: [x_min, y_min, x_max, y_max]"""
    return max(0, box[2] - box[0]) * max(0, box[3] - box[1])


def compute_stretch(df, raw_col='pred_yolo', conform_area_col='avg_area'):
    ratios = []
    for idx, raw_str in enumerate(df[raw_col]):
        raw_box = parse_box_string(raw_str)
        area_conform = df[conform_area_col].iloc[idx] 

        if not np.any(np.isnan(raw_box)) and not pd.isna(area_conform):
            area_raw = box_area(raw_box)
            if area_raw > 0:
                ratio = np.sqrt(area_conform / area_raw)
                ratios.append(ratio)
    return np.mean(ratios) if ratios else np.nan


In [14]:
files = {
    "v5a": "results_v5a.csv",
    "v5m": "results_v5m.csv",
    "v6a": "results_v6_a.csv",
    "v6m": "results_v6_m.csv",
    "v5a_synth": "results_v5_synth_a.csv",
    "v6a_synth": "results_v6_synth_a.csv",
    "v5m_synth": "results_v5_synth_m.csv",
    "v6m_synth": "results_v6_synth_m.csv"
}

stretch_results = []

for name, path in files.items():
    df = pd.read_csv(path)
    stretch = compute_stretch(df)
    stretch_results.append({"fichier": name, "stretch": stretch})

df_stretch = pd.DataFrame(stretch_results)
print(df_stretch)


     fichier   stretch
0        v5a  1.155251
1        v5m  1.131065
2        v6a  1.126092
3        v6m  1.105741
4  v5a_synth  1.145744
5  v6a_synth  1.119661
6  v5m_synth  1.127565
7  v6m_synth  1.105226


### **Merging** 

In [16]:
df_merged = df_summary.merge(df_marges, on='fichier', how='outer') \
                      .merge(df_stretch, on='fichier', how='outer')

df_merged = df_merged.sort_values(by="fichier")

df_merged.to_csv("conformal_metrics_summary.csv", index=False)

print(df_merged.head())

     fichier  variance_avg_area  rms_avg_area  marge_left_pred  \
0        v5a         156.735305    194.921426        11.756713   
1  v5a_synth         185.844961    213.101722        11.990723   
2        v5m         177.446512    200.419039        18.168259   
3  v5m_synth         209.783191    220.709397        21.240302   
4        v6a         156.595371    188.763423        10.388060   

   marge_top_pred  marge_right_pred  marge_bottom_pred  marge_left_gt  \
0        5.794552          8.930912           8.539671      11.975850   
1        5.502686          8.865966           8.059444      12.234019   
2        7.848067         12.041179          10.647329      17.099191   
3        8.009249         14.387174           9.741377      20.874961   
4        3.471438          7.755489           6.523437      10.519379   

   marge_top_gt  marge_right_gt  marge_bottom_gt   stretch  
0      6.073535       10.035560         8.539230  1.155251  
1      5.911663       10.056758         8.