TABLES (1- MEDIAN; 2- THRESHOLD; 3- PRESENCE PERCENTAGE; 4- NON-ZERO MEDIAN)

In [None]:
import pandas as pd
from tabulate import tabulate
import os
from typing import List, Dict, Optional, Any

# --- CONFIGURAZIONE GLOBALE FILES ---
FILES_CONFIG = {
    "GR": "gr_gpt_ALL.csv",
    "SYNT": "synt_gpt_ALL.csv",
    "READ": "read_gpt_ALL.csv",
    "INFDENS": "infdens_gpt_ALL.csv",
}

# Funzione di caricamento comune
def load_and_preprocess(path: str) -> Optional[pd.DataFrame]:
    if not os.path.exists(path):
        print(f" File not found: {path}")
        return None
    try:
        df = pd.read_csv(path, encoding="utf-8")
        if "sentence_percentage" in df.columns:
            df.rename(columns={"sentence_percentage": "percentage"}, inplace=True)
        if "percentage" not in df.columns:
            if "setting" in df.columns:
                mapping = {"2s_3e": 50, "4s_3e": 100}
                df["percentage"] = df["setting"].map(mapping)
            else:
                raise ValueError("Missing 'percentage' or 'setting' column.")
        if "manipulation" not in df.columns:
            raise ValueError("Missing 'manipulation' column")
        return df
    except Exception as e:
        print(f" Error loading {path}: {e}")
        return None

In [None]:
# --- ANALYSIS 1 ---
METRICS_ANALYSIS_1 = [
    "acc_median_final",
    "comp_final_median",
    "key_concept_coverage",
    "Conciseness",
]

# Cartella di output per tenere tutto ordinato
OUTPUT_DIR_1 = "results_medians"
OUTPUT_CSV_LONG = os.path.join(OUTPUT_DIR_1, "final_table_long.csv")
OUTPUT_TXT_LONG = os.path.join(OUTPUT_DIR_1, "final_table_long.txt")

def process_dataset_median(df: pd.DataFrame, dataset_name: str, metrics: List[str]) -> pd.DataFrame:
    """Calculates medians and pivots the table for a specific dataset."""
    grouped = df.groupby(["manipulation", "percentage"])[metrics].median().reset_index()

    rows = []
    for m in metrics:
        temp = grouped.pivot(index="percentage", columns="manipulation", values=m)
        temp.index = [f"{m}_{int(idx)}" for idx in temp.index]
        rows.append(temp)

    table = pd.concat(rows)
    
    table.columns = [f"{dataset_name}_{col}" for col in table.columns]
    
    return table

print("Starting Standard Median Analysis...")
os.makedirs(OUTPUT_DIR_1, exist_ok=True)
processed_tables_1 = []

for name, path in FILES_CONFIG.items():
    df = load_and_preprocess(path)
    if df is not None:
        try:
            table = process_dataset_median(df, name, METRICS_ANALYSIS_1)
            
            # --- SALVE FOR EACH DIMENSION ---
            single_csv_path = os.path.join(OUTPUT_DIR_1, f"{name}_median.csv")
            table.to_csv(single_csv_path, encoding="utf-8")
            print(f" -> Saved SINGLE table: {single_csv_path}")
            
            processed_tables_1.append(table)
        except Exception as e:
            print(f" Error processing {name}: {e}")

if processed_tables_1:
    # UNIFIED TABLE
    final_table_1 = pd.concat(processed_tables_1, axis=1).fillna(0)

    print("\n=== FINAL UNIFIED TABLE (MEDIANS) ===")
    table_str_1 = tabulate(final_table_1, headers="keys", tablefmt="grid", showindex=True)
    print(table_str_1)

    final_table_1.to_csv(OUTPUT_CSV_LONG, encoding="utf-8")
    with open(OUTPUT_TXT_LONG, "w", encoding="utf-8") as f:
        f.write(table_str_1)
    
    print(f"\n Results saved to directory: {OUTPUT_DIR_1}")
else:
    print("No data processed for Analysis 1.")

In [None]:
# --- ANALYSIS 2 ---
OUTPUT_DIR_2 = "results_thresholds"

THRESHOLDS_06 = {
    "acc_median_final": 0.6,
    "comp_final_median": 0.6,
    "key_concept_coverage": 0.6,
    "Conciseness": {"low": 0.5, "high": 1.5},
    "Num_addition": 2,
    "Num_direct_modifications": 2,
}

THRESHOLDS_04 = {
    "acc_median_final": 0.4,
    "comp_final_median": 0.4,
    "key_concept_coverage": 0.4,
    "Num_addition": 4,
    "Num_direct_modifications": 4,
}

THRESHOLD_SETS = {"thr06": THRESHOLDS_06, "thr04": THRESHOLDS_04}

def calculate_pass_rate(df_grouped, metric: str, threshold: Any) -> List[pd.Series]:
    series_list = []
    if isinstance(threshold, dict) and "low" in threshold and "high" in threshold:
        low, high = threshold["low"], threshold["high"]
        s_below = df_grouped[metric].apply(lambda x: (x < low).sum() / x.count() * 100 if x.count() > 0 else 0.0)
        s_below.name = f"{metric}_below_{low}"
        series_list.append(s_below)
        s_above = df_grouped[metric].apply(lambda x: (x > high).sum() / x.count() * 100 if x.count() > 0 else 0.0)
        s_above.name = f"{metric}_above_{high}"
        series_list.append(s_above)
    elif metric in ["Num_addition", "Num_direct_modifications"]:
        s_above = df_grouped[metric].apply(lambda x: (x > threshold).sum() / x.count() * 100 if x.count() > 0 else 0.0)
        s_above.name = f"{metric}_above_{threshold}"
        series_list.append(s_above)
    else:
        s_below = df_grouped[metric].apply(lambda x: (x < threshold).sum() / x.count() * 100 if x.count() > 0 else 0.0)
        s_below.name = f"{metric}_below_{threshold}"
        series_list.append(s_below)
    return series_list

def process_dataset_thresholds(df: pd.DataFrame, dataset_name: str) -> pd.DataFrame:
    unique_pcts = sorted(df["percentage"].dropna().unique())
    manip_vals = sorted(df["manipulation"].dropna().unique())
    rows = []
    for thr_set_name, thresholds in THRESHOLD_SETS.items():
        for metric, thr_val in thresholds.items():
            if metric not in df.columns: continue
            for pct in unique_pcts:
                df_pct = df[df["percentage"] == pct]
                grouped = df_pct.groupby("manipulation")
                results = calculate_pass_rate(grouped, metric, thr_val)
                for res in results:
                    res = res.reindex(manip_vals, fill_value=0.0)
                    res.name = f"{res.name}_{int(pct)}"
                    rows.append(res)
    if not rows: return pd.DataFrame()
    table = pd.concat(rows, axis=1).T
    table.columns = [f"{dataset_name}_{col}" for col in table.columns]
    return table.fillna(0.0).round(2)

print("Starting Threshold Analysis...")
os.makedirs(OUTPUT_DIR_2, exist_ok=True)
processed_tables_2 = []

for name, path in FILES_CONFIG.items():
    df = load_and_preprocess(path)
    if df is not None:
        table = process_dataset_thresholds(df, name)
        if not table.empty:
            csv_path = os.path.join(OUTPUT_DIR_2, f"{name}_thresholds.csv")
            table.to_csv(csv_path, encoding="utf-8")
            print(f" -> Saved SINGLE table: {csv_path}")
            processed_tables_2.append(table)

if processed_tables_2:
    final_table_2 = pd.concat(processed_tables_2, axis=1).fillna(0.0).round(2)
    final_csv_2 = os.path.join(OUTPUT_DIR_2, "final_thresholds_unified.csv")
    final_table_2.to_csv(final_csv_2, encoding="utf-8")
    print(f"\n Final unified thresholds table saved to: {final_csv_2}")

In [None]:
# --- ANALYSIS 3 ---
OUTPUT_DIR_3 = "results_presence"
METRICS_MAP_3 = {"%DirectModifications": "Num_direct_modifications", "%Additions": "Num_addition"}

def calculate_presence(df: pd.DataFrame, dataset_name: str) -> pd.DataFrame:
    unique_pcts = sorted(df["percentage"].dropna().unique())
    manip_vals = sorted(df["manipulation"].dropna().unique())
    rows = []
    for display_name, col_name in METRICS_MAP_3.items():
        if col_name not in df.columns: continue
        for pct in unique_pcts:
            s = df[df["percentage"] == pct].groupby("manipulation")[col_name].apply(
                lambda x: (x > 0).sum() / x.count() * 100 if x.count() > 0 else 0.0
            )
            s = s.reindex(manip_vals, fill_value=0.0)
            s.name = f"{display_name}_{int(pct)}"
            rows.append(s)
    if not rows: return pd.DataFrame()
    table = pd.concat(rows, axis=1).T
    table.columns = [f"{dataset_name}_{col}" for col in table.columns]
    return table.fillna(0.0).round(2)

print("Starting Presence Analysis (Value > 0)...")
os.makedirs(OUTPUT_DIR_3, exist_ok=True)
processed_tables_3 = []

for name, path in FILES_CONFIG.items():
    df = load_and_preprocess(path)
    if df is not None:
        try:
            table = calculate_presence(df, name)
            csv_path = os.path.join(OUTPUT_DIR_3, f"{name}_presence.csv")
            table.to_csv(csv_path, encoding="utf-8")
            print(f" -> Saved SINGLE table: {csv_path}")
            processed_tables_3.append(table)
        except Exception as e:
            print(f" Error processing {name}: {e}")

if processed_tables_3:
    final_table_3 = pd.concat(processed_tables_3, axis=1).fillna(0.0).round(2)
    final_csv_3 = os.path.join(OUTPUT_DIR_3, "final_presence_unified.csv")
    final_table_3.to_csv(final_csv_3, encoding="utf-8")
    print(f"\nFinal unified table saved to: {final_csv_3}")

In [None]:
# --- ANALYSIS 4 ---
OUTPUT_DIR_4 = "results_nonzero"
METRICS_4 = ["Num_addition", "Num_direct_modifications"]

def calculate_nonzero_median(df: pd.DataFrame, dataset_name: str) -> pd.DataFrame:
    unique_pcts = sorted(df["percentage"].dropna().unique())
    manip_vals = sorted(df["manipulation"].dropna().unique())
    rows = []
    for m in METRICS_4:
        if m not in df.columns: continue
        for pct in unique_pcts:
            grouped = df[df["percentage"] == pct].groupby("manipulation")[m].apply(
                lambda x: x[x > 0].median() if (x > 0).any() else 0.0
            )
            grouped = grouped.reindex(manip_vals, fill_value=0.0)
            grouped.name = f"{m}_{int(pct)}"
            rows.append(grouped)
    if not rows: return pd.DataFrame()
    table = pd.concat(rows, axis=1).T
    table.columns = [f"{dataset_name}_{col}" for col in table.columns]
    return table.fillna(0.0).round(2)

print("Starting Non-Zero Median Analysis...")
os.makedirs(OUTPUT_DIR_4, exist_ok=True)
processed_tables_4 = []

for name, path in FILES_CONFIG.items():
    df = load_and_preprocess(path)
    if df is not None:
        try:
            table = calculate_nonzero_median(df, name)
            csv_path = os.path.join(OUTPUT_DIR_4, f"{name}_nonzero_median.csv")
            table.to_csv(csv_path, encoding="utf-8")
            print(f" -> Saved SINGLE table: {csv_path}")
            processed_tables_4.append(table)
        except Exception as e:
            print(f"Error processing {name}: {e}")

if processed_tables_4:
    final_table_4 = pd.concat(processed_tables_4, axis=1).fillna(0.0).round(2)
    final_csv_4 = os.path.join(OUTPUT_DIR_4, "final_nonzero_median_unified.csv")
    final_table_4.to_csv(final_csv_4, encoding="utf-8")
    print(f"\n Final unified table saved to: {final_csv_4}")

VS BASELINE 

In [None]:
import pandas as pd
from tabulate import tabulate

files = {
    "GR": "gr_gpt_ALL.csv",
    "SYNT": "synt_gpt_ALL.csv",
    "READ": "read_gpt_ALL.csv",
    "INFDENS": "infdens_gpt_ALL.csv",
}

baseline_file = "clapnqans_openai_answers_NEW.csv"

# === METRICS TO ANALYSE ===
metrics_2 = [
    "acc_median_final",
    "comp_final_median",
    "key_concept_coverage",
    "Conciseness",
]

def build_variation_table(df, baseline_vals, df_name):
    if "percentage" in df.columns:
        group_col = "percentage"
    elif "setting" in df.columns:  
        mapping = {"2s_3e": 50, "4s_3e": 100}
        df["percentage"] = df["setting"].map(mapping)
        group_col = "percentage"
    else:
        raise ValueError(f"❌ Nessuna colonna 'percentage' o 'setting' trovata in {df_name}")

    if "manipulation" not in df.columns:
        raise ValueError(f"❌ {df_name}: manca colonna 'manipulation'")

    grouped = df.groupby(["manipulation", group_col])[metrics_2].median()

    rows = []
    for m in metrics_2:
        base_val = baseline_vals[m]  
        for pct in sorted(df[group_col].dropna().unique()):
            df_vals = grouped[m].xs(pct, level=group_col) if pct in grouped.index.get_level_values(group_col) else pd.Series()

            variation = ((df_vals - base_val) / base_val * 100).round(2)
            variation.name = f"{m}_{int(pct)}"
            rows.append(variation)

    if rows:
        table = pd.concat(rows, axis=1).T
        table.columns = [f"{df_name}_{col}" for col in table.columns]
        table = table.fillna(0.0).round(2)
    else:
        table = pd.DataFrame(columns=[f"{df_name}_{m}" for m in grouped.index.get_level_values("manipulation").unique()])

    print(f"\n % variation from baseline for {df_name}:")
    print(tabulate(table, headers="keys", tablefmt="grid", showindex=True))

    table.to_csv(f"tabella_{df_name}_var_vs_baseline.csv", encoding="utf-8")
    with open(f"tabella_{df_name}_var_vs_baseline.txt", "w", encoding="utf-8") as f:
        f.write(tabulate(table, headers="keys", tablefmt="grid", showindex=True))
    print(f"{df_name} saved in CSV e TXT")

    return table


try:
    baseline_df = pd.read_csv(baseline_file, encoding="utf-8")
    if "sentence_percentage" in baseline_df.columns:
        baseline_df.rename(columns={"sentence_percentage": "percentage"}, inplace=True)

    baseline_vals = baseline_df[metrics_2].median()
    print(f"baseline ({len(baseline_df)} rows)")
    print("baseline (median per metrics):")
    print(baseline_vals)

    tables = []
    for name, path in files.items():
        try:
            df = pd.read_csv(path, encoding="utf-8")
            if "sentence_percentage" in df.columns:
                df.rename(columns={"sentence_percentage": "percentage"}, inplace=True)
            print(f"Load {name} ({len(df)} rows)")
            t = build_variation_table(df, baseline_vals, name)
            tables.append(t)
        except Exception as e:
            print(f"⚠ Errore con {name}: {e}")

    # === TABELLA FINALE ===
    if tables:
        final_table = pd.concat(tables, axis=1).fillna(0.0).round(2)

        print("\n Unified table (% variation from baseline):")
        print(tabulate(final_table, headers="keys", tablefmt="grid", showindex=True))

        final_table.to_csv("tabella_finale_var_vs_baseline.csv", encoding="utf-8")
        with open("tabella_finale_var_vs_baseline.txt", "w", encoding="utf-8") as f:
            f.write(tabulate(final_table, headers="keys", tablefmt="grid", showindex=True))
        print("\n Saved in CSV e TXT")
except FileNotFoundError:
    print(f"❌ Error.")

In [None]:
import pandas as pd
from tabulate import tabulate

# === CONFIGURAZIONE ===
files = {
    "GR": "gr_gpt_ALL.csv",
    "SYNT": "synt_gpt_ALL.csv",
    "READ": "read_gpt_ALL.csv",
    "INFDENS": "infdens_gpt_ALL.csv",
}

baseline_file = "clapnqans_openai_answers_NEW.csv"

# === DUE SET DI SOGLIE ===
thresholds_06 = {
    "acc_median_final": 0.6,
    "comp_final_median": 0.6,
    "key_concept_coverage": 0.6,
    "Conciseness": {"low": 0.5, "high": 1.5},
    "Num_addition": 2,
    "Num_direct_modifications": 2,
}

thresholds_04 = {
    "acc_median_final": 0.4,
    "comp_final_median": 0.4,
    "key_concept_coverage": 0.4,
    "Num_addition": 4,
    "Num_direct_modifications": 4,
}

threshold_sets = {
    "thr06": thresholds_06,
    "thr04": thresholds_04,
}

def compute_baseline_thresholds(baseline_df, threshold_sets):
    baseline_vals = {}
    for thr_name, thresholds in threshold_sets.items():
        for metric, thr in thresholds.items():
            if metric == "Conciseness":
                low, high = thr["low"], thr["high"]
                below = float((baseline_df[metric] < low).sum() / len(baseline_df) * 100)
                above = float((baseline_df[metric] > high).sum() / len(baseline_df) * 100)
                baseline_vals[f"{metric}below{low}"] = below
                baseline_vals[f"{metric}above{high}"] = above
            elif metric in ["Num_addition", "Num_direct_modifications"]:
                above = float((baseline_df[metric] > thr).sum() / len(baseline_df) * 100)
                baseline_vals[f"{metric}above{thr}"] = above
            else:
                below = float((baseline_df[metric] < thr).sum() / len(baseline_df) * 100)
                baseline_vals[f"{metric}below{thr}"] = below
    return baseline_vals

def build_thresholds_variation_table(df, df_name, threshold_sets, baseline_vals):
    if "sentence_percentage" in df.columns:
        df = df.rename(columns={"sentence_percentage": "percentage"})

    if "percentage" not in df.columns:
        if "setting" in df.columns:
            mapping = {"2s_3e": 50, "4s_3e": 100}
            df["percentage"] = df["setting"].map(mapping)
        else:
            raise ValueError(f" {df_name}: no 'percentage' o 'setting'column")

    if "manipulation" not in df.columns:
        raise ValueError(f" {df_name}: no 'manipulation'column")

    unique_pcts = sorted(df["percentage"].dropna().unique())
    manip_vals = sorted(df["manipulation"].dropna().unique())

    rows = []
    for thr_name, thresholds in threshold_sets.items():
        for metric, thr in thresholds.items():
            for pct in unique_pcts:
                df_pct = df[df["percentage"] == pct]
                grouped = df_pct.groupby("manipulation")

                if metric == "Conciseness":
                    low = thr["low"]
                    high = thr["high"]

                    s_below = grouped[metric].apply(
                        lambda x: (x < low).sum() / x.count() * 100 if x.count() > 0 else 0.0
                    ).reindex(manip_vals, fill_value=0.0)
                    baseline_val = baseline_vals[f"{metric}below{low}"]
                    variation_below = (s_below - baseline_val).round(2)
                    variation_below.name = f"{metric}below{low}_{int(pct)}"

                    s_above = grouped[metric].apply(
                        lambda x: (x > high).sum() / x.count() * 100 if x.count() > 0 else 0.0
                    ).reindex(manip_vals, fill_value=0.0)
                    baseline_val = baseline_vals[f"{metric}above{high}"]
                    variation_above = (s_above - baseline_val).round(2)
                    variation_above.name = f"{metric}above{high}_{int(pct)}"

                    rows.append(variation_below)
                    rows.append(variation_above)

                elif metric in ["Num_addition", "Num_direct_modifications"]:
                    s_above = grouped[metric].apply(
                        lambda x: (x > thr).sum() / x.count() * 100 if x.count() > 0 else 0.0
                    ).reindex(manip_vals, fill_value=0.0)
                    baseline_val = baseline_vals[f"{metric}above{thr}"]
                    variation = (s_above - baseline_val).round(2)
                    variation.name = f"{metric}above{thr}_{int(pct)}"
                    rows.append(variation)

                else:
                    s_below = grouped[metric].apply(
                        lambda x: (x < thr).sum() / x.count() * 100 if x.count() > 0 else 0.0
                    ).reindex(manip_vals, fill_value=0.0)
                    baseline_val = baseline_vals[f"{metric}below{thr}"]
                    variation = (s_below - baseline_val).round(2)
                    variation.name = f"{metric}below{thr}_{int(pct)}"
                    rows.append(variation)

    if rows:
        table = pd.concat(rows, axis=1).T
        table.columns = [f"{df_name}_{col}" for col in table.columns]
        table = table.fillna(0.0).round(2)
    else:
        table = pd.DataFrame(columns=[f"{df_name}_{m}" for m in manip_vals])

    print(f"\n Difference in points % below/above threshold compared to baseline for {df_name}:")
    print(tabulate(table, headers="keys", tablefmt="grid", showindex=True))

    table.to_csv(f"tabella_{df_name}_var_thresholds_vs_baseline.csv", encoding="utf-8")
    with open(f"tabella_{df_name}_var_thresholds_vs_baseline.txt", "w", encoding="utf-8") as f:
        f.write(tabulate(table, headers="keys", tablefmt="grid", showindex=True))
    print(f" Saved {df_name} in CSV e TXT")

    return table

try:
    baseline_df = pd.read_csv(baseline_file, encoding="utf-8")
    baseline_vals = compute_baseline_thresholds(baseline_df, threshold_sets)
    

    tables = []
    for name, path in files.items():
        try:
            df = pd.read_csv(path, encoding="utf-8")
            print(f"Load {name} ({len(df)} rows)")
            t = build_thresholds_variation_table(df, name, threshold_sets, baseline_vals)
            tables.append(t)
        except Exception as e:
            print(f" Errore with {name}: {e}")

    if tables:
        final_table = pd.concat(tables, axis=1).fillna(0.0).round(2)

        print("\n Unified table (Difference in points percentage below/above threshold compared to baseline):")
        print(tabulate(final_table, headers="keys", tablefmt="grid", showindex=True))

        final_table.to_csv("tabella_finale_var_thresholds_vs_baseline.csv", encoding="utf-8")
        with open("tabella_finale_var_thresholds_vs_baseline.txt", "w", encoding="utf-8") as f:
            f.write(tabulate(final_table, headers="keys", tablefmt="grid", showindex=True))
        print("\n Unified table saved in CSV e TXT")
except FileNotFoundError:
    print(f"Error")

In [None]:
import pandas as pd
from tabulate import tabulate

files = {
    "GR": "gr_gpt_ALL.csv",
    "SYNT": "synt_gpt_ALL.csv",
    "READ": "read_gpt_ALL.csv",
    "INFDENS": "infdens_gpt_ALL.csv",
}

baseline_file = "clapnqans_openai_answers_NEW.csv"

def compute_baseline_nonzero(baseline_df):
    baseline_vals = {}
    baseline_vals["%Additions"] = (baseline_df["Num_addition"] != 0).sum() / len(baseline_df) * 100
    baseline_vals["%DirectModifications"] = (baseline_df["Num_direct_modifications"] != 0).sum() / len(baseline_df) * 100
    return baseline_vals

def build_nonzero_variation_table(df, df_name, baseline_vals):
    if "sentence_percentage" in df.columns:
        df = df.rename(columns={"sentence_percentage": "percentage"})
    if "percentage" not in df.columns and "setting" in df.columns:
        mapping = {"2s_3e": 50, "4s_3e": 100}
        df["percentage"] = df["setting"].map(mapping)

    if "manipulation" not in df.columns:
        raise ValueError(f" {df_name}: no 'manipulation' column")

    unique_pcts = sorted(df["percentage"].dropna().unique())
    manip_vals = sorted(df["manipulation"].dropna().unique())

    rows = []
    for pct in unique_pcts:
        df_pct = df[df["percentage"] == pct]
        grouped = df_pct.groupby("manipulation")

        # %Additions
        s_add = grouped["Num_addition"].apply(lambda x: (x != 0).sum() / x.count() * 100 if x.count() > 0 else 0.0).reindex(manip_vals, fill_value=0.0)
        variation_add = (s_add - baseline_vals["%Additions"]).round(2)
        variation_add.name = f"%Additions_{int(pct)}"
        rows.append(variation_add)

        # %DirectModifications
        s_dm = grouped["Num_direct_modifications"].apply(lambda x: (x != 0).sum() / x.count() * 100 if x.count() > 0 else 0.0).reindex(manip_vals, fill_value=0.0)
        variation_dm = (s_dm - baseline_vals["%DirectModifications"]).round(2)
        variation_dm.name = f"%DirectModifications_{int(pct)}"
        rows.append(variation_dm)

    if rows:
        table = pd.concat(rows, axis=1).T
        table.columns = [f"{df_name}_{col}" for col in table.columns]
        table = table.fillna(0.0).round(2)
    else:
        table = pd.DataFrame(columns=[f"{df_name}_{m}" for m in manip_vals])

    print(f"\n Difference in non-zero percentage points vs baseline for {df_name}:")
    print(tabulate(table, headers="keys", tablefmt="grid", showindex=True))

    table.to_csv(f"tabella_{df_name}_nonzero_vs_baseline.csv", encoding="utf-8")
    with open(f"tabella_{df_name}_nonzero_vs_baseline.txt", "w", encoding="utf-8") as f:
        f.write(tabulate(table, headers="keys", tablefmt="grid", showindex=True))
    print(f" {df_name} saved in CSV e TXT")

    return table

# === Baseline ===
try:
    baseline_df = pd.read_csv(baseline_file, encoding="utf-8")
    baseline_vals = compute_baseline_nonzero(baseline_df)
    
    tables = []
    for name, path in files.items():
        try:
            df = pd.read_csv(path, encoding="utf-8")
            print(f"Load {name} ({len(df)} righe)")
            t = build_nonzero_variation_table(df, name, baseline_vals)
            tables.append(t)
        except Exception as e:
            print(f"⚠ Error with {name}: {e}")

    if tables:
        final_table = pd.concat(tables, axis=1).fillna(0.0).round(2)

        print("\n Unified table (Difference in non-zero percentage points vs baseline):")
        print(tabulate(final_table, headers="keys", tablefmt="grid", showindex=True))

        final_table.to_csv("tabella_finale_nonzero_vs_baseline.csv", encoding="utf-8")
        with open("tabella_finale_nonzero_vs_baseline.txt", "w", encoding="utf-8") as f:
            f.write(tabulate(final_table, headers="keys", tablefmt="grid", showindex=True))
        print("\n Final table in CSV and TXT")
except FileNotFoundError:
    print(f" Error")

In [None]:
import pandas as pd
from tabulate import tabulate

files = {
    "GR": "gr_gpt_ALL.csv",
    "SYNT": "synt_gpt_ALL.csv",
    "READ": "read_gpt_ALL.csv",
    "INFDENS": "infdens_gpt_ALL.csv",
}

baseline_file = "clapnqans_openai_answers_NEW.csv"

metrics = ["Num_addition"]

def compute_baseline_median(baseline_df):
    baseline_median = {}
    for metric in metrics:
        nonzero_vals = baseline_df[baseline_df[metric] != 0][metric]
        baseline_median[metric] = nonzero_vals.median() if len(nonzero_vals) > 0 else 0.0
    return baseline_median

def build_nonzero_median_variation_table(df, df_name, baseline_median):
    if "sentence_percentage" in df.columns:
        df = df.rename(columns={"sentence_percentage": "percentage"})
    if "percentage" not in df.columns and "setting" in df.columns:
        mapping = {"2s_3e": 50, "4s_3e": 100}
        df["percentage"] = df["setting"].map(mapping)

    if "manipulation" not in df.columns:
        raise ValueError(f" {df_name}: no 'manipulation' column")

    unique_pcts = sorted(df["percentage"].dropna().unique())
    manip_vals = sorted(df["manipulation"].dropna().unique())

    rows = []
    for metric in metrics:
        for pct in unique_pcts:
            df_pct = df[df["percentage"] == pct]
            grouped = df_pct.groupby("manipulation")

            s_median = grouped[metric].apply(lambda x: x[x != 0].median() if (x != 0).any() else 0.0).reindex(manip_vals, fill_value=0.0)

            if baseline_median[metric] != 0:
                variation = ((s_median - baseline_median[metric]) / baseline_median[metric] * 100).round(2)
            else:
                variation = s_median.round(2)

            variation.name = f"{metric}_{int(pct)}"
            rows.append(variation)

    if rows:
        table = pd.concat(rows, axis=1).T
        table.columns = [f"{df_name}_{col}" for col in table.columns]
        table = table.fillna(0.0).round(2)
    else:
        table = pd.DataFrame(columns=[f"{df_name}_{m}" for m in manip_vals])

    print(f"\n Median non-zero % change vs baseline for {df_name}:")
    print(tabulate(table, headers="keys", tablefmt="grid", showindex=True))

    table.to_csv(f"tabella_{df_name}_median_nonzero_vs_baseline.csv", encoding="utf-8")
    with open(f"tabella_{df_name}_median_nonzero_vs_baseline.txt", "w", encoding="utf-8") as f:
        f.write(tabulate(table, headers="keys", tablefmt="grid", showindex=True))
    print(f"  {df_name} saved in CSV e TXT")

    return table

try:
    baseline_df = pd.read_csv(baseline_file, encoding="utf-8")
    baseline_median = compute_baseline_median(baseline_df)
    
    tables = []
    for name, path in files.items():
        try:
            df = pd.read_csv(path, encoding="utf-8")
            print(f"Load {name} ({len(df)} rows)")
            t = build_nonzero_median_variation_table(df, name, baseline_median)
            tables.append(t)
        except Exception as e:
            print(f"⚠ Error with {name}: {e}")

    if tables:
        final_table = pd.concat(tables, axis=1).fillna(0.0).round(2)

        print("\n Unified table (Median non-zero percentage change vs baseline):")
        print(tabulate(final_table, headers="keys", tablefmt="grid", showindex=True))

        final_table.to_csv("tabella_finale_median_nonzero_vs_baseline.csv", encoding="utf-8")
        with open("tabella_finale_median_nonzero_vs_baseline.txt", "w", encoding="utf-8") as f:
            f.write(tabulate(final_table, headers="keys", tablefmt="grid", showindex=True))
        print("\n Table saved in CSV e TXT")
except FileNotFoundError:
    print(f" Error")

GPT vs GEMINI

In [None]:
import pandas as pd
import numpy as np
import os

# ============================================================
# PATH AND FILE CONFIGURATION
# ============================================================
save_dir = r"GPTvsGEMINI"
os.makedirs(save_dir, exist_ok=True)

# GPT Datasets
files_gpt = {
    "GR": r"gr_gpt_ALL.csv",
    "SYNT": r"synt_gpt_ALL.csv",
    "READ": r"read_gpt_ALL.csv",
    "INFDENS": r"infdens_gpt_ALL.csv",
}

# Gemini Datasets
files_gemini = {
    "GR": r"gr_gemini_ALL.csv",
    "SYNT": r"synt_gemini_ALL.csv",
    "READ": r"read_gemini_ALL.csv",
    "INFDENS": r"infdens_gemini_ALL.csv",
}

# Baselines
baseline_gpt_path = r"clapnqans_openai_answers.csv"
baseline_gemini_path = r"clapnqans_gemini_answers.csv"

# Metrics
metrics = ['acc_median_final', 'comp_final_median', 'key_concept_coverage', 'Conciseness']
metric_names = ['Accuracy', 'Completeness', 'KCC', 'Conciseness']

# ============================================================
# DATA LOADING
# ============================================================

def load_data_safe(files_dict):
    loaded_data = {}
    for key, path in files_dict.items():
        if os.path.exists(path):
            loaded_data[key] = pd.read_csv(path, encoding="utf-8")
        else:
            print(f"⚠ File not found: {path}")
    return loaded_data

print("Loading GPT datasets...")
datasets_gpt = load_data_safe(files_gpt)

print("Loading Gemini datasets...")
datasets_gemini = load_data_safe(files_gemini)

# Combined structure for support functions
datasets_combined = {}
for key in datasets_gpt.keys():
    if key in datasets_gemini:
        datasets_combined[key] = {
            "GPT": datasets_gpt[key],
            "Gemini": datasets_gemini[key]
        }

# Loading Baselines
try:
    baseline_df_gpt = pd.read_csv(baseline_gpt_path, encoding="utf-8")
    baseline_df_gemini = pd.read_csv(baseline_gemini_path, encoding="utf-8")
    print("Baselines loaded successfully.")
except FileNotFoundError as e:
    print(f"Error loading baselines: {e}")
    baseline_df_gpt = pd.DataFrame() # Empty fallback
    baseline_df_gemini = pd.DataFrame()

# ============================================================
# SUPPORT FUNCTIONS
# ============================================================

def collect_all_manipulations(datasets, model_name):
    all_manips = set()
    for dim, dfs in datasets.items():
        if model_name in dfs:
            df = dfs[model_name]
            if "manipulation" in df.columns:
                all_manips.update(df['manipulation'].unique())
    return sorted(all_manips)

def calculate_baseline_medians(baseline_df):
    if baseline_df.empty: return {mname: 0 for mname in metric_names}
    return {mname: baseline_df[mcol].dropna().median()
            for mcol, mname in zip(metrics, metric_names)}

def collect_manipulation_medians(datasets, model_name, baseline_medians):
    all_manips = collect_all_manipulations(datasets, model_name)
    results = []

    for manip in all_manips:
        row = {"Manipulation": manip}
        for mcol, mname in zip(metrics, metric_names):
            baseline = baseline_medians[mname]
            for pct in [50, 100]:
                values = []
                for dim, dfs in datasets.items():
                    if model_name not in dfs: continue
                    df = dfs[model_name].copy()

                    if "sentence_percentage" in df.columns:
                        df.rename(columns={"sentence_percentage": "percentage"}, inplace=True)
                    if "setting" in df.columns and dim == "INFDENS":
                        mapping = {"2s_3e": 50, "4s_3e": 100}
                        df["percentage"] = df["setting"].map(mapping)
                    
                    if "percentage" in df.columns and "manipulation" in df.columns:
                        vals = df[(df["manipulation"] == manip) & (df["percentage"] == pct)][mcol].dropna().tolist()
                        values.extend(vals)

                if values:
                    med = np.median(values)
                    row[f"{mname}_{pct}%"] = med
                    row[f"{mname}_{pct}%-Base"] = med - baseline
                else:
                    row[f"{mname}_{pct}%"] = np.nan
                    row[f"{mname}_{pct}%-Base"] = np.nan

        results.append(row)

    return pd.DataFrame(results)


if not baseline_df_gpt.empty and not baseline_df_gemini.empty and datasets_combined:
    # 1. Calculate Baseline Medians
    baseline_medians_gpt_vals = calculate_baseline_medians(baseline_df_gpt)
    baseline_medians_gemini_vals = calculate_baseline_medians(baseline_df_gemini)

    # 2. Calculate Medians by Manipulation
    df_gpt_res = collect_manipulation_medians(datasets_combined, "GPT", baseline_medians_gpt_vals)
    df_gemini_res = collect_manipulation_medians(datasets_combined, "Gemini", baseline_medians_gemini_vals)


    def create_full_cumulative(df_gpt, df_gemini):
        rows = []
        for _, row_gpt in df_gpt.iterrows():
            manip = row_gpt["Manipulation"]
            row_gemini = df_gemini[df_gemini["Manipulation"] == manip]
            if len(row_gemini) == 0:
                continue
            row_gemini = row_gemini.iloc[0]
            for pct in [50, 100]:
                r = {"Manipulation": manip, "Percentage": pct}
                for mname in metric_names:
                    gpt_base = row_gpt[f"{mname}_{pct}%-Base"]
                    gem_base = row_gemini[f"{mname}_{pct}%-Base"]
                    
                    r[f"{mname}_GPT%-Base"] = gpt_base
                    r[f"{mname}_Gemini%-Base"] = gem_base
                    r[f"{mname}_Δ"] = gpt_base - gem_base
                rows.append(r)
        return pd.DataFrame(rows)

    cumulative_full = create_full_cumulative(df_gpt_res, df_gemini_res)
    
    # Save Result
    cumulative_path = os.path.join(save_dir, "Cumulative_GPT_vs_Gemini_FULL.csv")
    cumulative_full.to_csv(cumulative_path, index=False, encoding="utf-8")

    print(f"\n  cumulative table saved to: {cumulative_path}")
    
    # Preview
    print("Cumulative Table Preview:")
    print(cumulative_full.head().to_markdown(index=False, numalign="left", stralign="left"))

else:
    print("Cannot proceed: Missing Data or Baselines.")