In [None]:
%cd /lustre/fsn1/projects/rech/enh/unv34ei/MICAT/experiments/scripts/results_curves

In [None]:
#!/usr/bin/env python3
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from autorank import autorank, create_report, plot_stats 
import json
import glob
import re
import os

# Configuration
algorithms = ["Adam", "MAML", "Approx_GAP", "Beta_cd", "MICAT"]
datasets = ["algebra", "assist0910", "math2"]
subalgos = ["IMPACT", "NCDM"]
metrics = ["mi_acc","rmse","mae","mi_prec","mi_rec","mi_f_b","mi_auc","ma_prec","ma_rec","ma_f_b",'meta_doa','pc-er','rm']
pattern = re.compile(
    r"^CAT_launch_"
    r"(?P<dataset>.+?)_"
    r"(?P<subalgo>.+?)_"
    r"(?P<algorithm>.+?)_"
    r"\d+_all_results\.json$"
)

output_dir = "cd_plots"
os.makedirs(output_dir, exist_ok=True)

In [30]:
# Collect metric values across all valid datasets/subalgos/folds/steps
results = {m: {a: [] for a in algorithms} for m in metrics}
files = glob.glob("CAT_launch_*_IMPACT_*_all_results.json") + \
        glob.glob("CAT_launch_*_NCDM_*_all_results.json")
print(f"[INFO] Found {len(files)} candidate result files.")

for file in files:
    m = pattern.match(os.path.basename(file))
    if not m:
        print(f"[WARN] Filename does not match expected pattern: {file}")
        continue
    ds, subalgo, algo = m["dataset"], m["subalgo"], m["algorithm"]
    if ds not in datasets or algo not in algorithms:
        print(f"[WARN] Skipping file due to unmatched dataset/algo: {ds}, {algo}")
        continue
    if ds=="algebra" and subalgo=="NCDM":
        continue
    try:
        with open(file, "r") as f:
            folds = json.load(f)
        for fold_idx, (pred, meta) in enumerate(folds):
            value_acc = {metric:0 for metric in metrics}
            for step in pred:
                for metric in metrics:
                    value = pred[step].get(metric)
                    if value is None or np.isnan(value):
                        value = meta[step].get(metric)
                    if value is not None and not np.isnan(value):                     
                        value_acc[metric] += value 
                        if value==0.0:
                    elif metric == 'pc-er' :
                        value_acc[metric] += -2 
            for metric in metrics :
                results[metric][algo].append(value_acc[metric])
    except Exception as e:
        print(f"[ERROR] Failed to process {file}: {e}")

IndentationError: expected an indented block after 'if' statement on line 30 (280297985.py, line 31)

In [26]:
results['meta_doa']

{'Adam': [12.861356959187866,
  12.756967618601536,
  12.619613987828851,
  12.591751762043984,
  12.528431757782204,
  10.721220938012062,
  10.733717209599394,
  10.750553574930281,
  10.630181582329584,
  10.756617783674088,
  9.031180506551776,
  8.57268640713561,
  8.800294074888713,
  9.134315714194173,
  8.915044749956342,
  7.809445239868375,
  7.493418252067106,
  8.016728606059836,
  7.850191385674584,
  7.965332079722444,
  8.246055895385847,
  8.231403325115496,
  8.237490581107036,
  8.238011403833962,
  8.231750672822354],
 'MAML': [12.55744425980009,
  12.910644691729756,
  12.652863205225579,
  12.403289786088024,
  12.701208059487605,
  9.438664551698885,
  8.57252144073945,
  9.262439366153409,
  9.426067207687643,
  8.766469956276648,
  10.052183561212921,
  10.041587512519392,
  10.085108536656364,
  9.847569113101832,
  10.166767723258234,
  8.561968408346626,
  8.23764845374347,
  8.160118059084525,
  8.239745591593245,
  8.210082291216224,
  7.991898353753191,
  

In [25]:
for algo, r in results['meta_doa'].items() :
    print(np.array(r)/16)
    print(algo, len(r))
    """if len(r)<25:
        print(r)"""

[0.80383481 0.79731048 0.78872587 0.78698449 0.78302698 0.67007631
 0.67085733 0.6719096  0.66438635 0.67228861 0.56444878 0.5357929
 0.55001838 0.57089473 0.5571903  0.48809033 0.46833864 0.50104554
 0.49063696 0.49783325 0.51537849 0.51446271 0.51484316 0.51487571
 0.51448442]
Adam 25
[0.78484027 0.80691529 0.79080395 0.77520561 0.7938255  0.58991653
 0.53578259 0.57890246 0.5891292  0.54790437 0.62826147 0.62759922
 0.63031928 0.61547307 0.63542298 0.53512303 0.51485303 0.51000738
 0.5149841  0.51313014 0.49949365 0.47266301 0.48479807 0.49263565
 0.48471609]
MAML 25
[0.5391563  0.57349878 0.57772532 0.56464671 0.55915883 0.4963231
 0.50350373 0.4741099  0.51781556 0.51593258 0.78995399 0.79038101
 0.77451821 0.77261295 0.77380865 0.49981249 0.50084643 0.50131536
 0.50293557 0.49969359 0.49974923 0.44904121 0.48583938 0.47755556
 0.47924086]
Approx_GAP 25
[0.80039114 0.78879166 0.7854059  0.79127254 0.78050781 0.57852855
 0.51619678 0.56969555 0.58174289 0.55853173 0.66299104 0.6611

In [None]:
from scipy.stats import friedmanchisquare
import numpy as np
import scikit_posthocs as sp

t = [results['mi_acc'][algo] for algo in algorithms]
data = np.array(t)

from critdd import Diagram
import pandas as pd

# download example data


# create a CD diagram from the Pandas DataFrame
diagram = Diagram(
    data.T,
    treatment_names = algorithms,
    maximize_outcome = True
)

# inspect average ranks and groups of statistically indistinguishable treatments
diagram.average_ranks # the average rank of each treatment
diagram.get_groups(alpha=.05, adjustment="holm")

# export the diagram to a file
diagram.to_file(
    "example.tex",
    alpha = .05,
    adjustment = "holm",
    reverse_x = True,
    axis_options = {"title": ""},
)

In [None]:
from critdd import Diagrams # Diagrams is the 2D version of Diagram
import numpy as np
import pandas as pd


# construct a sequence of CD diagrams
treatment_names = ["Naïve", "MAML", "Approx-GAP", "BETA-CD", "MICAT"]
metrics = ["mi_acc","mi_prec","mi_rec","mi_f_b","mi_auc",'meta_doa']
diagram_names = ["Accuracy","Precision","Recall","F1","AUC",'Meta Doa']
Xs = [] # collect an (n,k)-shaped matrix for each diagram
for n in metrics:
    Xs.append(np.array([results[n][algo] for algo in algorithms]).T)
two_dimensional_diagram = Diagrams(
    np.stack(Xs),
    diagram_names = diagram_names,
    treatment_names = treatment_names,
    maximize_outcome = True
)



In [None]:
two_dimensional_diagram.to_file(
    "2D_diagram.tex",
    alpha = .05,
    adjustment = "holm",
    reverse_x = True,
    axis_options = {"title": ""},
)

In [None]:
#
# customize the style of the plot and export to PDF
two_dimensional_diagram.to_file(
    "2d_example.pdf",
    preamble = "\n".join([ # colors are defined before \begin{document}
        "\\definecolor{color1}{HTML}{84B818}",
        "\\definecolor{color2}{HTML}{D18B12}",
        "\\definecolor{color3}{HTML}{1BB5B5}",
        "\\definecolor{color4}{HTML}{F85A3E}",
        "\\definecolor{color5}{HTML}{4B6CFC}",
    ]),
    axis_options = { # style the plot
        "cycle list": ",".join([ # define the markers for treatments
            "{color1,mark=*}",
            "{color2,mark=diamond*}",
            "{color3,mark=triangle,semithick}",
            "{color4,mark=square,semithick}",
            "{color5,mark=pentagon,semithick}",
        ]),
        "width": "\\axisdefaultwidth",
        "height": "0.75*\\axisdefaultheight",
        "title": "critdd"
    },
)

In [None]:
# TIMETABLE

#!/usr/bin/env python3
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from autorank import autorank, create_report, plot_stats 
import json
import glob
import re
import os

# Configuration
algorithms = ["Adam", "Approx_GAP", "Beta_cd", "MICAT", "MAML"]
datasets = ["algebra", "assist0910", "math2"]
subalgos = ["IMPACT", "NCDM"]
metrics = ["mi_acc","rmse","mae","mi_prec","mi_rec","mi_f_b","mi_auc","ma_prec","ma_rec","ma_f_b",'meta_doa','pc-er','rm']
pattern = re.compile(
    r"^CAT_launch_"
    r"(?P<dataset>.+?)_"
    r"(?P<subalgo>.+?)_"
    r"(?P<algorithm>.+?)_"
    r"\d+_metrics_summary\.csv$"
)

output_dir = "time_table"
os.makedirs(output_dir, exist_ok=True)

In [None]:
import csv
# Collect metric values across all valid datasets/subalgos/folds/steps
results = {a: {d: {"IMPACT":[0,0], "NCDM":[0,0]} for d in datasets} for a in algorithms}
files = glob.glob("CAT_launch_*_IMPACT_*_metrics_summary.csv") + \
        glob.glob("CAT_launch_*_NCDM_*_metrics_summary.csv")
print(f"[INFO] Found {len(files)} candidate result files.")

for file in files:
    m = pattern.match(os.path.basename(file))
    if not m:
        print(f"[WARN] Filename does not match expected pattern: {file}")
        continue
    ds, subalgo, algo = m["dataset"], m["subalgo"], m["algorithm"]
    if ds not in datasets or algo not in algorithms:
        print(f"[WARN] Skipping file due to unmatched dataset/algo: {ds}, {algo}")
        continue
    try:
        print(ds, subalgo, algo)
        with open(file, newline='') as f:
            for row in f:
                if row[0]=='-' : 
                    s = row.split(',')
                    results[algo][ds][subalgo][0] = float(s[2])/60
                    results[algo][ds][subalgo][1] = float(s[3])/60
                
                    
    except Exception as e:
        print(f"[ERROR] Failed to process {file}: {e}")

In [None]:
results

In [None]:
{'Naive': {'algebra': {'IMPACT': [0, 0], 'NCDM': [0, 0]},
  'assist0910': {'IMPACT': [0.0, 0.0], 'NCDM': [0.0, 0.0]},
  'math2': {'IMPACT': [0.0, 0.0], 'NCDM': [0.0, 0.0]}},
 'Approx_GAP': {'algebra': {'IMPACT': [0, 0], 'NCDM': [0, 0]},
  'assist0910': {'IMPACT': [6.306100000000001, 0.39740983603551855],
   'NCDM': [10.202599999999999, 1.0624886182396915]},
  'math2': {'IMPACT': [65.5274, 31.93663734964559],
   'NCDM': [61.630766666666666, 7.737920111194109]}},
 'Beta_cd': {'algebra': {'IMPACT': [0, 0], 'NCDM': [0, 0]},
  'assist0910': {'IMPACT': [15.885433333333333, 2.6775153454275484],
   'NCDM': [29.354866666666666, 8.701718447301722]},
  'math2': {'IMPACT': [84.26469999999999, 5.656020853430832],
   'NCDM': [163.21325, 56.15742929553786]}},
 'MICAT': {'algebra': {'IMPACT': [0, 0], 'NCDM': [0, 0]},
  'assist0910': {'IMPACT': [11.7725, 1.987660950855441],
   'NCDM': [20.399400000000004, 0.3442023710293439]},
  'math2': {'IMPACT': [54.6772, 6.302373202038877], 'NCDM': [0, 0]}},
 'MAML': {'algebra': {'IMPACT': [0, 0], 'NCDM': [0, 0]},
  'assist0910': {'IMPACT': [8.475566666666666, 2.013736802839713],
   'NCDM': [24.715366666666668, 10.257397938263335]},
  'math2': {'IMPACT': [106.3803, 24.24575084748528],
   'NCDM': [51.81773333333334, 2.838770262905325]}}}