In [142]:
import pathlib
import glob
import os
import sys
import json
import seaborn as sns

from matplotlib import pyplot as plt

from analysis import Run, RunAnalyser, CriterionPlotter, load_all_runs_from_a_dir

In [143]:
COLORS = []
for idx, c in enumerate(sns.color_palette()):
    print(f"\\definecolor{{color{idx}}}{{RGB}}{{{','.join(str(int(k * 255)) for k in c)}}}")
    COLORS.append(f'color{idx}')

\definecolor{color0}{RGB}{31,119,180}
\definecolor{color1}{RGB}{255,127,14}
\definecolor{color2}{RGB}{44,160,44}
\definecolor{color3}{RGB}{214,39,40}
\definecolor{color4}{RGB}{148,103,189}
\definecolor{color5}{RGB}{140,86,75}
\definecolor{color6}{RGB}{227,119,194}
\definecolor{color7}{RGB}{127,127,127}
\definecolor{color8}{RGB}{188,189,34}
\definecolor{color9}{RGB}{23,190,207}


In [144]:
CITY_CATEGORIES = ['road_construction_site', 'crowd', 'large_trash_pile', 'fire', 'car']
FOREST_CATEGORIES = ['campsite', 'trash_pile', 'person', 'forest_fire', 'building']
ALL_CATEGORIES = ['large_trash_pile', 'fire', 'car', 'trash_pile', 'person', 'building','campsite', 'forest_fire', 'road_construction_site', 'crowd']
MODELS = [
    'GPT4o',
    'Sonnet',
    'Gemini',
    'Phi',
    'InternVL',
    'llava-interleave-7b',
    'Qwen2-VL-72B',
    'llava-onevision',
    'Pixtral-Large-Instruct-2411',
]
colors_map = {k: v for k, v in zip(MODELS, COLORS)}

In [159]:
colors_map

{'GPT4o': 'color0',
 'Sonnet': 'color1',
 'Gemini': 'color2',
 'Phi': 'color3',
 'InternVL': 'color4',
 'llava-interleave-7b': 'color5',
 'Qwen2-VL-72B': 'color6',
 'llava-onevision': 'color7',
 'Pixtral-Large-Instruct-2411': 'color8'}

In [145]:
# Can be used for forest as well
def class_aggregation_function(run: Run):
    object_type = str(run.object_type).lower()

    if "car" in object_type:
        return "car"
    elif "pickup" in object_type:
        return "car"
    elif "truck" in object_type:
        return "car"
    return object_type

def anomaly_aggregation_function(run: Run):
    object_type = str(run.object_type).lower()

    if "anomaly" in object_type:
        return "anomaly"
    return 'main'

def success_criterion(run):
    return run.model_claimed and RunAnalyser(run).maciek_criterion_satisfied(10)

## Settings

In [156]:
base_path = "../all_logs"
suffix = '-ForestNew'
all_exp = [(m, os.path.join(base_path, m + suffix)) for m in MODELS]
print(all_exp)
categories = ALL_CATEGORIES
# agg_fn = anomaly_aggregation_function
agg_fn = class_aggregation_function

[('GPT4o', '../all_logs/GPT4o-ForestNew'), ('Sonnet', '../all_logs/Sonnet-ForestNew'), ('Gemini', '../all_logs/Gemini-ForestNew'), ('Phi', '../all_logs/Phi-ForestNew'), ('InternVL', '../all_logs/InternVL-ForestNew'), ('llava-interleave-7b', '../all_logs/llava-interleave-7b-ForestNew'), ('Qwen2-VL-72B', '../all_logs/Qwen2-VL-72B-ForestNew'), ('llava-onevision', '../all_logs/llava-onevision-ForestNew'), ('Pixtral-Large-Instruct-2411', '../all_logs/Pixtral-Large-Instruct-2411-ForestNew')]


In [157]:
plots = []
for model_name, exp_path in all_exp:
    runs = load_all_runs_from_a_dir(pathlib.Path(exp_path))
    plotter = CriterionPlotter(runs)
    runs_aggregated_per_type = plotter.aggregate_runs_per_function(agg_fn)
    claimed_stats = plotter.plot_accuracy_in_aggregated_runs(runs_aggregated_per_type, None, success_criterion=success_criterion)
    unclaimed_stats = plotter.plot_accuracy_in_aggregated_runs(runs_aggregated_per_type, None,
                                                           success_criterion=lambda x: RunAnalyser(
                                                               x).maciek_criterion_satisfied(10))
    plots.append((model_name, claimed_stats, unclaimed_stats))

## Radar plot

In [160]:
for name, stat, _ in plots:
    print(f'\\tkzKiviatLine[thick,color={colors_map[name]}](', end='')
    print(','.join([f"{stat[c]['mean'] * 100:.3f}" if c in stat else '' for c in categories]), end='')
    print(')')

\tkzKiviatLine[thick,color=color0](,,,43.400,19.050,51.110,68.750,53.570,,)
\tkzKiviatLine[thick,color=color1](,,,37.740,40.480,68.890,59.380,60.710,,)
\tkzKiviatLine[thick,color=color2](,,,41.510,23.810,48.890,40.620,64.290,,)
\tkzKiviatLine[thick,color=color3](,,,0.000,0.000,0.000,0.000,0.000,,)
\tkzKiviatLine[thick,color=color4](,,,3.770,2.380,4.440,0.000,7.140,,)
\tkzKiviatLine[thick,color=color5](,,,0.000,0.000,0.000,0.000,10.710,,)
\tkzKiviatLine[thick,color=color6](,,,9.430,4.760,28.890,6.250,35.710,,)
\tkzKiviatLine[thick,color=color7](,,,9.430,9.520,15.560,25.000,42.860,,)
\tkzKiviatLine[thick,color=color8](,,,26.420,16.670,62.220,43.750,46.430,,)


In [164]:
for k, v in colors_map.items():
    print(f'\\addlegendimage{{{v},ultra thick}}')
    print(f'\\addlegendentry{{{k}}};')
    

\addlegendimage{color0,ultra thick}
\addlegendentry{GPT4o};
\addlegendimage{color1,ultra thick}
\addlegendentry{Sonnet};
\addlegendimage{color2,ultra thick}
\addlegendentry{Gemini};
\addlegendimage{color3,ultra thick}
\addlegendentry{Phi};
\addlegendimage{color4,ultra thick}
\addlegendentry{InternVL};
\addlegendimage{color5,ultra thick}
\addlegendentry{llava-interleave-7b};
\addlegendimage{color6,ultra thick}
\addlegendentry{Qwen2-VL-72B};
\addlegendimage{color7,ultra thick}
\addlegendentry{llava-onevision};
\addlegendimage{color8,ultra thick}
\addlegendentry{Pixtral-Large-Instruct-2411};


## Main plot

In [133]:
for idx, (m, p, _) in enumerate(plots):
    p = p['main']
    mean = p['mean']
    lower = p['mean'] - p['conf_int'][0]
    upper = p['conf_int'][1] - p['mean']
    print(f'\\addplot[style={{fill={colors_map[m]}}},error bars/.cd, y dir=both, y explicit] coordinates {{({idx+1}, {p["mean"]*100:.3f}) += (0,{upper*100:.3f}) -= (0,{lower*100:.3f})}};')



\addplot[style={fill=color0},error bars/.cd, y dir=both, y explicit] coordinates {(1, 33.500) += (0,7.000) -= (0,6.500)};
\addplot[style={fill=color1},error bars/.cd, y dir=both, y explicit] coordinates {(2, 30.500) += (0,6.890) -= (0,6.300)};
\addplot[style={fill=color2},error bars/.cd, y dir=both, y explicit] coordinates {(3, 41.500) += (0,7.160) -= (0,6.910)};
\addplot[style={fill=color3},error bars/.cd, y dir=both, y explicit] coordinates {(4, 0.000) += (0,1.830) -= (0,0.000)};
\addplot[style={fill=color4},error bars/.cd, y dir=both, y explicit] coordinates {(5, 1.500) += (0,2.820) -= (0,1.190)};
\addplot[style={fill=color5},error bars/.cd, y dir=both, y explicit] coordinates {(6, 1.500) += (0,2.820) -= (0,1.190)};
\addplot[style={fill=color6},error bars/.cd, y dir=both, y explicit] coordinates {(7, 18.000) += (0,6.040) -= (0,5.060)};
\addplot[style={fill=color7},error bars/.cd, y dir=both, y explicit] coordinates {(8, 11.500) += (0,5.250) -= (0,4.070)};
\addplot[style={fill=color8

In [124]:
plots[0][1]

{'main': {'mean': np.float64(0.455),
  'std': np.float64(0.498),
  'conf_int': (0.3846, 0.5267),
  'n': 200,
  'total_successes': 91},
 'anomaly': {'mean': np.float64(0.39),
  'std': np.float64(0.4877),
  'conf_int': (0.294, 0.4927),
  'n': 100,
  'total_successes': 39}}