In [1]:
cd ..

/data/vision/beery/scratch/evelyn/task_datacomp


In [7]:
import json
import pandas as pd
from pathlib import Path
import matplotlib.pyplot as plt
from visualizations.file_utils import get_per_class_json_paths, get_pt_paths
from visualizations.utils import get_class_accuracies

In [8]:
baselines_list = ["no_filter", "random_filter", "match_dist", "gradmatch", "zcore"]

# Instead of using a config to define the 'tasks' we want to evaluate, we define them here
dataset_list = [
    ('iWildCam', 'val1', 'test1'),
    ('iWildCam', 'val2', 'test2'),
    ('iWildCam', 'val3', 'test3'),
    ('iWildCam', 'val4', 'test4')
]

finetune_list = ["full_finetune_resnet50"]
lr_list = [0.001]
batch_size_list = [128]

In [9]:
pt_paths = get_pt_paths(baselines_list, dataset_list, finetune_list, lr_list, batch_size_list)
print("len(pt_paths):", len(pt_paths))

Getting pt_path for param configuration: {'fraction': 1}
Getting pt_path for param configuration: {'fraction': 0.25}
Getting pt_path for param configuration: {'fraction': 0.5}
Getting pt_path for param configuration: {'fraction': 0.75}
Getting pt_path for param configuration: {'fraction': 0.9}
Getting pt_path for param configuration: {'fraction': 0.25}
Getting pt_path for param configuration: {'fraction': 0.5}
Getting pt_path for param configuration: {'fraction': 0.75}
Getting pt_path for param configuration: {'fraction': 0.9}
Getting pt_path for param configuration: {'fraction': 0.25, 'selection_lr': 0.01, 'selection_batch': 4}
Getting pt_path for param configuration: {'fraction': 0.25, 'selection_lr': 0.01, 'selection_batch': 16}
Getting pt_path for param configuration: {'fraction': 0.25, 'selection_lr': 0.001, 'selection_batch': 4}
Getting pt_path for param configuration: {'fraction': 0.25, 'selection_lr': 0.001, 'selection_batch': 16}
Getting pt_path for param configuration: {'frac

In [10]:
for pt_path in pt_paths:
    get_class_accuracies(pt_path, save_json=True)

Class accuracy dictionary saved to: /data/vision/beery/scratch/evelyn/task_datacomp/experiments/iWildCam/no_filter_fraction_1/test1_full_finetune_resnet50_lr=0.001_batchsize=128_logits_class_accuracy.json
Class accuracy dictionary saved to: /data/vision/beery/scratch/evelyn/task_datacomp/experiments/iWildCam/no_filter_fraction_1/test2_full_finetune_resnet50_lr=0.001_batchsize=128_logits_class_accuracy.json
Class accuracy dictionary saved to: /data/vision/beery/scratch/evelyn/task_datacomp/experiments/iWildCam/no_filter_fraction_1/test3_full_finetune_resnet50_lr=0.001_batchsize=128_logits_class_accuracy.json
Class accuracy dictionary saved to: /data/vision/beery/scratch/evelyn/task_datacomp/experiments/iWildCam/no_filter_fraction_1/test4_full_finetune_resnet50_lr=0.001_batchsize=128_logits_class_accuracy.json
Class accuracy dictionary saved to: /data/vision/beery/scratch/evelyn/task_datacomp/experiments/iWildCam/random_filter_fraction_0.25/test1_full_finetune_resnet50_lr=0.001_batchsize

In [11]:
json_paths = get_per_class_json_paths(baselines_list, dataset_list, finetune_list, lr_list, batch_size_list)

Getting pt_path for param configuration: {'fraction': 1}
Getting pt_path for param configuration: {'fraction': 0.25}
Getting pt_path for param configuration: {'fraction': 0.5}
Getting pt_path for param configuration: {'fraction': 0.75}
Getting pt_path for param configuration: {'fraction': 0.9}
Getting pt_path for param configuration: {'fraction': 0.25}
Getting pt_path for param configuration: {'fraction': 0.5}
Getting pt_path for param configuration: {'fraction': 0.75}
Getting pt_path for param configuration: {'fraction': 0.9}
Getting pt_path for param configuration: {'fraction': 0.25, 'selection_lr': 0.01, 'selection_batch': 4}
Getting pt_path for param configuration: {'fraction': 0.25, 'selection_lr': 0.01, 'selection_batch': 16}
Getting pt_path for param configuration: {'fraction': 0.25, 'selection_lr': 0.001, 'selection_batch': 4}
Getting pt_path for param configuration: {'fraction': 0.25, 'selection_lr': 0.001, 'selection_batch': 16}
Getting pt_path for param configuration: {'frac

In [26]:
deployment = "test1"
deployment_json_paths = [json_path for json_path in json_paths if deployment in json_path]
deployment_json_paths

['/data/vision/beery/scratch/evelyn/task_datacomp/experiments/iWildCam/no_filter_fraction_1/test1_full_finetune_resnet50_lr=0.001_batchsize=128_logits_class_accuracy.json',
 '/data/vision/beery/scratch/evelyn/task_datacomp/experiments/iWildCam/random_filter_fraction_0.25/test1_full_finetune_resnet50_lr=0.001_batchsize=128_logits_class_accuracy.json',
 '/data/vision/beery/scratch/evelyn/task_datacomp/experiments/iWildCam/random_filter_fraction_0.5/test1_full_finetune_resnet50_lr=0.001_batchsize=128_logits_class_accuracy.json',
 '/data/vision/beery/scratch/evelyn/task_datacomp/experiments/iWildCam/random_filter_fraction_0.75/test1_full_finetune_resnet50_lr=0.001_batchsize=128_logits_class_accuracy.json',
 '/data/vision/beery/scratch/evelyn/task_datacomp/experiments/iWildCam/random_filter_fraction_0.9/test1_full_finetune_resnet50_lr=0.001_batchsize=128_logits_class_accuracy.json',
 '/data/vision/beery/scratch/evelyn/task_datacomp/experiments/iWildCam/match_dist_fraction_0.25/test1_full_fi

In [27]:
# Helper to extract a readable method name from the path
def extract_method_name(path: str) -> str:
    parts = Path(path).parts
    # typically ".../iWildCam/<method_folder>/test1_full_..."
    method_folder = parts[-2]  
    return method_folder  # e.g., "random_filter_fraction_0.25"

# Load all JSONs into a dict of dicts
acc_dicts = {}
for path in deployment_json_paths:
    with open(path, "r") as f:
        acc = json.load(f)
    method_name = extract_method_name(path)
    # convert string keys to float/int for sorting
    acc = {float(k): v for k, v in acc.items()}
    acc_dicts[method_name] = acc

# Build dataframe (outer join on all class ids)
df = pd.DataFrame(acc_dicts).sort_index()

In [28]:
pd.set_option('display.max_columns', None)

In [29]:
df

Unnamed: 0,no_filter_fraction_1,random_filter_fraction_0.25,random_filter_fraction_0.5,random_filter_fraction_0.75,random_filter_fraction_0.9,match_dist_fraction_0.25,match_dist_fraction_0.5,match_dist_fraction_0.75,match_dist_fraction_0.9,gradmatch_fraction_0.25_selection_batch_16_selection_lr_0.01,gradmatch_fraction_0.25_selection_batch_4_selection_lr_0.001,gradmatch_fraction_0.25_selection_batch_16_selection_lr_0.001,gradmatch_fraction_0.25_selection_batch_4_selection_lr_0.0001,gradmatch_fraction_0.25_selection_batch_16_selection_lr_0.0001,gradmatch_fraction_0.5_selection_batch_4_selection_lr_0.01,gradmatch_fraction_0.5_selection_batch_16_selection_lr_0.01,gradmatch_fraction_0.5_selection_batch_4_selection_lr_0.001,gradmatch_fraction_0.5_selection_batch_16_selection_lr_0.001,gradmatch_fraction_0.5_selection_batch_4_selection_lr_0.0001,gradmatch_fraction_0.5_selection_batch_16_selection_lr_0.0001,gradmatch_fraction_0.75_selection_batch_16_selection_lr_0.01,gradmatch_fraction_0.75_selection_batch_4_selection_lr_0.001,gradmatch_fraction_0.75_selection_batch_16_selection_lr_0.001,gradmatch_fraction_0.75_selection_batch_4_selection_lr_0.0001,gradmatch_fraction_0.75_selection_batch_16_selection_lr_0.0001,gradmatch_fraction_0.9_selection_batch_16_selection_lr_0.01,gradmatch_fraction_0.9_selection_batch_4_selection_lr_0.001,gradmatch_fraction_0.9_selection_batch_16_selection_lr_0.001,gradmatch_fraction_0.9_selection_batch_4_selection_lr_0.0001,gradmatch_fraction_0.9_selection_batch_16_selection_lr_0.0001,zcore_fraction_0.25_n_sample_500_redund_nn_1000_sample_dim_6,zcore_fraction_0.25_n_sample_5000_redund_nn_1000_sample_dim_2
5.0,0.914414,0.835586,0.802928,0.787162,0.836712,0.761261,0.855856,0.72973,0.748874,0.712838,0.79955,0.757883,0.780405,0.564189,0.804054,0.789414,0.783784,0.834459,0.760135,0.79955,0.760135,0.77027,0.844595,0.717342,0.806306,0.896396,0.865991,0.842342,0.791667,0.775901,0.93018,0.906532
11.0,0.911184,0.664474,0.881579,0.884868,0.828947,0.901316,0.888158,0.881579,0.930921,0.832237,0.875,0.957237,0.878289,0.914474,0.980263,0.914474,0.901316,0.835526,0.970395,0.835526,0.950658,0.9375,0.865132,0.904605,0.865132,0.819079,0.904605,0.845395,0.822368,0.907895,0.858553,0.934211
17.0,0.615385,0.676923,0.630769,0.615385,0.584615,0.815385,0.676923,0.723077,0.784615,0.446154,0.492308,0.646154,0.815385,0.738462,0.384615,0.738462,0.492308,0.707692,0.769231,0.723077,0.630769,0.769231,0.692308,0.8,0.8,0.738462,0.646154,0.707692,0.584615,0.723077,0.4,0.738462
28.0,0.428571,0.845238,0.785714,0.714286,0.809524,0.761905,0.535714,0.857143,0.404762,0.369048,0.678571,0.547619,0.107143,0.297619,0.0,0.22619,0.833333,0.666667,0.02381,0.964286,0.0,0.678571,0.583333,0.738095,0.77381,0.797619,0.690476,0.702381,0.761905,0.785714,0.690476,0.25
31.0,0.083333,0.0,1.0,0.083333,0.055556,,,,,,,,,,,,,,,,,,,,,,,,,,0.0,0.0
36.0,0.887218,0.827068,0.834586,0.864662,0.736842,0.842105,0.857143,0.819549,0.857143,0.842105,0.857143,0.819549,0.857143,0.864662,0.857143,0.819549,0.744361,0.684211,0.857143,0.87218,0.819549,0.75188,0.857143,0.857143,0.857143,0.699248,0.857143,0.857143,0.849624,0.714286,0.857143,0.834586
37.0,0.272727,0.793388,0.471074,0.438017,0.68595,0.793388,0.876033,0.719008,0.553719,0.677686,0.636364,0.61157,0.785124,0.785124,0.768595,0.694215,0.743802,0.413223,0.727273,0.727273,0.545455,0.785124,0.760331,0.578512,0.743802,0.752066,0.859504,0.917355,0.859504,0.735537,0.933884,0.454545
56.0,0.0,0.0,0.0,0.222222,0.0,,,,,,,,,,,,,,,,,,,,,,,,,,0.111111,0.111111
58.0,0.047619,0.079365,0.126984,0.0,0.079365,,,,,,,,,,,,,,,,,,,,,,,,,,0.047619,0.0
85.0,0.0,0.0,0.0,0.0,0.0,,,,,,,,,,,,,,,,,,,,,,,,,,0.0,0.0


In [32]:
cols = ["no_filter_fraction_1", "random_filter_fraction_0.25", "match_dist_fraction_0.25", "zcore_fraction_0.25_n_sample_500_redund_nn_1000_sample_dim_6", "zcore_fraction_0.25_n_sample_5000_redund_nn_1000_sample_dim_2"]

In [33]:
df[cols]

Unnamed: 0,no_filter_fraction_1,random_filter_fraction_0.25,match_dist_fraction_0.25,zcore_fraction_0.25_n_sample_500_redund_nn_1000_sample_dim_6,zcore_fraction_0.25_n_sample_5000_redund_nn_1000_sample_dim_2
5.0,0.914414,0.835586,0.761261,0.93018,0.906532
11.0,0.911184,0.664474,0.901316,0.858553,0.934211
17.0,0.615385,0.676923,0.815385,0.4,0.738462
28.0,0.428571,0.845238,0.761905,0.690476,0.25
31.0,0.083333,0.0,,0.0,0.0
36.0,0.887218,0.827068,0.842105,0.857143,0.834586
37.0,0.272727,0.793388,0.793388,0.933884,0.454545
56.0,0.0,0.0,,0.111111,0.111111
58.0,0.047619,0.079365,,0.047619,0.0
85.0,0.0,0.0,,0.0,0.0
