# Create summary and evaluation of different runs

In [103]:
# !rm -rf snellius_results
!rsync -r snellius:~/uvadlc_practicals_2023/assignment2/part1/results_resnet18 snellius_results
!rsync -r snellius:~/uvadlc_practicals_2023/assignment2/part2/results_zs snellius_results
!rsync -r snellius:~/uvadlc_practicals_2023/assignment2/part2/results_vp snellius_results
!rsync -r snellius:~/uvadlc_practicals_2023/assignment2/part2/images snellius_results

In [104]:
import json
from pathlib import Path
import pandas as pd

In [105]:
snellius_results = Path("snellius_results")

def load_jsons_to_df(json_dir):
    json_data = []
    for js_path in json_dir.glob("*.json"):
        with open(js_path) as f: 
            js = json.load(f)
            json_data.append(js)
    return pd.DataFrame(json_data)

resnet_df = load_jsons_to_df(snellius_results / "results_resnet18")
zs_df = load_jsons_to_df(snellius_results / "results_zs")
vp_dp_df = load_jsons_to_df(snellius_results / "results_vp")

In [106]:
resnet_df.sort_values(["dataset", "augmentation_name", "test_noise"])

Unnamed: 0,dataset,augmentation_name,test_noise,test_accuracy
1,cifar10,auto_augment,False,0.7679
0,cifar10,auto_augment,True,0.5727
6,cifar10,,False,0.807
3,cifar10,,True,0.4585
5,cifar100,auto_augment,False,0.552
7,cifar100,auto_augment,True,0.3394
4,cifar100,,False,0.5849
2,cifar100,,True,0.2554


In [107]:
zs_df.sort_values(["dataset", "set"], ascending=[True, False])

Unnamed: 0,dataset,set,accuracy
1,cifar10,train,88.721609
2,cifar10,test,88.897765
3,cifar100,train,63.581651
0,cifar100,test,63.079071


In [108]:
vp_dp_df

Unnamed: 0,print_freq,print_tqdm_interval,save_freq,batch_size,num_workers,epochs,max_batches,square_size,optim,learning_rate,...,resume_best,evaluate,gpu,use_wandb,device,model_folder,start_epoch,top1_val_acc,top1_test_acc,best_epoch
0,100,60.0,50,128,16,20,0,8,sgd,40,...,False,False,,False,cuda,./save/models/deep_prompt_padding_30_7_4_cifar...,20,92.35,91.9,20.0
1,100,60.0,50,128,16,20,0,8,sgd,40,...,True,True,,False,cuda,./save/models/visual_prompt_fixed_patch_1_0_4_...,16,,60.34,16.0
2,100,60.0,50,128,16,20,0,8,sgd,40,...,False,False,,False,cuda,./save/models/deep_prompt_padding_30_3_4_cifar...,4,93.33,92.78,4.0
3,100,60.0,50,128,16,20,0,8,sgd,40,...,True,True,,False,cuda,./save/models/visual_prompt_padding_30_0_4_cif...,20,,82.08,20.0
4,100,60.0,50,128,16,20,0,8,sgd,40,...,True,True,,False,cuda,./save/models/visual_prompt_fixed_patch_224_0_...,17,,81.36,17.0
5,100,60.0,50,128,16,20,0,8,sgd,40,...,False,False,,False,cuda,./save/models/deep_prompt_padding_30_0_4_cifar...,19,93.15,92.39,19.0
6,10,60.0,50,128,16,20,0,8,sgd,40,...,False,False,,False,cuda,./save/models/visual_prompt_padding_1_cifar100...,4,67.31,66.99,4.0
7,100,60.0,50,128,16,20,0,8,sgd,40,...,True,True,,False,cuda,./save/models/visual_prompt_fixed_patch_224_0_...,20,,61.05,20.0
8,10,60.0,50,128,16,20,0,8,sgd,40,...,True,True,,False,cuda,./save/models/visual_prompt_padding_1_cifar10_...,4,,87.54,4.0
9,100,60.0,50,128,16,20,0,8,sgd,40,...,False,False,,False,cuda,./save/models/visual_prompt_padding_30_0_4_cif...,20,87.92,88.1,20.0


In [109]:
vp_cols = ["dataset", "method", "prompt_size", "test_noise", "top1_test_acc", "best_epoch"]
dp_cols = ["dataset", "prompt_num", "injection_layer", "prompt_size", "test_noise", "top1_test_acc", "best_epoch"]

In [110]:
vp_df = vp_dp_df.query("prompt_type == 'visual_prompt'")[vp_cols].sort_values(["dataset", "method", "prompt_size", "test_noise"])
vp_df

Unnamed: 0,dataset,method,prompt_size,test_noise,top1_test_acc,best_epoch
23,cifar10,fixed_patch,1,False,89.39,5.0
17,cifar10,fixed_patch,1,True,87.73,5.0
13,cifar10,fixed_patch,224,False,81.6,17.0
4,cifar10,fixed_patch,224,True,81.36,17.0
24,cifar10,padding,1,False,89.89,
8,cifar10,padding,1,True,87.54,4.0
9,cifar10,padding,30,False,88.1,20.0
3,cifar10,padding,30,True,82.08,20.0
15,cifar100,fixed_patch,1,False,64.28,16.0
1,cifar100,fixed_patch,1,True,60.34,16.0


In [111]:
dp_df = vp_dp_df.query("prompt_type == 'deep_prompt'")[dp_cols].sort_values(["dataset", "injection_layer", "test_noise"])
dp_df

Unnamed: 0,dataset,prompt_num,injection_layer,prompt_size,test_noise,top1_test_acc,best_epoch
5,cifar10,4,0,30,False,92.39,19.0
10,cifar10,4,0,30,True,88.03,19.0
2,cifar10,4,3,30,False,92.78,4.0
22,cifar10,4,3,30,True,87.97,4.0
0,cifar10,4,7,30,False,91.9,20.0
11,cifar10,4,7,30,True,88.18,20.0
18,cifar100,4,0,30,False,67.89,1.0
21,cifar100,4,0,30,True,59.3,1.0
16,cifar100,4,3,30,False,69.04,4.0
12,cifar100,4,3,30,True,62.02,4.0
