In [1]:
import os
import numpy as np

In [2]:
PARENT_DIR = "/projects/leelab/clin25"
FROMATTED_METHOD_DICT = {
    "cocoop": "CoCoOp",
    "cpl": "CPL",
}
DATASET_LIST = [
    "sun397",
    "caltech-101",
    "oxford_flowers",
    "food-101",
]
SEED_LIST = [1, 2, 3]

PAPER_SEEN_ACCURACY_DICT = {
    "cocoop": {
        "sun397": 79.08,
        "caltech-101": 97.66,
        "oxford_flowers": 94.65,
        "food-101": 90.67,
    },
    "cpl": {
        "sun397": 81.05,
        "caltech-101": 97.70,
        "oxford_flowers": 93.91,
        "food-101": 93.01,
    },
}

PAPER_UNSEEN_ACCURACY_DICT = {
    "cocoop": {
        "sun397": 76.83,
        "caltech-101": 93.92,
        "oxford_flowers": 69.24,
        "food-101": 91.53,
    },
    "cpl": {
        "sun397": 80.19,
        "caltech-101": 94.94,
        "oxford_flowers": 72.30,
        "food-101": 93.44,
    },
}

PAPER_ACCURACY_DICT = {
    "seen": PAPER_SEEN_ACCURACY_DICT,
    "unseen": PAPER_UNSEEN_ACCURACY_DICT,
}

In [3]:
def retrieve_accuracy(method: str, dataset: str, seed: int, evaluation: str) -> float:
    if evaluation == "seen":
        eval_dir = "eval-base"
    elif evaluation == "unseen":
        eval_dir = "eval-new"
    else:
        raise ValueError(f"evaluation={evaluation} should be seen or unseen!")
        
    output_dir = os.path.join(
        PARENT_DIR, f"{method}-output", dataset, "base", f"{seed}", eval_dir
    )
    
    with open(os.path.join(output_dir, "log.txt")) as handle:
        log_text = handle.readlines()
        
    accuracy_line = log_text[-3].strip().replace("* ", "").replace(":", "")
    assert accuracy_line.split(" ")[0] == "accuracy"
    return float(accuracy_line.split(" ")[1].replace("%", ""))

In [4]:
def print_result_summary(method: str, evaluation: str) -> None:
    print(f"{FROMATTED_METHOD_DICT[method]} performance with {evaluation} classes.")
    print("-" * 45)
    for dataset in DATASET_LIST:
        print(dataset)
        accuracy_list = [
            retrieve_accuracy(method, dataset, seed, evaluation)
            for seed in SEED_LIST
        ]
        paper_accuracy = PAPER_ACCURACY_DICT[evaluation][method][dataset]

        for i in range(len(accuracy_list)):
            if i == 0:
                relative_diff = (accuracy_list[i] - paper_accuracy) / paper_accuracy * 100
                relative_diff = f" ({relative_diff:.2f})"
            else:
                relative_diff = ""
            print(f"\tAccuracy with seed {SEED_LIST[i]}: {accuracy_list[i]}{relative_diff}")

        accuracy_mean = np.mean(accuracy_list)
        accuracy_se = np.var(accuracy_list) / np.sqrt(len(accuracy_list))
        accuracy_ci = 1.96 * accuracy_se

        mean_relative_diff = (accuracy_mean - paper_accuracy) / paper_accuracy * 100
        ci_contains_paper_accuracy = (
            (paper_accuracy <= accuracy_mean + accuracy_ci)
            & (paper_accuracy >= accuracy_mean - accuracy_ci)
        )

        print(
            f"\tAccuracy mean (95% CI): {accuracy_mean:.2f} ({accuracy_ci:.2f})"
        )
        print("")
        print(f"\tMean relative difference with paper accuracy: {mean_relative_diff:.2f}")
        print(f"\t95% CI contains paper accuracy: {ci_contains_paper_accuracy}")
        print("-" * 45)

## Performance with seen classes

In [5]:
print_result_summary("cocoop", "seen")

CoCoOp performance with seen classes.
---------------------------------------------
sun397
	Accuracy with seed 1: 78.96 (-0.15)
	Accuracy with seed 2: 79.69
	Accuracy with seed 3: 79.31
	Accuracy mean (95% CI): 79.32 (0.10)

	Mean relative difference with paper accuracy: 0.30
	95% CI contains paper accuracy: False
---------------------------------------------
caltech-101
	Accuracy with seed 1: 97.55 (-0.11)
	Accuracy with seed 2: 97.81
	Accuracy with seed 3: 97.87
	Accuracy mean (95% CI): 97.74 (0.02)

	Mean relative difference with paper accuracy: 0.09
	95% CI contains paper accuracy: False
---------------------------------------------
oxford_flowers
	Accuracy with seed 1: 94.87 (0.23)
	Accuracy with seed 2: 93.92
	Accuracy with seed 3: 95.06
	Accuracy mean (95% CI): 94.62 (0.28)

	Mean relative difference with paper accuracy: -0.04
	95% CI contains paper accuracy: True
---------------------------------------------
food-101
	Accuracy with seed 1: 90.56 (-0.12)
	Accuracy with seed 2: 9

In [6]:
print_result_summary("cpl", "seen")

CPL performance with seen classes.
---------------------------------------------
sun397
	Accuracy with seed 1: 78.99 (-2.54)
	Accuracy with seed 2: 79.74
	Accuracy with seed 3: 79.76
	Accuracy mean (95% CI): 79.50 (0.15)

	Mean relative difference with paper accuracy: -1.92
	95% CI contains paper accuracy: False
---------------------------------------------
caltech-101
	Accuracy with seed 1: 97.74 (0.04)
	Accuracy with seed 2: 98.0
	Accuracy with seed 3: 98.06
	Accuracy mean (95% CI): 97.93 (0.02)

	Mean relative difference with paper accuracy: 0.24
	95% CI contains paper accuracy: False
---------------------------------------------
oxford_flowers
	Accuracy with seed 1: 95.16 (1.33)
	Accuracy with seed 2: 95.73
	Accuracy with seed 3: 94.11
	Accuracy mean (95% CI): 95.00 (0.51)

	Mean relative difference with paper accuracy: 1.16
	95% CI contains paper accuracy: False
---------------------------------------------
food-101
	Accuracy with seed 1: 90.86 (-2.31)
	Accuracy with seed 2: 90.35

## Performance with unseen classes

In [7]:
print_result_summary("cocoop", "unseen")

CoCoOp performance with unseen classes.
---------------------------------------------
sun397
	Accuracy with seed 1: 77.53 (0.91)
	Accuracy with seed 2: 76.85
	Accuracy with seed 3: 76.32
	Accuracy mean (95% CI): 76.90 (0.28)

	Mean relative difference with paper accuracy: 0.09
	95% CI contains paper accuracy: True
---------------------------------------------
caltech-101
	Accuracy with seed 1: 92.69 (-1.31)
	Accuracy with seed 2: 92.36
	Accuracy with seed 3: 92.79
	Accuracy mean (95% CI): 92.61 (0.04)

	Mean relative difference with paper accuracy: -1.39
	95% CI contains paper accuracy: False
---------------------------------------------
oxford_flowers
	Accuracy with seed 1: 71.77 (3.65)
	Accuracy with seed 2: 71.91
	Accuracy with seed 3: 72.13
	Accuracy mean (95% CI): 71.94 (0.02)

	Mean relative difference with paper accuracy: 3.89
	95% CI contains paper accuracy: False
---------------------------------------------
food-101
	Accuracy with seed 1: 91.19 (-0.37)
	Accuracy with seed 2: 

In [8]:
print_result_summary("cpl", "unseen")

CPL performance with unseen classes.
---------------------------------------------
sun397
	Accuracy with seed 1: 77.26 (-3.65)
	Accuracy with seed 2: 76.96
	Accuracy with seed 3: 76.02
	Accuracy mean (95% CI): 76.75 (0.32)

	Mean relative difference with paper accuracy: -4.29
	95% CI contains paper accuracy: False
---------------------------------------------
caltech-101
	Accuracy with seed 1: 93.78 (-1.22)
	Accuracy with seed 2: 93.89
	Accuracy with seed 3: 93.34
	Accuracy mean (95% CI): 93.67 (0.06)

	Mean relative difference with paper accuracy: -1.34
	95% CI contains paper accuracy: False
---------------------------------------------
oxford_flowers
	Accuracy with seed 1: 75.04 (3.79)
	Accuracy with seed 2: 70.21
	Accuracy with seed 3: 70.43
	Accuracy mean (95% CI): 71.89 (5.61)

	Mean relative difference with paper accuracy: -0.56
	95% CI contains paper accuracy: True
---------------------------------------------
food-101
	Accuracy with seed 1: 92.0 (-1.54)
	Accuracy with seed 2: 9