In [None]:
import json
from pathlib import Path

RESULTS_ROOT = Path("../results")
assert RESULTS_ROOT.exists()

In [None]:
EXPERIMENT_NAME = "icml_eval_fact_cls"

layers = []
contextual_accuracies = []
contextual_mccs = []
decontextual_mccs = []
decontextual_accuracies = []
for layer in range(1, 28):
    results_file = RESULTS_ROOT / EXPERIMENT_NAME / "linear/1" / f"fact_cls_layer_{layer}_metrics.json"
    if not results_file.exists():
        print(f"skipping layer {layer}")
        continue
    with results_file.open("r") as handle:
        results = json.load(handle)

    layers.append(layer)
    contextual_accuracies.append(results["contextual"]["accuracy"])
    contextual_mccs.append(results["contextual"]["mcc"])
    decontextual_accuracies.append(results["decontextual"]["accuracy"])
    decontextual_mccs.append(results["decontextual"]["mcc"])

In [None]:
import matplotlib.pyplot as plt

plt.bar(layers, contextual_accuracies)
plt.bar(layers, contextual_mccs)

In [None]:
plt.bar(layers, decontextual_accuracies)
plt.bar(layers, decontextual_mccs)

Pull results specifically for layer 26:

In [None]:
results_file = RESULTS_ROOT / EXPERIMENT_NAME / "linear/1" / f"fact_cls_layer_26.json"
with results_file.open("r") as handle:
    results = json.load(handle)
metrics = results["metrics"]

In [None]:
# need to recompute decontextual f1
from sklearn.metrics import accuracy_score, f1_score, matthews_corrcoef

def recompute_scores(results, task="decontextual"):
    y_pred = [
        sample[task]["score_comparator"] >= sample["decontextual"]["score_target"]
        for sample in results["samples"]
    ]
    y_true = [
        sample[task]["logp_comparator"] >= sample["decontextual"]["logp_target"] 
        for sample in results["samples"]
    ]
#     accuracy = accuracy_score(y_true, y_pred)
    f1 = f1_score(y_true, y_pred)
    mcc = matthews_corrcoef(y_true, y_pred)
    return f1, mcc

In [None]:
def format_row(results):
    scores = (
#         *recompute_scores(results),
        metrics["decontextual"]["f1"],
        metrics["decontextual"]["mcc"],
        metrics["contextual"]["f1"],
        metrics["contextual"]["mcc"],
    )
    scores_strs = [f"{score:.2f}".lstrip("0") for score in scores]
    return " & ".join(scores_strs)
print(format_row(results))