In [None]:
from pathlib import Path

RESULTS_ROOT = Path("../../results")
assert RESULTS_ROOT.exists()

RESULTS_PREFIX = "post_icml_"  # Change to whatever you used
MODEL = "gptj"

OURMETHOD = r"\ourmethod"

In [None]:
import sys
sys.path.append("../..")

from experiments.aliases import REMEDI_EDITOR_LAYER, REMEDI_ENTITY_CLS_LAYER

In [None]:
import json

def load_json(file):
    with Path(file).open("r") as handle:
        data = json.load(handle)
    return data

# Error Correction

In [None]:
def latexify_num(x):
    return f"${x}$"

def format_task(task):
    accuracy = task["top1_accuracy"]
    fluency_mean = task["fluency"]["mean"] * 100
    fluency_std = task["fluency"]["std"] * 100
    return (
        latexify_num(
            f"{accuracy:.2f}".lstrip("0"),
        ),
        latexify_num(f"{fluency_mean:.1f}")
    )

def latexify_row(row):
    return " & ".join(row) + r" \\"

layer = REMEDI_EDITOR_LAYER[MODEL]["biosbias"]
for method in ("baseline", OURMETHOD):
    results_dir = RESULTS_ROOT / f"{RESULTS_PREFIX}eval_gen_biosbias_{MODEL}"
    row = [method.capitalize()]
    for task in ("contextual", "decontextual"):
        if method == "baseline":
            results_file = results_dir / task / "baseline.json"
        else:
            results_file = results_dir / task / f"linear/{layer}/error_correction_metrics.json"
        assert results_file.exists()
        results = load_json(results_file)
        row += list(format_task(results.get("metrics", results)))
    row_str = latexify_row(row)
    print(row_str)

# Factual Editing

In [None]:
layer = REMEDI_EDITOR_LAYER[MODEL]["counterfact"]
experiment_dir = RESULTS_ROOT / f"{RESULTS_PREFIX}eval_gen_counterfact_{MODEL}"

In [None]:
from remedi import data, metrics

from tqdm.auto import tqdm

# Load references from our own eval.
references_file = experiment_dir / "essence_references.json"
references = load_json(references_file)["references"]
references = [[r] for r in references if r]

# Load the counterfact vectorizer.
tfidf_vectorizer = data.load_counterfact_tfidf_vectorizer()

# Load ROME essence results.
essences_by_method = {}
for method, results_dir in (
    ("FT", "../../../rome/results/FT-essence/run_000"),
    ("ROME", "../../../rome/results/ROME-essence/run_000"),
):
    case_files = sorted(Path(results_dir).glob("case*.json"))

    cases = []
    for case_file in tqdm(case_files):
        with case_file.open("r") as handle:
            case = json.load(handle)
        cases.append(case)

    cases = sorted(cases, key=lambda case: case["case_id"])[:5000]

    generations = []
    for case in cases:
        generations.append([case["post"]["generation"]])

    score = metrics.average_tfidf_similarity(generations, references, tfidf_vectorizer)
    essences_by_method[method] = score

In [None]:
# Print table rows for our method
for method in ("prefix", "replace", OURMETHOD):
    if method == OURMETHOD:
        results_dir = experiment_dir / f"linear/{layer}"
    else:
        results_dir = experiment_dir / method
    
    scores = {}
    for benchmark_name, keys in (
        ("efficacy", ("score",)),
        (
            "generation",
            (
                "fluency",
                "consistency",
            ),
        ),
        ("essence", ("essence",)),
    ):
        results_file = results_dir / f"{benchmark_name}_metrics.json"
        with results_file.open("r") as handle:
            results = json.load(handle)

        for key in keys:
            scores[f"{benchmark_name}_{key}"] = results[key]

    scores["neighborhood_score"] = {"mean": 1.0, "std": 0.0}
    
    row = [method.capitalize()]
    for key in (
        "efficacy_score",
        "neighborhood_score",
        "generation_consistency",
        "generation_fluency",
        "essence_essence",
    ):
        score = scores[key]["mean"] * 100
        row.append(latexify_num(f"{score:.1f}".lstrip("0")))

    print(latexify_row(row))

print(r"\midrule")
print(r"\textbf{Model Edit} & & & & & \\")
print(r"\midrule")

# Print rows for ROME method.
for method, results_dir in (
    ("FT", "../../../rome/results/FT/run_000"),
    ("ROME", "../../../rome/results/ROME/run_000"),
):
    summary_file = Path(results_dir) / "summary.json"
    with summary_file.open("r") as handle:
        summary = json.load(handle)

    row = [str(results_dir).split("/")[-2]]
    for metric in (
        "post_rewrite_success",
        "post_neighborhood_success",
        "post_reference_score",
        "post_ngram_entropy",
        "essence",
    ):
        if metric == "essence":
            x = essences_by_method[method]
            mean = x.mean * 100
        else:
            mean, _ = summary[metric]
        row.append(latexify_num(f"{mean:.1f}".lstrip("0")))
    print(latexify_row(row))

# Classification

In [None]:
from collections import defaultdict

METHOD_REMEDI = r"\ourmethod"
METHOD_REMEDI_I = r"\ourmethod ($I$)"
METHOD_CONTROL_TASK = "Task"
METHOD_CONTROL_MODEL = "Model"
METHODS = (METHOD_REMEDI, METHOD_REMEDI_I, METHOD_CONTROL_TASK, METHOD_CONTROL_MODEL)

TASK_BIOS_MED = "Bios-Med"
TASK_FACT_MED = "Fact-Med"
TASK_FACT_PRIOR = "Fact-Prior"

fact_remedi_layer = REMEDI_EDITOR_LAYER[MODEL]["counterfact"]
fact_entity_layer = REMEDI_ENTITY_CLS_LAYER[MODEL]["counterfact"]
bios_remedi_layer = REMEDI_EDITOR_LAYER[MODEL]["biosbias"]
bios_entity_layer = REMEDI_ENTITY_CLS_LAYER[MODEL]["biosbias"]

bios_results_dir = RESULTS_ROOT / f"post_icml_eval_cls_biosbias_{MODEL}"
fact_results_dir = RESULTS_ROOT / f"post_icml_eval_cls_counterfact_{MODEL}"
for results_dir in (bios_results_dir, fact_results_dir):
    assert results_dir.exists()


results_by_method = defaultdict(dict)
for method in METHODS:
    if method == METHOD_REMEDI:
        results_by_method[method][TASK_BIOS_MED] = load_json(
            bios_results_dir
            / "linear"
            / str(bios_remedi_layer)
            / f"error_cls_layer_{bios_entity_layer}_metrics.json"
        )
        
        fact_results = load_json(
            fact_results_dir
            / "linear"
            / str(fact_remedi_layer)
            / f"fact_cls_layer_{fact_entity_layer}_metrics.json"
        )
        results_by_method[method][TASK_FACT_MED] = fact_results["contextual"]
        results_by_method[method][TASK_FACT_PRIOR] = fact_results["decontextual"]
    elif method == METHOD_REMEDI_I:
        results_by_method[method][TASK_BIOS_MED] = load_json(
            bios_results_dir
            / "identity"
            / str(bios_remedi_layer)
            / f"error_cls_layer_{bios_entity_layer}_metrics.json"
        )

        fact_results = load_json(
            fact_results_dir
            / "identity"
            / str(fact_remedi_layer)
            / f"fact_cls_layer_{fact_entity_layer}_metrics.json"
        )
        results_by_method[method][TASK_FACT_MED] = fact_results["contextual"]
        results_by_method[method][TASK_FACT_PRIOR] = fact_results["decontextual"]
    elif method == METHOD_CONTROL_TASK:
        results_by_method[method][TASK_BIOS_MED] = load_json(
        bios_results_dir
            / "linear"
            / str(bios_remedi_layer)
            / f"error_cls_layer_{bios_entity_layer}_control_task_metrics.json"
        )

        fact_results = load_json(
            fact_results_dir
            / "linear"
            / str(fact_remedi_layer)
            / f"fact_cls_layer_{fact_entity_layer}_control_task_metrics.json"
        )
        results_by_method[method][TASK_FACT_MED] = fact_results["contextual"]
        results_by_method[method][TASK_FACT_PRIOR] = fact_results["decontextual"]
    elif method == METHOD_CONTROL_MODEL:
        results_by_method[method][TASK_BIOS_MED] = load_json(
        bios_results_dir
            / "linear"
            / str(bios_remedi_layer)
            / f"error_cls_layer_{bios_entity_layer}_control_model_metrics.json"
        )

        fact_results = load_json(
            fact_results_dir
            / "linear"
            / str(fact_remedi_layer)
            / f"fact_cls_layer_{fact_entity_layer}_control_model_metrics.json"
        )
        results_by_method[method][TASK_FACT_MED] = fact_results["contextual"]
        results_by_method[method][TASK_FACT_PRIOR] = fact_results["decontextual"]
    else:
        assert False, method

rows = []
for method in METHODS:
    if method == METHOD_CONTROL_TASK:
        rows += [[r"\midrule"], ["Control", *(["&"] * 6)], [r"\midrule"]]
    row = [method]
    for task, score in (
        (TASK_BIOS_MED, "f1"),
        (TASK_BIOS_MED, "mcc"),
        (TASK_FACT_MED, "f1"),
        (TASK_FACT_MED, "mcc"),
        (TASK_FACT_PRIOR, "f1"),
        (TASK_FACT_PRIOR, "mcc"),
    ):
        row.append(results_by_method[method][task][score])
    rows.append(row)

In [None]:
sep = r" \\" + "\n"
table = sep.join(
    " & ".join(
        f"{x:.2f}".lstrip('0') if isinstance(x, float) else x
        for x in row[1:]
    )
    for row in rows
)
print(table)

# [Delete] Playground

In [None]:
from remedi import benchmarks

fact_results = benchmarks.ClassificationBenchmarkResults.from_dict(load_json(
    fact_results_dir
    / "linear"
    / str(fact_remedi_layer)
    / f"fact_cls_layer_{fact_entity_layer}.json"
))

In [None]:
sum(x.decontextual.prediction for x in fact_results.samples) / len(fact_results.samples)

In [None]:
from sklearn.metrics import accuracy_score, f1_score, matthews_corrcoef

import random
labels = [not x.decontextual.label for x in fact_results.samples]
predictions = [not x.decontextual.prediction for x in fact_results.samples]

random.shuffle(labels)

print(accuracy_score(labels, predictions))
print(f1_score(labels, predictions))
print(matthews_corrcoef(labels, predictions))