In [None]:
from pathlib import Path

RESULTS_ROOT = Path("../../results")
assert RESULTS_ROOT.exists()

RESULTS_PREFIX = "emnlp_"  # Change to whatever you used
MODEL = "gptj"

OURMETHOD = r"\ourmethod"

In [None]:
import sys
sys.path.append("../..")

from experiments.aliases import REMEDI_EDITOR_LAYER, REMEDI_ENTITY_CLS_LAYER

In [None]:
import json

def load_json(file):
    with Path(file).open("r") as handle:
        data = json.load(handle)
    return data

# Error Correction

In [None]:
def latexify_num(x):
    return f"${x}$"

def format_task(task):
    accuracy = task["top1_accuracy"]
    fluency_mean = task["fluency"]["mean"] * 100
    fluency_std = task["fluency"]["std"] * 100
    return (
        latexify_num(
            f"{accuracy:.2f}".lstrip("0"),
        ),
        latexify_num(f"{fluency_mean:.1f}")
    )

def latexify_row(row):
    return " & ".join(row) + r" \\"

layer = REMEDI_EDITOR_LAYER[MODEL]["biosbias"]
for method in ("baseline", OURMETHOD):
    results_dir = RESULTS_ROOT / f"{RESULTS_PREFIX}eval_gen_biosbias_{MODEL}"
    row = [method.capitalize()]
    for task in ("contextual", "decontextual"):
        if method == "baseline":
            results_file = results_dir / task / "baseline.json"
        else:
            results_file = results_dir / task / f"linear/{layer}/error_correction_metrics.json"
        assert results_file.exists()
        results = load_json(results_file)
        row += list(format_task(results.get("metrics", results)))
    row_str = latexify_row(row)
    print(row_str)

# Factual Editing

In [None]:
layer = REMEDI_EDITOR_LAYER[MODEL]["counterfact"]
experiment_dir = RESULTS_ROOT / f"{RESULTS_PREFIX}eval_gen_counterfact_{MODEL}"

In [None]:
from remedi import data, metrics

from tqdm.auto import tqdm

# Load references from our own eval.
references_file = experiment_dir / "essence_references.json"
references = load_json(references_file)["references"]
references = [[r] for r in references if r]

# Load the counterfact vectorizer.
tfidf_vectorizer = data.load_counterfact_tfidf_vectorizer()

# Load ROME essence results.
essences_by_method = {}
for method, results_dir in (
    ("FT", "../../../rome/results/FT-essence/run_000"),
    ("ROME", "../../../rome/results/ROME-essence/run_000"),
):
    case_files = sorted(Path(results_dir).glob("case*.json"))

    cases = []
    for case_file in tqdm(case_files):
        with case_file.open("r") as handle:
            case = json.load(handle)
        cases.append(case)

    cases = sorted(cases, key=lambda case: case["case_id"])[:5000]

    generations = []
    for case in cases:
        generations.append([case["post"]["generation"]])

    score = metrics.average_tfidf_similarity(generations, references, tfidf_vectorizer)
    essences_by_method[method] = score

In [None]:
# Print table rows for our method
for method in ("prefix", "replace", OURMETHOD):
    if method == OURMETHOD:
        results_dir = experiment_dir / f"linear/{layer}"
    else:
        results_dir = experiment_dir / method

    scores = {}
    for benchmark_name, keys in (
        ("efficacy", ("score",)),
        (
            "generation",
            (
                "fluency",
                "consistency",
            ),
        ),
        ("essence", ("essence",)),
    ):
        results_file = results_dir / f"{benchmark_name}_metrics.json"
        with results_file.open("r") as handle:
            results = json.load(handle)

        for key in keys:
            scores[f"{benchmark_name}_{key}"] = results[key]

    scores["neighborhood_score"] = {"mean": 1.0, "std": 0.0}

    row = [method.capitalize()]
    for key in (
        "efficacy_score",
        "neighborhood_score",
        "generation_consistency",
        "generation_fluency",
        "essence_essence",
    ):
        score = scores[key]["mean"] * 100
        row.append(latexify_num(f"{score:.1f}".lstrip("0")))

    print(latexify_row(row))

print(r"\midrule")
print(r"\textbf{Model Edit} & & & & & \\")
print(r"\midrule")

# Print rows for ROME method.
for method, results_dir in (
    ("FT", "../../../rome/results/FT/run_000"),
    ("ROME", "../../../rome/results/ROME/run_000"),
):
    summary_file = Path(results_dir) / "summary.json"
    with summary_file.open("r") as handle:
        summary = json.load(handle)

    row = [str(results_dir).split("/")[-2]]
    for metric in (
        "post_rewrite_success",
        "post_neighborhood_success",
        "post_reference_score",
        "post_ngram_entropy",
        "essence",
    ):
        if metric == "essence":
            x = essences_by_method[method]
            mean = x.mean * 100
        else:
            mean, _ = summary[metric]
        row.append(latexify_num(f"{mean:.1f}".lstrip("0")))
    print(latexify_row(row))

For the appendix, break the scores down by:
- Model knows vs. model does not know.
- Model saw relation during training vs. model did not
- Relation type

In [None]:
remedi_layer = REMEDI_EDITOR_LAYER[MODEL]["counterfact"]
entity_layer = REMEDI_ENTITY_CLS_LAYER[MODEL]["counterfact"]
gen_experiment_dir = RESULTS_ROOT / f"{RESULTS_PREFIX}eval_gen_counterfact_{MODEL}"
cls_experiment_dir = RESULTS_ROOT / f"{RESULTS_PREFIX}eval_cls_counterfact_{MODEL}"

In [None]:
from collections import defaultdict
from itertools import chain

from remedi import data, metrics


def normalize_attribute(entity, context):
    return context.replace(entity, "[entity]")


counterfact_train = data.load_dataset("counterfact", split="train[:5000]")
counterfact_test = data.load_dataset("counterfact", split="train[5000:10000]")
attributes_train = {normalize_attribute(x["entity"], x["context"]) for x in counterfact_train}

attributes_by_relation = defaultdict(set)
for x in chain(counterfact_train, counterfact_test):
    rid = x["source"]["requested_rewrite"]["relation_id"]
    attribute = normalize_attribute(x["entity"], x["context"]).replace(x["target_mediated"], "[target]")
    attributes_by_relation[rid].add(attribute)
relation_canonical = {
    rid: next(iter(sorted(
        [a for a in attrs if "?" not in a],
        key=len,
        reverse=True,
    )))
    for rid, attrs in attributes_by_relation.items()
}


def group_by_model_knows(x_dataset, x_cls):
    return "Model Knows" if x_cls.label else "Model Does Not Know"


def group_by_generalization(x_dataset, x_cls):
    attribute = normalize_attribute(x_dataset["entity"], x_dataset["context"])
    return "Seen in Training" if attribute in attributes_train else "Unseen in Training"


def group_by_relation_type(x_dataset, x_cls):
    return relation_canonical[x_dataset["source"]["requested_rewrite"]["relation_id"]]


gen_results_dir = gen_experiment_dir / f"linear/{layer}"
cls_results = load_json(
    cls_experiment_dir
    / "linear"
    / str(remedi_layer)
    / f"fact_cls_layer_{entity_layer}.json"
)

In [None]:
from remedi import benchmarks

gen_scores = defaultdict(list)
for benchmark_name in (
    "efficacy",
    "generation",
    "essence",
):
    gen_results_file = gen_results_dir / f"{benchmark_name}.json"
    with gen_results_file.open("r") as handle:
        results = json.load(handle)

    for x in results["samples"]:
        if benchmark_name == "efficacy":
            gen_scores["efficacy"].append(x["target_score"] > x["comparator_score"])
        elif benchmark_name == "generation":
            gen_scores["fluency"].append(x["fluency_score"])
            gen_scores["consistency"].append(x["consistency_score"])
        else:
            assert benchmark_name == "essence"
            gen_scores["essence"].append(x["essence_score"])


def make_table(group_by):
    idx_by_group = defaultdict(list)
    for i, (x_train, x_cls) in enumerate(zip(counterfact_test, cls_results["samples"])):
        group = group_by(x_train, benchmarks.ClassificationSample.from_dict(x_cls).decontextual)
        idx_by_group[group].append(i)

    for group, idx in idx_by_group.items():
        group_scores = {
            key: metrics.Metric.aggregate([values[i] for i in idx]).mean
            for key, values in gen_scores.items()
        }
        group_scores["total"] = len(idx)

        row = [group]
        for key in (
            "total",
            "efficacy",
            "fluency",
            "consistency",
            "essence",
        ):
            score = group_scores[key]
            if key != "total":
                score *= 100
                score_str = latexify_num(f"{score:.1f}".lstrip("0"))
            else:
                score_str = str(int(score))
            row.append(score_str)

        print(latexify_row(row))

make_table(group_by_model_knows)

In [None]:
make_table(group_by_generalization)

In [None]:
make_table(group_by_relation_type)

# Classification

In [None]:
from collections import defaultdict

METHOD_REMEDI = r"\ourmethod"
METHOD_REMEDI_I = r"\ourmethod ($I$)"
METHOD_CONTROL_TASK = "Task"
METHOD_CONTROL_MODEL = "Model"
METHODS = (METHOD_REMEDI, METHOD_REMEDI_I, METHOD_CONTROL_TASK, METHOD_CONTROL_MODEL)

TASK_BIOS_MED = "Bios-Med"
TASK_FACT_MED = "Fact-Med"
TASK_FACT_PRIOR = "Fact-Prior"

fact_remedi_layer = REMEDI_EDITOR_LAYER[MODEL]["counterfact"]
fact_entity_layer = REMEDI_ENTITY_CLS_LAYER[MODEL]["counterfact"]
bios_remedi_layer = REMEDI_EDITOR_LAYER[MODEL]["biosbias"]
bios_entity_layer = REMEDI_ENTITY_CLS_LAYER[MODEL]["biosbias"]

bios_results_dir = RESULTS_ROOT / f"post_icml_eval_cls_biosbias_{MODEL}"
fact_results_dir = RESULTS_ROOT / f"post_icml_eval_cls_counterfact_{MODEL}"
for results_dir in (bios_results_dir, fact_results_dir):
    assert results_dir.exists()


results_by_method = defaultdict(dict)
for method in METHODS:
    if method == METHOD_REMEDI:
        results_by_method[method][TASK_BIOS_MED] = load_json(
            bios_results_dir
            / "linear"
            / str(bios_remedi_layer)
            / f"error_cls_layer_{bios_entity_layer}_metrics.json"
        )
        
        fact_results = load_json(
            fact_results_dir
            / "linear"
            / str(fact_remedi_layer)
            / f"fact_cls_layer_{fact_entity_layer}_metrics.json"
        )
        results_by_method[method][TASK_FACT_MED] = fact_results["contextual"]
        results_by_method[method][TASK_FACT_PRIOR] = fact_results["decontextual"]
    elif method == METHOD_REMEDI_I:
        results_by_method[method][TASK_BIOS_MED] = load_json(
            bios_results_dir
            / "identity"
            / str(bios_remedi_layer)
            / f"error_cls_layer_{bios_entity_layer}_metrics.json"
        )

        fact_results = load_json(
            fact_results_dir
            / "identity"
            / str(fact_remedi_layer)
            / f"fact_cls_layer_{fact_entity_layer}_metrics.json"
        )
        results_by_method[method][TASK_FACT_MED] = fact_results["contextual"]
        results_by_method[method][TASK_FACT_PRIOR] = fact_results["decontextual"]
    elif method == METHOD_CONTROL_TASK:
        results_by_method[method][TASK_BIOS_MED] = load_json(
        bios_results_dir
            / "linear"
            / str(bios_remedi_layer)
            / f"error_cls_layer_{bios_entity_layer}_control_task_metrics.json"
        )

        fact_results = load_json(
            fact_results_dir
            / "linear"
            / str(fact_remedi_layer)
            / f"fact_cls_layer_{fact_entity_layer}_control_task_metrics.json"
        )
        results_by_method[method][TASK_FACT_MED] = fact_results["contextual"]
        results_by_method[method][TASK_FACT_PRIOR] = fact_results["decontextual"]
    elif method == METHOD_CONTROL_MODEL:
        results_by_method[method][TASK_BIOS_MED] = load_json(
        bios_results_dir
            / "linear"
            / str(bios_remedi_layer)
            / f"error_cls_layer_{bios_entity_layer}_control_model_metrics.json"
        )

        fact_results = load_json(
            fact_results_dir
            / "linear"
            / str(fact_remedi_layer)
            / f"fact_cls_layer_{fact_entity_layer}_control_model_metrics.json"
        )
        results_by_method[method][TASK_FACT_MED] = fact_results["contextual"]
        results_by_method[method][TASK_FACT_PRIOR] = fact_results["decontextual"]
    else:
        assert False, method

rows = []
for method in METHODS:
    if method == METHOD_CONTROL_TASK:
        rows += [[r"\midrule"], [r"\textbf{Control}", *([""] * 6)], [r"\midrule"]]
    row = [method]
    for task, score in (
        (TASK_BIOS_MED, "f1"),
        (TASK_BIOS_MED, "mcc"),
        (TASK_FACT_MED, "f1"),
        (TASK_FACT_MED, "mcc"),
        (TASK_FACT_PRIOR, "f1"),
        (TASK_FACT_PRIOR, "mcc"),
    ):
        row.append(results_by_method[method][task][score])
    rows.append(row)

In [None]:
rows

In [None]:
sep = "\n"
table = sep.join(
    " & ".join(
        "$" + f"{x:.2f}".lstrip("0") + "$" if isinstance(x, float) else x
        for x in row
    ) + (r" \\" if len(row) > 1 else "")
    for row in rows
) + sep
print(table)

# Entailment

In [None]:
remedi_layer = REMEDI_EDITOR_LAYER[MODEL]["mcrae"]
experiment_name = f"post_icml_eval_ent_mcrae_{MODEL}"
results_dir = RESULTS_ROOT / experiment_name

prefix_metrics = load_json(results_dir / "prefix/entailment_metrics.json")
remedi_metrics = load_json(results_dir / "linear" / str(remedi_layer) / "entailment_metrics.json")

prefix_results = load_json(results_dir / "prefix/entailment.json")
remedi_results = load_json(results_dir / "linear" / str(remedi_layer) / "entailment.json")

In [None]:
from remedi import metrics

import numpy as np


def get_logp_delta_remedi(key):
    deltas = []
    deltas_pct = []
    for x in remedi_results["samples"]:
        features = x[f"{key}_features"]

        x_ds = []
        x_ds_pct = []
        for f in features:
            logp_post = f["logp_post"]
            logp_pre = f["logp_pre"]
            x_ds.append(np.exp(logp_post) - np.exp(logp_pre))
            x_ds_pct.append((logp_post - logp_pre) / abs(logp_pre) * 100)
        deltas.append(np.mean(x_ds))
        deltas_pct.append(np.mean(x_ds_pct))
    return {

        f"{key}_p_delta": metrics.Metric.aggregate(deltas).to_dict(),
        f"{key}_p_delta_pct": metrics.Metric.aggregate(deltas_pct).to_dict(),
    }


def get_logp_delta_prefix(key):
    deltas = []
    deltas_pct = []
    for x_rem, x_pre in zip(remedi_results["samples"], prefix_results["samples"]):
        features_rem = x_rem[f"{key}_features"]
        features_pre = x_pre[f"{key}_features"]

        x_ds = []
        x_ds_pct = []
        for f_rem, f_pre in zip(features_rem, features_pre):
            logp_pre = f_rem["logp_pre"]
            logp_post = f_pre["logp_pre"]
            x_ds.append(np.exp(logp_post) - np.exp(logp_pre))
            x_ds_pct.append((logp_post - logp_pre) / abs(logp_pre) * 100)

        deltas.append(np.mean(x_ds))
        deltas_pct.append(np.mean(x_ds_pct))
    return {
        f"{key}_p_delta": metrics.Metric.aggregate(deltas).to_dict(),
        f"{key}_p_delta_pct": metrics.Metric.aggregate(deltas_pct).to_dict(),
    }

# Need to compute probability deltas.
remedi_metrics.update({
    **get_logp_delta_remedi("co"),
    **get_logp_delta_remedi("orig"),
    **get_logp_delta_remedi("unrel"),
})
prefix_metrics.update({
    **get_logp_delta_prefix("co"),
    **get_logp_delta_prefix("orig"),
    **get_logp_delta_prefix("unrel"),
})

In [None]:
remedi_metrics.keys()

In [None]:
#  When   | Original Correlation | Entailed Correlation
#  Before | * *
#  Prefix | * *
#  REMEDI | * *
def format_p_delta(key, metrics):
    delta = metrics[f"{key}_p_delta"]
    mean = delta["mean"]
    if abs(mean) < .0001:
        return "0"
    mean = mean * 100
    std = delta["std"] * 100
    return f"${mean:.1f}$ $({std:.1f})$"

rows = [
    [
        "No Edit",
        "--",
        remedi_metrics["co_corr_pre"],
        "--",
        remedi_metrics["orig_corr_pre"],
        "--",
    ],
    [
        "Prefix",
        format_p_delta("co", prefix_metrics),
        prefix_metrics["co_corr_pre"],
        format_p_delta("orig", prefix_metrics),
        prefix_metrics["orig_corr_pre"],
        format_p_delta("unrel", prefix_metrics),
    ],
    [
        r"\ourmethod",
        format_p_delta("co", remedi_metrics),
        remedi_metrics["co_corr_post"],
        format_p_delta("orig", remedi_metrics),
        remedi_metrics["orig_corr_post"],
        format_p_delta("unrel", remedi_metrics),
    ],
]

In [None]:
sep = r" \\" + "\n"
table = sep.join(
    " & ".join("$" + f"{x['mean']:.2f}$".lstrip('0') if isinstance(x, dict) else x for x in row)
    for row in rows
) + sep
print(table.rstrip())

In [None]:
f"{43.1:.0f}"

In [None]:
remedi_metrics["orig_delta"]