In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
import os
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
import gzip
import pickle

from IPython.display import display

plt.style.use(".mplstyle")
pd.set_option("display.max_rows", 25)
pd.set_option("display.max_colwidth", 0)


In [None]:
BASE_DIR = "LAMA/data/"
METRICS_DIR = os.path.join(BASE_DIR, "metrics/")


In [None]:
os.getcwd()

In [None]:
with gzip.open(METRICS_DIR + "reranker/sweep_re/seed_0/learned/eval_detailed.pickle", "rb") as handle:
    pt_learned = pickle.load(handle)
    
pt_examples = [sample['example'] for sample in pt_learned['samples']]

In [None]:
with gzip.open("LAMA/data/metrics/reranker/sweep_re_ft_fl/seed_0/learned/eval_detailed.pickle") as handle:
    ft_learned = pickle.load(handle)
ft_examples = [sample['example'] for sample in ft_learned['samples']]

In [None]:
def trim(target):
    return target.replace("<extra_id_0> ", "").lower()


def find_target_distractors(distractors, target):
    return [
        distractor
        for distractor in distractors
        if trim(distractor["targets_pretokenized"]) == trim(target)
    ]


def subset_statistics(subset):
    num_fact_abstracts = []
    num_target_distractors = []
    num_distractors = []
    num_target_distractors_but_proponents = []
    for sample in subset["samples"]:
        example = sample["example"]
        target = example["targets_pretokenized"]
        fact = (
            example["predicate_id"]
            + ","
            + example["obj_uri"]
            + ","
            + example["sub_uri"]
        )
        distractors = sample["distractors"]
        num_distractors.append(len(distractors))
        fact_abstracts = sample["fact_abstracts"]
        num_fact_abstracts.append(len(fact_abstracts))
        target_distractors = find_target_distractors(distractors, target)
        num_target_distractors.append(len(target_distractors))
        fact_abstract_facts = [abstract["facts"] for abstract in fact_abstracts]
        target_distractor_facts = [
            abstract
            for abstract in target_distractors
            if fact in abstract["facts"]
        ]
        num_target_distractors_but_proponents.append(
            len(target_distractor_facts)
        )

    return list(
        map(
            np.mean,
            (
                num_distractors,
                num_fact_abstracts,
                num_target_distractors,
                num_target_distractors_but_proponents,
            ),
        )
    )


In [None]:
subset_statistics(pt_learned)

In [None]:
subset_statistics(ft_learned)

In [None]:
def metrics_to_df(results, metrics=["precision", "recall", "mrr"]):
    data = []

    for method in ("bm25plus", "random"):
        for eval_type in ("collapse", "full"):
            if eval_type != "collapse":
                current_metrics = metrics + ["mrr_compare_fn_subject", "mrr_compare_fn_object", "mrr_compare_fn_relation"]
            else:
                current_metrics = metrics
            for metric in current_metrics:
                if metric in results["evals"][method][eval_type]:
                    metric_result = results["evals"][method][eval_type][metric]
                    for normalized in ("cosine", "dot"):
                        for method_type in ("local", "global"):
                            try:
                                for k, score in metric_result.items():
                                    data.append(
                                        (
                                            method,
                                            method_type,
                                            normalized,
                                            eval_type,
                                            metric,
                                            str(k),
                                            score,
                                        )
                                    )
                            except:
                                data.append(
                                    (
                                        method,
                                        method_type,
                                        normalized,
                                        eval_type,
                                        metric,
                                        "1",
                                        metric_result,
                                    )
                                )

    for method_type in ("local", "global"):
        for eval_type in ("collapse", "full"):
            for normalized in ("cosine", "dot"):
                if method_type == "global" and normalized == "dot":
                    continue
                for method, method_results in results["evals"][method_type][
                    normalized
                ][eval_type].items():
                    if eval_type != "collapse":
                        current_metrics = metrics + ["mrr_compare_fn_subject", "mrr_compare_fn_object", "mrr_compare_fn_relation"]
                    else:
                        current_metrics = metrics
                    for metric in current_metrics:
                        if metric in method_results:
                            metric_result = method_results[metric]
                            try:
                                for k, score in metric_result.items():
                                    data.append(
                                        (
                                            method,
                                            method_type,
                                            normalized,
                                            eval_type,
                                            metric,
                                            str(k),
                                            score,
                                        )
                                    )
                            except:
                                data.append(
                                    (
                                        method,
                                        method_type,
                                        normalized,
                                        eval_type,
                                        metric,
                                        "1",
                                        metric_result,
                                    )
                                )

    df = pd.DataFrame(
        data,
        columns=[
            "layers",
            "norm_type",
            "normalization",
            "eval",
            "metrics",
            "k",
            "score",
        ],
    )

    df["layer_type"] = "Embed"
    df.loc[df["layers"].str.contains("gradients"), "layer_type"] = "TracIn"
    df.loc[
        (df["layers"].str.contains("gradients"))
        & (df["layers"].str.contains("activations")),
        "layer_type",
    ] = "TracIn+Embed"
    df.loc[
        (df["layers"] == "random") | (df["layers"] == "bm25plus"), "layer_type"
    ] = "baselines"

    df = df.replace(
        {
            "gradients": "G",
            "activations": "A",
            "block.": "",
            "encoder": "E",
            "decoder": "D",
            "shared": "emb",
            "random": "Target-Picker",
        },
        regex=True,
    )

    df["layers"] = (
        df["layers"]
        .replace(
            {f"G.E.{i},G.D.{i}": f"G.E.{i+1},G.D.{i+1}" for i in range(12)},
            regex=False,
        )
        .replace({f"G.E.{i}": f"G.E.{i+1}" for i in range(12)}, regex=False)
        .replace(
            {
                f"G.emb,G.E.{i},G.D.{i}": f"G.emb,G.E.{i+1},G.D.{i+1}"
                for i in range(12)
            },
            regex=False,
        )
        .replace(
            {f"G.emb,G.E.{i}": f"G.emb,G.E.{i+1}" for i in range(12)},
            regex=False,
        )
        .str.replace("G.emb", "G.0", regex=False)
        .str.replace("bm25plus", "BM25+", regex=False)
        .str.replace("Target-Picker", "Random-Target", regex=False)
    )

    return df


In [None]:
def plot_with_filter(df, 
                     filter=lambda x: x,
                     title="Title",
                     folder="plots/",
                     ylabel='',
                     save=False):
    plt.figure(figsize=(16, 6))
    
    ax = sns.barplot(data=filter(df), 
                     x='layers', 
                     y='score', 
                     hue='layer_type', 
                     estimator=np.mean, 
                     ci='sd')
    
    plt.xticks(rotation=90)
    plt.title(title)
    plt.xlabel("Layer Selection")
    plt.ylabel(ylabel)
    plt.legend(title='Method Type')
    
    if not save:
        plt.show()
    else:
        plt.savefig(folder+title+"_plot.png")
        plt.show()

In [None]:
def visualize_one_experiment(
    paths,
    suffix="",
    folder="plots/",
    save=False,
    visualize=False,
    k="3",
    norm_type="local",
    normalization="cosine",
):
    dfs = []
    for (i, path) in enumerate(paths):

        with gzip.open(METRICS_DIR + path) as f:
            reranker_metrics = pickle.load(f)

        df = metrics_to_df(reranker_metrics)
        df["seed"] = i
        dfs.append(df)

    df = pd.concat(dfs, ignore_index=True)

    if visualize:

        # scores = df.groupby(['normalization', 'eval', 'layers', 'metrics', 'k']).agg({'score': ['mean', 'std']}, as_index=False)
        # Layers that we don't want for visualizations
        ddf = df[~df["layers"].str.contains("A.E.0,A.D.0,")]

        for method in ("full", "collapse"):
            plot_with_filter(
                ddf,
                filter=lambda x: x[
                    (x["metrics"] == "precision")
                    & (x["k"] == '3')
                    & (x["eval"] == method)
                    & (x["norm_type"] == norm_type)
                    & (x["normalization"] == normalization)
                ],
                title=f"precision@3 ({method} + {suffix})",
                folder=folder,
                ylabel="precision@3",
                save=save,
            )

            plot_with_filter(
                ddf,
                filter=lambda x: x[
                    (x["metrics"] == "recall")
                    & (x["k"] == '10')
                    & (x["eval"] == method)
                    & (x["norm_type"] == norm_type)
                    & (x["normalization"] == normalization)
                ],
                title=f"recall@10 ({method} + {suffix})",
                folder=folder,
                ylabel="recall@10",
                save=save,
            )

            plot_with_filter(
                ddf,
                filter=lambda x: x[
                    (x["metrics"] == "mrr")
                    & (x["k"] == "1")
                    & (x["eval"] == method)
                    & (x["norm_type"] == norm_type)
                    & (x["normalization"] == normalization)
                ],
                title=f"mrr ({method} + {suffix})",
                folder=folder,
                ylabel="mrr",
                save=save,
            )
    return df


In [None]:
def get_abstracts_df(res, fact):
    abstracts = np.array(res["nn_abstracts"])
    for i, abstract in enumerate(abstracts):
        try:
            abstract["score"] = res["nn_scores"][i]
        except KeyError:
            abstract["score"] = res["nn"]["scores"][i]

    df = pd.DataFrame(pd.json_normalize(abstracts)).round(3)
    df["label"] = df["facts"].str.contains(",".join(fact))

    df = df.drop(
        ["page_uri", "masked_uri", "masked_type", "facts", "example_uris"],
        axis=1,
    )
    return df


def get_nn_abstracts(res, baseline_res, config=""):
    """Get list of abstracts and their scores (dotproduct score)"""
    print(f"Config: {config}")
    example = baseline_res["example"]
    print(
        f"Example: {example['inputs_pretokenized']} =>"
        f" {example['targets_pretokenized']}"
    )
    fact = (
        example["predicate_id"].strip(),
        example["obj_uri"].strip(),
        example["sub_uri"].strip(),
    )
    print(f"Fact: {fact}")
    print(f"Model Precision", res["precision"])
    df_model = get_abstracts_df(res, fact)
    print(f"Baseline Precision", baseline_res["precision"])
    df_baseline = get_abstracts_df(baseline_res, fact)
    return df_model, df_baseline


def result_getter(path):
    with gzip.open(METRICS_DIR + path) as f:
        reranker_metrics = pickle.load(f)

    def getter(
        i=3,
        sim="cosine",
        method="collapse",
        normalization="local",
        layers="activations.encoder.block.0,gradients.shared",
    ):

        config = {
            "sim": sim,
            "method": method,
            "layers": layers,
            "normalization": normalization,
        }

        return get_nn_abstracts(
            reranker_metrics["evals"][normalization][sim][method][layers][
                "samples"
            ][i],
            reranker_metrics["samples"][i],
            config=config,
        )

    getter.metrics = reranker_metrics
    return getter


In [None]:
def get_max(df: pd.DataFrame, 
            prefix, 
            no_prefix=None, 
            subset='learned', 
            metric='mrr', 
            eval_type='collapse', 
            normalization=None,
            k='1'):

    if normalization is not None:
        df = df[df.normalization == normalization]
    df2 = df[(df['layers'].str.startswith(prefix)) & 
             (df['subset'] == subset) &
             (df['metrics'] == metric) &
             (df['k'] == str(k)) &
             (df['eval'] == eval_type)]
    
    if no_prefix:
        df2 = df2[~df2['layers'].str.contains(no_prefix)]
        
    nlargest = df2.nlargest(10, ('score', 'mean'))
    
    largest = nlargest.iloc[0]    
    dflargest = df[(df['layers'] == largest['layers'][0]) &
                   (df['norm_type'] == largest['norm_type'][0]) &
                   (df['eos'] == largest['eos'][0]) &
                   (df['accum'] == largest['accum'][0]) &
                   (df['k'] == str(k)) &
                   (df['subset'] == subset) ]# &
#                   (df['eval'] == eval_type)]

    dflargest = dflargest.set_index('metrics').loc[[metric]]
    #dflargest = dflargest.set_index('metrics').loc[[metric]]
    dflargest['score_text'] = (100 * dflargest['score']['mean']).apply(lambda x: f"{x:.2f}") + '\stderr{' + (100 * dflargest['score']['std']).apply(lambda x: f"{x:.2f}") + '}'
    dflargest = dflargest.transpose()
    # dflargest = dflargest.loc[['score_text', 'k', 'eval']]
    return nlargest, dflargest


In [None]:
def print_best_results(path="reranker/sweep_v2/", subset="learned", accum="accum", eos="eos", seed_range=1, metric="mrr", normalization=None):
    dfs = {}
    suffix = f"{eos}+{subset}+{accum}"
    try:
        df = visualize_one_experiment(
            paths=[
                f"{path}/seed_{i}/{subset}/{eos}_{accum}/results_detailed.pickle"
                for i in range(seed_range)
            ],
            suffix=suffix,
            save=False,
            visualize=False,
        )
        df["eos"] = eos
        df["subset"] = subset
        df["accum"] = accum
        dfs[suffix] = df
    except FileNotFoundError:
        print(
            "Couldn't find: "
            f"{path}/seed_0/{subset}/{eos}_{accum}/results_detailed.pickle"
        )

    dfmerged = pd.concat(list(dfs.values()), ignore_index=True)
    scores = dfmerged.groupby([column for column in dfmerged.columns if column != 'seed' and column != 'score'], axis=0).agg({'score': [np.mean, np.std]}, as_index=False)
    scores = scores.reset_index()
    # print(scores.layers.unique())

    scores['path'] = path
    
    if metric == "recall":
        k="10"
    else:
        k="1"

    # print("Activations")
    largest, dflargest1 = get_max(scores, 'A', no_prefix='G', metric=metric, k=k, normalization=normalization)

    # print("Tracin")
    largest, dflargest2 = get_max(scores, 'G', metric=metric, k=k)
    # print("Tracin + Activations")
    largest, dflargest3 = get_max(scores[scores.layers.str.contains('G')], 'A', metric=metric, k=k, normalization=normalization)
    # print("BM25")
    largest, dflargest4 = get_max(scores, 'BM25+', metric=metric, k=k)
     # print("BM25")
    largest, dflargest5 = get_max(scores, 'Random-Target', metric=metric, k=k)
    # dflargest4 = dflargest4.transpose().reset_index().loc[0, :]
    dflargest = [dflargest1, dflargest2, dflargest3, dflargest4, dflargest5]
    dflargest = pd.concat([df.transpose().iloc[0:1].reset_index() for df in dflargest], axis='index')
    # display(dflargest[['layer_type', 'score_text']])
    dflargest.loc[(dflargest.layers == 'BM25+'), 'layer_type'] = 'BM25+'
    dflargest.loc[(dflargest.layers == 'Random-Target'), 'layer_type'] = 'Random-Target'
    return dflargest

In [None]:
table2_results = []
for metric in ("mrr", "recall"):
    df_ft_fl_mrr = print_best_results(path="reranker/sweep_re_ft_fl/", subset="learned", accum="accum", eos="eos", seed_range=3, metric=metric)
    df_pt_fl_mrr = print_best_results(path="reranker/sweep_v2_re_pt_fl/", subset="learned", accum="accum", eos="no_eos", seed_range=3, metric=metric)
    result = df_ft_fl_mrr[['layer_type', 'score_text']].merge(df_pt_fl_mrr[['layer_type', 'score_text']],  on="layer_type").set_index('layer_type').loc[['Random-Target', 'BM25+', 'TracIn', 'Embed', 'TracIn+Embed']]
    table2_results.append(result)
    print(metric)

# print("FT on PL (eos)")
# print_best_results(path="reranker/sweep_re_ft_pl/", subset="learned", accum="accum", eos="eos", seed_range=2)

# print("PT on PL (no_eos)")
# print_best_results(path="reranker/sweep_re/", subset="learned", accum="accum", eos="no_eos", seed_range=2)



In [None]:
table2 = pd.concat(table2_results, axis=1)
display(table2)

In [None]:
table3_results = {"mrr": {}, "recall": {}}
for metric in ("mrr", "recall"):
    df_ft_fl_mrr = print_best_results(path="reranker/sweep_re_ft_fl/", subset="learned", accum="accum", eos="no_eos", seed_range=3)
    df_pt_fl_mrr = print_best_results(path="reranker/sweep_v2_re_pt_fl/", subset="learned", accum="accum", eos="eos", seed_range=3)
    table3_results[metric]["eos"] = df_ft_fl_mrr[['layer_type', 'score_text']].merge(df_pt_fl_mrr[['layer_type', 'score_text']],  on="layer_type").set_index('layer_type').loc[['TracIn', 'Embed', 'TracIn+Embed']]

    df_ft_fl_mrr = print_best_results(path="reranker/sweep_re_ft_fl/", subset="learned", accum="no_accum", eos="eos", seed_range=3)
    df_pt_fl_mrr = print_best_results(path="reranker/sweep_v2_re_pt_fl/", subset="learned", accum="no_accum", eos="no_eos", seed_range=3)
    table3_results[metric]["accum"] = df_ft_fl_mrr[['layer_type', 'score_text']].merge(df_pt_fl_mrr[['layer_type', 'score_text']],  on="layer_type").set_index('layer_type').loc[['TracIn', 'Embed', 'TracIn+Embed']]

    df_ft_fl_mrr = print_best_results(path="reranker/sweep_re_ft_fl/", subset="learned", accum="accum", eos="eos", normalization='dot', seed_range=3)
    df_pt_fl_mrr = print_best_results(path="reranker/sweep_v2_re_pt_fl/", subset="learned", accum="accum", eos="no_eos", normalization='dot', seed_range=3)
    table3_results[metric]["normalization"] = df_ft_fl_mrr[['layer_type', 'score_text']].merge(df_pt_fl_mrr[['layer_type', 'score_text']],  on="layer_type").set_index('layer_type').loc[['TracIn', 'Embed', 'TracIn+Embed']]
    ft_ckpt_results = []
    for ckpt_no in (5000, 10000, 30000, 80000):
        df_ft_fl_mrr = print_best_results(path=f"reranker/sweep_re_ft_fl_{ckpt_no}/", subset="learned", accum="accum", eos="eos", seed_range=3)
        df_ft_fl_mrr = df_ft_fl_mrr[['layer_type', 'score_text']].set_index('layer_type').loc[['TracIn', 'Embed', 'TracIn+Embed']]
        ft_ckpt_results.append(df_ft_fl_mrr)
        
    pt_ckpt_results = []
    for ckpt_no in (5100, 10200, 15300, 1000000):
        df_pt_fl_mrr = print_best_results(path=f"reranker/sweep_re_pt_fl_{ckpt_no}/", subset="learned", accum="accum", eos="no_eos", seed_range=3)
        df_pt_fl_mrr = df_pt_fl_mrr[['layer_type', 'score_text']].set_index('layer_type').loc[['TracIn', 'Embed', 'TracIn+Embed']]
        pt_ckpt_results.append(df_pt_fl_mrr)
        
    table3_results[metric]["single_ckpt"] = {"ft": ft_ckpt_results, "pt": pt_ckpt_results}

In [None]:
df = visualize_one_experiment(
    paths=[
        f"reranker/sweep_re_ft_fl/seed_{i}/learned/eos_accum/results_detailed.pickle"
        for i in range(3)
    ],
    suffix="eos_accum",
    save=True,
    visualize=True,
)

In [None]:
METRICS_DIR = os.path.join("Synth/synth_data_synth_07_27/", "metrics/")
df = visualize_one_experiment(
    paths=[
        f"reranker/sweep/seed_{i}/learned/eos_accum/results_detailed.pickle"
        for i in range(1)
    ],
    suffix="eos_accum",
    save=True,
    visualize=True,
)
METRICS_DIR = os.path.join(BASE_DIR, "metrics/")

In [None]:
df = visualize_one_experiment(
    paths=[
        f"reranker/sweep_re_ft_fl/seed_{i}/learned/eos_accum/results_detailed.pickle"
        for i in range(3)
    ],
    suffix="eos_accum",
    save=True,
    visualize=True,
)

In [None]:
print("FT on FL (no_eos)")
print_best_results(path="reranker/sweep_re_ft_fl/", subset="learned", accum="accum", eos="no_eos", seed_range=3)
print("FT on PL (no_eos)")
print_best_results(path="reranker/sweep_re_ft_pl/", subset="learned", accum="accum", eos="no_eos", seed_range=3)
print("PT on PL (no_eos)")
print_best_results(path="reranker/sweep_re/", subset="learned", accum="accum", eos="no_eos", seed_range=3)

In [None]:
print("FT on FL (eos)")
print_best_results(path="reranker/sweep_re_ft_fl_5000/", subset="learned", accum="accum", eos="eos", seed_range=3)
print("FT on FL (eos)")
print_best_results(path="reranker/sweep_re_ft_fl_10000/", subset="learned", accum="accum", eos="eos", seed_range=3)
print("FT on FL (eos)")
print_best_results(path="reranker/sweep_re_ft_fl_30000/", subset="learned", accum="accum", eos="eos", seed_range=3)
print("FT on FL (eos)")
print_best_results(path="reranker/sweep_re_ft_fl_80000/", subset="learned", accum="accum", eos="eos", seed_range=3)

In [None]:
def result_getter(path):
    with gzip.open(METRICS_DIR + path) as f:
        reranker_metrics = pickle.load(f)

    def getter(
        i=3,
        sim="cosine",
        method="collapse",
        normalization="local",
        layers="activations.encoder.block.0,gradients.shared",
    ):

        config = {
            "sim": sim,
            "method": method,
            "layers": layers,
            "normalization": normalization,
        }

        return get_nn_abstracts(
            reranker_metrics["evals"][normalization][sim][method][layers][
                "samples"
            ][i],
            reranker_metrics["samples"][i],
            config=config,
        )

    getter.metrics = reranker_metrics
    return getter

In [None]:
getter_ft_fl = result_getter('reranker/sweep_re_ft_fl/seed_0/learned/eos_accum/results_detailed.pickle')

In [None]:
idx = 3
tracin_res, baseline_res = getter_ft_fl(i=idx, layers='gradients.shared')
embed_res,_= getter_ft_fl(i=idx, layers='activations.encoder.block.0,activations.decoder.block.0')
display(embed_res)

In [None]:
getter_ft_pl = result_getter('reranker/sweep_re_ft_pl/seed_0/learned/eos_accum/results_detailed.pickle')

In [None]:
idx = 5
tracin_res, baseline_res = getter_ft_pl(i=idx, layers='gradients.shared')
embed_res,_= getter_ft_pl(i=idx, layers='activations.encoder.block.0,activations.decoder.block.0')
display(embed_res)