In [None]:
from functools import partial
import itertools
import json
from pathlib import Path
import re
import sys
sys.path.append("../src")

import matplotlib
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns
import statsmodels.formula.api as smf
from tqdm import tqdm, tqdm_notebook

%matplotlib inline
sns.set(style="whitegrid", context="paper", font_scale=3.5, rc={"lines.linewidth": 2.5})
from IPython.display import set_matplotlib_formats
set_matplotlib_formats('png')
#set_matplotlib_formats('svg')

%load_ext autoreload
%autoreload 2
import util

## Data preparation

In [None]:
output_path = Path("../output")
bert_encoding_path = output_path / "encodings"
sprobe_results_path = output_path / "structural-probe"

In [None]:
checkpoints = [util.get_encoding_ckpt_id(dir_entry) for dir_entry in bert_encoding_path.iterdir()]

In [None]:
models = [model for model, _, _ in checkpoints]

baseline_model = "baseline"
if baseline_model not in models:
    raise ValueError("Missing baseline model. This is necessary to compute performance deltas in the analysis of fine-tuning models. Stop.")

standard_models = [model for model in models if not model.startswith("LM_") and not model == baseline_model]
custom_models = [model for model in models if model.startswith("LM_") and not model == baseline_model]

runs = sorted(set(run for _, run, _ in checkpoints))
checkpoint_steps = sorted(set(step for _, _, step in checkpoints))

# Models which should appear in the final report figures
report_models = ["SQuAD", "QQP", "MNLI", "SST", "LM", "LM_scrambled", "LM_scrambled_para", "LM_pos", "glove"]

# Model subsets to render in different report figures
report_model_sets = [
    ("all", set(report_models)),
    ("standard", set(report_models) & set(standard_models)),
    ("custom", set(report_models) & set(custom_models)),
]
report_model_sets = [(name, model_set) for name, model_set in report_model_sets
                     if len(model_set) > 0]

In [None]:
RENDER_FINAL = True
figure_path = Path("../reports/figures")
figure_path.mkdir(exist_ok=True)

report_hues = dict(zip(sorted(report_models), sns.color_palette()))

## Collect results

In [None]:
eval_results = {}
for eval_dir in tqdm_notebook(list(sprobe_results_path.iterdir())):
    if not eval_dir.is_dir(): continue
    model, run, step = util.get_encoding_ckpt_id(eval_dir)
    
    try:
        uuas_file = list(eval_dir.glob("**/dev.uuas"))[0]
        with uuas_file.open("r") as f:
            uuas = float(f.read().strip())
    except: continue
        
    try:
        spearman_file = list(eval_dir.glob("**/dev.spearmanr-*-mean"))[0]
        with spearman_file.open("r") as f:
            spearman = float(f.read().strip())
    except: continue
        
    eval_results[model, run, step] = pd.Series({"uuas": uuas, "spearman": spearman})

### Add non-BERT results

In [None]:
nonbert_models = []

In [None]:
# GloVe
# for glove_dir in tqdm_notebook(list(sprobe_glove_path.glob("*"))):
#     if not glove_dir.is_dir(): continue
#     model = glove_dir.name
    
#     try:
#         uuas_file = list(glove_dir.glob("**/dev.uuas"))[0]
#         with uuas_file.open("r") as f:
#             uuas = float(f.read().strip())
#     except: continue
        
#     try:
#         spearman_file = list(glove_dir.glob("**/dev.spearmanr-*-mean"))[0]
#         with spearman_file.open("r") as f:
#             spearman = float(f.read().strip())
#     except: continue
        
#     nonbert_models.append(model)
#     eval_results[model, 1, 250, 0] = pd.Series({"uuas": uuas, "spearman": spearman})

### Aggregate

In [None]:
eval_results = pd.DataFrame(pd.concat(eval_results, names=["model", "run", "step", "metric"]))

In [None]:
eval_results.tail(20)

In [None]:
# Only use spaCy results
nonbert_models_to_graph = [("spaCy-en_vectors_web_lg", "GloVe")]
nonbert_models_to_graph = [(name, label) for name, label in nonbert_models_to_graph if name in nonbert_models]

## Graph

In [None]:
graph_data = eval_results.reset_index()
graph_data = graph_data[~graph_data.model.isin(nonbert_models + [baseline_model])]

In [None]:
g = sns.FacetGrid(data=graph_data, col="metric", height=7, sharex=True, sharey=True)
g.map(sns.lineplot, "step", 0, "model")

for uuas_ax in g.axes[:, 0]:
    for nonbert_model, label in nonbert_models_to_graph:
        uuas_ax.axhline(eval_results.loc[nonbert_model, 1, 250, 0, "uuas"][0], linestyle='--', label=label)
for spearman_ax in g.axes[:, 1]:
    for nonbert_model, label in nonbert_models_to_graph:
        spearman_ax.axhline(eval_results.loc[nonbert_model, 1, 250, 0, "spearman"][0], linestyle='--', label=label)
        
g.add_legend()
g

In [None]:
g = sns.FacetGrid(data=graph_data, col="metric", row="model", height=7, sharex=True, sharey=True)
g.map(sns.lineplot, "step", 0).add_legend()

In [None]:
%matplotlib agg

if RENDER_FINAL:
    dir = figure_path / "structural_probe"
    dir.mkdir(exist_ok=True)
    
    for metric, label in [("uuas", "UUAS"), ("spearman", "Spearman correlation")]:
        fig = plt.figure(figsize=(15, 9))
        ax = sns.lineplot(data=graph_data[(graph_data.metric == metric)], x="step", y=0,
                          hue="model", palette=report_hues)
        for nonbert_model, nonbert_label in nonbert_models_to_graph:
            ax.axhline(eval_results.loc[nonbert_model, 1, 0, metric][0],
                       linestyle='--', label=nonbert_label, linewidth=3)
            
        plt.legend(loc="center left", bbox_to_anchor=(1, 0.5))
        plt.xlim((0, checkpoint_steps[-1]))
        plt.ylabel(label)
        plt.xlabel("Training step")
        plt.tight_layout()
        plt.savefig(dir / ("%s.pdf" % metric))
        plt.close()
    
%matplotlib inline