## Jupyter notebook for generating BERT figures

In [None]:
import csv

import numpy as np
from scipy.stats import kendalltau, pearsonr
import seaborn as sns

import matplotlib as mpl
import matplotlib.pyplot as plt
from matplotlib.ticker import ScalarFormatter


mpl.rcParams["figure.dpi"] = 600
%config InlineBackend.figure_format = 'retina'

In [None]:
titles = []
with open("BERT_results_activation.csv", "r") as csv_file:
    csv_reader = csv.reader(csv_file, delimiter=",")
    for row in csv_reader:
        titles = row[1:]
        break
csv_array = np.genfromtxt("BERT_results_activation.csv", delimiter=",")

In [None]:
results = csv_array[1:, 1:]
print(titles)
results = np.delete(results, 14, axis=1)
del titles[14]
results = np.delete(results, 12, axis=1)
del titles[12]
results = np.delete(results, 10, axis=1)
del titles[10]
results = np.delete(results, 9, axis=1)
del titles[9]
results = np.delete(results, 7, axis=1)
del titles[7]
results = np.delete(results, 5, axis=1)
del titles[5]
results = np.delete(results, 3, axis=1)
del titles[3]
results = np.delete(results, 1, axis=1)
del titles[1]

In [None]:
%matplotlib inline
mpl.rcParams["figure.figsize"] = [20, 10]
mpl.rcParams.update({"font.size": 16})
mpl.rc("xtick", labelsize=13)
mpl.rc("ytick", labelsize=13)
fig, axs = plt.subplots(2, 4)
fig.tight_layout(h_pad=4)

for i in range(1, results.shape[1]):
    if i == 1:
        cmap = sns.color_palette("BuGn", as_cmap=True)
    if i == 3:
        cmap = sns.color_palette("GnBu", as_cmap=True)
    if i == 5:
        cmap = sns.color_palette("OrRd", as_cmap=True)
    if i == 8:
        cmap = sns.color_palette("RdPu", as_cmap=True)
    if i == 13:
        cmap = sns.color_palette("RdPu", as_cmap=True)
    if i >= 5:
        subplot.set_xlabel("GLUE Score")
    
    subplot = axs[int((i - 1) / 4), (i - 1) % 4]
    subplot.yaxis.set_major_formatter(ScalarFormatter())
    subplot.set_title(
        "\n"
        + titles[i]
        + "\n τ: {:.3f}    ρ: {:.3f}".format(
            abs(kendalltau(results[:, 0], results[:, i])[0]),
            abs(pearsonr(results[:, 0], results[:, i])[0]),
        )
    )

    # subplot.set_title(
    #     "\n"
    #     + titles[i] + " Non-Normalized"
    #     + "\n τ: {:.3f}    ρ: {:.3f}".format(
    #         abs(kendalltau(results[:, 0], results[:, i])[0]),
    #         abs(pearsonr(results[:, 0], results[:, i])[0]),
    #     )
    # )
    # if i >= 6:
    #     subplot.set_title(
    #         titles[i] + "\nNon-Normalized"
    #         + "\n τ: {:.3f}    ρ: {:.3f}".format(
    #             abs(kendalltau(results[:, 0], results[:, i])[0]),
    #             abs(pearsonr(results[:, 0], results[:, i])[0]),
    #         )
    #     )

    subplot.scatter(results[:, 0], results[:, i], c=results[:, 0], cmap=cmap)

fig.savefig("BERT_results.png", dpi=300, bbox_inches="tight")
fig.show()

In [None]:
titles = []
parameter_array = np.genfromtxt("BERT_results.csv", delimiter=",")
parameter_results = parameter_array[1:, 1:]

In [None]:
%matplotlib inline
mpl.rcParams["figure.figsize"] = [5, 5]
mpl.rc("xtick", labelsize=13)
mpl.rc("ytick", labelsize=13)

fig, ax = plt.subplots()
cmap = sns.color_palette("OrRd", as_cmap=True)
ax.set_xlabel("Loss")
ax.yaxis.set_major_formatter(ScalarFormatter())
ax.set_title(
    "Number of Parameters"
    + "\n τ: {:.3f}    ρ: {:.3f}".format(
        abs(kendalltau(parameter_results[:, 0], parameter_results[:, 7])[0]),
        abs(pearsonr(parameter_results[:, 0], parameter_results[:, 7])[0]),
    )
)

ax.scatter(
    parameter_results[:, 0],
    parameter_results[:, 7],
    c=parameter_results[:, 0],
    cmap=cmap,
)

fig.savefig("BERT_params.png", dpi=300, bbox_inches="tight")

fig.show()

In [None]:
titles = []
with open("BERT_batch_ablation.csv", "r") as csv_file:
    csv_reader = csv.reader(csv_file, delimiter=",")
    for row in csv_reader:
        titles = row[1:]
        break
csv_array = np.genfromtxt("BERT_batch_ablation.csv", delimiter=",")

In [None]:
results = csv_array[1:, 1:]
print(titles)
results = np.delete(results, 14, axis=1)
del titles[14]
results = np.delete(results, 12, axis=1)
del titles[12]
results = np.delete(results, 10, axis=1)
del titles[10]
results = np.delete(results, 9, axis=1)
del titles[9]
results = np.delete(results, 7, axis=1)
del titles[7]
results = np.delete(results, 5, axis=1)
del titles[5]
results = np.delete(results, 3, axis=1)
del titles[3]
results = np.delete(results, 1, axis=1)
del titles[1]

In [None]:
%matplotlib inline
mpl.rcParams["figure.figsize"] = [20, 10]
mpl.rcParams.update({"font.size": 16})
mpl.rc("xtick", labelsize=13)
mpl.rc("ytick", labelsize=13)
fig, axs = plt.subplots(2, 4)
fig.tight_layout(h_pad=4)

for i in range(1, results.shape[1]):
    if i == 1:
        cmap = sns.color_palette("BuGn", as_cmap=True)
    if i == 3:
        cmap = sns.color_palette("GnBu", as_cmap=True)
    if i == 5:
        cmap = sns.color_palette("OrRd", as_cmap=True)
    if i == 8:
        cmap = sns.color_palette("RdPu", as_cmap=True)
    if i == 13:
        cmap = sns.color_palette("RdPu", as_cmap=True)
    subplot = axs[int((i - 1) / 4), (i - 1) % 4]
    subplot.yaxis.set_major_formatter(ScalarFormatter())
    subplot.set_title("\n" + titles[i].split(" Normalized")[0])
    bp = subplot.boxplot(
        np.transpose(results[:, i].reshape((10, 10))), patch_artist=True, notch=True
    )
    for whisker in bp["whiskers"]:
        whisker.set(color="black", linewidth=1, linestyle=":")

    for patch in bp["boxes"]:
        patch.set_facecolor(cmap(0.75))

    for median in bp["medians"]:
        median.set(color="black", linewidth=1)

    for flier in bp["fliers"]:
        flier.set(marker="o", color="black")

fig.savefig("BERT_batch_ablation.png", dpi=300, bbox_inches="tight")
fig.show()

In [None]:
titles = []
with open("BERT_initialization_ablation.csv", "r") as csv_file:
    csv_reader = csv.reader(csv_file, delimiter=",")
    for row in csv_reader:
        titles = row[1:]
        break
csv_array = np.genfromtxt("BERT_initialization_ablation.csv", delimiter=",")

In [None]:
results = csv_array[1:, 1:]
print(titles)
results = np.delete(results, 14, axis=1)
del titles[14]
results = np.delete(results, 12, axis=1)
del titles[12]
results = np.delete(results, 10, axis=1)
del titles[10]
results = np.delete(results, 9, axis=1)
del titles[9]
results = np.delete(results, 7, axis=1)
del titles[7]
results = np.delete(results, 5, axis=1)
del titles[5]
results = np.delete(results, 3, axis=1)
del titles[3]
results = np.delete(results, 1, axis=1)
del titles[1]

In [None]:
%matplotlib inline
mpl.rcParams["figure.figsize"] = [20, 10]
mpl.rcParams.update({"font.size": 16})
mpl.rc("xtick", labelsize=13)
mpl.rc("ytick", labelsize=13)
fig, axs = plt.subplots(2, 4)
fig.tight_layout(h_pad=4)

for i in range(1, results.shape[1]):
    if i == 1:
        cmap = sns.color_palette("BuGn", as_cmap=True)
    if i == 3:
        cmap = sns.color_palette("GnBu", as_cmap=True)
    if i == 5:
        cmap = sns.color_palette("OrRd", as_cmap=True)
    if i == 8:
        cmap = sns.color_palette("RdPu", as_cmap=True)
    if i == 13:
        cmap = sns.color_palette("RdPu", as_cmap=True)
        
    subplot = axs[int((i - 1) / 4), (i - 1) % 4]
    subplot.yaxis.set_major_formatter(ScalarFormatter())
    subplot.set_title("\n" + titles[i].split(" Normalized")[0])

    bp = subplot.boxplot(
        np.transpose(results[:, i].reshape((10, 10))), patch_artist=True, notch=True
    )
    for whisker in bp["whiskers"]:
        whisker.set(color="black", linewidth=1, linestyle=":")

    for patch in bp["boxes"]:
        patch.set_facecolor(cmap(0.75))

    for median in bp["medians"]:
        median.set(color="black", linewidth=1)

    for flier in bp["fliers"]:
        flier.set(marker="o", color="black")

fig.savefig("BERT_initialization_ablation.png", dpi=300, bbox_inches="tight")
fig.show()

In [None]:
csv_array = np.genfromtxt("BERT_train_ablation.csv", delimiter=",")
results = csv_array[1:, :]

In [None]:
%matplotlib inline
mpl.rc("xtick", labelsize=13)
mpl.rc("ytick", labelsize=13)
mpl.rcParams.update({"font.size": 14})
fig, ax = plt.subplots()
cmap = sns.color_palette("GnBu_r", as_cmap=True)
ax.set_xlabel("Number of Pretraining Steps")
ax.set_ylabel("GLUE Score")
ax.yaxis.set_major_formatter(ScalarFormatter())
ax.set_title("Pretraining Ablation Study")

for i in range(10):
    plt.plot(
        results[0:21, 1],
        results[:, 10].reshape((10, 21))[i],
        linestyle=":",
        linewidth=1,
    )
plt.plot(
    results[0:21, 1], np.average(results[:, 10].reshape((10, 21)), axis=0), linewidth=2
)

fig.savefig("BERT_train_ablation.svg", dpi=300, bbox_inches="tight")

fig.show()