In [1]:
import wandb
from matplotlib import pyplot as plt
import seaborn as sns
import numpy as np
import pandas as pd
import torch
import json
from pathlib import Path
from typing import Dict
from textwrap import wrap

from lmc.logging.plot_utils import *
from lmc.logging.report_utils import *
from lmc.logging import *
from lmc.logging.wandb_utils import get_timeseries_metrics_from_wandb
from importlib import reload
from lmc.utils import Step
import re

%load_ext autoreload
%autoreload 2

# NLP - Finetuning


## GLUE


In [84]:
import wandb

api = wandb.Api()

api.flush()

project = "gsaltintas/LMCFinetuning-NLP"

filters = {
    "state": {"$nin": ["crashed"]},
    "tags": {"$nin": ["ignore", "ignor", "hidden", "delete", "wrong-scaling"]},
}

grad_runs = api.runs(project, filters=filters)
# grad_runs = [run for run in grad_runs if len(run.config.get("dont_perturb_module_patterns", [])) > 0]
# grad_runs = [run for run in grad_runs if len(run.config.get("dont_perturb_module_patterns", [])) == 0]
len(grad_runs)


25

In [85]:
# from mc.utils.report_utils import get_merged_df
from lmc.logging.wandb_utils import get_merged_df
from lmc.logging.wandb_registry import WandbMetricsRegistry

main_df, registry = get_merged_df(
    grad_runs,
    scale_barriers=False,
    performance_aware=False,
    find_missing=False,
    return_registry=True,
)
wandb_keys = WandbMetricsRegistry(main_df["n_models"].max())

na_filters = (
     (main_df["model1.train.cross_entropy"] != "NaN")
    & (main_df["model1.train.cross_entropy"] != "null")
    & (~main_df["model1.train.cross_entropy"].isna())
)

merged_df = main_df[na_filters]
# Combine filters and apply them in one step
# merged_df = merged_df.apply(rename_opt_lr, axis=1)
# code_name = merged_df["run.group"].apply(extract_model_name).apply(correct_cases).rename("code_name")
# Calculate mean and use pd.concat for efficiency
# average_test_acc = (merged_df[["model1.test.accuracy", "model2.test.accuracy"]].mean(axis=1) * 100).rename("average_test_acc")
# average_train_acc = (merged_df[["model1.train.accuracy", "model2.train.accuracy"]].mean(axis=1) * 100).rename("average_train_acc")
# merged_df = pd.concat([merged_df, average_test_acc, average_train_acc], axis=1)
# merged_df.loc[merged_df["perturb_step"] == -1, "perturb_step"] = 0

# further_filters = (
#      (merged_df["trainer.opt.lr"] == 0.1)
#     & check_nulls(merged_df, wandb_keys.get_metric("lmc_loss_train_0_1").flat_name)
#     & (merged_df["average_test_acc"] >= 0.7)
#     & (merged_df["model2.test.accuracy"] >= 0.8)
#     & (merged_df["epoch"] >= 50)
# )
len(merged_df)

The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  merged_df["trainer.opt.warmup_ratio"].fillna(0, inplace=True)
  merged_df.fillna("null", inplace=True)


22

In [89]:
from lmc.logging.report_utils import mean_std, print_to_markdown

latex_ind = [
    "data.dataset",
    "trainer.training_steps",
]

table_metrics = [
    registry.get_metric("train_cross_entropy_1"),
    registry.get_metric("test_cross_entropy_1"),
    registry.get_metric("test_accuracy_1"),
    registry.get_metric("train_pearson_correlation_1"),
    registry.get_metric("train_f1_1"),
]

# Create mapping of flat_names to ylabels
name_to_label = {m.flat_name: m.ylabel for m in table_metrics}


pv = merged_df.pivot_table(
    index=latex_ind, values=[m.flat_name for m in table_metrics], aggfunc=[mean_std]
)
pv.columns = pd.MultiIndex.from_tuples(
    [(col[0], name_to_label[col[1]]) for col in pv.columns]
)
latex_pv = pv.droplevel(level=0, axis=1).reset_index()
latex_pv = latex_pv.sort_values("data.dataset", ascending=False)
# check_mark_cols = ["data.mixup", "trainer.gradient_clip_val", "model.weight_decay"]
# for col in check_mark_cols:
#     latex_pv[col] = latex_pv[col].apply(
#         lambda x: r"\xmark" if x not in [None, "null", 0] else r"\cmark"
#     )


# latex_pv = latex_pv.rename(columns=rename_map)
print_to_latex(latex_pv, threshold=1e7, style=1)


\begin{tabular}{lrlrlrl}
\toprule
data.dataset & trainer.training_steps & $\mathrm{Acc}^{1}_{\mathrm{Te}}$ & $\mathrm{CE}^{1}_{\mathrm{Te}}$ & $\mathrm{CE}^{1}_{\mathrm{Tr}}$ & F1 & Pearson Correlation \\
\midrule
wnli & 2500 & 0.21 ± 0.00 & 5.21 ± 0.00 & 0.03 ± 0.00 & * & * \\
stsb & 2500 & * & 0.58 ± * & 0.53 ± * & * & 0.87 ± * \\
sst2 & 2500 & 0.91 ± 0.00 & 0.23 ± 0.01 & 0.14 ± 0.00 & * & * \\
rte & 2500 & 0.64 ± 0.00 & 0.76 ± 0.00 & 0.36 ± 0.00 & * & * \\
qqp & 2500 & 0.87 ± * & 0.31 ± * & 0.31 ± * & 0.82 ± * & * \\
qnli & 2500 & 0.89 ± 0.00 & 0.28 ± 0.00 & 0.30 ± 0.00 & * & * \\
mrpc & 2500 & 0.83 ± 0.00 & 0.43 ± 0.00 & 0.22 ± 0.00 & 0.94 ± 0.00 & * \\
mnli & 2500 & 0.78 ± 0.00 & 0.55 ± 0.00 & 0.57 ± 0.00 & * & * \\
mnli & 4000 & 0.78 ± * & 0.54 ± * & 0.55 ± * & * & * \\
mnli & 2ep & 0.80 ± * & 0.50 ± * & 0.55 ± * & * & * \\
cola & 2500 & 0.81 ± 0.00 & 0.46 ± 0.00 & 0.36 ± 0.00 & * & * \\
cola & 5ep & 0.77 ± 0.00 & 0.55 ± 0.00 & 0.47 ± 0.00 & * & * \\
\bottomrule
\end{tabular}



In [90]:
pv

Unnamed: 0_level_0,Unnamed: 1_level_0,mean_std,mean_std,mean_std,mean_std,mean_std
Unnamed: 0_level_1,Unnamed: 1_level_1,$\mathrm{Acc}^{1}_{\mathrm{Te}}$,$\mathrm{CE}^{1}_{\mathrm{Te}}$,$\mathrm{CE}^{1}_{\mathrm{Tr}}$,F1,Pearson Correlation
data.dataset,trainer.training_steps,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2
cola,2500,0.81 ± 0.00,0.46 ± 0.00,0.36 ± 0.00,,
cola,5ep,0.77 ± 0.00,0.55 ± 0.00,0.47 ± 0.00,,
mnli,2500,0.78 ± 0.00,0.55 ± 0.00,0.57 ± 0.00,,
mnli,4000,0.78 ± nan,0.54 ± nan,0.55 ± nan,,
mnli,2ep,0.80 ± nan,0.50 ± nan,0.55 ± nan,,
mrpc,2500,0.83 ± 0.00,0.43 ± 0.00,0.22 ± 0.00,0.94 ± 0.00,
qnli,2500,0.89 ± 0.00,0.28 ± 0.00,0.30 ± 0.00,,
qqp,2500,0.87 ± nan,0.31 ± nan,0.31 ± nan,0.82 ± nan,
rte,2500,0.64 ± 0.00,0.76 ± 0.00,0.36 ± 0.00,,
sst2,2500,0.91 ± 0.00,0.23 ± 0.01,0.14 ± 0.00,,


In [91]:
print_to_markdown(latex_pv)

| data.dataset   | trainer.training_steps   | $\mathrm{Acc}^{1}_{\mathrm{Te}}$   | $\mathrm{CE}^{1}_{\mathrm{Te}}$   | $\mathrm{CE}^{1}_{\mathrm{Tr}}$   | F1          | Pearson Correlation   |
|:---------------|:-------------------------|:-----------------------------------|:----------------------------------|:----------------------------------|:------------|:----------------------|
| wnli           | 2500                     | 0.21 ± 0.00                        | 5.21 ± 0.00                       | 0.03 ± 0.00                       | *         | *                   |
| stsb           | 2500                     | *                                | 0.58 ± *                        | 0.53 ± *                        | *         | 0.87 ± *            |
| sst2           | 2500                     | 0.91 ± 0.00                        | 0.23 ± 0.01                       | 0.14 ± 0.00                       | *         | *                   |
| rte            | 2500                     | 0.64 ± 

  format_float = lambda x: f"{x:.{float_precision}f}" if isinstance(x, float) else x


'| data.dataset   | trainer.training_steps   | $\\mathrm{Acc}^{1}_{\\mathrm{Te}}$   | $\\mathrm{CE}^{1}_{\\mathrm{Te}}$   | $\\mathrm{CE}^{1}_{\\mathrm{Tr}}$   | F1          | Pearson Correlation   |\n|:---------------|:-------------------------|:-----------------------------------|:----------------------------------|:----------------------------------|:------------|:----------------------|\n| wnli           | 2500                     | 0.21 ± 0.00                        | 5.21 ± 0.00                       | 0.03 ± 0.00                       | *         | *                   |\n| stsb           | 2500                     | *                                | 0.58 ± *                        | 0.53 ± *                        | *         | 0.87 ± *            |\n| sst2           | 2500                     | 0.91 ± 0.00                        | 0.23 ± 0.01                       | 0.14 ± 0.00                       | *         | *                   |\n| rte            | 2500                  

In [69]:
latex_pv

Unnamed: 0,data.dataset,trainer.training_steps,$\mathrm{Acc}^{1}_{\mathrm{Te}}$,$\mathrm{CE}^{1}_{\mathrm{Te}}$,$\mathrm{CE}^{1}_{\mathrm{Tr}}$
11,wnli,2500,0.21 ± 0.00,5.21 ± 0.00,0.03 ± 0.00
10,stsb,2500,0.09 ± 0.00,0.58 ± 0.00,0.53 ± 0.00
9,sst2,2500,0.91 ± 0.00,0.23 ± 0.01,0.14 ± 0.00
8,rte,2500,0.64 ± 0.00,0.76 ± 0.00,0.36 ± 0.00
7,qqp,2500,0.87 ± nan,0.31 ± nan,0.31 ± nan
6,qnli,2500,0.89 ± 0.00,0.28 ± 0.00,0.30 ± 0.00
5,mrpc,2500,0.83 ± 0.00,0.43 ± 0.00,0.22 ± 0.00
2,mnli,2500,0.78 ± 0.00,0.55 ± 0.00,0.57 ± 0.00
3,mnli,4000,0.78 ± nan,0.54 ± nan,0.55 ± nan
4,mnli,2ep,0.80 ± nan,0.50 ± nan,0.55 ± nan
