# Calibration

In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
import logging

import matplotlib.pyplot as plt
import numpy as np
from matplotlib import rc

from analysis import load_configs, load_experimental_results, get_stats
from factorization.config import IMAGE_DIR, USETEX
from factorization.models.mlp import Model

logging.basicConfig(level=logging.INFO)


rc("font", family="serif", size=8)
rc("text", usetex=USETEX)
if USETEX:
    rc("text.latex", preamble=r"\usepackage{times}")


def get_names(name):
    match name:
        case "dim":
            return "calibration/emb", ["emb_dim"]
        case "ffn":
            return "calibration/ffn", ["ffn_dim"]
        case "ffn_bis":
            return "calibration/ffn_bis", ["ffn_dim"]
        case "layer":
            return "calibration/layer", ["nb_layers"]
        case "layer_bis":
            return "calibration/layer_bis", ["nb_layers"]
        case "lr":
            return "calibration/lr", ["learning_rate"]
        case "scheduler":
            return "calibration/lr_schedule", ["learning_rate"]

In [None]:
save_fig = True

## Embedding dimension

In [None]:
name = "dim"
xaxis = "flops"

kwargs = {
    "input_factors": [2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2],
    # "input_factors": [8, 8, 8, 8],
    "output_factors": [8, 8, 8, 8],
    # "output_factors": [4096],
    "learning_rate": 1e-2,
}

file_path, study_factors = get_names(name)

In [None]:
all_configs = load_configs(file_path)
res = load_experimental_results(all_configs, **kwargs)

In [None]:
res['flops'] = Model.get_flops(res['emb_dim'], res['ffn_dim'], res['nb_layers'], res['output_size'])
res['flops'] *= res['epoch'] * res['input_size']

In [None]:
all_mean, all_std, keys = get_stats(res, study_factors, xaxis=xaxis, **kwargs)

In [None]:
if save_fig:
    fig, ax = plt.subplots(1, 1, figsize=(1.75, 1.5))
else:
    fig, axes = plt.subplots(1, 2, figsize=(10, 5))
    ax = axes[0]

y_name = "test_loss"
legend = []
all_plots = []
for val, val_std in zip(all_mean, all_std):
    a, = ax.plot(val[y_name], linewidth=1.5)
    all_plots.append(a)
    # leg = ' '.join([rf'{key}={val[key].values[0]}' for key in keys])
    leg = rf'$d=${val[keys[0]].values[0]}'
    legend.append(leg)
ax.set_yscale('log')
ax.set_xscale('log')
ax.set_xlim(1e10, 1e13)
ax.set_xlabel("Flop", fontsize=8)
ax.set_ylabel(r"Test loss ${\cal L}_{\text{unobs.}}$", fontsize=8)
ax.tick_params(axis='both', labelsize=6)
ax.grid()
if save_fig:
    fig.savefig(IMAGE_DIR / f'{name}_{len(kwargs["input_factors"])}_{len(kwargs["output_factors"])}.pdf', bbox_inches='tight')

if save_fig:
    fig, ax = plt.subplots(1, 1, figsize=(.25, 1.5))
    leg = ax.legend(all_plots, legend, loc='center', ncol=1, fontsize=6)
    ax.axis('off')
    fig.savefig(IMAGE_DIR / f'cal0_{name}_leg.pdf', bbox_inches='tight')
else:
    ax = axes[1]
    leg = ax.legend(all_plots, legend, loc='center', ncol=1, fontsize=12)
    ax.axis('off')

## FFN dimension

In [None]:
name = "ffn"
xaxis = "flops"

kwargs = {
    # "input_factors": [2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2],
    "input_factors": [8, 8, 8, 8],
    # "output_factors": [8, 8, 8, 8],
    "output_factors": [4096],
    "learning_rate": 1e-2,
}

file_path, study_factors = get_names(name)

In [None]:
all_configs = load_configs(file_path)
res = load_experimental_results(all_configs, **kwargs)

In [None]:
res['flops'] = Model.get_flops(res['emb_dim'], res['ffn_dim'], res['nb_layers'], res['output_size'])
res['flops'] *= res['epoch'] * res['input_size']

In [None]:
all_mean, all_std, keys = get_stats(res, study_factors, xaxis=xaxis, **kwargs)

In [None]:
if save_fig:
    fig, ax = plt.subplots(1, 1, figsize=(1.75, 1.5))
else:
    fig, axes = plt.subplots(1, 2, figsize=(10, 5))
    ax = axes[0]

y_name = "test_loss"
legend = []
all_plots = []
for val, val_std in zip(all_mean, all_std):
    a, = ax.plot(val[y_name], linewidth=1.5)
    all_plots.append(a)
    # leg = ' '.join([rf'{key}={val[key].values[0]}' for key in keys])
    leg = rf'$h={val[keys[0]].values[0]//64}d$'
    legend.append(leg)
ax.set_yscale('log')
ax.set_xscale('log')
ax.set_xlabel("Flop", fontsize=8)
ax.set_ylabel(r"Test loss ${\cal L}_{\text{unobs.}}$", fontsize=8)
ax.tick_params(axis='both', labelsize=6)
ax.grid()
if save_fig:
    fig.savefig(IMAGE_DIR / f'{name}__{len(kwargs["input_factors"])}_{len(kwargs["output_factors"])}.pdf', bbox_inches='tight')

if save_fig:
    fig, ax = plt.subplots(1, 1, figsize=(.25, 1.5))
    leg = ax.legend(all_plots, legend, loc='center', ncol=1, fontsize=6)
    ax.axis('off')
    fig.savefig(IMAGE_DIR / f'{name}_leg.pdf', bbox_inches='tight')
else:
    ax = axes[1]
    leg = ax.legend(all_plots, legend, loc='center', ncol=1, fontsize=12)
    ax.axis('off')

With smaller spaces

In [None]:
name = "ffn_bis"
xaxis = "flops"

kwargs = {
    # "input_factors": [2, 2, 2, 3, 3, 5],
    "input_factors": [3, 4, 5, 6],
    # "output_factors": [2, 2, 3, 3],
    "output_factors": [36],
    "learning_rate": 1e-2,
}

file_path, study_factors = get_names(name)

In [None]:
all_configs = load_configs(file_path)
res = load_experimental_results(all_configs, **kwargs)

In [None]:
res['flops'] = Model.get_flops(res['emb_dim'], res['ffn_dim'], res['nb_layers'], res['output_size'])
res['flops'] *= res['epoch'] * res['input_size']

In [None]:
all_mean, all_std, keys = get_stats(res, study_factors, xaxis=xaxis, **kwargs)

In [None]:
if save_fig:
    fig, ax = plt.subplots(1, 1, figsize=(1.75, 1.5))
else:
    fig, axes = plt.subplots(1, 2, figsize=(10, 5))
    ax = axes[0]

y_name = "test_loss"
legend = []
all_plots = []
for val, val_std in zip(all_mean, all_std):
    a, = ax.plot(val[y_name], linewidth=1.5)
    all_plots.append(a)
    # leg = ' '.join([rf'{key}={val[key].values[0]}' for key in keys])
    leg = rf'$h={val[keys[0]].values[0]//64}d$'
    legend.append(leg)
ax.set_yscale('log')
ax.set_xscale('log')
ax.set_xlabel("Flop", fontsize=8)
ax.set_ylabel(r"Test loss ${\cal L}_{\text{unobs.}}$", fontsize=8)
ax.tick_params(axis='both', labelsize=6)
ax.grid()
if save_fig:
    fig.savefig(IMAGE_DIR / f'{name}_{len(kwargs["input_factors"])}_{len(kwargs["output_factors"])}.pdf', bbox_inches='tight')

if save_fig:
    fig, ax = plt.subplots(1, 1, figsize=(.25, 1.5))
    leg = ax.legend(all_plots, legend, loc='center', ncol=1, fontsize=6)
    ax.axis('off')
    fig.savefig(IMAGE_DIR / f'{name}_leg.pdf', bbox_inches='tight')
else:
    ax = axes[1]
    leg = ax.legend(all_plots, legend, loc='center', ncol=1, fontsize=12)
    ax.axis('off')

## Number of layers

In [None]:
name = "layer"
xaxis = "flops"

kwargs = {
    # "input_factors": [2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2],
    "input_factors": [8, 8, 8, 8],
    # "output_factors": [8, 8, 8, 8],
    "output_factors": [4096],
    "learning_rate": 1e-2,
}

file_path, study_factors = get_names(name)

In [None]:
all_configs = load_configs(file_path)
res = load_experimental_results(all_configs, **kwargs)

In [None]:
res['flops'] = Model.get_flops(res['emb_dim'], res['ffn_dim'], res['nb_layers'], res['output_size'])
res['flops'] *= res['epoch'] * res['input_size']

In [None]:
all_mean, all_std, keys = get_stats(res, study_factors, xaxis=xaxis, **kwargs)

In [None]:
if save_fig:
    fig, ax = plt.subplots(1, 1, figsize=(1.75, 1.5))
else:
    fig, axes = plt.subplots(1, 2, figsize=(10, 5))
    ax = axes[0]

y_name = "test_loss"
legend = []
all_plots = []
for val, val_std in zip(all_mean, all_std):
    a, = ax.plot(val[y_name], linewidth=1.5)
    all_plots.append(a)
    # leg = ' '.join([rf'{key}={val[key].values[0]}' for key in keys])
    leg = rf'L={val[keys[0]].values[0]}'
    legend.append(leg)
ax.set_yscale('log')
ax.set_xscale('log')
ax.set_xlabel("Flop", fontsize=8)
ax.set_ylabel(r"Test loss ${\cal L}_{\text{unobs.}}$", fontsize=8)
ax.tick_params(axis='both', labelsize=6)
ax.grid()
if save_fig:
    fig.savefig(IMAGE_DIR / f'{name}_{len(kwargs["input_factors"])}_{len(kwargs["output_factors"])}.pdf', bbox_inches='tight')

if save_fig:
    fig, ax = plt.subplots(1, 1, figsize=(.25, 1.5))
    leg = ax.legend(all_plots, legend, loc='center', ncol=1, fontsize=6)
    ax.axis('off')
    fig.savefig(IMAGE_DIR / f'{name}_leg.pdf', bbox_inches='tight')
else:
    ax = axes[1]
    leg = ax.legend(all_plots, legend, loc='center', ncol=1, fontsize=12)
    ax.axis('off')

With smaller spaces

In [None]:
name = "layer_bis"
xaxis = "flops"

kwargs = {
    # "input_factors": [2, 2, 2, 3, 3, 5],
    "input_factors": [3, 4, 5, 6],
    # "output_factors": [2, 2, 3, 3],
    "output_factors": [36],
    "learning_rate": 1e-2,
}

file_path, study_factors = get_names(name)

In [None]:
all_configs = load_configs(file_path)
res = load_experimental_results(all_configs, **kwargs)

In [None]:
res['flops'] = Model.get_flops(res['emb_dim'], res['ffn_dim'], res['nb_layers'], res['output_size'])
res['flops'] *= res['epoch'] * res['input_size']

In [None]:
all_mean, all_std, keys = get_stats(res, study_factors, xaxis=xaxis, **kwargs)

In [None]:
if save_fig:
    fig, ax = plt.subplots(1, 1, figsize=(1.75, 1.5))
else:
    fig, axes = plt.subplots(1, 2, figsize=(10, 5))
    ax = axes[0]

y_name = "test_loss"
legend = []
all_plots = []
for val, val_std in zip(all_mean, all_std):
    a, = ax.plot(val[y_name], linewidth=1.5)
    all_plots.append(a)
    # leg = ' '.join([rf'{key}={val[key].values[0]}' for key in keys])
    leg = rf'L={val[keys[0]].values[0]}'
    legend.append(leg)
ax.set_yscale('log')
ax.set_xscale('log')
ax.set_xlabel("Flop", fontsize=8)
ax.set_ylabel(r"Test loss ${\cal L}_{\text{unobs.}}$", fontsize=8)
ax.tick_params(axis='both', labelsize=6)
ax.grid()
if save_fig:
    fig.savefig(IMAGE_DIR / f'{name}_{len(kwargs["input_factors"])}_{len(kwargs["output_factors"])}.pdf', bbox_inches='tight')

if save_fig:
    fig, ax = plt.subplots(1, 1, figsize=(.25, 1.5))
    leg = ax.legend(all_plots, legend, loc='center', ncol=1, fontsize=6)
    ax.axis('off')
    fig.savefig(IMAGE_DIR / f'{name}_leg.pdf', bbox_inches='tight')
else:
    ax = axes[1]
    leg = ax.legend(all_plots, legend, loc='center', ncol=1, fontsize=12)
    ax.axis('off')

## Learning rate

In [None]:
name = "lr"
xaxis = "epoch"

kwargs = {
    # "input_factors": [2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2],
    "input_factors": [8, 8, 8, 8],
    # "output_factors": [8, 8, 8, 8],
    "output_factors": [4096],
    "nb_epochs": [1000],
    # "seed": 0,
}

file_path, study_factors = get_names(name)

In [None]:
all_configs = load_configs(file_path)
res = load_experimental_results(all_configs, **kwargs)

In [None]:
all_mean, all_std, keys = get_stats(res, study_factors, xaxis=xaxis, **kwargs)

In [None]:
if save_fig:
    fig, ax = plt.subplots(1, 1, figsize=(1.75, 1.5))
else:
    fig, axes = plt.subplots(1, 2, figsize=(10, 5))
    ax = axes[0]

y_name = "test_loss"
legend = []
all_plots = []
for val, val_std in zip(all_mean, all_std):
    a, = ax.plot(val[y_name], linewidth=1.5)
    all_plots.append(a)
    # leg = ' '.join([rf'{key}={val[key].values[0]}' for key in keys])
    leg = rf'$\eta=${val[keys[0]].values[0]}'
    legend.append(leg)
ax.set_yscale('log')
ax.set_xscale('log')
ax.set_xlabel("Epoch", fontsize=8)
ax.set_ylabel(r"Test loss ${\cal L}_{\text{unobs.}}$", fontsize=8)
ax.tick_params(axis='both', labelsize=6)
ax.grid()
if save_fig:
    fig.savefig(IMAGE_DIR / f'{name}_{len(kwargs["input_factors"])}_{len(kwargs["output_factors"])}.pdf', bbox_inches='tight')

if save_fig:
    fig, ax = plt.subplots(1, 1, figsize=(.25, 1.5))
    leg = ax.legend(all_plots, legend, loc='center', ncol=1, fontsize=6)
    ax.axis('off')
    fig.savefig(IMAGE_DIR / f'{name}_leg.pdf', bbox_inches='tight')
else:
    ax = axes[1]
    leg = ax.legend(all_plots, legend, loc='center', ncol=1, fontsize=12)
    ax.axis('off')

## Homemade Scheduler

In [None]:
name = "scheduler"
xaxis = "epoch"
file_path, study_factors = get_names(name)
all_configs = load_configs(file_path)
res = load_experimental_results(all_configs, **kwargs)
all_mean_bis, all_std_bis, keys_bis = get_stats(res, study_factors, xaxis=xaxis, **kwargs)

In [None]:
if save_fig:
    fig, ax = plt.subplots(1, 1, figsize=(1.75, 1.5))
else:
    fig, axes = plt.subplots(1, 2, figsize=(10, 5))
    ax = axes[0]

y_name = "test_loss"
legend = []
all_plots = []
for val, val_std in zip(all_mean, all_std):
    a, = ax.plot(val[y_name], linewidth=1.5)
    all_plots.append(a)
    # leg = ' '.join([rf'{key}={val[key].values[0]}' for key in keys])
    leg = rf'$\eta=${val[keys[0]].values[0]}'
    legend.append(leg)
for val, val_std in zip(all_mean_bis, all_std_bis):
    a, = ax.plot(val[y_name], linewidth=1.5)
    all_plots.append(a)
    # leg = ' '.join([rf'{key}={val[key].values[0]}' for key in keys])
    leg = 'custom'
    legend.append(leg)
ax.set_yscale('log')
ax.set_xscale('log')
ax.set_xlabel("Epoch", fontsize=8)
ax.set_ylabel(r"Test loss ${\cal L}_{\text{unobs.}}$", fontsize=8)
ax.tick_params(axis='both', labelsize=6)
ax.grid()
if save_fig:
    fig.savefig(IMAGE_DIR / f'{name}_{len(kwargs["input_factors"])}_{len(kwargs["output_factors"])}.pdf', bbox_inches='tight')

if save_fig:
    fig, ax = plt.subplots(1, 1, figsize=(.25, 1.5))
    leg = ax.legend(all_plots, legend, loc='center', ncol=1, fontsize=6)
    ax.axis('off')
    fig.savefig(IMAGE_DIR / f'{name}_leg.pdf', bbox_inches='tight')
else:
    ax = axes[1]
    leg = ax.legend(all_plots, legend, loc='center', ncol=1, fontsize=12)
    ax.axis('off')