In [1]:
import matplotlib.pyplot as plt

from models import TransformerModelLooped
from curriculum import CurriculumSimple
from train import train_without_config, validate_model

n_dims = 10

In [4]:
model_loop_b5_b15 = TransformerModelLooped(
    n_dims=n_dims,
    n_positions=101,
    n_embd=128,
    n_layer=1,
    n_head=4,
    pred_type="regression",
).cuda()

cirriculum_b5_b15 = CurriculumSimple(n_dims, 31, 5, [5000, n_dims, 0], [5000, 31, 0], [1000, 15, 1])

metrics_l1_b5_b15_d10 = train_without_config(
    model_loop_b5_b15, cirriculum_b5_b15, model_n_dims=n_dims, 
    log_every_steps=10, train_steps=15000, family="gpt2_loop", 
    do_wandb_log=False, seed=None, task_name="noisy_linear_regression")

number of parameters: 0.20M


  0%|          | 0/15000 [00:00<?, ?it/s]

0


loss 3.6659064292907715: 100%|██████████| 15000/15000 [17:47<00:00, 14.05it/s]


In [3]:
model_loop_b5 = TransformerModelLooped(
    n_dims=n_dims,
    n_positions=101,
    n_embd=128,
    n_layer=1,
    n_head=4,
    pred_type="regression",
).cuda()

cirriculum_b5 = CurriculumSimple(n_dims, 31, 5, [5000, n_dims, 0], [5000, 31, 0], [1000, 5, 0])

metrics_l1_b5_d10 = train_without_config(
    model_loop_b5, cirriculum_b5, model_n_dims=n_dims,
    log_every_steps=10, train_steps=15000, family="gpt2_loop",
    do_wandb_log=False, seed=None, task_name="noisy_linear_regression")

number of parameters: 0.20M


  0%|          | 0/15000 [00:00<?, ?it/s]

0


loss 10.217330932617188:   5%|▍         | 699/15000 [00:24<08:14, 28.91it/s]


KeyboardInterrupt: 

In [20]:
model_loop_b10 = TransformerModelLooped(
    n_dims=n_dims,
    n_positions=101,
    n_embd=128,
    n_layer=1,
    n_head=4,
    pred_type="regression",
).cuda()

cirriculum_b10 = CurriculumSimple(n_dims, 31, 10, [5000, n_dims, 0], [5000, 31, 0], [5000, 10, 0])

metrics_l1_b10_d10 = train_without_config(model_loop_b10, cirriculum_b10, model_n_dims=n_dims,
                                          log_every_steps=10, train_steps=15000, family="gpt2_loop",
                                          do_wandb_log=False, seed=None, task_name="noisy_linear_regression")

In [24]:
model_loop_b15 = TransformerModelLooped(
    n_dims=n_dims,
    n_positions=101,
    n_embd=128,
    n_layer=1,
    n_head=4,
    pred_type="regression",
).cuda()

cirriculum_b15 = CurriculumSimple(n_dims, 31, 15, [5000, n_dims, 0], [5000, 31, 0], [5000, 15, 0])

metrics_l1_b15_d10 = train_without_config(model_loop_b15, cirriculum_b15, model_n_dims=n_dims,
                                          log_every_steps=10, train_steps=15000, family="gpt2_loop",
                                          do_wandb_log=False, seed=None, task_name="noisy_linear_regression")

In [5]:
model_loop_b5_b10 = TransformerModelLooped(
    n_dims=n_dims,
    n_positions=101,
    n_embd=128,
    n_layer=1,
    n_head=4,
    pred_type="regression",
).cuda()

cirriculum_b5_b10 = CurriculumSimple(n_dims, 31, 5, [5000, n_dims, 0], [5000, 31, 0], [2000, 10, 1])

metrics_l1_b5_b10_d10 = train_without_config(
    model_loop_b5_b10, cirriculum_b5_b10, model_n_dims=n_dims,
    log_every_steps=10, train_steps=15000, family="gpt2_loop",
    do_wandb_log=False, seed=None, task_name="noisy_linear_regression")

number of parameters: 0.20M


loss 10.921560287475586:   0%|          | 3/15000 [00:00<10:30, 23.79it/s]

0


loss 3.8144404888153076: 100%|██████████| 15000/15000 [13:11<00:00, 18.96it/s]


In [30]:
def calculate_by_loop(model):
    val_by_loop = []
    loop_steps = []
    for i in range(1, 150):
        val_loss = validate_model(model, n_dims_truncated=10, n_loops=i, model_n_dims=10, n_points=31, family="gpt2_loop", task_name="noisy_linear_regression")
        val_by_loop.append(val_loss / n_dims)
        loop_steps.append(i)
    return loop_steps, val_by_loop

steps_b10, values_b10 = calculate_by_loop(model_loop_b10)
steps_b5, values_b5 = calculate_by_loop(model_loop_b5)
steps_b5_b15, values_b5_b15 = calculate_by_loop(model_loop_b5_b15)
steps_b15, values_b15 = calculate_by_loop(model_loop_b15)
steps_b5_b10, values_b5_b10 = calculate_by_loop(model_loop_b5_b10)

In [None]:
plt.plot(steps_b5, values_b5)
plt.plot(steps_b10, values_b10)
plt.plot(steps_b15, values_b15)
plt.plot(steps_b5_b10, values_b5_b10)
plt.plot(steps_b5_b15, values_b5_b15)
plt.grid()
plt.title("T=20")
plt.legend(["b=5", "b=10", "b=15", "b=5-10", "b=5-15", ])
plt.savefig('../images/check_for_scheduling_convergence_properties_noisy_linear_regression.png')