In [1]:
import torch,inspect
import argparse
from pathlib import Path
from functools import partial
import numpy as np
from argparse import Namespace
from dev import namespace_tools
# nested namespace arguement containing all elements associated to the training setup

notebook_run = Namespace(
    simple_hp = Namespace(
        batch_size= 32,
        d_model = 64,
        early_stop_thresh = np.inf, # default to np.inf
        nb_epochs = 1000,
        warm_up_epochs = 20,
    ),
    # parameters to limit the size of the dataset
    dset_truncation = Namespace(
        limit_length= 15,
        use_splitting = False,
        max_length_from_file = False,
    ),
    # parameters for the optimization algorithm
    opt_params = Namespace(
        unlinked_optimizer = partial(torch.optim.NAdam,lr=0.01),
        unlinked_scheduler = partial(torch.optim.lr_scheduler.ReduceLROnPlateau, mode='min', 
                                     factor=0.9, patience=20)
    ),
    # parameters to reload the model
    train_state_control = Namespace(             
        load_from_backup = True,
        restore_optimizer = True
    ),
    #paths from root
    paths = namespace_tools.Paths(
        path_dataset = "data/french_english_dataset/fra.txt",
        path_language_info = "models/language_info.pth",
        path_dataset_splitting = "dataset_splitting",
        path_model_and_dependencies = "models/sequence_translator_transformer_new.pth",
        root = "../.."
    )

)

In [2]:
notebook_run = namespace_tools.NameSpaceAggregation(notebook_run)
notebook_run.diffuse(globals())

In [3]:
from ploomber_engine.ipython import PloomberClient
from ploomber import DAG
from pathlib import Path
from ploomber.products import File

# initialize client
client = PloomberClient.from_path(Path("./training_setup.ipynb"),cwd=Path("../../"))
from argparse import Namespace

from translation_machine.models import transformer_mod
from translation_machine import sentence_mod

initial_namespace_as_dict = notebook_run.diffuse()
train_setup = client.get_namespace(initial_namespace_as_dict)
for key,val in train_setup.items():
        globals()[key] = val

Executing cell: 13: 100%|███████████████████████| 21/21 [00:02<00:00,  7.59it/s]


In [4]:
# revert to train mode
model.train()
model.training

True

In [5]:
from translation_machine import model_trainer_mod
model_trainer = model_trainer_mod.ModelTrainer(model,optimizer,train_data_loader,val_data_loader,baseline_loss)

In [6]:
from argparse import Namespace
from pathlib import Path
class Paths(Namespace):
    def __init__(self,**kwargs):
        assert "root" in kwargs,"root must be given with the paths"
        self.root = kwargs.pop("root")
        kwargs = {key:Path(self.root).joinpath(val) for key,val in kwargs.items()}
        for key,val in kwargs.items():
            assert val.exists()
        super(Paths,self).__init__(**kwargs)

paths = Paths(
    root = "../..",
    path_dataset = "data/french_english_dataset/fra.txt",
    path_language_info = "models/language_info.pth",
    path_dataset_splitting = "dataset_splitting",
    path_model_and_dependencies = "models/sequence_translator_transformer_new.pth"
)

In [7]:
## import matplotlib.pyplot as plt,numpy as np
from tqdm import tqdm
import matplotlib.pyplot as plt
best_loss_val_mean = np.inf
best_epoch = scheduler.last_epoch

for epoch in tqdm(range(simple_hp.nb_epochs)):
    #import time
    #start = time.time()
    print(f"training for epoch {epoch}")
    print(f"for epoch {epoch} learning rate is {optimizer.param_groups[0]['lr']}" )
    print("training_step")
    loss_train,nb_words_per_batch_train,metric_train = model_trainer.train_on_epoch()
    print("validation_step")
    loss_val,nb_words_per_batch_val,metric_val = model_trainer.validate_on_epoch()

    sum_loss_train = torch.tensor(loss_train).sum()
    sum_loss_val = torch.tensor(loss_val).sum()
    mean_train_loss = sum_loss_train/sum(nb_words_per_batch_train)
    mean_val_loss = sum_loss_val/sum(nb_words_per_batch_val)

    scheduler.step(mean_val_loss)

        
    print(f"for epoch {epoch} mean loss on train {mean_train_loss}")
    print(f"for epoch {epoch} mean loss on val {mean_val_loss}")
        
    losses["train"].append(mean_train_loss)
    losses["val"].append(mean_val_loss)
    metrics["train"].append(metric_train)
    metrics["val"].append(metric_val)
    
    if (mean_val_loss < best_loss_val_mean):
        best_epoch = scheduler.last_epoch
        best_loss_val_mean = mean_val_loss

        model_training_state = {"model_params":model_trainer.model.state_dict(),
                               "model_inputs":model_inputs,
                              "optimizer":optimizer.state_dict(),
                              "scheduler":scheduler.state_dict(),
                              }
        results = { "losses":losses,
                   "metrics":metrics}
        new_back_up = dict()
        if "back_up" in globals():
            new_back_up["notebook_runs"] = back_up["notebook_runs"] + tuple([notebook_run.state_dict()])
        else:
            new_back_up["notebook_runs"] = tuple([notebook_run.state_dict()])

        new_back_up["results"] = results
        new_back_up["model_training_state"] = model_training_state
        
        back_up = new_back_up
        torch.save(back_up,paths.path_model_and_dependencies)
        print(f"saving for epoch {epoch}")
        
        plt.plot(losses["train"],"b*")
        plt.plot(losses["val"],"g*")
        plt.title("losses")
        plt.savefig("loss_curve")
        #import pdb;pdb.set_trace()
    elif epoch - best_epoch > simple_hp.early_stop_thresh  and epoch > simple_hp.warm_up_epochs:
        print("Early stopped training at epoch %d" % epoch)
        break  # terminate the training loop

    del loss_train,nb_words_per_batch_train,metric_train

    del loss_val,nb_words_per_batch_val,metric_val


  0%|                                                                            | 0/1000 [00:00<?, ?it/s]

training for epoch 0
for epoch 0 learning rate is 0.0025418658283290017
training_step
0 1.95886597266564
validation_step
0 1.9844744755671575
for epoch 0 mean loss on train 1.958866000175476
for epoch 0 mean loss on val 1.9844744205474854
saving for epoch 0


  0%|                                                                    | 1/1000 [00:01<26:27,  1.59s/it]

training for epoch 1
for epoch 1 learning rate is 0.0025418658283290017
training_step
0 2.068482178908128
validation_step
0 1.9683157113882213
for epoch 1 mean loss on train 2.0684821605682373
for epoch 1 mean loss on val 1.9683157205581665
saving for epoch 1


  0%|▏                                                                   | 2/1000 [00:02<15:07,  1.10it/s]

training for epoch 2
for epoch 2 learning rate is 0.0025418658283290017
training_step
0 1.920576389019306
validation_step
0 2.064667481642503
for epoch 2 mean loss on train 1.9205763339996338
for epoch 2 mean loss on val 2.0646674633026123


  0%|▏                                                                   | 3/1000 [00:02<09:21,  1.78it/s]

training for epoch 3
for epoch 3 learning rate is 0.0025418658283290017
training_step
0 2.0370087256798377
validation_step
0 2.0157061356764574
for epoch 3 mean loss on train 2.037008762359619
for epoch 3 mean loss on val 2.0157060623168945


  0%|▎                                                                   | 4/1000 [00:02<06:28,  2.56it/s]

training for epoch 4
for epoch 4 learning rate is 0.0025418658283290017
training_step
0 2.0142291142390323
validation_step
0 2.0674494229830227
for epoch 4 mean loss on train 2.0142290592193604
for epoch 4 mean loss on val 2.0674493312835693


  0%|▎                                                                   | 5/1000 [00:02<04:53,  3.39it/s]

training for epoch 5
for epoch 5 learning rate is 0.0025418658283290017
training_step
0 2.074574250441331
validation_step
0 1.9569169558011568
for epoch 5 mean loss on train 2.0745742321014404
for epoch 5 mean loss on val 1.9569169282913208
saving for epoch 5


  1%|▍                                                                   | 6/1000 [00:02<05:30,  3.01it/s]

training for epoch 6
for epoch 6 learning rate is 0.0025418658283290017
training_step
0 2.034579643836388
validation_step
0 2.171418850238507
for epoch 6 mean loss on train 2.0345797538757324
for epoch 6 mean loss on val 2.1714189052581787


  1%|▍                                                                   | 7/1000 [00:02<04:29,  3.69it/s]

training for epoch 7
for epoch 7 learning rate is 0.0025418658283290017
training_step
0 2.003692920391376
validation_step
0 1.9677688892071064
for epoch 7 mean loss on train 2.003692865371704
for epoch 7 mean loss on val 1.967768907546997


  1%|▌                                                                   | 8/1000 [00:03<03:40,  4.50it/s]

training for epoch 8
for epoch 8 learning rate is 0.0025418658283290017
training_step
0 2.011869870699369
validation_step
0 1.976263926579402
for epoch 8 mean loss on train 2.0118699073791504
for epoch 8 mean loss on val 1.9762638807296753


  1%|▌                                                                   | 9/1000 [00:03<03:14,  5.10it/s]

training for epoch 9
for epoch 9 learning rate is 0.0025418658283290017
training_step
0 2.0859762338491588
validation_step
0 2.185197243323693
for epoch 9 mean loss on train 2.0859761238098145
for epoch 9 mean loss on val 2.185197353363037


  1%|▋                                                                  | 10/1000 [00:03<02:54,  5.68it/s]

training for epoch 10
for epoch 10 learning rate is 0.0025418658283290017
training_step
0 2.0158006227933445
validation_step
0 1.9985254727877104
for epoch 10 mean loss on train 2.015800714492798
for epoch 10 mean loss on val 1.9985255002975464


  1%|▋                                                                  | 11/1000 [00:03<02:32,  6.47it/s]

training for epoch 11
for epoch 11 learning rate is 0.0025418658283290017
training_step
0 2.0613189110389123
validation_step
0 2.092140344473032
for epoch 11 mean loss on train 2.061318874359131
for epoch 11 mean loss on val 2.0921404361724854


  1%|▊                                                                  | 12/1000 [00:03<02:23,  6.88it/s]

training for epoch 12
for epoch 12 learning rate is 0.0025418658283290017
training_step
0 2.1546090932992787
validation_step
0 2.010467529296875
for epoch 12 mean loss on train 2.154609203338623
for epoch 12 mean loss on val 2.010467529296875


  1%|▊                                                                  | 13/1000 [00:03<02:27,  6.70it/s]

training for epoch 13
for epoch 13 learning rate is 0.0025418658283290017
training_step
0 2.0062235318697414
validation_step
0 2.072729991032527
for epoch 13 mean loss on train 2.006223440170288
for epoch 13 mean loss on val 2.07273006439209


  1%|▉                                                                  | 14/1000 [00:03<02:29,  6.62it/s]

training for epoch 14
for epoch 14 learning rate is 0.0025418658283290017
training_step
0 2.0115055671105018
validation_step
0 1.962000039907602
for epoch 14 mean loss on train 2.011505603790283
for epoch 14 mean loss on val 1.9620000123977661


  2%|█                                                                  | 15/1000 [00:04<02:20,  7.03it/s]

training for epoch 15
for epoch 15 learning rate is 0.0025418658283290017
training_step
0 1.9978470435509315
validation_step
0 1.9993963975172777
for epoch 15 mean loss on train 1.997847080230713
for epoch 15 mean loss on val 1.9993964433670044


  2%|█                                                                  | 16/1000 [00:04<02:11,  7.48it/s]

training for epoch 16
for epoch 16 learning rate is 0.0025418658283290017
training_step
0 2.051831318781926
validation_step
0 1.9662127861609826
for epoch 16 mean loss on train 2.0518312454223633
for epoch 16 mean loss on val 1.9662127494812012


  2%|█▏                                                                 | 17/1000 [00:04<02:07,  7.71it/s]

training for epoch 17
for epoch 17 learning rate is 0.0025418658283290017
training_step
0 1.9822945227989783
validation_step
0 2.026107201209435
for epoch 17 mean loss on train 1.9822945594787598
for epoch 17 mean loss on val 2.0261073112487793


  2%|█▏                                                                 | 18/1000 [00:04<02:05,  7.85it/s]

training for epoch 18
for epoch 18 learning rate is 0.0025418658283290017
training_step
0 2.2235260009765625
validation_step
0 1.9947310227614183
for epoch 18 mean loss on train 2.2235260009765625
for epoch 18 mean loss on val 1.994731068611145


  2%|█▎                                                                 | 19/1000 [00:04<02:08,  7.62it/s]

training for epoch 19
for epoch 19 learning rate is 0.0025418658283290017
training_step
0 2.03832641014686
validation_step
0 2.0820259681114783
for epoch 19 mean loss on train 2.0383265018463135
for epoch 19 mean loss on val 2.0820260047912598


  2%|█▎                                                                 | 20/1000 [00:04<02:09,  7.54it/s]

training for epoch 20
for epoch 20 learning rate is 0.0025418658283290017
training_step
0 2.082683856670673
validation_step
0 2.1373045994685245
for epoch 20 mean loss on train 2.082683801651001
for epoch 20 mean loss on val 2.1373045444488525


  2%|█▍                                                                 | 21/1000 [00:04<02:12,  7.37it/s]

training for epoch 21
for epoch 21 learning rate is 0.0022876792454961017
training_step
0 2.114712348351112
validation_step
0 2.022283700796274
for epoch 21 mean loss on train 2.1147122383117676
for epoch 21 mean loss on val 2.0222837924957275


  2%|█▍                                                                 | 22/1000 [00:04<02:14,  7.30it/s]

training for epoch 22
for epoch 22 learning rate is 0.0022876792454961017
training_step
0 1.9975619682898889
validation_step
0 2.0431990990271935
for epoch 22 mean loss on train 1.9975619316101074
for epoch 22 mean loss on val 2.043199062347412


  2%|█▌                                                                 | 23/1000 [00:05<02:12,  7.36it/s]

training for epoch 23
for epoch 23 learning rate is 0.0022876792454961017
training_step
0 2.1134888575627255
validation_step
0 2.154080024132362
for epoch 23 mean loss on train 2.1134889125823975
for epoch 23 mean loss on val 2.1540799140930176


  2%|█▌                                                                 | 24/1000 [00:05<02:15,  7.18it/s]

training for epoch 24
for epoch 24 learning rate is 0.0022876792454961017
training_step
0 2.000245754535382
validation_step
0 2.042814548198993
for epoch 24 mean loss on train 2.0002458095550537
for epoch 24 mean loss on val 2.0428144931793213


  2%|█▋                                                                 | 25/1000 [00:05<02:22,  6.83it/s]

training for epoch 25
for epoch 25 learning rate is 0.0022876792454961017
training_step
0 2.0485380612886868
validation_step
0 1.9794446505033052
for epoch 25 mean loss on train 2.0485379695892334
for epoch 25 mean loss on val 1.9794446229934692


  3%|█▋                                                                 | 26/1000 [00:05<02:14,  7.22it/s]

training for epoch 26
for epoch 26 learning rate is 0.0022876792454961017
training_step
0 2.113091542170598
validation_step
0 2.023430897639348
for epoch 26 mean loss on train 2.113091468811035
for epoch 26 mean loss on val 2.023430824279785


  3%|█▊                                                                 | 27/1000 [00:05<02:09,  7.54it/s]

training for epoch 27
for epoch 27 learning rate is 0.0022876792454961017
training_step
0 1.9720369485708384
validation_step
0 2.0379698826716495
for epoch 27 mean loss on train 1.9720369577407837
for epoch 27 mean loss on val 2.0379698276519775


  3%|█▉                                                                 | 28/1000 [00:05<02:08,  7.54it/s]

training for epoch 28
for epoch 28 learning rate is 0.0022876792454961017
training_step
0 2.132640105027419
validation_step
0 2.0110931396484375
for epoch 28 mean loss on train 2.1326401233673096
for epoch 28 mean loss on val 2.0110931396484375


  3%|█▉                                                                 | 29/1000 [00:05<02:07,  7.60it/s]

training for epoch 29
for epoch 29 learning rate is 0.0022876792454961017
training_step
0 2.038312765268179
validation_step
0 2.0629923893855167
for epoch 29 mean loss on train 2.0383126735687256
for epoch 29 mean loss on val 2.0629923343658447


  3%|██                                                                 | 30/1000 [00:06<02:05,  7.73it/s]

training for epoch 30
for epoch 30 learning rate is 0.0022876792454961017
training_step
0 2.037657517653245
validation_step
0 2.1411622854379506
for epoch 30 mean loss on train 2.0376574993133545
for epoch 30 mean loss on val 2.141162395477295


  3%|██                                                                 | 31/1000 [00:06<02:08,  7.56it/s]

training for epoch 31
for epoch 31 learning rate is 0.0022876792454961017
training_step
0 2.0221328735351562
validation_step
0 2.002874521108774
for epoch 31 mean loss on train 2.0221328735351562
for epoch 31 mean loss on val 2.0028746128082275


  3%|██▏                                                                | 32/1000 [00:06<02:07,  7.61it/s]

training for epoch 32
for epoch 32 learning rate is 0.0022876792454961017
training_step
0 2.0437133495624247
validation_step
0 1.986236572265625
for epoch 32 mean loss on train 2.043713331222534
for epoch 32 mean loss on val 1.986236572265625


  3%|██▏                                                                | 33/1000 [00:06<02:10,  7.43it/s]

training for epoch 33
for epoch 33 learning rate is 0.0022876792454961017
training_step
0 2.0282432849590597
validation_step
0 2.0096272688645582
for epoch 33 mean loss on train 2.02824330329895
for epoch 33 mean loss on val 2.009627342224121


  3%|██▎                                                                | 34/1000 [00:06<02:11,  7.34it/s]

training for epoch 34
for epoch 34 learning rate is 0.0022876792454961017
training_step
0 1.9805465111365685
validation_step
0 2.0837015005258412
for epoch 34 mean loss on train 1.980546474456787
for epoch 34 mean loss on val 2.0837016105651855


  4%|██▎                                                                | 35/1000 [00:06<02:08,  7.51it/s]

training for epoch 35
for epoch 35 learning rate is 0.0022876792454961017
training_step
0 2.1199728158804088
validation_step
0 2.032030839186448
for epoch 35 mean loss on train 2.1199727058410645
for epoch 35 mean loss on val 2.0320308208465576


  4%|██▍                                                                | 36/1000 [00:06<02:06,  7.59it/s]

training for epoch 36
for epoch 36 learning rate is 0.0022876792454961017
training_step
0 2.096059799194336
validation_step
0 2.114949739896334
for epoch 36 mean loss on train 2.096059799194336
for epoch 36 mean loss on val 2.1149497032165527


  4%|██▍                                                                | 37/1000 [00:06<02:06,  7.62it/s]

training for epoch 37
for epoch 37 learning rate is 0.0022876792454961017
training_step
0 2.006402822641226
validation_step
0 2.006776662973257
for epoch 37 mean loss on train 2.0064027309417725
for epoch 37 mean loss on val 2.0067765712738037


  4%|██▌                                                                | 38/1000 [00:07<02:04,  7.73it/s]

training for epoch 38
for epoch 38 learning rate is 0.0022876792454961017
training_step
0 2.0392015897310696
validation_step
0 1.9899774698110728
for epoch 38 mean loss on train 2.039201498031616
for epoch 38 mean loss on val 1.989977478981018


  4%|██▌                                                                | 39/1000 [00:07<02:05,  7.69it/s]

training for epoch 39
for epoch 39 learning rate is 0.0022876792454961017
training_step
0 2.0486055520864634
validation_step
0 2.0917745736929088
for epoch 39 mean loss on train 2.048605442047119
for epoch 39 mean loss on val 2.0917744636535645


  4%|██▋                                                                | 40/1000 [00:07<02:03,  7.75it/s]

training for epoch 40
for epoch 40 learning rate is 0.0022876792454961017
training_step
0 2.059080857496995
validation_step
0 1.996946774996244
for epoch 40 mean loss on train 2.0590808391571045
for epoch 40 mean loss on val 1.9969468116760254


  4%|██▋                                                                | 41/1000 [00:07<02:01,  7.89it/s]

training for epoch 41
for epoch 41 learning rate is 0.0022876792454961017
training_step
0 2.0460163996769833
validation_step
0 2.0336039616511417
for epoch 41 mean loss on train 2.0460164546966553
for epoch 41 mean loss on val 2.0336039066314697


  4%|██▊                                                                | 42/1000 [00:07<02:02,  7.85it/s]

training for epoch 42
for epoch 42 learning rate is 0.0020589113209464917
training_step
0 2.019054266122671
validation_step
0 1.9322079878586988
for epoch 42 mean loss on train 2.0190541744232178
for epoch 42 mean loss on val 1.9322079420089722
saving for epoch 42


  4%|██▉                                                                | 43/1000 [00:08<04:17,  3.72it/s]

training for epoch 43
for epoch 43 learning rate is 0.0020589113209464917
training_step
0 2.00229732806866
validation_step
0 1.9935989379882812
for epoch 43 mean loss on train 2.0022974014282227
for epoch 43 mean loss on val 1.9935989379882812


  4%|██▉                                                                | 44/1000 [00:08<03:41,  4.32it/s]

training for epoch 44
for epoch 44 learning rate is 0.0020589113209464917
training_step
0 2.1624525510347805
validation_step
0 2.0763055361234226
for epoch 44 mean loss on train 2.162452459335327
for epoch 44 mean loss on val 2.076305627822876


  4%|███                                                                | 45/1000 [00:08<03:12,  4.97it/s]

training for epoch 45
for epoch 45 learning rate is 0.0020589113209464917
training_step
0 2.055950164794922
validation_step
0 2.027963638305664
for epoch 45 mean loss on train 2.055950164794922
for epoch 45 mean loss on val 2.027963638305664


  5%|███                                                                | 46/1000 [00:08<02:52,  5.54it/s]

training for epoch 46
for epoch 46 learning rate is 0.0020589113209464917
training_step
0 2.019229448758639
validation_step
0 2.023885286771334
for epoch 46 mean loss on train 2.0192294120788574
for epoch 46 mean loss on val 2.0238852500915527


  5%|███▏                                                               | 47/1000 [00:08<02:43,  5.82it/s]

training for epoch 47
for epoch 47 learning rate is 0.0020589113209464917
training_step
0 1.992382783156175
validation_step
0 2.0378601367657003
for epoch 47 mean loss on train 1.9923827648162842
for epoch 47 mean loss on val 2.037860155105591


  5%|███▏                                                               | 48/1000 [00:08<02:37,  6.05it/s]

training for epoch 48
for epoch 48 learning rate is 0.0020589113209464917
training_step
0 1.9769020080566406
validation_step
0 1.9984352405254657
for epoch 48 mean loss on train 1.9769020080566406
for epoch 48 mean loss on val 1.9984352588653564


  5%|███▎                                                               | 49/1000 [00:09<02:32,  6.24it/s]

training for epoch 49
for epoch 49 learning rate is 0.0020589113209464917
training_step
0 1.9773194239689753
validation_step
0 2.0201639028695912
for epoch 49 mean loss on train 1.9773194789886475
for epoch 49 mean loss on val 2.0201640129089355


  5%|███▎                                                               | 50/1000 [00:09<02:24,  6.57it/s]

training for epoch 50
for epoch 50 learning rate is 0.0020589113209464917
training_step
0 2.0537408682016225
validation_step
0 2.0322123307448168
for epoch 50 mean loss on train 2.053740978240967
for epoch 50 mean loss on val 2.032212257385254


  5%|███▍                                                               | 51/1000 [00:09<02:17,  6.88it/s]

training for epoch 51
for epoch 51 learning rate is 0.0020589113209464917
training_step
0 2.018678518442007
validation_step
0 2.008832784799429
for epoch 51 mean loss on train 2.0186784267425537
for epoch 51 mean loss on val 2.0088326930999756


  5%|███▍                                                               | 52/1000 [00:09<02:13,  7.12it/s]

training for epoch 52
for epoch 52 learning rate is 0.0020589113209464917
training_step
0 2.0657203380878153
validation_step
0 2.0055926396296573
for epoch 52 mean loss on train 2.065720319747925
for epoch 52 mean loss on val 2.0055925846099854


  5%|███▌                                                               | 53/1000 [00:09<02:09,  7.32it/s]

training for epoch 53
for epoch 53 learning rate is 0.0020589113209464917
training_step
0 2.1453703366793118
validation_step
0 2.0549322275015025
for epoch 53 mean loss on train 2.1453702449798584
for epoch 53 mean loss on val 2.054932117462158


  5%|███▌                                                               | 54/1000 [00:09<02:08,  7.37it/s]

training for epoch 54
for epoch 54 learning rate is 0.0020589113209464917
training_step
0 2.0282052847055287
validation_step
0 2.052511068490835
for epoch 54 mean loss on train 2.028205394744873
for epoch 54 mean loss on val 2.052510976791382


  6%|███▋                                                               | 55/1000 [00:09<02:07,  7.40it/s]

training for epoch 55
for epoch 55 learning rate is 0.0020589113209464917
training_step
0 2.0040667607234073
validation_step
0 2.1226643782395582
for epoch 55 mean loss on train 2.0040667057037354
for epoch 55 mean loss on val 2.122664451599121


  6%|███▊                                                               | 56/1000 [00:09<02:04,  7.57it/s]

training for epoch 56
for epoch 56 learning rate is 0.0020589113209464917
training_step
0 1.9910931220421424
validation_step
0 2.225985600398137
for epoch 56 mean loss on train 1.9910931587219238
for epoch 56 mean loss on val 2.225985527038574


  6%|███▊                                                               | 57/1000 [00:10<02:11,  7.20it/s]

training for epoch 57
for epoch 57 learning rate is 0.0020589113209464917
training_step
0 2.023511299720177
validation_step
0 2.057558353130634
for epoch 57 mean loss on train 2.0235114097595215
for epoch 57 mean loss on val 2.057558298110962


  6%|███▉                                                               | 58/1000 [00:10<02:14,  7.02it/s]

training for epoch 58
for epoch 58 learning rate is 0.0020589113209464917
training_step
0 1.9969627673809345
validation_step
0 1.954806401179387
for epoch 58 mean loss on train 1.9969627857208252
for epoch 58 mean loss on val 1.9548064470291138


  6%|███▉                                                               | 59/1000 [00:10<02:17,  6.84it/s]

training for epoch 59
for epoch 59 learning rate is 0.0020589113209464917
training_step
0 2.0120490147517276
validation_step
0 1.9745316138634315
for epoch 59 mean loss on train 2.0120489597320557
for epoch 59 mean loss on val 1.974531650543213


  6%|████                                                               | 60/1000 [00:10<02:10,  7.20it/s]

training for epoch 60
for epoch 60 learning rate is 0.0020589113209464917
training_step
0 1.9677135760967548
validation_step
0 2.0440632746769833
for epoch 60 mean loss on train 1.9677135944366455
for epoch 60 mean loss on val 2.0440633296966553


  6%|████                                                               | 61/1000 [00:10<02:06,  7.44it/s]

training for epoch 61
for epoch 61 learning rate is 0.0020589113209464917
training_step
0 2.0106465266301083
validation_step
0 2.018472378070538
for epoch 61 mean loss on train 2.0106465816497803
for epoch 61 mean loss on val 2.01847243309021


  6%|████▏                                                              | 62/1000 [00:10<02:03,  7.60it/s]

training for epoch 62
for epoch 62 learning rate is 0.0020589113209464917
training_step
0 2.0836029052734375
validation_step
0 2.0238285064697266
for epoch 62 mean loss on train 2.0836029052734375
for epoch 62 mean loss on val 2.0238285064697266


  6%|████▏                                                              | 63/1000 [00:10<02:00,  7.80it/s]

training for epoch 63
for epoch 63 learning rate is 0.0018530201888518425
training_step
0 2.0798090421236477
validation_step
0 2.057681597196139
for epoch 63 mean loss on train 2.0798089504241943
for epoch 63 mean loss on val 2.0576815605163574


  6%|████▎                                                              | 64/1000 [00:11<01:59,  7.82it/s]

training for epoch 64
for epoch 64 learning rate is 0.0018530201888518425
training_step
0 2.0524941957913914
validation_step
0 2.0487738389235277
for epoch 64 mean loss on train 2.0524942874908447
for epoch 64 mean loss on val 2.048773765563965


  6%|████▎                                                              | 65/1000 [00:11<02:00,  7.75it/s]

training for epoch 65
for epoch 65 learning rate is 0.0018530201888518425
training_step
0 2.0297853029691257
validation_step
0 2.0549674400916467
for epoch 65 mean loss on train 2.029785394668579
for epoch 65 mean loss on val 2.0549674034118652


  7%|████▍                                                              | 66/1000 [00:11<01:56,  8.01it/s]

training for epoch 66
for epoch 66 learning rate is 0.0018530201888518425
training_step
0 2.001066354604868
validation_step
0 2.1424466646634617
for epoch 66 mean loss on train 2.0010664463043213
for epoch 66 mean loss on val 2.142446756362915


  7%|████▍                                                              | 67/1000 [00:11<01:58,  7.87it/s]

training for epoch 67
for epoch 67 learning rate is 0.0018530201888518425
training_step
0 2.097883811363807
validation_step
0 2.0363925053523135
for epoch 67 mean loss on train 2.097883701324463
for epoch 67 mean loss on val 2.0363924503326416


  7%|████▌                                                              | 68/1000 [00:11<02:00,  7.74it/s]

training for epoch 68
for epoch 68 learning rate is 0.0018530201888518425
training_step
0 2.140949689424955
validation_step
0 2.110346867487981
for epoch 68 mean loss on train 2.1409497261047363
for epoch 68 mean loss on val 2.110346794128418


  7%|████▌                                                              | 69/1000 [00:11<02:03,  7.54it/s]

training for epoch 69
for epoch 69 learning rate is 0.0018530201888518425
training_step
0 2.17625984778771
validation_step
0 2.0137417133037863
for epoch 69 mean loss on train 2.176259756088257
for epoch 69 mean loss on val 2.0137417316436768


  7%|████▋                                                              | 70/1000 [00:11<02:05,  7.44it/s]

training for epoch 70
for epoch 70 learning rate is 0.0018530201888518425
training_step
0 2.1045717092660756
validation_step
0 2.175665048452524
for epoch 70 mean loss on train 2.10457181930542
for epoch 70 mean loss on val 2.1756651401519775


  7%|████▊                                                              | 71/1000 [00:11<02:08,  7.24it/s]

training for epoch 71
for epoch 71 learning rate is 0.0018530201888518425
training_step
0 2.1203214205228367
validation_step
0 2.0530151954064
for epoch 71 mean loss on train 2.12032151222229
for epoch 71 mean loss on val 2.0530152320861816


  7%|████▊                                                              | 72/1000 [00:12<02:03,  7.53it/s]

training for epoch 72
for epoch 72 learning rate is 0.0018530201888518425
training_step
0 2.0013517233041616
validation_step
0 2.0628798558161807
for epoch 72 mean loss on train 2.001351833343506
for epoch 72 mean loss on val 2.062879800796509


  7%|████▉                                                              | 73/1000 [00:12<02:01,  7.61it/s]

training for epoch 73
for epoch 73 learning rate is 0.0018530201888518425
training_step
0 2.1444376431978664
validation_step
0 2.029025591336764
for epoch 73 mean loss on train 2.144437551498413
for epoch 73 mean loss on val 2.0290255546569824


  7%|████▉                                                              | 74/1000 [00:12<01:59,  7.73it/s]

training for epoch 74
for epoch 74 learning rate is 0.0018530201888518425
training_step
0 2.015980940598708
validation_step
0 2.037371562077449
for epoch 74 mean loss on train 2.0159809589385986
for epoch 74 mean loss on val 2.0373716354370117


  8%|█████                                                              | 75/1000 [00:12<01:56,  7.95it/s]

training for epoch 75
for epoch 75 learning rate is 0.0018530201888518425
training_step
0 2.0081807650052586
validation_step
0 1.986540280855619
for epoch 75 mean loss on train 2.008180856704712
for epoch 75 mean loss on val 1.9865403175354004


  8%|█████                                                              | 76/1000 [00:12<01:58,  7.77it/s]

training for epoch 76
for epoch 76 learning rate is 0.0018530201888518425
training_step
0 2.01904663672814
validation_step
0 2.1024222740760217
for epoch 76 mean loss on train 2.0190465450286865
for epoch 76 mean loss on val 2.1024222373962402


  8%|█████▏                                                             | 77/1000 [00:12<01:57,  7.88it/s]

training for epoch 77
for epoch 77 learning rate is 0.0018530201888518425
training_step
0 2.06046515244704
validation_step
0 2.009778829721304
for epoch 77 mean loss on train 2.060465097427368
for epoch 77 mean loss on val 2.0097787380218506


  8%|█████▏                                                             | 78/1000 [00:12<01:54,  8.08it/s]

training for epoch 78
for epoch 78 learning rate is 0.0018530201888518425
training_step
0 1.9979692605825572
validation_step
0 2.0952694232647238
for epoch 78 mean loss on train 1.9979692697525024
for epoch 78 mean loss on val 2.0952694416046143


  8%|█████▎                                                             | 79/1000 [00:12<01:54,  8.05it/s]

training for epoch 79
for epoch 79 learning rate is 0.0018530201888518425
training_step
0 2.037074455848107
validation_step
0 2.03550045306866
for epoch 79 mean loss on train 2.037074565887451
for epoch 79 mean loss on val 2.0355005264282227


  8%|█████▎                                                             | 80/1000 [00:13<01:57,  7.84it/s]

training for epoch 80
for epoch 80 learning rate is 0.0018530201888518425
training_step
0 2.0618164355938253
validation_step
0 1.9873123168945312
for epoch 80 mean loss on train 2.061816453933716
for epoch 80 mean loss on val 1.9873123168945312


  8%|█████▍                                                             | 81/1000 [00:13<01:54,  8.03it/s]

training for epoch 81
for epoch 81 learning rate is 0.0018530201888518425
training_step
0 2.085477535541241
validation_step
0 1.997514431293194
for epoch 81 mean loss on train 2.085477590560913
for epoch 81 mean loss on val 1.9975144863128662


  8%|█████▍                                                             | 82/1000 [00:13<01:55,  7.98it/s]

training for epoch 82
for epoch 82 learning rate is 0.0018530201888518425
training_step
0 2.0182188474214993
validation_step
0 2.046001434326172
for epoch 82 mean loss on train 2.018218755722046
for epoch 82 mean loss on val 2.046001434326172


  8%|█████▌                                                             | 83/1000 [00:13<01:55,  7.96it/s]

training for epoch 83
for epoch 83 learning rate is 0.0018530201888518425
training_step
0 2.0582872537466197
validation_step
0 2.012245324941782
for epoch 83 mean loss on train 2.0582871437072754
for epoch 83 mean loss on val 2.0122454166412354


  8%|█████▋                                                             | 84/1000 [00:13<01:53,  8.05it/s]

training for epoch 84
for epoch 84 learning rate is 0.0016677181699666583
training_step
0 2.005559040949895
validation_step
0 2.037843997661884
for epoch 84 mean loss on train 2.005558967590332
for epoch 84 mean loss on val 2.037843942642212


  8%|█████▋                                                             | 85/1000 [00:13<02:01,  7.55it/s]

training for epoch 85
for epoch 85 learning rate is 0.0016677181699666583
training_step
0 2.0727762075570912
validation_step
0 2.094779087946965
for epoch 85 mean loss on train 2.0727763175964355
for epoch 85 mean loss on val 2.0947790145874023


  9%|█████▊                                                             | 86/1000 [00:13<02:04,  7.35it/s]

training for epoch 86
for epoch 86 learning rate is 0.0016677181699666583
training_step
0 2.1196283193734975
validation_step
0 2.0915439312274637
for epoch 86 mean loss on train 2.119628429412842
for epoch 86 mean loss on val 2.0915439128875732


  9%|█████▊                                                             | 87/1000 [00:14<02:07,  7.18it/s]

training for epoch 87
for epoch 87 learning rate is 0.0016677181699666583
training_step
0 2.0470035259540262
validation_step
0 2.0615884340726414
for epoch 87 mean loss on train 2.0470035076141357
for epoch 87 mean loss on val 2.0615885257720947


  9%|█████▉                                                             | 88/1000 [00:14<02:02,  7.43it/s]

training for epoch 88
for epoch 88 learning rate is 0.0016677181699666583
training_step
0 2.0024537306565504
validation_step
0 2.0929505274846005
for epoch 88 mean loss on train 2.0024538040161133
for epoch 88 mean loss on val 2.0929505825042725


  9%|█████▉                                                             | 89/1000 [00:14<02:00,  7.53it/s]

training for epoch 89
for epoch 89 learning rate is 0.0016677181699666583
training_step
0 2.032713376558744
validation_step
0 2.1319040151742787
for epoch 89 mean loss on train 2.0327134132385254
for epoch 89 mean loss on val 2.131904125213623


  9%|██████                                                             | 90/1000 [00:14<02:00,  7.52it/s]

training for epoch 90
for epoch 90 learning rate is 0.0016677181699666583
training_step
0 2.062127480140099
validation_step
0 1.988659638624925
for epoch 90 mean loss on train 2.0621275901794434
for epoch 90 mean loss on val 1.9886596202850342


  9%|██████                                                             | 91/1000 [00:14<01:58,  7.69it/s]

training for epoch 91
for epoch 91 learning rate is 0.0016677181699666583
training_step
0 2.060524720412034
validation_step
0 1.9976938687838042
for epoch 91 mean loss on train 2.0605247020721436
for epoch 91 mean loss on val 1.9976938962936401


  9%|██████▏                                                            | 92/1000 [00:14<01:56,  7.78it/s]

training for epoch 92
for epoch 92 learning rate is 0.0016677181699666583
training_step
0 1.9803020770733173
validation_step
0 1.9548548184908354
for epoch 92 mean loss on train 1.980302095413208
for epoch 92 mean loss on val 1.9548548460006714


  9%|██████▏                                                            | 93/1000 [00:14<01:56,  7.76it/s]

training for epoch 93
for epoch 93 learning rate is 0.0016677181699666583
training_step
0 2.0199159475473256
validation_step
0 2.0327522571270285
for epoch 93 mean loss on train 2.01991605758667
for epoch 93 mean loss on val 2.032752275466919


  9%|██████▎                                                            | 94/1000 [00:14<01:57,  7.74it/s]

training for epoch 94
for epoch 94 learning rate is 0.0016677181699666583
training_step
0 2.045249792245718
validation_step
0 2.02591925400954
for epoch 94 mean loss on train 2.0452497005462646
for epoch 94 mean loss on val 2.025919198989868


 10%|██████▎                                                            | 95/1000 [00:15<01:55,  7.87it/s]

training for epoch 95
for epoch 95 learning rate is 0.0016677181699666583
training_step
0 2.08559564443735
validation_step
0 2.0402642763577976
for epoch 95 mean loss on train 2.0855956077575684
for epoch 95 mean loss on val 2.040264368057251


 10%|██████▍                                                            | 96/1000 [00:15<01:59,  7.56it/s]

training for epoch 96
for epoch 96 learning rate is 0.0016677181699666583
training_step
0 1.9868554335374098
validation_step
0 2.084546309251052
for epoch 96 mean loss on train 1.986855387687683
for epoch 96 mean loss on val 2.0845463275909424


 10%|██████▍                                                            | 97/1000 [00:15<02:06,  7.14it/s]

training for epoch 97
for epoch 97 learning rate is 0.0016677181699666583
training_step
0 1.986883750328651
validation_step
0 2.130173609806941
for epoch 97 mean loss on train 1.9868837594985962
for epoch 97 mean loss on val 2.130173683166504


 10%|██████▌                                                            | 98/1000 [00:15<02:03,  7.30it/s]

training for epoch 98
for epoch 98 learning rate is 0.0016677181699666583
training_step
0 2.0441554142878604
validation_step
0 2.0626902947059045
for epoch 98 mean loss on train 2.0441553592681885
for epoch 98 mean loss on val 2.062690258026123


 10%|██████▋                                                            | 99/1000 [00:15<01:58,  7.62it/s]

training for epoch 99
for epoch 99 learning rate is 0.0016677181699666583
training_step
0 2.1032819014329176
validation_step
0 2.0798592200646033
for epoch 99 mean loss on train 2.1032819747924805
for epoch 99 mean loss on val 2.0798592567443848


 10%|██████▌                                                           | 100/1000 [00:15<01:55,  7.78it/s]

training for epoch 100
for epoch 100 learning rate is 0.0016677181699666583
training_step
0 2.0267022939828725
validation_step
0 2.0242123237022986
for epoch 100 mean loss on train 2.026702404022217
for epoch 100 mean loss on val 2.02421236038208


 10%|██████▋                                                           | 101/1000 [00:15<01:55,  7.81it/s]

training for epoch 101
for epoch 101 learning rate is 0.0016677181699666583
training_step
0 2.028666716355544
validation_step
0 2.0563363295335035
for epoch 101 mean loss on train 2.0286667346954346
for epoch 101 mean loss on val 2.0563364028930664


 10%|██████▋                                                           | 102/1000 [00:15<01:55,  7.78it/s]

training for epoch 102
for epoch 102 learning rate is 0.0016677181699666583
training_step
0 2.100314947275015
validation_step
0 2.0359451587383566
for epoch 102 mean loss on train 2.1003148555755615
for epoch 102 mean loss on val 2.035945177078247


 10%|██████▊                                                           | 103/1000 [00:16<01:57,  7.63it/s]

training for epoch 103
for epoch 103 learning rate is 0.0016677181699666583
training_step
0 1.9938455728384166
validation_step
0 2.0437275813176083
for epoch 103 mean loss on train 1.9938455820083618
for epoch 103 mean loss on val 2.0437276363372803


 10%|██████▊                                                           | 104/1000 [00:16<01:54,  7.81it/s]

training for epoch 104
for epoch 104 learning rate is 0.0016677181699666583
training_step
0 2.053317436805138
validation_step
0 2.0758061042198768
for epoch 104 mean loss on train 2.0533175468444824
for epoch 104 mean loss on val 2.075806140899658


 10%|██████▉                                                           | 105/1000 [00:16<01:57,  7.63it/s]

training for epoch 105
for epoch 105 learning rate is 0.0015009463529699924
training_step
0 1.996671383197491
validation_step
0 2.0329374166635366
for epoch 105 mean loss on train 1.996671438217163
for epoch 105 mean loss on val 2.032937526702881


 11%|██████▉                                                           | 106/1000 [00:16<01:57,  7.59it/s]

training for epoch 106
for epoch 106 learning rate is 0.0015009463529699924
training_step
0 1.9759098933293269
validation_step
0 2.015653756948618
for epoch 106 mean loss on train 1.975909948348999
for epoch 106 mean loss on val 2.0156538486480713


 11%|███████                                                           | 107/1000 [00:16<01:55,  7.70it/s]

training for epoch 107
for epoch 107 learning rate is 0.0015009463529699924
training_step
0 1.9823590792142427
validation_step
0 2.0457713787372294
for epoch 107 mean loss on train 1.9823590517044067
for epoch 107 mean loss on val 2.045771360397339


 11%|███████▏                                                          | 108/1000 [00:16<01:55,  7.75it/s]

training for epoch 108
for epoch 108 learning rate is 0.0015009463529699924
training_step
0 1.9976180149958684
validation_step
0 2.1678653130164514
for epoch 108 mean loss on train 1.9976179599761963
for epoch 108 mean loss on val 2.16786527633667


 11%|███████▏                                                          | 109/1000 [00:16<01:53,  7.86it/s]

training for epoch 109
for epoch 109 learning rate is 0.0015009463529699924
training_step
0 2.0728224240816555
validation_step
0 1.9990472059983473
for epoch 109 mean loss on train 2.072822332382202
for epoch 109 mean loss on val 1.9990471601486206


 11%|███████▎                                                          | 110/1000 [00:17<01:52,  7.90it/s]

training for epoch 110
for epoch 110 learning rate is 0.0015009463529699924
training_step
0 2.056732911330003
validation_step
0 2.079043608445388
for epoch 110 mean loss on train 2.0567328929901123
for epoch 110 mean loss on val 2.0790436267852783


 11%|███████▎                                                          | 111/1000 [00:17<01:55,  7.70it/s]

training for epoch 111
for epoch 111 learning rate is 0.0015009463529699924
training_step
0 2.053036616398738
validation_step
0 2.03077272268442
for epoch 111 mean loss on train 2.053036689758301
for epoch 111 mean loss on val 2.0307726860046387


 11%|███████▍                                                          | 112/1000 [00:17<01:55,  7.68it/s]

training for epoch 112
for epoch 112 learning rate is 0.0015009463529699924
training_step
0 1.9834533104529748
validation_step
0 2.0996338770939755
for epoch 112 mean loss on train 1.9834532737731934
for epoch 112 mean loss on val 2.0996339321136475


 11%|███████▍                                                          | 113/1000 [00:17<02:00,  7.38it/s]

training for epoch 113
for epoch 113 learning rate is 0.0015009463529699924
training_step
0 2.0444164276123047
validation_step
0 2.0068060067983775
for epoch 113 mean loss on train 2.0444164276123047
for epoch 113 mean loss on val 2.006805896759033


 11%|███████▌                                                          | 114/1000 [00:17<02:00,  7.36it/s]

training for epoch 114
for epoch 114 learning rate is 0.0015009463529699924
training_step
0 2.0638016920823317
validation_step
0 2.116771551278921
for epoch 114 mean loss on train 2.0638017654418945
for epoch 114 mean loss on val 2.1167714595794678


 12%|███████▌                                                          | 115/1000 [00:17<01:53,  7.78it/s]

training for epoch 115
for epoch 115 learning rate is 0.0015009463529699924
training_step
0 2.006563039926382
validation_step
0 1.9900239797738881
for epoch 115 mean loss on train 2.0065629482269287
for epoch 115 mean loss on val 1.9900239706039429


 12%|███████▋                                                          | 116/1000 [00:17<01:48,  8.12it/s]

training for epoch 116
for epoch 116 learning rate is 0.0015009463529699924
training_step
0 2.0084973848783054
validation_step
0 2.0385956397423377
for epoch 116 mean loss on train 2.008497476577759
for epoch 116 mean loss on val 2.038595676422119


 12%|███████▋                                                          | 117/1000 [00:17<01:48,  8.15it/s]

training for epoch 117
for epoch 117 learning rate is 0.0015009463529699924
training_step
0 2.012180915245643
validation_step
0 1.981308130117563
for epoch 117 mean loss on train 2.012180805206299
for epoch 117 mean loss on val 1.981308102607727


 12%|███████▊                                                          | 118/1000 [00:18<01:53,  7.75it/s]

training for epoch 118
for epoch 118 learning rate is 0.0015009463529699924
training_step
0 2.081611780019907
validation_step
0 2.033035865196815
for epoch 118 mean loss on train 2.0816118717193604
for epoch 118 mean loss on val 2.0330357551574707


 12%|███████▊                                                          | 119/1000 [00:18<01:55,  7.64it/s]

training for epoch 119
for epoch 119 learning rate is 0.0015009463529699924
training_step
0 1.957139675433819
validation_step
0 2.09668702345628
for epoch 119 mean loss on train 1.9571397304534912
for epoch 119 mean loss on val 2.096687078475952


 12%|███████▉                                                          | 120/1000 [00:18<01:54,  7.66it/s]

training for epoch 120
for epoch 120 learning rate is 0.0015009463529699924
training_step
0 2.041174081655649
validation_step
0 2.0606988760141225
for epoch 120 mean loss on train 2.0411741733551025
for epoch 120 mean loss on val 2.060698986053467


 12%|███████▉                                                          | 121/1000 [00:18<01:52,  7.80it/s]

training for epoch 121
for epoch 121 learning rate is 0.0015009463529699924
training_step
0 1.9990259317251353
validation_step
0 2.1343043400691104
for epoch 121 mean loss on train 1.9990259408950806
for epoch 121 mean loss on val 2.1343042850494385


 12%|████████                                                          | 122/1000 [00:18<01:55,  7.58it/s]

training for epoch 122
for epoch 122 learning rate is 0.0015009463529699924
training_step
0 2.0600428948035607
validation_step
0 2.0687418717604418
for epoch 122 mean loss on train 2.0600428581237793
for epoch 122 mean loss on val 2.068741798400879


 12%|████████                                                          | 123/1000 [00:18<02:02,  7.16it/s]

training for epoch 123
for epoch 123 learning rate is 0.0015009463529699924
training_step
0 2.029843110304612
validation_step
0 2.0392630650446963
for epoch 123 mean loss on train 2.0298430919647217
for epoch 123 mean loss on val 2.0392630100250244


 12%|████████▏                                                         | 124/1000 [00:18<02:14,  6.52it/s]

training for epoch 124
for epoch 124 learning rate is 0.0015009463529699924
training_step
0 2.072060364943284
validation_step
0 2.0825259868915262
for epoch 124 mean loss on train 2.0720603466033936
for epoch 124 mean loss on val 2.0825259685516357


 12%|████████▎                                                         | 125/1000 [00:19<02:10,  6.71it/s]

training for epoch 125
for epoch 125 learning rate is 0.0015009463529699924
training_step
0 2.0148614736703725
validation_step
0 1.992026108961839
for epoch 125 mean loss on train 2.014861583709717
for epoch 125 mean loss on val 1.9920260906219482


 13%|████████▎                                                         | 126/1000 [00:19<02:01,  7.22it/s]

training for epoch 126
for epoch 126 learning rate is 0.0013508517176729932
training_step
0 1.9866529611440806
validation_step
0 2.0393188183124247
for epoch 126 mean loss on train 1.9866529703140259
for epoch 126 mean loss on val 2.039318799972534


 13%|████████▍                                                         | 127/1000 [00:19<01:59,  7.30it/s]

training for epoch 127
for epoch 127 learning rate is 0.0013508517176729932
training_step
0 2.064904579749474
validation_step
0 2.021800701434796
for epoch 127 mean loss on train 2.0649046897888184
for epoch 127 mean loss on val 2.0218007564544678


 13%|████████▍                                                         | 128/1000 [00:19<01:59,  7.31it/s]

training for epoch 128
for epoch 128 learning rate is 0.0013508517176729932
training_step
0 2.156383514404297
validation_step
0 1.9973493722768931
for epoch 128 mean loss on train 2.156383514404297
for epoch 128 mean loss on val 1.9973493814468384


 13%|████████▌                                                         | 129/1000 [00:19<01:55,  7.56it/s]

training for epoch 129
for epoch 129 learning rate is 0.0013508517176729932
training_step
0 1.9665546417236328
validation_step
0 2.0635142693152795
for epoch 129 mean loss on train 1.9665546417236328
for epoch 129 mean loss on val 2.063514232635498


 13%|████████▌                                                         | 130/1000 [00:19<01:54,  7.59it/s]

training for epoch 130
for epoch 130 learning rate is 0.0013508517176729932
training_step
0 1.9957136007455678
validation_step
0 1.9992982424222505
for epoch 130 mean loss on train 1.9957135915756226
for epoch 130 mean loss on val 1.9992982149124146


 13%|████████▋                                                         | 131/1000 [00:19<01:54,  7.60it/s]

training for epoch 131
for epoch 131 learning rate is 0.0013508517176729932
training_step
0 2.0428780775803785
validation_step
0 2.099425535935622
for epoch 131 mean loss on train 2.0428781509399414
for epoch 131 mean loss on val 2.0994255542755127


 13%|████████▋                                                         | 132/1000 [00:19<01:52,  7.70it/s]

training for epoch 132
for epoch 132 learning rate is 0.0013508517176729932
training_step
0 2.130329278799204
validation_step
0 1.9828318082369292
for epoch 132 mean loss on train 2.1303293704986572
for epoch 132 mean loss on val 1.9828318357467651


 13%|████████▊                                                         | 133/1000 [00:20<01:50,  7.82it/s]

training for epoch 133
for epoch 133 learning rate is 0.0013508517176729932
training_step
0 2.122293472290039
validation_step
0 1.9847294734074519
for epoch 133 mean loss on train 2.122293472290039
for epoch 133 mean loss on val 1.984729528427124


 13%|████████▊                                                         | 134/1000 [00:20<01:57,  7.36it/s]

training for epoch 134
for epoch 134 learning rate is 0.0013508517176729932
training_step
0 2.047322053175706
validation_step
0 2.015027413001427
for epoch 134 mean loss on train 2.0473220348358154
for epoch 134 mean loss on val 2.0150275230407715


 14%|████████▉                                                         | 135/1000 [00:20<02:05,  6.90it/s]

training for epoch 135
for epoch 135 learning rate is 0.0013508517176729932
training_step
0 2.009406016423152
validation_step
0 2.087292010967548
for epoch 135 mean loss on train 2.009406089782715
for epoch 135 mean loss on val 2.087291955947876


 14%|████████▉                                                         | 136/1000 [00:20<01:58,  7.30it/s]

training for epoch 136
for epoch 136 learning rate is 0.0013508517176729932
training_step
0 2.058563525860126
validation_step
0 2.0711533473088193
for epoch 136 mean loss on train 2.058563470840454
for epoch 136 mean loss on val 2.071153402328491


 14%|█████████                                                         | 137/1000 [00:20<01:57,  7.35it/s]

training for epoch 137
for epoch 137 learning rate is 0.0013508517176729932
training_step
0 2.0797287867619443
validation_step
0 2.009257536668044
for epoch 137 mean loss on train 2.079728841781616
for epoch 137 mean loss on val 2.0092575550079346


 14%|█████████                                                         | 138/1000 [00:20<01:59,  7.24it/s]

training for epoch 138
for epoch 138 learning rate is 0.0013508517176729932
training_step
0 2.0332750173715444
validation_step
0 2.1599772526667667
for epoch 138 mean loss on train 2.0332751274108887
for epoch 138 mean loss on val 2.1599771976470947


 14%|█████████▏                                                        | 139/1000 [00:20<01:59,  7.21it/s]

training for epoch 139
for epoch 139 learning rate is 0.0013508517176729932
training_step
0 1.9980424734262319
validation_step
0 2.0022286635178785
for epoch 139 mean loss on train 1.9980424642562866
for epoch 139 mean loss on val 2.0022287368774414


 14%|█████████▏                                                        | 140/1000 [00:21<01:57,  7.29it/s]

training for epoch 140
for epoch 140 learning rate is 0.0013508517176729932
training_step
0 2.0101583920992336
validation_step
0 2.132333755493164
for epoch 140 mean loss on train 2.0101583003997803
for epoch 140 mean loss on val 2.132333755493164


 14%|█████████▎                                                        | 141/1000 [00:21<01:55,  7.44it/s]

training for epoch 141
for epoch 141 learning rate is 0.0013508517176729932
training_step
0 1.9975833892822266
validation_step
0 2.185589130108173
for epoch 141 mean loss on train 1.9975833892822266
for epoch 141 mean loss on val 2.185589075088501


 14%|█████████▎                                                        | 142/1000 [00:21<01:54,  7.50it/s]

training for epoch 142
for epoch 142 learning rate is 0.0013508517176729932
training_step
0 2.111427747286283
validation_step
0 2.062594927274264
for epoch 142 mean loss on train 2.1114277839660645
for epoch 142 mean loss on val 2.0625948905944824


 14%|█████████▍                                                        | 143/1000 [00:21<01:53,  7.55it/s]

training for epoch 143
for epoch 143 learning rate is 0.0013508517176729932
training_step
0 2.055854944082407
validation_step
0 1.9981231689453125
for epoch 143 mean loss on train 2.0558550357818604
for epoch 143 mean loss on val 1.9981231689453125


 14%|█████████▌                                                        | 144/1000 [00:21<01:52,  7.60it/s]

training for epoch 144
for epoch 144 learning rate is 0.0013508517176729932
training_step
0 1.9876717787522535
validation_step
0 2.092027810903696
for epoch 144 mean loss on train 1.9876717329025269
for epoch 144 mean loss on val 2.0920279026031494


 14%|█████████▌                                                        | 145/1000 [00:21<01:52,  7.61it/s]

training for epoch 145
for epoch 145 learning rate is 0.0013508517176729932
training_step
0 1.9613037109375
validation_step
0 2.064104226919321
for epoch 145 mean loss on train 1.9613037109375
for epoch 145 mean loss on val 2.0641043186187744


 15%|█████████▋                                                        | 146/1000 [00:21<01:52,  7.56it/s]

training for epoch 146
for epoch 146 learning rate is 0.0013508517176729932
training_step
0 1.964083744929387
validation_step
0 2.054403598491962
for epoch 146 mean loss on train 1.9640837907791138
for epoch 146 mean loss on val 2.05440354347229


 15%|█████████▋                                                        | 147/1000 [00:21<01:51,  7.65it/s]

training for epoch 147
for epoch 147 learning rate is 0.001215766545905694
training_step
0 1.9780305715707631
validation_step
0 1.9636886303241436
for epoch 147 mean loss on train 1.9780305624008179
for epoch 147 mean loss on val 1.963688611984253


 15%|█████████▊                                                        | 148/1000 [00:22<01:51,  7.66it/s]

training for epoch 148
for epoch 148 learning rate is 0.001215766545905694
training_step
0 2.0740743783804088
validation_step
0 2.023392603947566
for epoch 148 mean loss on train 2.0740742683410645
for epoch 148 mean loss on val 2.023392677307129


 15%|█████████▊                                                        | 149/1000 [00:22<01:49,  7.75it/s]

training for epoch 149
for epoch 149 learning rate is 0.001215766545905694
training_step
0 2.0495367783766527
validation_step
0 2.1763469989483175
for epoch 149 mean loss on train 2.04953670501709
for epoch 149 mean loss on val 2.176347017288208


 15%|█████████▉                                                        | 150/1000 [00:22<01:46,  7.98it/s]

training for epoch 150
for epoch 150 learning rate is 0.001215766545905694
training_step
0 2.05880619929387
validation_step
0 2.0115878765399637
for epoch 150 mean loss on train 2.0588061809539795
for epoch 150 mean loss on val 2.0115878582000732


 15%|█████████▉                                                        | 151/1000 [00:22<01:46,  8.01it/s]

training for epoch 151
for epoch 151 learning rate is 0.001215766545905694
training_step
0 2.0274402911846456
validation_step
0 2.0483848865215597
for epoch 151 mean loss on train 2.027440309524536
for epoch 151 mean loss on val 2.04838490486145


 15%|██████████                                                        | 152/1000 [00:22<01:51,  7.58it/s]

training for epoch 152
for epoch 152 learning rate is 0.001215766545905694
training_step
0 2.026842557466947
validation_step
0 2.165797600379357
for epoch 152 mean loss on train 2.0268425941467285
for epoch 152 mean loss on val 2.165797710418701


 15%|██████████                                                        | 153/1000 [00:22<01:50,  7.66it/s]

training for epoch 153
for epoch 153 learning rate is 0.001215766545905694
training_step
0 2.066090803879958
validation_step
0 2.053613516000601
for epoch 153 mean loss on train 2.0660908222198486
for epoch 153 mean loss on val 2.0536134243011475


 15%|██████████▏                                                       | 154/1000 [00:22<01:48,  7.80it/s]

training for epoch 154
for epoch 154 learning rate is 0.001215766545905694
training_step
0 2.082542272714468
validation_step
0 2.0718929584209738
for epoch 154 mean loss on train 2.0825421810150146
for epoch 154 mean loss on val 2.0718929767608643


 16%|██████████▏                                                       | 155/1000 [00:22<01:48,  7.81it/s]

training for epoch 155
for epoch 155 learning rate is 0.001215766545905694
training_step
0 1.9599229372464693
validation_step
0 2.0139302473801832
for epoch 155 mean loss on train 1.9599229097366333
for epoch 155 mean loss on val 2.013930320739746


 16%|██████████▎                                                       | 156/1000 [00:23<01:49,  7.72it/s]

training for epoch 156
for epoch 156 learning rate is 0.001215766545905694
training_step
0 2.0171536665696363
validation_step
0 2.0263065925011268
for epoch 156 mean loss on train 2.017153739929199
for epoch 156 mean loss on val 2.026306629180908


 16%|██████████▎                                                       | 157/1000 [00:23<01:46,  7.90it/s]

training for epoch 157
for epoch 157 learning rate is 0.001215766545905694
training_step
0 2.0086790231557994
validation_step
0 2.0418460552509012
for epoch 157 mean loss on train 2.008678913116455
for epoch 157 mean loss on val 2.0418460369110107


 16%|██████████▍                                                       | 158/1000 [00:23<01:46,  7.93it/s]

training for epoch 158
for epoch 158 learning rate is 0.001215766545905694
training_step
0 2.083044639000526
validation_step
0 1.9761644510122447
for epoch 158 mean loss on train 2.0830445289611816
for epoch 158 mean loss on val 1.97616446018219


 16%|██████████▍                                                       | 159/1000 [00:23<01:45,  7.95it/s]

training for epoch 159
for epoch 159 learning rate is 0.001215766545905694
training_step
0 2.0412772252009463
validation_step
0 2.0931147061861477
for epoch 159 mean loss on train 2.0412771701812744
for epoch 159 mean loss on val 2.0931146144866943


 16%|██████████▌                                                       | 160/1000 [00:23<01:46,  7.88it/s]

training for epoch 160
for epoch 160 learning rate is 0.001215766545905694
training_step
0 2.168452629676232
validation_step
0 1.9713040865384615
for epoch 160 mean loss on train 2.168452739715576
for epoch 160 mean loss on val 1.9713040590286255


 16%|██████████▋                                                       | 161/1000 [00:23<01:52,  7.44it/s]

training for epoch 161
for epoch 161 learning rate is 0.001215766545905694
training_step
0 2.1127650921161356
validation_step
0 2.0967688927283654
for epoch 161 mean loss on train 2.112765073776245
for epoch 161 mean loss on val 2.096768856048584


 16%|██████████▋                                                       | 162/1000 [00:23<01:57,  7.12it/s]

training for epoch 162
for epoch 162 learning rate is 0.001215766545905694
training_step
0 2.008046663724459
validation_step
0 2.093364715576172
for epoch 162 mean loss on train 2.0080466270446777
for epoch 162 mean loss on val 2.093364715576172


 16%|██████████▊                                                       | 163/1000 [00:24<02:03,  6.78it/s]

training for epoch 163
for epoch 163 learning rate is 0.001215766545905694
training_step
0 2.060609377347506
validation_step
0 1.958216300377479
for epoch 163 mean loss on train 2.0606093406677246
for epoch 163 mean loss on val 1.9582163095474243


 16%|██████████▊                                                       | 164/1000 [00:24<01:57,  7.14it/s]

training for epoch 164
for epoch 164 learning rate is 0.001215766545905694
training_step
0 2.1892224825345554
validation_step
0 2.099369635948768
for epoch 164 mean loss on train 2.189222574234009
for epoch 164 mean loss on val 2.099369525909424


 16%|██████████▉                                                       | 165/1000 [00:24<01:54,  7.29it/s]

training for epoch 165
for epoch 165 learning rate is 0.001215766545905694
training_step
0 2.18240840618427
validation_step
0 2.0047212747427134
for epoch 165 mean loss on train 2.182408332824707
for epoch 165 mean loss on val 2.004721164703369


 17%|██████████▉                                                       | 166/1000 [00:24<01:51,  7.51it/s]

training for epoch 166
for epoch 166 learning rate is 0.001215766545905694
training_step
0 1.9845440204326923
validation_step
0 1.9981501652644231
for epoch 166 mean loss on train 1.984544038772583
for epoch 166 mean loss on val 1.998150110244751


 17%|███████████                                                       | 167/1000 [00:24<01:48,  7.67it/s]

training for epoch 167
for epoch 167 learning rate is 0.001215766545905694
training_step
0 2.225655482365535
validation_step
0 2.064137678879958
for epoch 167 mean loss on train 2.2256555557250977
for epoch 167 mean loss on val 2.0641376972198486


 17%|███████████                                                       | 168/1000 [00:24<01:47,  7.71it/s]

training for epoch 168
for epoch 168 learning rate is 0.0010941898913151245
training_step
0 1.9650991879976714
validation_step
0 2.068984105036809
for epoch 168 mean loss on train 1.9650992155075073
for epoch 168 mean loss on val 2.068984031677246


 17%|███████████▏                                                      | 169/1000 [00:24<01:49,  7.59it/s]

training for epoch 169
for epoch 169 learning rate is 0.0010941898913151245
training_step
0 2.048071054311899
validation_step
0 1.9612558805025542
for epoch 169 mean loss on train 2.0480711460113525
for epoch 169 mean loss on val 1.9612559080123901


 17%|███████████▏                                                      | 170/1000 [00:24<01:48,  7.64it/s]

training for epoch 170
for epoch 170 learning rate is 0.0010941898913151245
training_step
0 2.1071742131159854
validation_step
0 1.9474295102632964
for epoch 170 mean loss on train 2.1071741580963135
for epoch 170 mean loss on val 1.9474295377731323


 17%|███████████▎                                                      | 171/1000 [00:25<01:50,  7.53it/s]

training for epoch 171
for epoch 171 learning rate is 0.0010941898913151245
training_step
0 1.9564825204702525
validation_step
0 2.0481036259577823
for epoch 171 mean loss on train 1.9564825296401978
for epoch 171 mean loss on val 2.0481035709381104


 17%|███████████▎                                                      | 172/1000 [00:25<01:55,  7.18it/s]

training for epoch 172
for epoch 172 learning rate is 0.0010941898913151245
training_step
0 2.0523000863882213
validation_step
0 2.0529073568490834
for epoch 172 mean loss on train 2.052299976348877
for epoch 172 mean loss on val 2.0529074668884277


 17%|███████████▍                                                      | 173/1000 [00:25<01:56,  7.09it/s]

training for epoch 173
for epoch 173 learning rate is 0.0010941898913151245
training_step
0 2.0337000626784105
validation_step
0 2.0750760298508863
for epoch 173 mean loss on train 2.0336999893188477
for epoch 173 mean loss on val 2.075076103210449


 17%|███████████▍                                                      | 174/1000 [00:25<01:49,  7.51it/s]

training for epoch 174
for epoch 174 learning rate is 0.0010941898913151245
training_step
0 2.0556333982027493
validation_step
0 2.0402500446026144
for epoch 174 mean loss on train 2.055633306503296
for epoch 174 mean loss on val 2.040250062942505


 18%|███████████▌                                                      | 175/1000 [00:25<01:48,  7.62it/s]

training for epoch 175
for epoch 175 learning rate is 0.0010941898913151245
training_step
0 2.0581292372483473
validation_step
0 1.9573789743276744
for epoch 175 mean loss on train 2.05812931060791
for epoch 175 mean loss on val 1.9573789834976196


 18%|███████████▌                                                      | 176/1000 [00:25<01:49,  7.55it/s]

training for epoch 176
for epoch 176 learning rate is 0.0010941898913151245
training_step
0 2.0760172330416164
validation_step
0 2.0401169703556943
for epoch 176 mean loss on train 2.076017141342163
for epoch 176 mean loss on val 2.040117025375366


 18%|███████████▋                                                      | 177/1000 [00:25<01:47,  7.64it/s]

training for epoch 177
for epoch 177 learning rate is 0.0010941898913151245
training_step
0 1.9940076974722056
validation_step
0 2.01242916400616
for epoch 177 mean loss on train 1.9940077066421509
for epoch 177 mean loss on val 2.0124292373657227


 18%|███████████▋                                                      | 178/1000 [00:26<01:47,  7.65it/s]

training for epoch 178
for epoch 178 learning rate is 0.0010941898913151245
training_step
0 2.0626100393442006
validation_step
0 2.094283177302434
for epoch 178 mean loss on train 2.062610149383545
for epoch 178 mean loss on val 2.094283103942871


 18%|███████████▊                                                      | 179/1000 [00:26<01:47,  7.66it/s]

training for epoch 179
for epoch 179 learning rate is 0.0010941898913151245
training_step
0 2.0825880490816555
validation_step
0 2.0117657001201925
for epoch 179 mean loss on train 2.082587957382202
for epoch 179 mean loss on val 2.011765718460083


 18%|███████████▉                                                      | 180/1000 [00:26<01:46,  7.73it/s]

training for epoch 180
for epoch 180 learning rate is 0.0010941898913151245
training_step
0 2.0420276935283956
validation_step
0 2.1032999478853664
for epoch 180 mean loss on train 2.042027711868286
for epoch 180 mean loss on val 2.103299856185913


 18%|███████████▉                                                      | 181/1000 [00:26<01:48,  7.58it/s]

training for epoch 181
for epoch 181 learning rate is 0.0010941898913151245
training_step
0 2.0319981208214393
validation_step
0 2.101007314828726
for epoch 181 mean loss on train 2.0319981575012207
for epoch 181 mean loss on val 2.1010072231292725


 18%|████████████                                                      | 182/1000 [00:26<01:50,  7.43it/s]

training for epoch 182
for epoch 182 learning rate is 0.0010941898913151245
training_step
0 2.085223858173077
validation_step
0 2.037742468026968
for epoch 182 mean loss on train 2.085223913192749
for epoch 182 mean loss on val 2.0377423763275146


 18%|████████████                                                      | 183/1000 [00:26<01:50,  7.39it/s]

training for epoch 183
for epoch 183 learning rate is 0.0010941898913151245
training_step
0 2.0087340428278995
validation_step
0 2.110873442429763
for epoch 183 mean loss on train 2.0087339878082275
for epoch 183 mean loss on val 2.1108734607696533


 18%|████████████▏                                                     | 184/1000 [00:26<01:51,  7.34it/s]

training for epoch 184
for epoch 184 learning rate is 0.0010941898913151245
training_step
0 2.026849306546725
validation_step
0 1.970331778893104
for epoch 184 mean loss on train 2.0268492698669434
for epoch 184 mean loss on val 1.9703317880630493


 18%|████████████▏                                                     | 185/1000 [00:26<01:48,  7.50it/s]

training for epoch 185
for epoch 185 learning rate is 0.0010941898913151245
training_step
0 2.012250166672927
validation_step
0 2.175434405987079
for epoch 185 mean loss on train 2.0122501850128174
for epoch 185 mean loss on val 2.1754343509674072


 19%|████████████▎                                                     | 186/1000 [00:27<01:45,  7.69it/s]

training for epoch 186
for epoch 186 learning rate is 0.0010941898913151245
training_step
0 1.9902625450721154
validation_step
0 2.0626214834359975
for epoch 186 mean loss on train 1.990262508392334
for epoch 186 mean loss on val 2.062621593475342


 19%|████████████▎                                                     | 187/1000 [00:27<01:42,  7.93it/s]

training for epoch 187
for epoch 187 learning rate is 0.0010941898913151245
training_step
0 2.0454383263221154
validation_step
0 1.9885845184326172
for epoch 187 mean loss on train 2.045438289642334
for epoch 187 mean loss on val 1.9885845184326172


 19%|████████████▍                                                     | 188/1000 [00:27<01:42,  7.94it/s]

training for epoch 188
for epoch 188 learning rate is 0.0010941898913151245
training_step
0 2.029219994178185
validation_step
0 2.0214912707989035
for epoch 188 mean loss on train 2.0292201042175293
for epoch 188 mean loss on val 2.021491289138794


 19%|████████████▍                                                     | 189/1000 [00:27<01:42,  7.89it/s]

training for epoch 189
for epoch 189 learning rate is 0.0009847709021836122
training_step
0 2.0060106424184947
validation_step
0 2.008130440345177
for epoch 189 mean loss on train 2.0060105323791504
for epoch 189 mean loss on val 2.0081305503845215


 19%|████████████▌                                                     | 190/1000 [00:27<01:40,  8.02it/s]

training for epoch 190
for epoch 190 learning rate is 0.0009847709021836122
training_step
0 2.0774961618276744
validation_step
0 2.1100152822641225
for epoch 190 mean loss on train 2.07749605178833
for epoch 190 mean loss on val 2.110015392303467


 19%|████████████▌                                                     | 191/1000 [00:27<01:42,  7.89it/s]

training for epoch 191
for epoch 191 learning rate is 0.0009847709021836122
training_step
0 2.029875095073993
validation_step
0 2.1237207559438853
for epoch 191 mean loss on train 2.0298750400543213
for epoch 191 mean loss on val 2.123720645904541


 19%|████████████▋                                                     | 192/1000 [00:27<01:43,  7.79it/s]

training for epoch 192
for epoch 192 learning rate is 0.0009847709021836122
training_step
0 2.1345610985389123
validation_step
0 2.1508090679462137
for epoch 192 mean loss on train 2.134561061859131
for epoch 192 mean loss on val 2.1508090496063232


 19%|████████████▋                                                     | 193/1000 [00:28<01:45,  7.67it/s]

training for epoch 193
for epoch 193 learning rate is 0.0009847709021836122
training_step
0 1.9725867051344652
validation_step
0 2.024820767916166
for epoch 193 mean loss on train 1.972586750984192
for epoch 193 mean loss on val 2.0248208045959473


 19%|████████████▊                                                     | 194/1000 [00:28<01:44,  7.74it/s]

training for epoch 194
for epoch 194 learning rate is 0.0009847709021836122
training_step
0 1.9807459024282603
validation_step
0 1.9861615987924428
for epoch 194 mean loss on train 1.9807459115982056
for epoch 194 mean loss on val 1.9861615896224976


 20%|████████████▊                                                     | 195/1000 [00:28<01:41,  7.95it/s]

training for epoch 195
for epoch 195 learning rate is 0.0009847709021836122
training_step
0 2.0552366696871243
validation_step
0 2.0665658804086537
for epoch 195 mean loss on train 2.055236577987671
for epoch 195 mean loss on val 2.066565990447998


 20%|████████████▉                                                     | 196/1000 [00:28<01:40,  8.01it/s]

training for epoch 196
for epoch 196 learning rate is 0.0009847709021836122
training_step
0 2.001818290123573
validation_step
0 2.0824332604041467
for epoch 196 mean loss on train 2.0018181800842285
for epoch 196 mean loss on val 2.0824332237243652


 20%|█████████████                                                     | 197/1000 [00:28<01:39,  8.08it/s]

training for epoch 197
for epoch 197 learning rate is 0.0009847709021836122
training_step
0 2.084869678203876
validation_step
0 2.018574934739333
for epoch 197 mean loss on train 2.084869623184204
for epoch 197 mean loss on val 2.0185749530792236


 20%|█████████████                                                     | 198/1000 [00:28<01:40,  8.02it/s]

training for epoch 198
for epoch 198 learning rate is 0.0009847709021836122
training_step
0 2.0247044196495643
validation_step
0 2.0438308715820312
for epoch 198 mean loss on train 2.0247044563293457
for epoch 198 mean loss on val 2.0438308715820312


 20%|█████████████▏                                                    | 199/1000 [00:28<01:48,  7.41it/s]

training for epoch 199
for epoch 199 learning rate is 0.0009847709021836122
training_step
0 1.9964033273550181
validation_step
0 1.9860641772930439
for epoch 199 mean loss on train 1.9964033365249634
for epoch 199 mean loss on val 1.9860641956329346


 20%|█████████████▏                                                    | 200/1000 [00:28<01:53,  7.04it/s]

training for epoch 200
for epoch 200 learning rate is 0.0009847709021836122
training_step
0 2.085528153639573
validation_step
0 1.9593887329101562
for epoch 200 mean loss on train 2.0855281352996826
for epoch 200 mean loss on val 1.9593887329101562


 20%|█████████████▎                                                    | 201/1000 [00:29<01:56,  6.88it/s]

training for epoch 201
for epoch 201 learning rate is 0.0009847709021836122
training_step
0 1.9912884051983173
validation_step
0 2.049894626323993
for epoch 201 mean loss on train 1.991288423538208
for epoch 201 mean loss on val 2.0498945713043213


 20%|█████████████▎                                                    | 202/1000 [00:29<01:53,  7.05it/s]

training for epoch 202
for epoch 202 learning rate is 0.0009847709021836122
training_step
0 2.0257963033822866
validation_step
0 2.2190936161921573
for epoch 202 mean loss on train 2.025796413421631
for epoch 202 mean loss on val 2.2190935611724854


 20%|█████████████▍                                                    | 203/1000 [00:29<01:47,  7.39it/s]

training for epoch 203
for epoch 203 learning rate is 0.0009847709021836122
training_step
0 2.0527610778808594
validation_step
0 2.0442796853872447
for epoch 203 mean loss on train 2.0527610778808594
for epoch 203 mean loss on val 2.0442795753479004


 20%|█████████████▍                                                    | 204/1000 [00:29<01:46,  7.48it/s]

training for epoch 204
for epoch 204 learning rate is 0.0009847709021836122
training_step
0 2.042885560255784
validation_step
0 2.0437779059776893
for epoch 204 mean loss on train 2.0428855419158936
for epoch 204 mean loss on val 2.0437779426574707


 20%|█████████████▌                                                    | 205/1000 [00:29<01:50,  7.21it/s]

training for epoch 205
for epoch 205 learning rate is 0.0009847709021836122
training_step
0 2.148845672607422
validation_step
0 2.039762203509991
for epoch 205 mean loss on train 2.148845672607422
for epoch 205 mean loss on val 2.039762258529663


 21%|█████████████▌                                                    | 206/1000 [00:29<01:45,  7.51it/s]

training for epoch 206
for epoch 206 learning rate is 0.0009847709021836122
training_step
0 2.0337327810434194
validation_step
0 2.0129690903883715
for epoch 206 mean loss on train 2.0337328910827637
for epoch 206 mean loss on val 2.0129690170288086


 21%|█████████████▋                                                    | 207/1000 [00:29<01:43,  7.64it/s]

training for epoch 207
for epoch 207 learning rate is 0.0009847709021836122
training_step
0 2.010388887845553
validation_step
0 2.0253966404841495
for epoch 207 mean loss on train 2.0103888511657715
for epoch 207 mean loss on val 2.0253965854644775


 21%|█████████████▋                                                    | 208/1000 [00:30<01:43,  7.69it/s]

training for epoch 208
for epoch 208 learning rate is 0.0009847709021836122
training_step
0 1.9531820737398589
validation_step
0 2.0569600325364332
for epoch 208 mean loss on train 1.9531821012496948
for epoch 208 mean loss on val 2.056960105895996


 21%|█████████████▊                                                    | 209/1000 [00:30<01:46,  7.40it/s]

training for epoch 209
for epoch 209 learning rate is 0.0009847709021836122
training_step
0 2.1648468604454627
validation_step
0 2.058041352492112
for epoch 209 mean loss on train 2.164846897125244
for epoch 209 mean loss on val 2.0580413341522217


 21%|█████████████▊                                                    | 210/1000 [00:30<01:47,  7.34it/s]

training for epoch 210
for epoch 210 learning rate is 0.0008862938119652509
training_step
0 2.0231602008526144
validation_step
0 2.055008521446815
for epoch 210 mean loss on train 2.023160219192505
for epoch 210 mean loss on val 2.0550084114074707


 21%|█████████████▉                                                    | 211/1000 [00:30<01:46,  7.38it/s]

training for epoch 211
for epoch 211 learning rate is 0.0008862938119652509
training_step
0 2.023115744957557
validation_step
0 1.9989915994497447
for epoch 211 mean loss on train 2.023115634918213
for epoch 211 mean loss on val 1.99899160861969


 21%|█████████████▉                                                    | 212/1000 [00:30<01:45,  7.47it/s]

training for epoch 212
for epoch 212 learning rate is 0.0008862938119652509
training_step
0 2.052953133216271
validation_step
0 2.0385893308199368
for epoch 212 mean loss on train 2.0529532432556152
for epoch 212 mean loss on val 2.0385892391204834


 21%|██████████████                                                    | 213/1000 [00:30<01:46,  7.38it/s]

training for epoch 213
for epoch 213 learning rate is 0.0008862938119652509
training_step
0 2.03610112116887
validation_step
0 2.067965727586013
for epoch 213 mean loss on train 2.0361011028289795
for epoch 213 mean loss on val 2.0679657459259033


 21%|██████████████                                                    | 214/1000 [00:30<01:50,  7.13it/s]

training for epoch 214
for epoch 214 learning rate is 0.0008862938119652509
training_step
0 2.093870896559495
validation_step
0 1.9679976243239183
for epoch 214 mean loss on train 2.0938708782196045
for epoch 214 mean loss on val 1.967997670173645


 22%|██████████████▏                                                   | 215/1000 [00:30<01:43,  7.60it/s]

training for epoch 215
for epoch 215 learning rate is 0.0008862938119652509
training_step
0 2.0635870420015774
validation_step
0 2.040103032038762
for epoch 215 mean loss on train 2.063586950302124
for epoch 215 mean loss on val 2.040102958679199


 22%|██████████████▎                                                   | 216/1000 [00:31<01:41,  7.69it/s]

training for epoch 216
for epoch 216 learning rate is 0.0008862938119652509
training_step
0 2.1084056267371545
validation_step
0 2.050602986262395
for epoch 216 mean loss on train 2.108405590057373
for epoch 216 mean loss on val 2.050602912902832


 22%|██████████████▎                                                   | 217/1000 [00:31<01:43,  7.60it/s]

training for epoch 217
for epoch 217 learning rate is 0.0008862938119652509
training_step
0 2.0261306762695312
validation_step
0 2.027227988609901
for epoch 217 mean loss on train 2.0261306762695312
for epoch 217 mean loss on val 2.0272278785705566


 22%|██████████████▍                                                   | 218/1000 [00:31<01:42,  7.65it/s]

training for epoch 218
for epoch 218 learning rate is 0.0008862938119652509
training_step
0 2.015480775099534
validation_step
0 2.1069601499117336
for epoch 218 mean loss on train 2.0154807567596436
for epoch 218 mean loss on val 2.1069600582122803


 22%|██████████████▍                                                   | 219/1000 [00:31<01:42,  7.61it/s]

training for epoch 219
for epoch 219 learning rate is 0.0008862938119652509
training_step
0 2.0543751349815955
validation_step
0 2.001078825730544
for epoch 219 mean loss on train 2.054375171661377
for epoch 219 mean loss on val 2.0010788440704346


 22%|██████████████▌                                                   | 220/1000 [00:31<01:41,  7.65it/s]

training for epoch 220
for epoch 220 learning rate is 0.0008862938119652509
training_step
0 1.9983626145582933
validation_step
0 2.0322836362398586
for epoch 220 mean loss on train 1.99836266040802
for epoch 220 mean loss on val 2.0322835445404053


 22%|██████████████▌                                                   | 221/1000 [00:31<01:42,  7.58it/s]

training for epoch 221
for epoch 221 learning rate is 0.0008862938119652509
training_step
0 2.128914612990159
validation_step
0 2.1255667759821963
for epoch 221 mean loss on train 2.1289145946502686
for epoch 221 mean loss on val 2.1255667209625244


 22%|██████████████▋                                                   | 222/1000 [00:31<01:45,  7.37it/s]

training for epoch 222
for epoch 222 learning rate is 0.0008862938119652509
training_step
0 2.0379544771634617
validation_step
0 2.175325246957632
for epoch 222 mean loss on train 2.037954568862915
for epoch 222 mean loss on val 2.1753251552581787


 22%|██████████████▋                                                   | 223/1000 [00:32<01:46,  7.32it/s]

training for epoch 223
for epoch 223 learning rate is 0.0008862938119652509
training_step
0 2.0121276562030497
validation_step
0 2.0659354283259463
for epoch 223 mean loss on train 2.012127637863159
for epoch 223 mean loss on val 2.0659353733062744


 22%|██████████████▊                                                   | 224/1000 [00:32<01:45,  7.35it/s]

training for epoch 224
for epoch 224 learning rate is 0.0008862938119652509
training_step
0 2.0425679133488583
validation_step
0 2.0574857271634617
for epoch 224 mean loss on train 2.0425679683685303
for epoch 224 mean loss on val 2.057485818862915


 22%|██████████████▊                                                   | 225/1000 [00:32<01:42,  7.57it/s]

training for epoch 225
for epoch 225 learning rate is 0.0008862938119652509
training_step
0 2.0215658041147084
validation_step
0 2.04681029686561
for epoch 225 mean loss on train 2.0215659141540527
for epoch 225 mean loss on val 2.0468103885650635


 23%|██████████████▉                                                   | 226/1000 [00:32<01:42,  7.53it/s]

training for epoch 226
for epoch 226 learning rate is 0.0008862938119652509
training_step
0 2.068705045259916
validation_step
0 2.0900422609769382
for epoch 226 mean loss on train 2.0687050819396973
for epoch 226 mean loss on val 2.0900423526763916


 23%|██████████████▉                                                   | 227/1000 [00:32<01:40,  7.71it/s]

training for epoch 227
for epoch 227 learning rate is 0.0008862938119652509
training_step
0 2.167796941903921
validation_step
0 2.072986309344952
for epoch 227 mean loss on train 2.1677968502044678
for epoch 227 mean loss on val 2.072986364364624


 23%|███████████████                                                   | 228/1000 [00:32<01:39,  7.76it/s]

training for epoch 228
for epoch 228 learning rate is 0.0008862938119652509
training_step
0 2.0449374272273135
validation_step
0 2.0340115473820615
for epoch 228 mean loss on train 2.0449373722076416
for epoch 228 mean loss on val 2.0340116024017334


 23%|███████████████                                                   | 229/1000 [00:32<01:37,  7.87it/s]

training for epoch 229
for epoch 229 learning rate is 0.0008862938119652509
training_step
0 2.0534532987154446
validation_step
0 2.027829096867488
for epoch 229 mean loss on train 2.053453207015991
for epoch 229 mean loss on val 2.027829170227051


 23%|███████████████▏                                                  | 230/1000 [00:32<01:37,  7.87it/s]

training for epoch 230
for epoch 230 learning rate is 0.0008862938119652509
training_step
0 2.0595609224759617
validation_step
0 2.0548615088829627
for epoch 230 mean loss on train 2.059561014175415
for epoch 230 mean loss on val 2.054861545562744


 23%|███████████████▏                                                  | 231/1000 [00:33<01:36,  7.94it/s]

training for epoch 231
for epoch 231 learning rate is 0.0007976644307687258
training_step
0 1.9874866192157452
validation_step
0 2.0448868091289816
for epoch 231 mean loss on train 1.9874866008758545
for epoch 231 mean loss on val 2.044886827468872


 23%|███████████████▎                                                  | 232/1000 [00:33<01:35,  8.00it/s]

training for epoch 232
for epoch 232 learning rate is 0.0007976644307687258
training_step
0 2.006093978881836
validation_step
0 1.9715567368727465
for epoch 232 mean loss on train 2.006093978881836
for epoch 232 mean loss on val 1.9715567827224731


 23%|███████████████▍                                                  | 233/1000 [00:33<01:36,  7.93it/s]

training for epoch 233
for epoch 233 learning rate is 0.0007976644307687258
training_step
0 2.122702818650466
validation_step
0 1.9957505739652193
for epoch 233 mean loss on train 2.1227028369903564
for epoch 233 mean loss on val 1.9957505464553833


 23%|███████████████▍                                                  | 234/1000 [00:33<01:37,  7.82it/s]

training for epoch 234
for epoch 234 learning rate is 0.0007976644307687258
training_step
0 2.0014158395620494
validation_step
0 2.0697768284724307
for epoch 234 mean loss on train 2.001415729522705
for epoch 234 mean loss on val 2.069776773452759


 24%|███████████████▌                                                  | 235/1000 [00:33<01:37,  7.82it/s]

training for epoch 235
for epoch 235 learning rate is 0.0007976644307687258
training_step
0 1.9999691889836237
validation_step
0 2.1204119462233324
for epoch 235 mean loss on train 1.999969244003296
for epoch 235 mean loss on val 2.1204118728637695


 24%|███████████████▌                                                  | 236/1000 [00:33<01:40,  7.61it/s]

training for epoch 236
for epoch 236 learning rate is 0.0007976644307687258
training_step
0 2.0642374478853664
validation_step
0 2.0690975189208984
for epoch 236 mean loss on train 2.064237356185913
for epoch 236 mean loss on val 2.0690975189208984


 24%|███████████████▋                                                  | 237/1000 [00:33<01:41,  7.50it/s]

training for epoch 237
for epoch 237 learning rate is 0.0007976644307687258
training_step
0 1.9923446361835186
validation_step
0 2.083908961369441
for epoch 237 mean loss on train 1.992344617843628
for epoch 237 mean loss on val 2.083909034729004


 24%|███████████████▋                                                  | 238/1000 [00:34<02:03,  6.18it/s]

training for epoch 238
for epoch 238 learning rate is 0.0007976644307687258
training_step
0 2.069909902719351
validation_step
0 2.0650705190805287
for epoch 238 mean loss on train 2.0699098110198975
for epoch 238 mean loss on val 2.065070629119873


 24%|███████████████▊                                                  | 239/1000 [00:34<02:17,  5.54it/s]

training for epoch 239
for epoch 239 learning rate is 0.0007976644307687258
training_step
0 2.040498733520508
validation_step
0 2.0167318490835338
for epoch 239 mean loss on train 2.040498733520508
for epoch 239 mean loss on val 2.0167317390441895


 24%|███████████████▊                                                  | 240/1000 [00:34<02:31,  5.00it/s]

training for epoch 240
for epoch 240 learning rate is 0.0007976644307687258
training_step
0 2.0032534966102014
validation_step
0 2.092288237351638
for epoch 240 mean loss on train 2.00325345993042
for epoch 240 mean loss on val 2.0922882556915283


 24%|███████████████▉                                                  | 241/1000 [00:34<02:20,  5.39it/s]

training for epoch 241
for epoch 241 learning rate is 0.0007976644307687258
training_step
0 2.0783270322359524
validation_step
0 1.9856796264648438
for epoch 241 mean loss on train 2.078326940536499
for epoch 241 mean loss on val 1.9856796264648438


 24%|███████████████▉                                                  | 242/1000 [00:34<02:08,  5.88it/s]

training for epoch 242
for epoch 242 learning rate is 0.0007976644307687258
training_step
0 2.119929240300105
validation_step
0 1.9385094275841346
for epoch 242 mean loss on train 2.119929313659668
for epoch 242 mean loss on val 1.938509464263916


 24%|████████████████                                                  | 243/1000 [00:34<01:58,  6.40it/s]

training for epoch 243
for epoch 243 learning rate is 0.0007976644307687258
training_step
0 1.9903472020075872
validation_step
0 2.107538810143104
for epoch 243 mean loss on train 1.990347146987915
for epoch 243 mean loss on val 2.1075387001037598


 24%|████████████████                                                  | 244/1000 [00:35<01:48,  6.97it/s]

training for epoch 244
for epoch 244 learning rate is 0.0007976644307687258
training_step
0 2.0895762810340295
validation_step
0 2.0569959787222056
for epoch 244 mean loss on train 2.089576244354248
for epoch 244 mean loss on val 2.0569958686828613


 24%|████████████████▏                                                 | 245/1000 [00:35<01:43,  7.33it/s]

training for epoch 245
for epoch 245 learning rate is 0.0007976644307687258
training_step
0 2.0558977860670824
validation_step
0 2.090420502882737
for epoch 245 mean loss on train 2.0558977127075195
for epoch 245 mean loss on val 2.0904204845428467


 25%|████████████████▏                                                 | 246/1000 [00:35<01:38,  7.69it/s]

training for epoch 246
for epoch 246 learning rate is 0.0007976644307687258
training_step
0 2.0711256173940806
validation_step
0 2.0689908541165867
for epoch 246 mean loss on train 2.0711255073547363
for epoch 246 mean loss on val 2.06899094581604


 25%|████████████████▎                                                 | 247/1000 [00:35<01:37,  7.69it/s]

training for epoch 247
for epoch 247 learning rate is 0.0007976644307687258
training_step
0 2.043508823101337
validation_step
0 2.079950919518104
for epoch 247 mean loss on train 2.043508768081665
for epoch 247 mean loss on val 2.0799508094787598


 25%|████████████████▎                                                 | 248/1000 [00:35<01:37,  7.72it/s]

training for epoch 248
for epoch 248 learning rate is 0.0007976644307687258
training_step
0 2.050151091355544
validation_step
0 2.0285198505108175
for epoch 248 mean loss on train 2.0501511096954346
for epoch 248 mean loss on val 2.028519868850708


 25%|████████████████▍                                                 | 249/1000 [00:35<01:37,  7.72it/s]

training for epoch 249
for epoch 249 learning rate is 0.0007976644307687258
training_step
0 1.9552874931922326
validation_step
0 1.9529210604154146
for epoch 249 mean loss on train 1.9552874565124512
for epoch 249 mean loss on val 1.9529210329055786


 25%|████████████████▌                                                 | 250/1000 [00:35<01:33,  8.03it/s]

training for epoch 250
for epoch 250 learning rate is 0.0007976644307687258
training_step
0 2.0332918900709887
validation_step
0 2.0535944425142727
for epoch 250 mean loss on train 2.033291816711426
for epoch 250 mean loss on val 2.0535943508148193


 25%|████████████████▌                                                 | 251/1000 [00:35<01:31,  8.17it/s]

training for epoch 251
for epoch 251 learning rate is 0.0007976644307687258
training_step
0 2.103624490591196
validation_step
0 2.006445811345027
for epoch 251 mean loss on train 2.1036245822906494
for epoch 251 mean loss on val 2.00644588470459


 25%|████████████████▋                                                 | 252/1000 [00:36<01:33,  8.03it/s]

training for epoch 252
for epoch 252 learning rate is 0.0007178979876918532
training_step
0 2.0744766822228065
validation_step
0 2.0447323138897238
for epoch 252 mean loss on train 2.074476718902588
for epoch 252 mean loss on val 2.0447323322296143


 25%|████████████████▋                                                 | 253/1000 [00:36<02:01,  6.17it/s]

training for epoch 253
for epoch 253 learning rate is 0.0007178979876918532
training_step
0 2.0612514202411356
validation_step
0 1.9926345531757061
for epoch 253 mean loss on train 2.061251401901245
for epoch 253 mean loss on val 1.9926345348358154


 25%|████████████████▊                                                 | 254/1000 [00:36<02:02,  6.08it/s]

training for epoch 254
for epoch 254 learning rate is 0.0007178979876918532
training_step
0 2.019433094904973
validation_step
0 2.076661330003005
for epoch 254 mean loss on train 2.01943302154541
for epoch 254 mean loss on val 2.0766613483428955


 26%|████████████████▊                                                 | 255/1000 [00:36<02:00,  6.19it/s]

training for epoch 255
for epoch 255 learning rate is 0.0007178979876918532
training_step
0 2.0827495868389425
validation_step
0 2.0487695840688853
for epoch 255 mean loss on train 2.082749605178833
for epoch 255 mean loss on val 2.048769474029541


 26%|████████████████▉                                                 | 256/1000 [00:36<01:55,  6.45it/s]

training for epoch 256
for epoch 256 learning rate is 0.0007178979876918532
training_step
0 2.0035067338209887
validation_step
0 2.053384927602915
for epoch 256 mean loss on train 2.003506660461426
for epoch 256 mean loss on val 2.053385019302368


 26%|████████████████▉                                                 | 257/1000 [00:36<01:51,  6.64it/s]

training for epoch 257
for epoch 257 learning rate is 0.0007178979876918532
training_step
0 2.028445610633263
validation_step
0 2.0125962770902195
for epoch 257 mean loss on train 2.0284457206726074
for epoch 257 mean loss on val 2.012596368789673


 26%|█████████████████                                                 | 258/1000 [00:37<01:48,  6.87it/s]

training for epoch 258
for epoch 258 learning rate is 0.0007178979876918532
training_step
0 2.018962713388296
validation_step
0 2.085087849543645
for epoch 258 mean loss on train 2.0189626216888428
for epoch 258 mean loss on val 2.085087776184082


 26%|█████████████████                                                 | 259/1000 [00:37<01:41,  7.27it/s]

training for epoch 259
for epoch 259 learning rate is 0.0007178979876918532
training_step
0 2.0015509678767276
validation_step
0 2.0768878643329325
for epoch 259 mean loss on train 2.0015509128570557
for epoch 259 mean loss on val 2.076887845993042


 26%|█████████████████▏                                                | 260/1000 [00:37<01:37,  7.57it/s]

training for epoch 260
for epoch 260 learning rate is 0.0007178979876918532
training_step
0 2.0854244232177734
validation_step
0 2.044746839083158
for epoch 260 mean loss on train 2.0854244232177734
for epoch 260 mean loss on val 2.0447468757629395


 26%|█████████████████▏                                                | 261/1000 [00:37<01:36,  7.67it/s]

training for epoch 261
for epoch 261 learning rate is 0.0007178979876918532
training_step
0 2.066245299119216
validation_step
0 2.112194354717548
for epoch 261 mean loss on train 2.0662453174591064
for epoch 261 mean loss on val 2.112194299697876


 26%|█████████████████▎                                                | 262/1000 [00:37<01:34,  7.82it/s]

training for epoch 262
for epoch 262 learning rate is 0.0007178979876918532
training_step
0 1.961518801175631
validation_step
0 2.047850095308744
for epoch 262 mean loss on train 1.9615187644958496
for epoch 262 mean loss on val 2.0478501319885254


 26%|█████████████████▎                                                | 263/1000 [00:37<01:32,  7.99it/s]

training for epoch 263
for epoch 263 learning rate is 0.0007178979876918532
training_step
0 2.1237806173471303
validation_step
0 2.0099088228665867
for epoch 263 mean loss on train 2.1237807273864746
for epoch 263 mean loss on val 2.00990891456604


 26%|█████████████████▍                                                | 264/1000 [00:37<01:31,  8.02it/s]

training for epoch 264
for epoch 264 learning rate is 0.0007178979876918532
training_step
0 1.9824237823486328
validation_step
0 2.10468996488131
for epoch 264 mean loss on train 1.9824237823486328
for epoch 264 mean loss on val 2.1046900749206543


 26%|█████████████████▍                                                | 265/1000 [00:37<01:30,  8.12it/s]

training for epoch 265
for epoch 265 learning rate is 0.0007178979876918532
training_step
0 2.087302428025466
validation_step
0 2.0222625732421875
for epoch 265 mean loss on train 2.0873024463653564
for epoch 265 mean loss on val 2.0222625732421875


 27%|█████████████████▌                                                | 266/1000 [00:37<01:30,  8.14it/s]

training for epoch 266
for epoch 266 learning rate is 0.0007178979876918532
training_step
0 2.1196184891920824
validation_step
0 2.052803479708158
for epoch 266 mean loss on train 2.1196184158325195
for epoch 266 mean loss on val 2.0528035163879395


 27%|█████████████████▌                                                | 267/1000 [00:38<01:29,  8.18it/s]

training for epoch 267
for epoch 267 learning rate is 0.0007178979876918532
training_step
0 2.0441033289982724
validation_step
0 2.0028699728158803
for epoch 267 mean loss on train 2.0441033840179443
for epoch 267 mean loss on val 2.0028700828552246


 27%|████████████████▎                                            | 268/1000 [29:17<104:53:12, 515.84s/it]

training for epoch 268
for epoch 268 learning rate is 0.0007178979876918532
training_step
0 2.0310629331148586
validation_step
0 2.0229898599477916
for epoch 268 mean loss on train 2.0310628414154053
for epoch 268 mean loss on val 2.0229897499084473


 27%|████████████████▋                                             | 269/1000 [29:18<73:22:03, 361.32s/it]

training for epoch 269
for epoch 269 learning rate is 0.0007178979876918532
training_step
0 1.9805286114032452
validation_step
0 2.0441020085261417
for epoch 269 mean loss on train 1.9805285930633545
for epoch 269 mean loss on val 2.0441019535064697


 27%|████████████████▋                                             | 270/1000 [29:18<51:20:11, 253.17s/it]

training for epoch 270
for epoch 270 learning rate is 0.0007178979876918532
training_step
0 2.0766356541560245
validation_step
0 2.130520013662485
for epoch 270 mean loss on train 2.0766355991363525
for epoch 270 mean loss on val 2.1305201053619385


 27%|████████████████▊                                             | 271/1000 [29:19<35:54:53, 177.36s/it]

training for epoch 271
for epoch 271 learning rate is 0.0007178979876918532
training_step
0 2.1456141838660607
validation_step
0 2.0298077509953427
for epoch 271 mean loss on train 2.1456141471862793
for epoch 271 mean loss on val 2.0298078060150146


 27%|████████████████▊                                             | 272/1000 [29:19<25:07:31, 124.25s/it]

training for epoch 272
for epoch 272 learning rate is 0.0007178979876918532
training_step
0 2.0965658334585338
validation_step
0 2.1114955315223107
for epoch 272 mean loss on train 2.0965657234191895
for epoch 272 mean loss on val 2.1114954948425293


 27%|█████████████████▏                                             | 273/1000 [29:19<17:34:48, 87.05s/it]

training for epoch 273
for epoch 273 learning rate is 0.0006461081889226679
training_step
0 2.1197839883657603
validation_step
0 2.0373416313758264
for epoch 273 mean loss on train 2.119783878326416
for epoch 273 mean loss on val 2.037341594696045


 27%|█████████████████▎                                             | 274/1000 [29:20<12:18:16, 61.01s/it]

training for epoch 274
for epoch 274 learning rate is 0.0006461081889226679
training_step
0 1.9862491901104267
validation_step
0 2.059084232036884
for epoch 274 mean loss on train 1.9862492084503174
for epoch 274 mean loss on val 2.059084177017212


 28%|█████████████████▌                                              | 275/1000 [29:20<8:36:57, 42.78s/it]

training for epoch 275
for epoch 275 learning rate is 0.0006461081889226679
training_step
0 1.9762496948242188
validation_step
0 2.12768187889686
for epoch 275 mean loss on train 1.9762496948242188
for epoch 275 mean loss on val 2.1276819705963135


 28%|█████████████████▋                                              | 276/1000 [29:20<6:02:24, 30.03s/it]

training for epoch 276
for epoch 276 learning rate is 0.0006461081889226679
training_step
0 1.9870951725886419
validation_step
0 1.938005887545072
for epoch 276 mean loss on train 1.9870951175689697
for epoch 276 mean loss on val 1.9380059242248535


 28%|█████████████████▋                                              | 277/1000 [29:20<4:14:03, 21.08s/it]

training for epoch 277
for epoch 277 learning rate is 0.0006461081889226679
training_step
0 2.0736946692833533
validation_step
0 1.9948070232684796
for epoch 277 mean loss on train 2.0736947059631348
for epoch 277 mean loss on val 1.9948070049285889


 28%|█████████████████▊                                              | 278/1000 [29:21<2:58:21, 14.82s/it]

training for epoch 278
for epoch 278 learning rate is 0.0006461081889226679
training_step
0 2.0063083355243387
validation_step
0 2.237222378070538
for epoch 278 mean loss on train 2.0063083171844482
for epoch 278 mean loss on val 2.23722243309021


 28%|█████████████████▊                                              | 279/1000 [29:21<2:05:20, 10.43s/it]

training for epoch 279
for epoch 279 learning rate is 0.0006461081889226679
training_step
0 2.0372707660381613
validation_step
0 2.031309127807617
for epoch 279 mean loss on train 2.0372707843780518
for epoch 279 mean loss on val 2.031309127807617


 28%|█████████████████▉                                              | 280/1000 [29:21<1:28:17,  7.36s/it]

training for epoch 280
for epoch 280 learning rate is 0.0006461081889226679
training_step
0 2.052973380455604
validation_step
0 2.096747031578651
for epoch 280 mean loss on train 2.0529732704162598
for epoch 280 mean loss on val 2.0967469215393066


 28%|█████████████████▉                                              | 281/1000 [29:21<1:02:29,  5.21s/it]

training for epoch 281
for epoch 281 learning rate is 0.0006461081889226679
training_step
0 2.072490251981295
validation_step
0 1.9975851499117339
for epoch 281 mean loss on train 2.0724902153015137
for epoch 281 mean loss on val 1.9975851774215698


 28%|██████████████████▌                                               | 282/1000 [29:22<44:43,  3.74s/it]

training for epoch 282
for epoch 282 learning rate is 0.0006461081889226679
training_step
0 2.011617513803335
validation_step
0 2.0163326263427734
for epoch 282 mean loss on train 2.011617422103882
for epoch 282 mean loss on val 2.0163326263427734


 28%|██████████████████▋                                               | 283/1000 [29:22<32:17,  2.70s/it]

training for epoch 283
for epoch 283 learning rate is 0.0006461081889226679
training_step
0 2.171593739436223
validation_step
0 2.1009691678560696
for epoch 283 mean loss on train 2.17159366607666
for epoch 283 mean loss on val 2.100969076156616


 28%|██████████████████▋                                               | 284/1000 [29:22<23:23,  1.96s/it]

training for epoch 284
for epoch 284 learning rate is 0.0006461081889226679
training_step
0 1.9845720437856822
validation_step
0 2.0930922581599307
for epoch 284 mean loss on train 1.9845720529556274
for epoch 284 mean loss on val 2.093092203140259


 28%|██████████████████▊                                               | 285/1000 [29:22<16:58,  1.42s/it]

training for epoch 285
for epoch 285 learning rate is 0.0006461081889226679
training_step
0 1.956961411696214
validation_step
0 2.063651158259465
for epoch 285 mean loss on train 1.9569613933563232
for epoch 285 mean loss on val 2.0636510848999023


 29%|██████████████████▉                                               | 286/1000 [29:22<12:31,  1.05s/it]

training for epoch 286
for epoch 286 learning rate is 0.0006461081889226679
training_step
0 2.0039185744065504
validation_step
0 2.13124025785006
for epoch 286 mean loss on train 2.0039186477661133
for epoch 286 mean loss on val 2.1312403678894043


 29%|██████████████████▉                                               | 287/1000 [29:23<10:57,  1.08it/s]

training for epoch 287
for epoch 287 learning rate is 0.0006461081889226679
training_step
0 1.9925256875845103
validation_step
0 2.0242379995492787
for epoch 287 mean loss on train 1.9925256967544556
for epoch 287 mean loss on val 2.024238109588623


 29%|███████████████████                                               | 288/1000 [29:23<08:40,  1.37it/s]

training for epoch 288
for epoch 288 learning rate is 0.0006461081889226679
training_step
0 2.11507195692796
validation_step
0 2.138919243445763
for epoch 288 mean loss on train 2.115072011947632
for epoch 288 mean loss on val 2.1389193534851074


 29%|███████████████████                                               | 289/1000 [29:24<06:58,  1.70it/s]

training for epoch 289
for epoch 289 learning rate is 0.0006461081889226679
training_step
0 1.9548172583946815
validation_step
0 2.032686233520508
for epoch 289 mean loss on train 1.954817295074463
for epoch 289 mean loss on val 2.032686233520508


 29%|███████████████████▏                                              | 290/1000 [29:24<05:39,  2.09it/s]

training for epoch 290
for epoch 290 learning rate is 0.0006461081889226679
training_step
0 1.9920909588153546
validation_step
0 2.000862561739408
for epoch 290 mean loss on train 1.9920909404754639
for epoch 290 mean loss on val 2.0008625984191895


 29%|███████████████████▏                                              | 291/1000 [29:24<04:31,  2.61it/s]

training for epoch 291
for epoch 291 learning rate is 0.0006461081889226679
training_step
0 2.004388662484976
validation_step
0 2.065450521615835
for epoch 291 mean loss on train 2.0043885707855225
for epoch 291 mean loss on val 2.065450429916382


 29%|███████████████████▎                                              | 292/1000 [29:24<03:40,  3.21it/s]

training for epoch 292
for epoch 292 learning rate is 0.0006461081889226679
training_step
0 2.2219747396615834
validation_step
0 2.0927388117863583
for epoch 292 mean loss on train 2.2219748497009277
for epoch 292 mean loss on val 2.0927388668060303


 29%|███████████████████▎                                              | 293/1000 [29:24<02:58,  3.96it/s]

training for epoch 293
for epoch 293 learning rate is 0.0006461081889226679
training_step
0 2.0383211282583384
validation_step
0 2.004670656644381
for epoch 293 mean loss on train 2.038321018218994
for epoch 293 mean loss on val 2.0046706199645996


 29%|███████████████████▍                                              | 294/1000 [29:24<02:32,  4.62it/s]

training for epoch 294
for epoch 294 learning rate is 0.0005814973700304011
training_step
0 2.018434084378756
validation_step
0 2.0324425330528846
for epoch 294 mean loss on train 2.0184340476989746
for epoch 294 mean loss on val 2.032442569732666


 30%|███████████████████▍                                              | 295/1000 [29:24<02:11,  5.37it/s]

training for epoch 295
for epoch 295 learning rate is 0.0005814973700304011
training_step
0 2.1408057579627404
validation_step
0 2.0702351790208082
for epoch 295 mean loss on train 2.140805721282959
for epoch 295 mean loss on val 2.070235252380371


 30%|███████████████████▌                                              | 296/1000 [29:25<01:58,  5.96it/s]

training for epoch 296
for epoch 296 learning rate is 0.0005814973700304011
training_step
0 2.084811283991887
validation_step
0 2.1372930086576023
for epoch 296 mean loss on train 2.084811210632324
for epoch 296 mean loss on val 2.1372931003570557


 30%|███████████████████▌                                              | 297/1000 [29:25<01:56,  6.02it/s]

training for epoch 297
for epoch 297 learning rate is 0.0005814973700304011
training_step
0 2.0020599365234375
validation_step
0 2.0490765204796424
for epoch 297 mean loss on train 2.0020599365234375
for epoch 297 mean loss on val 2.049076557159424


 30%|███████████████████▋                                              | 298/1000 [29:25<01:54,  6.12it/s]

training for epoch 298
for epoch 298 learning rate is 0.0005814973700304011
training_step
0 2.0951282794658956
validation_step
0 2.004320584810697
for epoch 298 mean loss on train 2.095128297805786
for epoch 298 mean loss on val 2.0043206214904785


 30%|███████████████████▋                                              | 299/1000 [29:25<02:00,  5.83it/s]

training for epoch 299
for epoch 299 learning rate is 0.0005814973700304011
training_step
0 2.0699420342078576
validation_step
0 2.0431741567758412
for epoch 299 mean loss on train 2.069941997528076
for epoch 299 mean loss on val 2.0431742668151855
training for epoch 300
for epoch 300 learning rate is 0.0005814973700304011
training_step
0 1.9670661045954778
validation_step
0 2.2115438901461086
for epoch 300 mean loss on train 1.9670660495758057
for epoch 300 mean loss on val 2.2115437984466553


 30%|███████████████████▊                                              | 301/1000 [29:25<01:36,  7.28it/s]

training for epoch 301
for epoch 301 learning rate is 0.0005814973700304011
training_step
0 2.0619364518385668
validation_step
0 2.058631603534405
for epoch 301 mean loss on train 2.061936378479004
for epoch 301 mean loss on val 2.058631658554077
training for epoch 302
for epoch 302 learning rate is 0.0005814973700304011
training_step
0 1.9913313939021184
validation_step
0 1.9627367166372447
for epoch 302 mean loss on train 1.9913313388824463
for epoch 302 mean loss on val 1.96273672580719


 30%|███████████████████▉                                              | 303/1000 [29:25<01:22,  8.40it/s]

training for epoch 303
for epoch 303 learning rate is 0.0005814973700304011
training_step
0 1.9771308898925781
validation_step
0 1.9976838918832631
for epoch 303 mean loss on train 1.9771308898925781
for epoch 303 mean loss on val 1.9976838827133179
training for epoch 304
for epoch 304 learning rate is 0.0005814973700304011
training_step
0 2.1680607428917518
validation_step
0 2.069210786085862
for epoch 304 mean loss on train 2.168060779571533
for epoch 304 mean loss on val 2.0692107677459717


 30%|████████████████████▏                                             | 305/1000 [29:26<01:17,  8.95it/s]

training for epoch 305
for epoch 305 learning rate is 0.0005814973700304011
training_step
0 2.06273680466872
validation_step
0 2.0200094076303334
for epoch 305 mean loss on train 2.062736749649048
for epoch 305 mean loss on val 2.0200095176696777


 31%|████████████████████▏                                             | 306/1000 [29:26<01:16,  9.12it/s]

training for epoch 306
for epoch 306 learning rate is 0.0005814973700304011
training_step
0 2.0462300227238583
validation_step
0 2.049626423762395
for epoch 306 mean loss on train 2.0462300777435303
for epoch 306 mean loss on val 2.049626350402832


 31%|████████████████████▎                                             | 307/1000 [29:26<01:15,  9.20it/s]

training for epoch 307
for epoch 307 learning rate is 0.0005814973700304011
training_step
0 2.0091661306527944
validation_step
0 2.0230211111215444
for epoch 307 mean loss on train 2.0091662406921387
for epoch 307 mean loss on val 2.0230212211608887
training for epoch 308
for epoch 308 learning rate is 0.0005814973700304011
training_step
0 2.102872555072491
validation_step
0 2.0271728222186747
for epoch 308 mean loss on train 2.102872610092163
for epoch 308 mean loss on val 2.027172803878784


 31%|████████████████████▍                                             | 309/1000 [29:26<01:10,  9.77it/s]

training for epoch 309
for epoch 309 learning rate is 0.0005814973700304011
training_step
0 2.0508053119365988
validation_step
0 2.101599326500526
for epoch 309 mean loss on train 2.0508053302764893
for epoch 309 mean loss on val 2.1015992164611816


 31%|████████████████████▍                                             | 310/1000 [29:26<01:12,  9.47it/s]

training for epoch 310
for epoch 310 learning rate is 0.0005814973700304011
training_step
0 1.9704629457913911
validation_step
0 2.1808366041917067
for epoch 310 mean loss on train 1.9704629182815552
for epoch 310 mean loss on val 2.1808366775512695


 31%|████████████████████▌                                             | 311/1000 [29:26<01:14,  9.23it/s]

training for epoch 311
for epoch 311 learning rate is 0.0005814973700304011
training_step
0 2.035752223088191
validation_step
0 1.98656492966872
for epoch 311 mean loss on train 2.035752296447754
for epoch 311 mean loss on val 1.9865648746490479


 31%|████████████████████▌                                             | 312/1000 [29:26<01:14,  9.24it/s]

training for epoch 312
for epoch 312 learning rate is 0.0005814973700304011
training_step
0 2.0626820784348707
validation_step
0 2.0144882202148438
for epoch 312 mean loss on train 2.0626821517944336
for epoch 312 mean loss on val 2.0144882202148438
training for epoch 313
for epoch 313 learning rate is 0.0005814973700304011
training_step
0 2.0166410299447866
validation_step
0 2.049809089073768
for epoch 313 mean loss on train 2.016641139984131
for epoch 313 mean loss on val 2.049808979034424


 31%|████████████████████▋                                             | 314/1000 [29:27<01:10,  9.68it/s]

training for epoch 314
for epoch 314 learning rate is 0.0005814973700304011
training_step
0 2.0286331176757812
validation_step
0 2.026571567241962
for epoch 314 mean loss on train 2.0286331176757812
for epoch 314 mean loss on val 2.02657151222229


 32%|████████████████████▊                                             | 315/1000 [29:27<01:10,  9.73it/s]

training for epoch 315
for epoch 315 learning rate is 0.0005233476330273611
training_step
0 2.000276858990009
validation_step
0 1.997197224543645
for epoch 315 mean loss on train 2.000276803970337
for epoch 315 mean loss on val 1.9971972703933716


 32%|████████████████████▊                                             | 316/1000 [29:27<01:10,  9.77it/s]

training for epoch 316
for epoch 316 learning rate is 0.0005233476330273611
training_step
0 1.981705592228816
validation_step
0 2.0389005220853367
for epoch 316 mean loss on train 1.9817055463790894
for epoch 316 mean loss on val 2.03890061378479
training for epoch 317
for epoch 317 learning rate is 0.0005233476330273611
training_step
0 1.9963956979604869
validation_step
0 2.0204862447885366
for epoch 317 mean loss on train 1.9963957071304321
for epoch 317 mean loss on val 2.020486354827881


 32%|████████████████████▉                                             | 318/1000 [29:27<01:06, 10.26it/s]

training for epoch 318
for epoch 318 learning rate is 0.0005233476330273611
training_step
0 2.0683150658240685
validation_step
0 2.0580379779522238
for epoch 318 mean loss on train 2.068315029144287
for epoch 318 mean loss on val 2.0580379962921143
training for epoch 319
for epoch 319 learning rate is 0.0005233476330273611
training_step
0 2.0990147223839393
validation_step
0 2.0664872389573317
for epoch 319 mean loss on train 2.0990147590637207
for epoch 319 mean loss on val 2.0664873123168945


 32%|█████████████████████                                             | 320/1000 [29:27<01:12,  9.36it/s]

training for epoch 320
for epoch 320 learning rate is 0.0005233476330273611
training_step
0 2.0274468935452976
validation_step
0 1.974277349618765
for epoch 320 mean loss on train 2.027446985244751
for epoch 320 mean loss on val 1.974277377128601


 32%|█████████████████████▏                                            | 321/1000 [29:27<01:13,  9.24it/s]

training for epoch 321
for epoch 321 learning rate is 0.0005233476330273611
training_step
0 2.0866234119121847
validation_step
0 2.0434226989746094
for epoch 321 mean loss on train 2.086623430252075
for epoch 321 mean loss on val 2.0434226989746094


 32%|█████████████████████▎                                            | 322/1000 [29:27<01:14,  9.07it/s]

training for epoch 322
for epoch 322 learning rate is 0.0005233476330273611
training_step
0 2.234685310950646
validation_step
0 2.0491441579965444
for epoch 322 mean loss on train 2.2346854209899902
for epoch 322 mean loss on val 2.0491442680358887
training for epoch 323
for epoch 323 learning rate is 0.0005233476330273611
training_step
0 2.076159257155198
validation_step
0 1.987070083618164
for epoch 323 mean loss on train 2.0761592388153076
for epoch 323 mean loss on val 1.987070083618164


 32%|█████████████████████▍                                            | 324/1000 [29:28<01:16,  8.87it/s]

training for epoch 324
for epoch 324 learning rate is 0.0005233476330273611
training_step
0 2.0752878922682543
validation_step
0 2.100583736713116
for epoch 324 mean loss on train 2.0752878189086914
for epoch 324 mean loss on val 2.100583791732788


 32%|█████████████████████▍                                            | 325/1000 [29:28<01:14,  9.03it/s]

training for epoch 325
for epoch 325 learning rate is 0.0005233476330273611
training_step
0 1.9614241673396184
validation_step
0 1.974083240215595
for epoch 325 mean loss on train 1.9614241123199463
for epoch 325 mean loss on val 1.9740831851959229
training for epoch 326
for epoch 326 learning rate is 0.0005233476330273611
training_step
0 2.1513729095458984
validation_step
0 1.995872790996845
for epoch 326 mean loss on train 2.1513729095458984
for epoch 326 mean loss on val 1.9958727359771729


 33%|█████████████████████▌                                            | 327/1000 [29:28<01:11,  9.47it/s]

training for epoch 327
for epoch 327 learning rate is 0.0005233476330273611
training_step
0 2.031279050386869
validation_step
0 2.0425497201772838
for epoch 327 mean loss on train 2.0312790870666504
for epoch 327 mean loss on val 2.0425496101379395


 33%|█████████████████████▋                                            | 328/1000 [29:28<01:11,  9.39it/s]

training for epoch 328
for epoch 328 learning rate is 0.0005233476330273611
training_step
0 2.082814730130709
validation_step
0 2.0561067141019382
for epoch 328 mean loss on train 2.0828146934509277
for epoch 328 mean loss on val 2.0561068058013916


 33%|█████████████████████▋                                            | 329/1000 [29:28<01:13,  9.15it/s]

training for epoch 329
for epoch 329 learning rate is 0.0005233476330273611
training_step
0 2.034308066734901
validation_step
0 1.9656248826246996
for epoch 329 mean loss on train 2.0343079566955566
for epoch 329 mean loss on val 1.9656249284744263
training for epoch 330
for epoch 330 learning rate is 0.0005233476330273611
training_step
0 2.023177513709435
validation_step
0 2.013031005859375
for epoch 330 mean loss on train 2.0231776237487793
for epoch 330 mean loss on val 2.013031005859375


 33%|█████████████████████▊                                            | 331/1000 [29:28<01:12,  9.28it/s]

training for epoch 331
for epoch 331 learning rate is 0.0005233476330273611
training_step
0 2.0038108825683594
validation_step
0 2.0531680767352762
for epoch 331 mean loss on train 2.0038108825683594
for epoch 331 mean loss on val 2.0531680583953857


 33%|█████████████████████▉                                            | 332/1000 [29:28<01:11,  9.33it/s]

training for epoch 332
for epoch 332 learning rate is 0.0005233476330273611
training_step
0 2.0445936643160305
validation_step
0 2.0724171858567457
for epoch 332 mean loss on train 2.044593572616577
for epoch 332 mean loss on val 2.0724172592163086
training for epoch 333
for epoch 333 learning rate is 0.0005233476330273611
training_step
0 1.9343404036301832
validation_step
0 2.0828035794771633
for epoch 333 mean loss on train 1.9343403577804565
for epoch 333 mean loss on val 2.08280348777771


 33%|██████████████████████                                            | 334/1000 [29:29<01:08,  9.71it/s]

training for epoch 334
for epoch 334 learning rate is 0.0005233476330273611
training_step
0 2.092378322894757
validation_step
0 2.031417259803185
for epoch 334 mean loss on train 2.0923783779144287
for epoch 334 mean loss on val 2.0314173698425293


 34%|██████████████████████                                            | 335/1000 [29:29<01:08,  9.73it/s]

training for epoch 335
for epoch 335 learning rate is 0.0005233476330273611
training_step
0 1.9884355985201323
validation_step
0 2.0276643312894382
for epoch 335 mean loss on train 1.9884356260299683
for epoch 335 mean loss on val 2.0276644229888916


 34%|██████████████████████▏                                           | 336/1000 [29:29<01:08,  9.66it/s]

training for epoch 336
for epoch 336 learning rate is 0.000471012869724625
training_step
0 2.0435785146859975
validation_step
0 2.069255095261794
for epoch 336 mean loss on train 2.043578624725342
for epoch 336 mean loss on val 2.0692551136016846
training for epoch 337
for epoch 337 learning rate is 0.000471012869724625
training_step
0 2.033919407771184
validation_step
0 2.049078867985652
for epoch 337 mean loss on train 2.033919334411621
for epoch 337 mean loss on val 2.049078941345215


 34%|██████████████████████▎                                           | 338/1000 [29:29<01:08,  9.64it/s]

training for epoch 338
for epoch 338 learning rate is 0.000471012869724625
training_step
0 2.017064021183894
validation_step
0 1.9840591137225811
for epoch 338 mean loss on train 2.017064094543457
for epoch 338 mean loss on val 1.9840590953826904
training for epoch 339
for epoch 339 learning rate is 0.000471012869724625
training_step
0 1.9981150993934045
validation_step
0 2.0152432368351865
for epoch 339 mean loss on train 1.998115062713623
for epoch 339 mean loss on val 2.0152432918548584


 34%|██████████████████████▍                                           | 340/1000 [29:29<01:05, 10.00it/s]

training for epoch 340
for epoch 340 learning rate is 0.000471012869724625
training_step
0 2.0435936267559347
validation_step
0 2.0623895204984226
for epoch 340 mean loss on train 2.043593645095825
for epoch 340 mean loss on val 2.062389612197876


 34%|██████████████████████▌                                           | 341/1000 [29:29<01:06,  9.85it/s]

training for epoch 341
for epoch 341 learning rate is 0.000471012869724625
training_step
0 2.0086351541372447
validation_step
0 2.162286318265475
for epoch 341 mean loss on train 2.0086350440979004
for epoch 341 mean loss on val 2.1622862815856934
training for epoch 342
for epoch 342 learning rate is 0.000471012869724625
training_step
0 2.023957032423753
validation_step
0 2.000270990224985
for epoch 342 mean loss on train 2.0239570140838623
for epoch 342 mean loss on val 2.0002710819244385


 34%|██████████████████████▋                                           | 343/1000 [29:30<01:06,  9.93it/s]

training for epoch 343
for epoch 343 learning rate is 0.000471012869724625
training_step
0 2.141954862154447
validation_step
0 1.9982949770413911
for epoch 343 mean loss on train 2.1419548988342285
for epoch 343 mean loss on val 1.9982949495315552


 34%|██████████████████████▋                                           | 344/1000 [29:30<01:08,  9.61it/s]

training for epoch 344
for epoch 344 learning rate is 0.000471012869724625
training_step
0 1.9755828563983624
validation_step
0 1.977364026583158
for epoch 344 mean loss on train 1.9755828380584717
for epoch 344 mean loss on val 1.9773640632629395


 34%|██████████████████████▊                                           | 345/1000 [29:30<01:09,  9.37it/s]

training for epoch 345
for epoch 345 learning rate is 0.000471012869724625
training_step
0 1.9605071728046124
validation_step
0 2.073521540715144
for epoch 345 mean loss on train 1.9605071544647217
for epoch 345 mean loss on val 2.073521614074707
training for epoch 346
for epoch 346 learning rate is 0.000471012869724625
training_step
0 2.0153733766995945
validation_step
0 2.0037718552809496
for epoch 346 mean loss on train 2.015373468399048
for epoch 346 mean loss on val 2.0037717819213867


 35%|██████████████████████▉                                           | 347/1000 [29:30<01:07,  9.71it/s]

training for epoch 347
for epoch 347 learning rate is 0.000471012869724625
training_step
0 2.0421372927152195
validation_step
0 2.113605792705829
for epoch 347 mean loss on train 2.042137384414673
for epoch 347 mean loss on val 2.1136057376861572
training for epoch 348
for epoch 348 learning rate is 0.000471012869724625
training_step
0 2.0485955751859226
validation_step
0 2.09760255080003
for epoch 348 mean loss on train 2.048595666885376
for epoch 348 mean loss on val 2.097602605819702


 35%|███████████████████████                                           | 349/1000 [29:30<01:05,  9.97it/s]

training for epoch 349
for epoch 349 learning rate is 0.000471012869724625
training_step
0 2.019674301147461
validation_step
0 2.116497333233173
for epoch 349 mean loss on train 2.019674301147461
for epoch 349 mean loss on val 2.116497278213501


 35%|███████████████████████                                           | 350/1000 [29:30<01:06,  9.80it/s]

training for epoch 350
for epoch 350 learning rate is 0.000471012869724625
training_step
0 2.0908892704890323
validation_step
0 2.041045849139874
for epoch 350 mean loss on train 2.0908892154693604
for epoch 350 mean loss on val 2.041045904159546


 35%|███████████████████████▏                                          | 351/1000 [29:30<01:07,  9.64it/s]

training for epoch 351
for epoch 351 learning rate is 0.000471012869724625
training_step
0 2.0620733407827525
validation_step
0 2.047433559711163
for epoch 351 mean loss on train 2.062073230743408
for epoch 351 mean loss on val 2.047433614730835


 35%|███████████████████████▏                                          | 352/1000 [29:31<01:07,  9.60it/s]

training for epoch 352
for epoch 352 learning rate is 0.000471012869724625
training_step
0 2.161272195669321
validation_step
0 1.9910868131197417
for epoch 352 mean loss on train 2.1612722873687744
for epoch 352 mean loss on val 1.9910868406295776


 35%|███████████████████████▎                                          | 353/1000 [29:31<01:08,  9.45it/s]

training for epoch 353
for epoch 353 learning rate is 0.000471012869724625
training_step
0 2.0656647315392127
validation_step
0 2.217594733605018
for epoch 353 mean loss on train 2.065664768218994
for epoch 353 mean loss on val 2.217594623565674
training for epoch 354
for epoch 354 learning rate is 0.000471012869724625
training_step
0 2.112974900465745
validation_step
0 2.0772410172682543
for epoch 354 mean loss on train 2.1129748821258545
for epoch 354 mean loss on val 2.0772409439086914


 36%|███████████████████████▍                                          | 355/1000 [29:31<01:07,  9.62it/s]

training for epoch 355
for epoch 355 learning rate is 0.000471012869724625
training_step
0 2.0454189593975363
validation_step
0 2.011445999145508
for epoch 355 mean loss on train 2.0454189777374268
for epoch 355 mean loss on val 2.011445999145508


 36%|███████████████████████▍                                          | 356/1000 [29:31<01:06,  9.61it/s]

training for epoch 356
for epoch 356 learning rate is 0.000471012869724625
training_step
0 1.9909983414870043
validation_step
0 2.1361449314997745
for epoch 356 mean loss on train 1.990998387336731
for epoch 356 mean loss on val 2.1361448764801025


 36%|███████████████████████▌                                          | 357/1000 [29:31<01:06,  9.66it/s]

training for epoch 357
for epoch 357 learning rate is 0.0004239115827521625
training_step
0 2.0180752093975363
validation_step
0 2.0671926645132213
for epoch 357 mean loss on train 2.0180752277374268
for epoch 357 mean loss on val 2.067192554473877


 36%|███████████████████████▋                                          | 358/1000 [29:31<01:07,  9.45it/s]

training for epoch 358
for epoch 358 learning rate is 0.0004239115827521625
training_step
0 1.9840843494121845
validation_step
0 2.0177188286414514
for epoch 358 mean loss on train 1.9840843677520752
for epoch 358 mean loss on val 2.01771879196167


 36%|███████████████████████▋                                          | 359/1000 [29:31<01:07,  9.45it/s]

training for epoch 359
for epoch 359 learning rate is 0.0004239115827521625
training_step
0 2.0358236753023586
validation_step
0 2.0203550778902493
for epoch 359 mean loss on train 2.0358235836029053
for epoch 359 mean loss on val 2.020354986190796
training for epoch 360
for epoch 360 learning rate is 0.0004239115827521625
training_step
0 1.9989439157339244
validation_step
0 2.013896648700421
for epoch 360 mean loss on train 1.9989439249038696
for epoch 360 mean loss on val 2.0138967037200928


 36%|███████████████████████▊                                          | 361/1000 [29:31<01:07,  9.51it/s]

training for epoch 361
for epoch 361 learning rate is 0.0004239115827521625
training_step
0 2.0253510108360877
validation_step
0 1.9812827477088342
for epoch 361 mean loss on train 2.025351047515869
for epoch 361 mean loss on val 1.9812827110290527
training for epoch 362
for epoch 362 learning rate is 0.0004239115827521625
training_step
0 1.9503919161283052
validation_step
0 2.0926380157470703
for epoch 362 mean loss on train 1.9503918886184692
for epoch 362 mean loss on val 2.0926380157470703


 36%|███████████████████████▉                                          | 363/1000 [29:32<01:05,  9.70it/s]

training for epoch 363
for epoch 363 learning rate is 0.0004239115827521625
training_step
0 2.1269212869497447
validation_step
0 2.027959236731896
for epoch 363 mean loss on train 2.1269211769104004
for epoch 363 mean loss on val 2.0279593467712402
training for epoch 364
for epoch 364 learning rate is 0.0004239115827521625
training_step
0 1.9567351708045373
validation_step
0 2.028553155752329
for epoch 364 mean loss on train 1.9567351341247559
for epoch 364 mean loss on val 2.0285532474517822


 36%|████████████████████████                                          | 365/1000 [29:32<01:05,  9.72it/s]

training for epoch 365
for epoch 365 learning rate is 0.0004239115827521625
training_step
0 2.0683228419377255
validation_step
0 2.099680387056791
for epoch 365 mean loss on train 2.0683228969573975
for epoch 365 mean loss on val 2.0996804237365723


 37%|████████████████████████▏                                         | 366/1000 [29:32<01:06,  9.59it/s]

training for epoch 366
for epoch 366 learning rate is 0.0004239115827521625
training_step
0 1.9412117004394531
validation_step
0 2.1054515838623047
for epoch 366 mean loss on train 1.9412117004394531
for epoch 366 mean loss on val 2.1054515838623047


 37%|████████████████████████▏                                         | 367/1000 [29:32<01:07,  9.32it/s]

training for epoch 367
for epoch 367 learning rate is 0.0004239115827521625
training_step
0 2.0361937009371243
validation_step
0 1.9905159290020282
for epoch 367 mean loss on train 2.036193609237671
for epoch 367 mean loss on val 1.990515947341919


 37%|████████████████████████▎                                         | 368/1000 [29:32<01:06,  9.45it/s]

training for epoch 368
for epoch 368 learning rate is 0.0004239115827521625
training_step
0 2.0150476602407603
validation_step
0 1.988308099599985
for epoch 368 mean loss on train 2.015047550201416
for epoch 368 mean loss on val 1.988308072090149


 37%|████████████████████████▎                                         | 369/1000 [29:32<01:07,  9.41it/s]

training for epoch 369
for epoch 369 learning rate is 0.0004239115827521625
training_step
0 2.079479070810171
validation_step
0 2.0893861330472507
for epoch 369 mean loss on train 2.0794789791107178
for epoch 369 mean loss on val 2.089386224746704


 37%|████████████████████████▍                                         | 370/1000 [29:32<01:07,  9.38it/s]

training for epoch 370
for epoch 370 learning rate is 0.0004239115827521625
training_step
0 1.988695584810697
validation_step
0 2.065088858971229
for epoch 370 mean loss on train 1.9886956214904785
for epoch 370 mean loss on val 2.0650887489318848


 37%|████████████████████████▍                                         | 371/1000 [29:33<01:07,  9.33it/s]

training for epoch 371
for epoch 371 learning rate is 0.0004239115827521625
training_step
0 1.9960867074819713
validation_step
0 2.087103036733774
for epoch 371 mean loss on train 1.9960867166519165
for epoch 371 mean loss on val 2.0871031284332275


 37%|████████████████████████▌                                         | 372/1000 [29:33<01:06,  9.38it/s]

training for epoch 372
for epoch 372 learning rate is 0.0004239115827521625
training_step
0 2.007098124577449
validation_step
0 2.003941169151893
for epoch 372 mean loss on train 2.0070981979370117
for epoch 372 mean loss on val 2.003941059112549


 37%|████████████████████████▌                                         | 373/1000 [29:33<01:07,  9.36it/s]

training for epoch 373
for epoch 373 learning rate is 0.0004239115827521625
training_step
0 2.0076851477989783
validation_step
0 1.9625709240253155
for epoch 373 mean loss on train 2.0076851844787598
for epoch 373 mean loss on val 1.9625709056854248


 37%|████████████████████████▋                                         | 374/1000 [29:33<01:06,  9.36it/s]

training for epoch 374
for epoch 374 learning rate is 0.0004239115827521625
training_step
0 1.980000715989333
validation_step
0 2.0480304131141076
for epoch 374 mean loss on train 1.9800007343292236
for epoch 374 mean loss on val 2.048030376434326


 38%|████████████████████████▊                                         | 375/1000 [29:33<01:07,  9.30it/s]

training for epoch 375
for epoch 375 learning rate is 0.0004239115827521625
training_step
0 2.0787632281963644
validation_step
0 1.9801653348482573
for epoch 375 mean loss on train 2.078763246536255
for epoch 375 mean loss on val 1.9801653623580933


 38%|████████████████████████▊                                         | 376/1000 [29:33<01:08,  9.16it/s]

training for epoch 376
for epoch 376 learning rate is 0.0004239115827521625
training_step
0 2.014546174269456
validation_step
0 2.065569364107572
for epoch 376 mean loss on train 2.0145461559295654
for epoch 376 mean loss on val 2.0655694007873535


 38%|████████████████████████▉                                         | 377/1000 [29:33<01:13,  8.42it/s]

training for epoch 377
for epoch 377 learning rate is 0.0004239115827521625
training_step
0 2.039624727689303
validation_step
0 2.050731365497296
for epoch 377 mean loss on train 2.0396246910095215
for epoch 377 mean loss on val 2.0507314205169678


 38%|████████████████████████▉                                         | 378/1000 [29:33<01:18,  7.93it/s]

training for epoch 378
for epoch 378 learning rate is 0.00038152042447694626
training_step
0 2.0204827235295224
validation_step
0 2.0465884575477014
for epoch 378 mean loss on train 2.0204827785491943
for epoch 378 mean loss on val 2.04658842086792


 38%|█████████████████████████                                         | 379/1000 [29:34<01:27,  7.11it/s]

training for epoch 379
for epoch 379 learning rate is 0.00038152042447694626
training_step
0 2.0156839810884914
validation_step
0 2.1179931347186747
for epoch 379 mean loss on train 2.015683889389038
for epoch 379 mean loss on val 2.117993116378784


 38%|█████████████████████████                                         | 380/1000 [29:34<01:25,  7.25it/s]

training for epoch 380
for epoch 380 learning rate is 0.00038152042447694626
training_step
0 1.9964777139516978
validation_step
0 2.065891119150015
for epoch 380 mean loss on train 1.996477723121643
for epoch 380 mean loss on val 2.0658910274505615


 38%|█████████████████████████▏                                        | 381/1000 [29:34<01:20,  7.70it/s]

training for epoch 381
for epoch 381 learning rate is 0.00038152042447694626
training_step
0 2.1030037219707785
validation_step
0 2.0352093623234677
for epoch 381 mean loss on train 2.103003740310669
for epoch 381 mean loss on val 2.0352094173431396
training for epoch 382
for epoch 382 learning rate is 0.00038152042447694626
training_step
0 2.1595388559194713
validation_step
0 2.0681010026198168
for epoch 382 mean loss on train 2.159538745880127
for epoch 382 mean loss on val 2.068100929260254


 38%|█████████████████████████▎                                        | 383/1000 [29:34<01:11,  8.68it/s]

training for epoch 383
for epoch 383 learning rate is 0.00038152042447694626
training_step
0 2.0619964599609375
validation_step
0 2.033590170053335
for epoch 383 mean loss on train 2.0619964599609375
for epoch 383 mean loss on val 2.033590078353882


 38%|█████████████████████████▎                                        | 384/1000 [29:34<01:11,  8.62it/s]

training for epoch 384
for epoch 384 learning rate is 0.00038152042447694626
training_step
0 2.0645121060884914
validation_step
0 2.0168917729304385
for epoch 384 mean loss on train 2.064512014389038
for epoch 384 mean loss on val 2.0168917179107666


 38%|█████████████████████████▍                                        | 385/1000 [29:34<01:10,  8.75it/s]

training for epoch 385
for epoch 385 learning rate is 0.00038152042447694626
training_step
0 2.0273267305814304
validation_step
0 2.1103822267972507
for epoch 385 mean loss on train 2.027326822280884
for epoch 385 mean loss on val 2.110382318496704


 39%|█████████████████████████▍                                        | 386/1000 [29:34<01:08,  9.01it/s]

training for epoch 386
for epoch 386 learning rate is 0.00038152042447694626
training_step
0 2.0263057121863732
validation_step
0 2.0304299868070164
for epoch 386 mean loss on train 2.026305675506592
for epoch 386 mean loss on val 2.0304300785064697
training for epoch 387
for epoch 387 learning rate is 0.00038152042447694626
training_step
0 2.0200990530160756
validation_step
0 2.040521474984976
for epoch 387 mean loss on train 2.02009916305542
for epoch 387 mean loss on val 2.0405213832855225


 39%|█████████████████████████▌                                        | 388/1000 [29:35<01:03,  9.57it/s]

training for epoch 388
for epoch 388 learning rate is 0.00038152042447694626
training_step
0 2.0386958489051232
validation_step
0 2.0958357590895433
for epoch 388 mean loss on train 2.038695812225342
for epoch 388 mean loss on val 2.0958356857299805


 39%|█████████████████████████▋                                        | 389/1000 [29:35<01:04,  9.46it/s]

training for epoch 389
for epoch 389 learning rate is 0.00038152042447694626
training_step
0 2.0449289175180287
validation_step
0 2.08054439838116
for epoch 389 mean loss on train 2.044929027557373
for epoch 389 mean loss on val 2.0805444717407227


 39%|█████████████████████████▋                                        | 390/1000 [29:35<01:06,  9.20it/s]

training for epoch 390
for epoch 390 learning rate is 0.00038152042447694626
training_step
0 1.9844850393442006
validation_step
0 2.0155918414776144
for epoch 390 mean loss on train 1.9844850301742554
for epoch 390 mean loss on val 2.015591859817505


 39%|█████████████████████████▊                                        | 391/1000 [29:35<01:07,  8.96it/s]

training for epoch 391
for epoch 391 learning rate is 0.00038152042447694626
training_step
0 2.088933357825646
validation_step
0 2.081301909226638
for epoch 391 mean loss on train 2.0889334678649902
for epoch 391 mean loss on val 2.0813019275665283


 39%|█████████████████████████▊                                        | 392/1000 [29:35<01:07,  9.06it/s]

training for epoch 392
for epoch 392 learning rate is 0.00038152042447694626
training_step
0 2.0867664630596456
validation_step
0 2.0508336287278395
for epoch 392 mean loss on train 2.086766481399536
for epoch 392 mean loss on val 2.0508337020874023
training for epoch 393
for epoch 393 learning rate is 0.00038152042447694626
training_step
0 2.1395709698016825
validation_step
0 2.010314647967999
for epoch 393 mean loss on train 2.139570951461792
for epoch 393 mean loss on val 2.010314702987671


 39%|██████████████████████████                                        | 394/1000 [29:35<01:03,  9.52it/s]

training for epoch 394
for epoch 394 learning rate is 0.00038152042447694626
training_step
0 2.0087142357459435
validation_step
0 1.9968778170072115
for epoch 394 mean loss on train 2.008714199066162
for epoch 394 mean loss on val 1.9968777894973755


 40%|██████████████████████████                                        | 395/1000 [29:35<01:02,  9.62it/s]

training for epoch 395
for epoch 395 learning rate is 0.00038152042447694626
training_step
0 2.0105037689208984
validation_step
0 2.04440190241887
for epoch 395 mean loss on train 2.0105037689208984
for epoch 395 mean loss on val 2.0444018840789795
training for epoch 396
for epoch 396 learning rate is 0.00038152042447694626
training_step
0 1.9830215160663311
validation_step
0 2.1440054086538463
for epoch 396 mean loss on train 1.9830214977264404
for epoch 396 mean loss on val 2.144005298614502


 40%|██████████████████████████▏                                       | 397/1000 [29:35<01:01,  9.88it/s]

training for epoch 397
for epoch 397 learning rate is 0.00038152042447694626
training_step
0 2.074345221886268
validation_step
0 2.076568310077374
for epoch 397 mean loss on train 2.074345111846924
for epoch 397 mean loss on val 2.076568365097046
training for epoch 398
for epoch 398 learning rate is 0.00038152042447694626
training_step
0 2.0709686279296875
validation_step
0 1.9976062774658203
for epoch 398 mean loss on train 2.0709686279296875
for epoch 398 mean loss on val 1.9976062774658203


 40%|██████████████████████████▎                                       | 399/1000 [29:36<01:01,  9.84it/s]

training for epoch 399
for epoch 399 learning rate is 0.00034336838202925164
training_step
0 2.065222373375526
validation_step
0 2.1354826413668118
for epoch 399 mean loss on train 2.0652222633361816
for epoch 399 mean loss on val 2.1354825496673584


 40%|██████████████████████████▍                                       | 400/1000 [29:36<01:00,  9.87it/s]

training for epoch 400
for epoch 400 learning rate is 0.00034336838202925164
training_step
0 2.058521417471079
validation_step
0 1.9647659888634315
for epoch 400 mean loss on train 2.0585215091705322
for epoch 400 mean loss on val 1.964766025543213
training for epoch 401
for epoch 401 learning rate is 0.00034336838202925164
training_step
0 2.1144112807053785
validation_step
0 2.091596016517052
for epoch 401 mean loss on train 2.1144113540649414
for epoch 401 mean loss on val 2.0915961265563965


 40%|██████████████████████████▌                                       | 402/1000 [29:36<00:59,  9.98it/s]

training for epoch 402
for epoch 402 learning rate is 0.00034336838202925164
training_step
0 2.06836304297814
validation_step
0 2.0026494539701023
for epoch 402 mean loss on train 2.0683629512786865
for epoch 402 mean loss on val 2.0026495456695557


 40%|██████████████████████████▌                                       | 403/1000 [29:36<01:02,  9.56it/s]

training for epoch 403
for epoch 403 learning rate is 0.00034336838202925164
training_step
0 2.026982674231896
validation_step
0 2.0378312330979567
for epoch 403 mean loss on train 2.0269827842712402
for epoch 403 mean loss on val 2.0378313064575195


 40%|██████████████████████████▋                                       | 404/1000 [29:36<01:03,  9.39it/s]

training for epoch 404
for epoch 404 learning rate is 0.00034336838202925164
training_step
0 1.9977152897761419
validation_step
0 2.014089290912335
for epoch 404 mean loss on train 1.9977152347564697
for epoch 404 mean loss on val 2.014089345932007


 40%|██████████████████████████▋                                       | 405/1000 [29:36<01:03,  9.43it/s]

training for epoch 405
for epoch 405 learning rate is 0.00034336838202925164
training_step
0 2.0943219111515927
validation_step
0 2.1747383704552283
for epoch 405 mean loss on train 2.0943219661712646
for epoch 405 mean loss on val 2.1747384071350098


 41%|██████████████████████████▊                                       | 406/1000 [29:36<01:03,  9.42it/s]

training for epoch 406
for epoch 406 learning rate is 0.00034336838202925164
training_step
0 2.0168110774113583
validation_step
0 2.0384817857008715
for epoch 406 mean loss on train 2.0168111324310303
for epoch 406 mean loss on val 2.0384817123413086


 41%|██████████████████████████▊                                       | 407/1000 [29:37<01:02,  9.41it/s]

training for epoch 407
for epoch 407 learning rate is 0.00034336838202925164
training_step
0 2.044276310847356
validation_step
0 2.0084813924936147
for epoch 407 mean loss on train 2.044276237487793
for epoch 407 mean loss on val 2.008481502532959


 41%|██████████████████████████▉                                       | 408/1000 [29:37<01:03,  9.29it/s]

training for epoch 408
for epoch 408 learning rate is 0.00034336838202925164
training_step
0 2.0043177971473107
validation_step
0 2.1121773352989783
for epoch 408 mean loss on train 2.0043177604675293
for epoch 408 mean loss on val 2.1121773719787598


 41%|██████████████████████████▉                                       | 409/1000 [29:37<01:04,  9.19it/s]

training for epoch 409
for epoch 409 learning rate is 0.00034336838202925164
training_step
0 2.004332322340745
validation_step
0 1.9907310192401593
for epoch 409 mean loss on train 2.0043323040008545
for epoch 409 mean loss on val 1.9907310009002686
training for epoch 410
for epoch 410 learning rate is 0.00034336838202925164
training_step
0 1.9906130570631762
validation_step
0 2.0332581446721005
for epoch 410 mean loss on train 1.9906131029129028
for epoch 410 mean loss on val 2.0332581996917725


 41%|███████████████████████████▏                                      | 411/1000 [29:37<01:02,  9.35it/s]

training for epoch 411
for epoch 411 learning rate is 0.00034336838202925164
training_step
0 1.9940790029672475
validation_step
0 2.0374378791222205
for epoch 411 mean loss on train 1.9940789937973022
for epoch 411 mean loss on val 2.037437915802002


 41%|███████████████████████████▏                                      | 412/1000 [29:37<01:03,  9.22it/s]

training for epoch 412
for epoch 412 learning rate is 0.00034336838202925164
training_step
0 2.0880404252272387
validation_step
0 1.9900326362022986
for epoch 412 mean loss on train 2.088040351867676
for epoch 412 mean loss on val 1.99003267288208


 41%|███████████████████████████▎                                      | 413/1000 [29:37<01:03,  9.29it/s]

training for epoch 413
for epoch 413 learning rate is 0.00034336838202925164
training_step
0 2.010950675377479
validation_step
0 2.027666238638071
for epoch 413 mean loss on train 2.0109505653381348
for epoch 413 mean loss on val 2.0276663303375244


 41%|███████████████████████████▎                                      | 414/1000 [29:37<01:02,  9.30it/s]

training for epoch 414
for epoch 414 learning rate is 0.00034336838202925164
training_step
0 2.046732682448167
validation_step
0 2.031890575702374
for epoch 414 mean loss on train 2.0467326641082764
for epoch 414 mean loss on val 2.031890630722046


 42%|███████████████████████████▍                                      | 415/1000 [29:37<01:04,  9.11it/s]

training for epoch 415
for epoch 415 learning rate is 0.00034336838202925164
training_step
0 2.0238396571232724
validation_step
0 2.039501190185547
for epoch 415 mean loss on train 2.0238397121429443
for epoch 415 mean loss on val 2.039501190185547


 42%|███████████████████████████▍                                      | 416/1000 [29:37<01:03,  9.20it/s]

training for epoch 416
for epoch 416 learning rate is 0.00034336838202925164
training_step
0 2.0442661872276893
validation_step
0 2.0680116506723256
for epoch 416 mean loss on train 2.0442662239074707
for epoch 416 mean loss on val 2.06801176071167


 42%|███████████████████████████▌                                      | 417/1000 [29:38<01:03,  9.19it/s]

training for epoch 417
for epoch 417 learning rate is 0.00034336838202925164
training_step
0 2.090535970834585
validation_step
0 2.067811232346755
for epoch 417 mean loss on train 2.090535879135132
for epoch 417 mean loss on val 2.0678112506866455


 42%|███████████████████████████▌                                      | 418/1000 [29:38<01:02,  9.27it/s]

training for epoch 418
for epoch 418 learning rate is 0.00034336838202925164
training_step
0 2.03360836322491
validation_step
0 2.0562628232515774
for epoch 418 mean loss on train 2.0336084365844727
for epoch 418 mean loss on val 2.056262731552124


 42%|███████████████████████████▋                                      | 419/1000 [29:38<01:01,  9.45it/s]

training for epoch 419
for epoch 419 learning rate is 0.00034336838202925164
training_step
0 2.0835911677433896
validation_step
0 2.044878152700571
for epoch 419 mean loss on train 2.0835912227630615
for epoch 419 mean loss on val 2.0448782444000244


 42%|███████████████████████████▋                                      | 420/1000 [29:38<01:01,  9.39it/s]

training for epoch 420
for epoch 420 learning rate is 0.0003090315438263265
training_step
0 1.992150673499474
validation_step
0 2.0639786353478065
for epoch 420 mean loss on train 1.9921506643295288
for epoch 420 mean loss on val 2.063978672027588


 42%|███████████████████████████▊                                      | 421/1000 [29:38<01:00,  9.53it/s]

training for epoch 421
for epoch 421 learning rate is 0.0003090315438263265
training_step
0 2.068370525653546
validation_step
0 1.9777755737304688
for epoch 421 mean loss on train 2.0683705806732178
for epoch 421 mean loss on val 1.9777755737304688


 42%|███████████████████████████▊                                      | 422/1000 [29:38<01:01,  9.39it/s]

training for epoch 422
for epoch 422 learning rate is 0.0003090315438263265
training_step
0 2.013497132521409
validation_step
0 2.0350801027738132
for epoch 422 mean loss on train 2.0134971141815186
for epoch 422 mean loss on val 2.0350801944732666


 42%|███████████████████████████▉                                      | 423/1000 [29:38<01:03,  9.11it/s]

training for epoch 423
for epoch 423 learning rate is 0.0003090315438263265
training_step
0 2.0920385213998647
validation_step
0 2.1447968116173377
for epoch 423 mean loss on train 2.092038631439209
for epoch 423 mean loss on val 2.144796848297119


 42%|███████████████████████████▉                                      | 424/1000 [29:38<01:03,  9.05it/s]

training for epoch 424
for epoch 424 learning rate is 0.0003090315438263265
training_step
0 2.069657692542443
validation_step
0 2.0226576878474307
for epoch 424 mean loss on train 2.069657802581787
for epoch 424 mean loss on val 2.022657632827759


 42%|████████████████████████████                                      | 425/1000 [29:38<01:05,  8.84it/s]

training for epoch 425
for epoch 425 learning rate is 0.0003090315438263265
training_step
0 2.069221936739408
validation_step
0 2.002582550048828
for epoch 425 mean loss on train 2.0692219734191895
for epoch 425 mean loss on val 2.002582550048828


 43%|████████████████████████████                                      | 426/1000 [29:39<01:02,  9.12it/s]

training for epoch 426
for epoch 426 learning rate is 0.0003090315438263265
training_step
0 2.1008949279785156
validation_step
0 2.1292938819298377
for epoch 426 mean loss on train 2.1008949279785156
for epoch 426 mean loss on val 2.129293918609619
training for epoch 427
for epoch 427 learning rate is 0.0003090315438263265
training_step
0 2.0165465428278995
validation_step
0 2.148372063269982
for epoch 427 mean loss on train 2.0165464878082275
for epoch 427 mean loss on val 2.148372173309326


 43%|████████████████████████████▏                                     | 428/1000 [29:39<00:59,  9.59it/s]

training for epoch 428
for epoch 428 learning rate is 0.0003090315438263265
training_step
0 2.154888006357046
validation_step
0 2.0626751826359677
for epoch 428 mean loss on train 2.1548879146575928
for epoch 428 mean loss on val 2.0626752376556396


 43%|████████████████████████████▎                                     | 429/1000 [29:39<01:00,  9.50it/s]

training for epoch 429
for epoch 429 learning rate is 0.0003090315438263265
training_step
0 2.045878630418044
validation_step
0 2.0401925307053785
for epoch 429 mean loss on train 2.0458786487579346
for epoch 429 mean loss on val 2.0401926040649414
training for epoch 430
for epoch 430 learning rate is 0.0003090315438263265
training_step
0 2.094973784226638
validation_step
0 2.1962845142071066
for epoch 430 mean loss on train 2.0949738025665283
for epoch 430 mean loss on val 2.196284532546997


 43%|████████████████████████████▍                                     | 431/1000 [29:39<00:58,  9.81it/s]

training for epoch 431
for epoch 431 learning rate is 0.0003090315438263265
training_step
0 2.0332020979661207
validation_step
0 2.0664752079890323
for epoch 431 mean loss on train 2.0332021713256836
for epoch 431 mean loss on val 2.0664751529693604
training for epoch 432
for epoch 432 learning rate is 0.0003090315438263265
training_step
0 2.038974175086388
validation_step
0 2.125970400296725
for epoch 432 mean loss on train 2.0389742851257324
for epoch 432 mean loss on val 2.1259703636169434


 43%|████████████████████████████▌                                     | 433/1000 [29:39<00:57,  9.83it/s]

training for epoch 433
for epoch 433 learning rate is 0.0003090315438263265
training_step
0 2.1086145547720103
validation_step
0 2.1407189002403846
for epoch 433 mean loss on train 2.108614444732666
for epoch 433 mean loss on val 2.140718936920166


 43%|████████████████████████████▋                                     | 434/1000 [29:39<00:57,  9.86it/s]

training for epoch 434
for epoch 434 learning rate is 0.0003090315438263265
training_step
0 1.963666475736178
validation_step
0 2.1127266517052283
for epoch 434 mean loss on train 1.9636664390563965
for epoch 434 mean loss on val 2.1127266883850098
training for epoch 435
for epoch 435 learning rate is 0.0003090315438263265
training_step
0 2.11238039456881
validation_step
0 2.0258014385516825
for epoch 435 mean loss on train 2.1123805046081543
for epoch 435 mean loss on val 2.025801420211792


 44%|████████████████████████████▊                                     | 436/1000 [29:40<00:57,  9.84it/s]

training for epoch 436
for epoch 436 learning rate is 0.0003090315438263265
training_step
0 1.990145756648137
validation_step
0 2.0367007622352014
for epoch 436 mean loss on train 1.9901458024978638
for epoch 436 mean loss on val 2.03670072555542


 44%|████████████████████████████▊                                     | 437/1000 [29:40<00:58,  9.59it/s]

training for epoch 437
for epoch 437 learning rate is 0.0003090315438263265
training_step
0 2.196040520301232
validation_step
0 2.105707755455604
for epoch 437 mean loss on train 2.196040630340576
for epoch 437 mean loss on val 2.1057076454162598


 44%|████████████████████████████▉                                     | 438/1000 [29:40<01:03,  8.89it/s]

training for epoch 438
for epoch 438 learning rate is 0.0003090315438263265
training_step
0 2.030986785888672
validation_step
0 2.1169347029465895
for epoch 438 mean loss on train 2.030986785888672
for epoch 438 mean loss on val 2.1169347763061523
training for epoch 439
for epoch 439 learning rate is 0.0003090315438263265
training_step
0 1.999918717604417
validation_step
0 2.047853029691256
for epoch 439 mean loss on train 1.9999186992645264
for epoch 439 mean loss on val 2.0478529930114746


 44%|█████████████████████████████                                     | 440/1000 [29:40<01:01,  9.15it/s]

training for epoch 440
for epoch 440 learning rate is 0.0003090315438263265
training_step
0 2.2169804206261268
validation_step
0 2.0276266244741588
for epoch 440 mean loss on train 2.216980457305908
for epoch 440 mean loss on val 2.0276265144348145


 44%|█████████████████████████████                                     | 441/1000 [29:40<01:01,  9.13it/s]

training for epoch 441
for epoch 441 learning rate is 0.00027812838944369386
training_step
0 2.02329342181866
validation_step
0 2.0233592987060547
for epoch 441 mean loss on train 2.0232934951782227
for epoch 441 mean loss on val 2.0233592987060547


 44%|█████████████████████████████▏                                    | 442/1000 [29:40<01:00,  9.24it/s]

training for epoch 442
for epoch 442 learning rate is 0.00027812838944369386
training_step
0 2.053906807532677
validation_step
0 2.0036227886493387
for epoch 442 mean loss on train 2.0539069175720215
for epoch 442 mean loss on val 2.0036227703094482
training for epoch 443
for epoch 443 learning rate is 0.00027812838944369386
training_step
0 2.0418665959284854
validation_step
0 1.9563421102670522
for epoch 443 mean loss on train 2.0418665409088135
for epoch 443 mean loss on val 1.956342101097107


 44%|█████████████████████████████▎                                    | 444/1000 [29:40<00:57,  9.63it/s]

training for epoch 444
for epoch 444 learning rate is 0.00027812838944369386
training_step
0 2.058581278874324
validation_step
0 2.009329869196965
for epoch 444 mean loss on train 2.0585813522338867
for epoch 444 mean loss on val 2.0093297958374023
training for epoch 445
for epoch 445 learning rate is 0.00027812838944369386
training_step
0 2.039714813232422
validation_step
0 2.0755301255446215
for epoch 445 mean loss on train 2.039714813232422
for epoch 445 mean loss on val 2.0755300521850586


 45%|█████████████████████████████▍                                    | 446/1000 [29:41<00:55,  9.91it/s]

training for epoch 446
for epoch 446 learning rate is 0.00027812838944369386
training_step
0 2.080325493445763
validation_step
0 2.0321034651536207
for epoch 446 mean loss on train 2.0803256034851074
for epoch 446 mean loss on val 2.0321035385131836
training for epoch 447
for epoch 447 learning rate is 0.00027812838944369386
training_step
0 2.016902336707482
validation_step
0 2.0651865739088793
for epoch 447 mean loss on train 2.016902446746826
for epoch 447 mean loss on val 2.0651865005493164


 45%|█████████████████████████████▌                                    | 448/1000 [29:41<00:54, 10.11it/s]

training for epoch 448
for epoch 448 learning rate is 0.00027812838944369386
training_step
0 2.161772948044997
validation_step
0 2.018848125751202
for epoch 448 mean loss on train 2.1617729663848877
for epoch 448 mean loss on val 2.018848180770874
training for epoch 449
for epoch 449 learning rate is 0.00027812838944369386
training_step
0 1.997388399564303
validation_step
0 2.01284672663762
for epoch 449 mean loss on train 1.9973883628845215
for epoch 449 mean loss on val 2.0128467082977295


 45%|█████████████████████████████▋                                    | 450/1000 [29:41<00:56,  9.82it/s]

training for epoch 450
for epoch 450 learning rate is 0.00027812838944369386
training_step
0 2.058860338651217
validation_step
0 2.1012736100416918
for epoch 450 mean loss on train 2.0588603019714355
for epoch 450 mean loss on val 2.101273536682129


 45%|█████████████████████████████▊                                    | 451/1000 [29:41<00:56,  9.75it/s]

training for epoch 451
for epoch 451 learning rate is 0.00027812838944369386
training_step
0 2.0818331791804385
validation_step
0 2.033960929283729
for epoch 451 mean loss on train 2.0818331241607666
for epoch 451 mean loss on val 2.0339608192443848
training for epoch 452
for epoch 452 learning rate is 0.00027812838944369386
training_step
0 1.999093275803786
validation_step
0 1.9858445387620192
for epoch 452 mean loss on train 1.9990932941436768
for epoch 452 mean loss on val 1.9858444929122925


 45%|█████████████████████████████▉                                    | 453/1000 [29:41<00:56,  9.70it/s]

training for epoch 453
for epoch 453 learning rate is 0.00027812838944369386
training_step
0 2.0585218576284556
validation_step
0 1.9851552523099458
for epoch 453 mean loss on train 2.0585217475891113
for epoch 453 mean loss on val 1.9851552248001099


 45%|█████████████████████████████▉                                    | 454/1000 [29:41<00:57,  9.48it/s]

training for epoch 454
for epoch 454 learning rate is 0.00027812838944369386
training_step
0 2.0484178983248196
validation_step
0 2.1268050854022684
for epoch 454 mean loss on train 2.048417806625366
for epoch 454 mean loss on val 2.126805067062378


 46%|██████████████████████████████                                    | 455/1000 [29:42<00:57,  9.47it/s]

training for epoch 455
for epoch 455 learning rate is 0.00027812838944369386
training_step
0 2.071260745708759
validation_step
0 2.1582879286545973
for epoch 455 mean loss on train 2.071260690689087
for epoch 455 mean loss on val 2.15828800201416
training for epoch 456
for epoch 456 learning rate is 0.00027812838944369386
training_step
0 2.0704351571890025
validation_step
0 2.0712035252497745
for epoch 456 mean loss on train 2.070435047149658
for epoch 456 mean loss on val 2.0712034702301025


 46%|██████████████████████████████▏                                   | 457/1000 [29:42<00:55,  9.81it/s]

training for epoch 457
for epoch 457 learning rate is 0.00027812838944369386
training_step
0 2.08005743760329
validation_step
0 2.0961217146653395
for epoch 457 mean loss on train 2.080057382583618
for epoch 457 mean loss on val 2.0961217880249023
training for epoch 458
for epoch 458 learning rate is 0.00027812838944369386
training_step
0 2.0200906900259166
validation_step
0 2.025287921612079
for epoch 458 mean loss on train 2.0200905799865723
for epoch 458 mean loss on val 2.0252878665924072


 46%|██████████████████████████████▎                                   | 459/1000 [29:42<00:54,  9.92it/s]

training for epoch 459
for epoch 459 learning rate is 0.00027812838944369386
training_step
0 1.9926785689133863
validation_step
0 1.9744699918306792
for epoch 459 mean loss on train 1.9926785230636597
for epoch 459 mean loss on val 1.9744700193405151


 46%|██████████████████████████████▎                                   | 460/1000 [29:42<00:55,  9.76it/s]

training for epoch 460
for epoch 460 learning rate is 0.00027812838944369386
training_step
0 2.0212490375225363
validation_step
0 2.048392809354342
for epoch 460 mean loss on train 2.0212490558624268
for epoch 460 mean loss on val 2.0483927726745605
training for epoch 461
for epoch 461 learning rate is 0.00027812838944369386
training_step
0 1.9712324876051683
validation_step
0 2.0455184349646935
for epoch 461 mean loss on train 1.971232533454895
for epoch 461 mean loss on val 2.045518398284912


 46%|██████████████████████████████▍                                   | 462/1000 [29:42<00:53, 10.11it/s]

training for epoch 462
for epoch 462 learning rate is 0.0002503155504993245
training_step
0 1.9905587709867036
validation_step
0 2.074729772714468
for epoch 462 mean loss on train 1.9905587434768677
for epoch 462 mean loss on val 2.0747296810150146


 46%|██████████████████████████████▌                                   | 463/1000 [29:42<00:54,  9.92it/s]

training for epoch 463
for epoch 463 learning rate is 0.0002503155504993245
training_step
0 2.083200748150165
validation_step
0 1.9884788806621845
for epoch 463 mean loss on train 2.083200693130493
for epoch 463 mean loss on val 1.9884788990020752
training for epoch 464
for epoch 464 learning rate is 0.0002503155504993245
training_step
0 2.000578073354868
validation_step
0 2.1002203134390025
for epoch 464 mean loss on train 2.0005781650543213
for epoch 464 mean loss on val 2.100220203399658


 46%|██████████████████████████████▋                                   | 465/1000 [29:43<00:53,  9.95it/s]

training for epoch 465
for epoch 465 learning rate is 0.0002503155504993245
training_step
0 2.0174696995661807
validation_step
0 2.1350853259746847
for epoch 465 mean loss on train 2.017469644546509
for epoch 465 mean loss on val 2.135085344314575


 47%|██████████████████████████████▊                                   | 466/1000 [29:43<00:56,  9.50it/s]

training for epoch 466
for epoch 466 learning rate is 0.0002503155504993245
training_step
0 2.064001670250526
validation_step
0 2.0133131467379055
for epoch 466 mean loss on train 2.0640015602111816
for epoch 466 mean loss on val 2.013313055038452


 47%|██████████████████████████████▊                                   | 467/1000 [29:43<00:56,  9.49it/s]

training for epoch 467
for epoch 467 learning rate is 0.0002503155504993245
training_step
0 2.0023968036358175
validation_step
0 1.9765396118164062
for epoch 467 mean loss on train 2.002396821975708
for epoch 467 mean loss on val 1.9765396118164062


 47%|██████████████████████████████▉                                   | 468/1000 [29:43<00:56,  9.44it/s]

training for epoch 468
for epoch 468 learning rate is 0.0002503155504993245
training_step
0 1.960959948026217
validation_step
0 2.0844447796161356
for epoch 468 mean loss on train 1.9609599113464355
for epoch 468 mean loss on val 2.084444761276245


 47%|██████████████████████████████▉                                   | 469/1000 [29:43<00:56,  9.35it/s]

training for epoch 469
for epoch 469 learning rate is 0.0002503155504993245
training_step
0 2.043798593374399
validation_step
0 2.0247409527118387
for epoch 469 mean loss on train 2.0437986850738525
for epoch 469 mean loss on val 2.0247409343719482


 47%|███████████████████████████████                                   | 470/1000 [29:43<00:58,  9.05it/s]

training for epoch 470
for epoch 470 learning rate is 0.0002503155504993245
training_step
0 2.0110387068528395
validation_step
0 1.9996625460111177
for epoch 470 mean loss on train 2.0110387802124023
for epoch 470 mean loss on val 1.9996625185012817


 47%|███████████████████████████████                                   | 471/1000 [29:43<01:01,  8.60it/s]

training for epoch 471
for epoch 471 learning rate is 0.0002503155504993245
training_step
0 2.103611872746394
validation_step
0 2.121981694148137
for epoch 471 mean loss on train 2.103611946105957
for epoch 471 mean loss on val 2.121981620788574


 47%|███████████████████████████████▏                                  | 472/1000 [29:43<01:04,  8.22it/s]

training for epoch 472
for epoch 472 learning rate is 0.0002503155504993245
training_step
0 1.9654376690204327
validation_step
0 2.0189461341271033
for epoch 472 mean loss on train 1.965437650680542
for epoch 472 mean loss on val 2.0189461708068848


 47%|███████████████████████████████▏                                  | 473/1000 [29:44<01:05,  8.00it/s]

training for epoch 473
for epoch 473 learning rate is 0.0002503155504993245
training_step
0 2.01261226947491
validation_step
0 1.988611808189979
for epoch 473 mean loss on train 2.0126123428344727
for epoch 473 mean loss on val 1.9886118173599243


 47%|███████████████████████████████▎                                  | 474/1000 [29:44<01:03,  8.24it/s]

training for epoch 474
for epoch 474 learning rate is 0.0002503155504993245
training_step
0 2.1623367896446815
validation_step
0 2.1721244225135217
for epoch 474 mean loss on train 2.162336826324463
for epoch 474 mean loss on val 2.1721243858337402


 48%|███████████████████████████████▎                                  | 475/1000 [29:44<01:01,  8.47it/s]

training for epoch 475
for epoch 475 learning rate is 0.0002503155504993245
training_step
0 1.9772145197941706
validation_step
0 2.0159491025484524
for epoch 475 mean loss on train 1.9772145748138428
for epoch 475 mean loss on val 2.015949010848999


 48%|███████████████████████████████▍                                  | 476/1000 [29:44<01:00,  8.68it/s]

training for epoch 476
for epoch 476 learning rate is 0.0002503155504993245
training_step
0 2.0489783653846154
validation_step
0 1.9569285466120794
for epoch 476 mean loss on train 2.048978328704834
for epoch 476 mean loss on val 1.9569284915924072


 48%|███████████████████████████████▍                                  | 477/1000 [29:44<00:58,  8.95it/s]

training for epoch 477
for epoch 477 learning rate is 0.0002503155504993245
training_step
0 2.065954795250526
validation_step
0 2.0005240807166467
for epoch 477 mean loss on train 2.0659546852111816
for epoch 477 mean loss on val 2.0005240440368652


 48%|███████████████████████████████▌                                  | 478/1000 [29:44<00:57,  9.13it/s]

training for epoch 478
for epoch 478 learning rate is 0.0002503155504993245
training_step
0 1.9591476733867939
validation_step
0 2.011381149291992
for epoch 478 mean loss on train 1.9591476917266846
for epoch 478 mean loss on val 2.011381149291992
training for epoch 479
for epoch 479 learning rate is 0.0002503155504993245
training_step
0 1.9874745882474458
validation_step
0 2.0877720759465146
for epoch 479 mean loss on train 1.9874745607376099
for epoch 479 mean loss on val 2.0877721309661865


 48%|███████████████████████████████▋                                  | 480/1000 [29:44<00:53,  9.68it/s]

training for epoch 480
for epoch 480 learning rate is 0.0002503155504993245
training_step
0 2.0311971811147838
validation_step
0 2.1112323174109826
for epoch 480 mean loss on train 2.0311970710754395
for epoch 480 mean loss on val 2.111232280731201


 48%|███████████████████████████████▋                                  | 481/1000 [29:44<00:53,  9.64it/s]

training for epoch 481
for epoch 481 learning rate is 0.0002503155504993245
training_step
0 2.0638031592735877
validation_step
0 2.053662520188552
for epoch 481 mean loss on train 2.063803195953369
for epoch 481 mean loss on val 2.0536625385284424


 48%|███████████████████████████████▊                                  | 482/1000 [29:44<00:54,  9.47it/s]

training for epoch 482
for epoch 482 learning rate is 0.0002503155504993245
training_step
0 2.0642657646766076
validation_step
0 2.064893575815054
for epoch 482 mean loss on train 2.064265727996826
for epoch 482 mean loss on val 2.0648934841156006


 48%|███████████████████████████████▉                                  | 483/1000 [29:45<00:57,  9.04it/s]

training for epoch 483
for epoch 483 learning rate is 0.00022528399544939206
training_step
0 2.0883523500882664
validation_step
0 2.010826404278095
for epoch 483 mean loss on train 2.0883524417877197
for epoch 483 mean loss on val 2.010826349258423


 48%|███████████████████████████████▉                                  | 484/1000 [29:45<00:59,  8.63it/s]

training for epoch 484
for epoch 484 learning rate is 0.00022528399544939206
training_step
0 2.0578993283785305
validation_step
0 2.063855978158804
for epoch 484 mean loss on train 2.057899236679077
for epoch 484 mean loss on val 2.0638558864593506


 48%|████████████████████████████████                                  | 485/1000 [29:45<00:57,  8.91it/s]

training for epoch 485
for epoch 485 learning rate is 0.00022528399544939206
training_step
0 2.095975875854492
validation_step
0 2.0251793494591346
for epoch 485 mean loss on train 2.095975875854492
for epoch 485 mean loss on val 2.025179386138916


 49%|████████████████████████████████                                  | 486/1000 [29:45<00:59,  8.60it/s]

training for epoch 486
for epoch 486 learning rate is 0.00022528399544939206
training_step
0 2.0045145474947414
validation_step
0 2.001618898831881
for epoch 486 mean loss on train 2.004514455795288
for epoch 486 mean loss on val 2.0016188621520996


 49%|████████████████████████████████▏                                 | 487/1000 [29:45<00:58,  8.71it/s]

training for epoch 487
for epoch 487 learning rate is 0.00022528399544939206
training_step
0 2.0421986213097205
validation_step
0 2.1171405498798075
for epoch 487 mean loss on train 2.042198657989502
for epoch 487 mean loss on val 2.117140531539917


 49%|████████████████████████████████▏                                 | 488/1000 [29:45<00:58,  8.78it/s]

training for epoch 488
for epoch 488 learning rate is 0.00022528399544939206
training_step
0 2.061829346876878
validation_step
0 2.018205789419321
for epoch 488 mean loss on train 2.0618293285369873
for epoch 488 mean loss on val 2.0182058811187744


 49%|████████████████████████████████▎                                 | 489/1000 [29:45<00:58,  8.71it/s]

training for epoch 489
for epoch 489 learning rate is 0.00022528399544939206
training_step
0 2.059695610633263
validation_step
0 2.0505189162034254
for epoch 489 mean loss on train 2.0596957206726074
for epoch 489 mean loss on val 2.0505189895629883


 49%|████████████████████████████████▎                                 | 490/1000 [29:45<00:58,  8.68it/s]

training for epoch 490
for epoch 490 learning rate is 0.00022528399544939206
training_step
0 2.1468051030085635
validation_step
0 2.0365590315598707
for epoch 490 mean loss on train 2.1468050479888916
for epoch 490 mean loss on val 2.0365591049194336
training for epoch 491
for epoch 491 learning rate is 0.00022528399544939206
training_step
0 2.0461886479304385
validation_step
0 2.052197969876803
for epoch 491 mean loss on train 2.0461885929107666
for epoch 491 mean loss on val 2.0521979331970215


 49%|████████████████████████████████▍                                 | 492/1000 [29:46<00:55,  9.22it/s]

training for epoch 492
for epoch 492 learning rate is 0.00022528399544939206
training_step
0 1.9432842548076923
validation_step
0 1.9906490032489483
for epoch 492 mean loss on train 1.943284273147583
for epoch 492 mean loss on val 1.9906489849090576
training for epoch 493
for epoch 493 learning rate is 0.00022528399544939206
training_step
0 2.028071476862981
validation_step
0 2.0602246798001804
for epoch 493 mean loss on train 2.028071403503418
for epoch 493 mean loss on val 2.060224771499634


 49%|████████████████████████████████▌                                 | 494/1000 [29:46<00:53,  9.49it/s]

training for epoch 494
for epoch 494 learning rate is 0.00022528399544939206
training_step
0 2.0160226088303785
validation_step
0 2.0685479090763974
for epoch 494 mean loss on train 2.0160226821899414
for epoch 494 mean loss on val 2.0685479640960693
training for epoch 495
for epoch 495 learning rate is 0.00022528399544939206
training_step
0 2.0458104060246396
validation_step
0 2.12249389061561
for epoch 495 mean loss on train 2.0458104610443115
for epoch 495 mean loss on val 2.1224939823150635


 50%|████████████████████████████████▋                                 | 496/1000 [29:46<00:53,  9.42it/s]

training for epoch 496
for epoch 496 learning rate is 0.00022528399544939206
training_step
0 2.0503170306865988
validation_step
0 2.040437991802509
for epoch 496 mean loss on train 2.0503170490264893
for epoch 496 mean loss on val 2.040437936782837


 50%|████████████████████████████████▊                                 | 497/1000 [29:46<00:55,  9.14it/s]

training for epoch 497
for epoch 497 learning rate is 0.00022528399544939206
training_step
0 2.0603602482722354
validation_step
0 2.0884775015024037
for epoch 497 mean loss on train 2.0603601932525635
for epoch 497 mean loss on val 2.088477611541748


 50%|████████████████████████████████▊                                 | 498/1000 [29:46<00:54,  9.23it/s]

training for epoch 498
for epoch 498 learning rate is 0.00022528399544939206
training_step
0 2.0041335179255557
validation_step
0 2.005886811476487
for epoch 498 mean loss on train 2.004133462905884
for epoch 498 mean loss on val 2.0058867931365967


 50%|████████████████████████████████▉                                 | 499/1000 [29:46<00:53,  9.30it/s]

training for epoch 499
for epoch 499 learning rate is 0.00022528399544939206
training_step
0 2.0039647909311147
validation_step
0 2.01254756634052
for epoch 499 mean loss on train 2.003964900970459
for epoch 499 mean loss on val 2.012547492980957


 50%|█████████████████████████████████                                 | 500/1000 [29:46<00:54,  9.15it/s]

training for epoch 500
for epoch 500 learning rate is 0.00022528399544939206
training_step
0 1.966265164888822
validation_step
0 2.0881111438457785
for epoch 500 mean loss on train 1.9662652015686035
for epoch 500 mean loss on val 2.088111162185669


 50%|█████████████████████████████████                                 | 501/1000 [29:47<00:55,  8.97it/s]

training for epoch 501
for epoch 501 learning rate is 0.00022528399544939206
training_step
0 2.0050804431621847
validation_step
0 1.9877941425030048
for epoch 501 mean loss on train 2.005080461502075
for epoch 501 mean loss on val 1.9877941608428955


 50%|█████████████████████████████████▏                                | 502/1000 [29:47<00:55,  8.97it/s]

training for epoch 502
for epoch 502 learning rate is 0.00022528399544939206
training_step
0 2.051645278930664
validation_step
0 2.0441372211162863
for epoch 502 mean loss on train 2.051645278930664
for epoch 502 mean loss on val 2.0441372394561768


 50%|█████████████████████████████████▏                                | 503/1000 [29:47<00:54,  9.19it/s]

training for epoch 503
for epoch 503 learning rate is 0.00022528399544939206
training_step
0 2.034870294424204
validation_step
0 1.9680469219501202
for epoch 503 mean loss on train 2.0348703861236572
for epoch 503 mean loss on val 1.9680469036102295


 50%|█████████████████████████████████▎                                | 504/1000 [29:47<00:56,  8.74it/s]

training for epoch 504
for epoch 504 learning rate is 0.00020275559590445286
training_step
0 2.04053101172814
validation_step
0 2.015020517202524
for epoch 504 mean loss on train 2.0405309200286865
for epoch 504 mean loss on val 2.0150206089019775


 50%|█████████████████████████████████▎                                | 505/1000 [29:47<00:56,  8.83it/s]

training for epoch 505
for epoch 505 learning rate is 0.00020275559590445286
training_step
0 2.007813673753005
validation_step
0 2.033586942232572
for epoch 505 mean loss on train 2.0078136920928955
for epoch 505 mean loss on val 2.0335869789123535


 51%|█████████████████████████████████▍                                | 506/1000 [29:47<00:55,  8.87it/s]

training for epoch 506
for epoch 506 learning rate is 0.00020275559590445286
training_step
0 2.1000831310565653
validation_step
0 2.1365079146165113
for epoch 506 mean loss on train 2.100083112716675
for epoch 506 mean loss on val 2.136507987976074


 51%|█████████████████████████████████▍                                | 507/1000 [29:47<00:53,  9.17it/s]

training for epoch 507
for epoch 507 learning rate is 0.00020275559590445286
training_step
0 2.0433885134183445
validation_step
0 2.024706620436448
for epoch 507 mean loss on train 2.043388605117798
for epoch 507 mean loss on val 2.0247066020965576


 51%|█████████████████████████████████▌                                | 508/1000 [29:47<00:53,  9.23it/s]

training for epoch 508
for epoch 508 learning rate is 0.00020275559590445286
training_step
0 2.0188093919020433
validation_step
0 2.0102500915527344
for epoch 508 mean loss on train 2.0188093185424805
for epoch 508 mean loss on val 2.0102500915527344


 51%|█████████████████████████████████▌                                | 509/1000 [29:47<00:53,  9.18it/s]

training for epoch 509
for epoch 509 learning rate is 0.00020275559590445286
training_step
0 2.0982692425067606
validation_step
0 2.0253062615027795
for epoch 509 mean loss on train 2.09826922416687
for epoch 509 mean loss on val 2.025306224822998


 51%|█████████████████████████████████▋                                | 510/1000 [29:48<00:52,  9.29it/s]

training for epoch 510
for epoch 510 learning rate is 0.00020275559590445286
training_step
0 2.1270594963660607
validation_step
0 2.0868638845590444
for epoch 510 mean loss on train 2.1270594596862793
for epoch 510 mean loss on val 2.0868639945983887
training for epoch 511
for epoch 511 learning rate is 0.00020275559590445286
training_step
0 2.0369557600754957
validation_step
0 2.028509580172025
for epoch 511 mean loss on train 2.0369558334350586
for epoch 511 mean loss on val 2.0285096168518066


 51%|█████████████████████████████████▊                                | 512/1000 [29:48<00:50,  9.66it/s]

training for epoch 512
for epoch 512 learning rate is 0.00020275559590445286
training_step
0 2.06930424616887
validation_step
0 2.0085890843318057
for epoch 512 mean loss on train 2.0693042278289795
for epoch 512 mean loss on val 2.008589029312134
training for epoch 513
for epoch 513 learning rate is 0.00020275559590445286
training_step
0 2.049596199622521
validation_step
0 1.9903969397911658
for epoch 513 mean loss on train 2.0495963096618652
for epoch 513 mean loss on val 1.9903969764709473


 51%|█████████████████████████████████▉                                | 514/1000 [29:48<00:49,  9.83it/s]

training for epoch 514
for epoch 514 learning rate is 0.00020275559590445286
training_step
0 2.125784947321965
validation_step
0 2.019482979407677
for epoch 514 mean loss on train 2.1257848739624023
for epoch 514 mean loss on val 2.0194830894470215


 52%|█████████████████████████████████▉                                | 515/1000 [29:48<00:49,  9.82it/s]

training for epoch 515
for epoch 515 learning rate is 0.00020275559590445286
training_step
0 1.9979022099421575
validation_step
0 2.057721651517428
for epoch 515 mean loss on train 1.9979021549224854
for epoch 515 mean loss on val 2.0577216148376465


 52%|██████████████████████████████████                                | 516/1000 [29:48<00:52,  9.23it/s]

training for epoch 516
for epoch 516 learning rate is 0.00020275559590445286
training_step
0 2.114575752845177
validation_step
0 1.9795084733229418
for epoch 516 mean loss on train 2.1145758628845215
for epoch 516 mean loss on val 1.9795085191726685


 52%|██████████████████████████████████                                | 517/1000 [29:48<00:54,  8.84it/s]

training for epoch 517
for epoch 517 learning rate is 0.00020275559590445286
training_step
0 2.0188939021183896
validation_step
0 2.0134463677039514
for epoch 517 mean loss on train 2.0188939571380615
for epoch 517 mean loss on val 2.01344633102417


 52%|██████████████████████████████████▏                               | 518/1000 [29:48<00:55,  8.61it/s]

training for epoch 518
for epoch 518 learning rate is 0.00020275559590445286
training_step
0 2.1210108536940355
validation_step
0 2.138411595271184
for epoch 518 mean loss on train 2.1210107803344727
for epoch 518 mean loss on val 2.138411521911621


 52%|██████████████████████████████████▎                               | 519/1000 [29:49<00:54,  8.76it/s]

training for epoch 519
for epoch 519 learning rate is 0.00020275559590445286
training_step
0 1.996664780836839
validation_step
0 2.0159144768348107
for epoch 519 mean loss on train 1.9966647624969482
for epoch 519 mean loss on val 2.0159144401550293
training for epoch 520
for epoch 520 learning rate is 0.00020275559590445286
training_step
0 1.964601810161884
validation_step
0 2.10909916804387
for epoch 520 mean loss on train 1.964601755142212
for epoch 520 mean loss on val 2.1090991497039795


 52%|██████████████████████████████████▍                               | 521/1000 [29:49<00:51,  9.28it/s]

training for epoch 521
for epoch 521 learning rate is 0.00020275559590445286
training_step
0 2.0439012967623196
validation_step
0 2.0834676302396336
for epoch 521 mean loss on train 2.043901205062866
for epoch 521 mean loss on val 2.083467721939087


 52%|██████████████████████████████████▍                               | 522/1000 [29:49<00:51,  9.24it/s]

training for epoch 522
for epoch 522 learning rate is 0.00020275559590445286
training_step
0 2.058895697960487
validation_step
0 2.179746627807617
for epoch 522 mean loss on train 2.0588955879211426
for epoch 522 mean loss on val 2.179746627807617


 52%|██████████████████████████████████▌                               | 523/1000 [29:49<00:51,  9.29it/s]

training for epoch 523
for epoch 523 learning rate is 0.00020275559590445286
training_step
0 2.0386331998384914
validation_step
0 1.9791727799635668
for epoch 523 mean loss on train 2.038633108139038
for epoch 523 mean loss on val 1.9791728258132935
training for epoch 524
for epoch 524 learning rate is 0.00020275559590445286
training_step
0 2.0324918306790867
validation_step
0 2.1510725754957933
for epoch 524 mean loss on train 2.03249192237854
for epoch 524 mean loss on val 2.1510725021362305


 52%|██████████████████████████████████▋                               | 525/1000 [29:49<00:49,  9.56it/s]

training for epoch 525
for epoch 525 learning rate is 0.00018248003631400757
training_step
0 2.02241090627817
validation_step
0 2.113220508282001
for epoch 525 mean loss on train 2.0224108695983887
for epoch 525 mean loss on val 2.113220453262329


 53%|██████████████████████████████████▋                               | 526/1000 [29:49<00:50,  9.39it/s]

training for epoch 526
for epoch 526 learning rate is 0.00018248003631400757
training_step
0 2.0039841578556943
validation_step
0 2.042870594904973
for epoch 526 mean loss on train 2.003984212875366
for epoch 526 mean loss on val 2.04287052154541


 53%|██████████████████████████████████▊                               | 527/1000 [29:49<00:50,  9.37it/s]

training for epoch 527
for epoch 527 learning rate is 0.00018248003631400757
training_step
0 2.0004521883451023
validation_step
0 2.1007008185753455
for epoch 527 mean loss on train 2.0004522800445557
for epoch 527 mean loss on val 2.100700855255127


 53%|██████████████████████████████████▊                               | 528/1000 [29:50<00:51,  9.17it/s]

training for epoch 528
for epoch 528 learning rate is 0.00018248003631400757
training_step
0 1.9813914665809045
validation_step
0 2.1172652611365685
for epoch 528 mean loss on train 1.981391429901123
for epoch 528 mean loss on val 2.117265224456787


 53%|██████████████████████████████████▉                               | 529/1000 [29:50<00:54,  8.66it/s]

training for epoch 529
for epoch 529 learning rate is 0.00018248003631400757
training_step
0 1.958937718318059
validation_step
0 2.064566685603215
for epoch 529 mean loss on train 1.9589377641677856
for epoch 529 mean loss on val 2.0645666122436523


 53%|██████████████████████████████████▉                               | 530/1000 [29:50<00:57,  8.12it/s]

training for epoch 530
for epoch 530 learning rate is 0.00018248003631400757
training_step
0 2.0530595045823317
validation_step
0 2.025793662438026
for epoch 530 mean loss on train 2.0530595779418945
for epoch 530 mean loss on val 2.0257935523986816


 53%|███████████████████████████████████                               | 531/1000 [29:50<00:56,  8.32it/s]

training for epoch 531
for epoch 531 learning rate is 0.00018248003631400757
training_step
0 2.051116943359375
validation_step
0 2.1342230576735277
for epoch 531 mean loss on train 2.051116943359375
for epoch 531 mean loss on val 2.134222984313965


 53%|███████████████████████████████████                               | 532/1000 [29:50<00:55,  8.42it/s]

training for epoch 532
for epoch 532 learning rate is 0.00018248003631400757
training_step
0 2.0479947603665867
validation_step
0 2.049860587486854
for epoch 532 mean loss on train 2.04799485206604
for epoch 532 mean loss on val 2.0498604774475098


 53%|███████████████████████████████████▏                              | 533/1000 [29:50<00:53,  8.72it/s]

training for epoch 533
for epoch 533 learning rate is 0.00018248003631400757
training_step
0 2.054854759803185
validation_step
0 2.0228052872877855
for epoch 533 mean loss on train 2.0548548698425293
for epoch 533 mean loss on val 2.0228052139282227


 53%|███████████████████████████████████▏                              | 534/1000 [29:50<00:52,  8.85it/s]

training for epoch 534
for epoch 534 learning rate is 0.00018248003631400757
training_step
0 2.0779925126295824
validation_step
0 1.9739388685960035
for epoch 534 mean loss on train 2.0779924392700195
for epoch 534 mean loss on val 1.9739388227462769


 54%|███████████████████████████████████▎                              | 535/1000 [29:50<00:51,  9.02it/s]

training for epoch 535
for epoch 535 learning rate is 0.00018248003631400757
training_step
0 1.9820729769193208
validation_step
0 2.0574490473820615
for epoch 535 mean loss on train 1.9820729494094849
for epoch 535 mean loss on val 2.0574491024017334
training for epoch 536
for epoch 536 learning rate is 0.00018248003631400757
training_step
0 1.9408846635084887
validation_step
0 2.001090123103215
for epoch 536 mean loss on train 1.9408847093582153
for epoch 536 mean loss on val 2.0010900497436523


 54%|███████████████████████████████████▍                              | 537/1000 [29:51<00:50,  9.16it/s]

training for epoch 537
for epoch 537 learning rate is 0.00018248003631400757
training_step
0 1.9329662322998047
validation_step
0 2.067366673396184
for epoch 537 mean loss on train 1.9329662322998047
for epoch 537 mean loss on val 2.067366600036621


 54%|███████████████████████████████████▌                              | 538/1000 [29:51<00:50,  9.17it/s]

training for epoch 538
for epoch 538 learning rate is 0.00018248003631400757
training_step
0 1.9889601193941557
validation_step
0 2.107831368079552
for epoch 538 mean loss on train 1.9889601469039917
for epoch 538 mean loss on val 2.1078314781188965


 54%|███████████████████████████████████▌                              | 539/1000 [29:51<00:51,  8.91it/s]

training for epoch 539
for epoch 539 learning rate is 0.00018248003631400757
training_step
0 2.012860811673678
validation_step
0 2.136936774620643
for epoch 539 mean loss on train 2.0128607749938965
for epoch 539 mean loss on val 2.136936664581299


 54%|███████████████████████████████████▋                              | 540/1000 [29:51<00:51,  8.89it/s]

training for epoch 540
for epoch 540 learning rate is 0.00018248003631400757
training_step
0 2.1372437110314
validation_step
0 2.0250784067007213
for epoch 540 mean loss on train 2.1372437477111816
for epoch 540 mean loss on val 2.025078296661377


 54%|███████████████████████████████████▋                              | 541/1000 [29:51<00:52,  8.77it/s]

training for epoch 541
for epoch 541 learning rate is 0.00018248003631400757
training_step
0 2.032490070049579
validation_step
0 1.9837417602539062
for epoch 541 mean loss on train 2.0324900150299072
for epoch 541 mean loss on val 1.9837417602539062


 54%|███████████████████████████████████▊                              | 542/1000 [29:51<00:52,  8.69it/s]

training for epoch 542
for epoch 542 learning rate is 0.00018248003631400757
training_step
0 2.0138350266676683
validation_step
0 2.021007244403546
for epoch 542 mean loss on train 2.0138349533081055
for epoch 542 mean loss on val 2.0210072994232178


 54%|███████████████████████████████████▊                              | 543/1000 [29:51<00:51,  8.91it/s]

training for epoch 543
for epoch 543 learning rate is 0.00018248003631400757
training_step
0 2.0128619854266825
validation_step
0 2.051028765164889
for epoch 543 mean loss on train 2.012861967086792
for epoch 543 mean loss on val 2.0510287284851074


 54%|███████████████████████████████████▉                              | 544/1000 [29:51<00:51,  8.79it/s]

training for epoch 544
for epoch 544 learning rate is 0.00018248003631400757
training_step
0 2.0405632899357724
validation_step
0 2.1924940255972056
for epoch 544 mean loss on train 2.0405633449554443
for epoch 544 mean loss on val 2.1924939155578613


 55%|███████████████████████████████████▉                              | 545/1000 [29:51<00:53,  8.57it/s]

training for epoch 545
for epoch 545 learning rate is 0.00018248003631400757
training_step
0 1.983088419987605
validation_step
0 2.0430862720196066
for epoch 545 mean loss on train 1.9830883741378784
for epoch 545 mean loss on val 2.043086290359497


 55%|████████████████████████████████████                              | 546/1000 [29:52<00:52,  8.67it/s]

training for epoch 546
for epoch 546 learning rate is 0.00016423203268260683
training_step
0 2.1166773576002855
validation_step
0 2.0233494685246396
for epoch 546 mean loss on train 2.1166772842407227
for epoch 546 mean loss on val 2.0233495235443115


 55%|████████████████████████████████████                              | 547/1000 [29:52<00:53,  8.48it/s]

training for epoch 547
for epoch 547 learning rate is 0.00016423203268260683
training_step
0 2.0471297044020433
validation_step
0 1.9897682483379657
for epoch 547 mean loss on train 2.0471296310424805
for epoch 547 mean loss on val 1.9897682666778564


 55%|████████████████████████████████████▏                             | 548/1000 [29:52<00:53,  8.47it/s]

training for epoch 548
for epoch 548 learning rate is 0.00016423203268260683
training_step
0 2.211183107816256
validation_step
0 1.9889483818641076
for epoch 548 mean loss on train 2.2111830711364746
for epoch 548 mean loss on val 1.9889483451843262


 55%|████████████████████████████████████▏                             | 549/1000 [29:52<00:51,  8.73it/s]

training for epoch 549
for epoch 549 learning rate is 0.00016423203268260683
training_step
0 2.022122016319862
validation_step
0 2.168706307044396
for epoch 549 mean loss on train 2.0221219062805176
for epoch 549 mean loss on val 2.1687064170837402


 55%|████████████████████████████████████▎                             | 550/1000 [29:52<00:52,  8.60it/s]

training for epoch 550
for epoch 550 learning rate is 0.00016423203268260683
training_step
0 1.967599868774414
validation_step
0 1.9765360905573919
for epoch 550 mean loss on train 1.967599868774414
for epoch 550 mean loss on val 1.9765360355377197


 55%|████████████████████████████████████▎                             | 551/1000 [29:52<00:53,  8.36it/s]

training for epoch 551
for epoch 551 learning rate is 0.00016423203268260683
training_step
0 2.022656954251803
validation_step
0 1.978327384361854
for epoch 551 mean loss on train 2.0226569175720215
for epoch 551 mean loss on val 1.9783273935317993


 55%|████████████████████████████████████▍                             | 552/1000 [29:52<00:52,  8.61it/s]

training for epoch 552
for epoch 552 learning rate is 0.00016423203268260683
training_step
0 2.0064593095045824
validation_step
0 2.1194905501145582
for epoch 552 mean loss on train 2.0064592361450195
for epoch 552 mean loss on val 2.119490623474121


 55%|████████████████████████████████████▍                             | 553/1000 [29:52<00:51,  8.66it/s]

training for epoch 553
for epoch 553 learning rate is 0.00016423203268260683
training_step
0 1.9845337500939002
validation_step
0 2.052660135122446
for epoch 553 mean loss on train 1.9845337867736816
for epoch 553 mean loss on val 2.0526602268218994


 55%|████████████████████████████████████▌                             | 554/1000 [29:53<00:51,  8.66it/s]

training for epoch 554
for epoch 554 learning rate is 0.00016423203268260683
training_step
0 1.9949496342585638
validation_step
0 1.9866828918457031
for epoch 554 mean loss on train 1.9949495792388916
for epoch 554 mean loss on val 1.9866828918457031


 56%|████████████████████████████████████▋                             | 555/1000 [29:53<00:52,  8.49it/s]

training for epoch 555
for epoch 555 learning rate is 0.00016423203268260683
training_step
0 1.9581476358266978
validation_step
0 2.1883370326115537
for epoch 555 mean loss on train 1.958147644996643
for epoch 555 mean loss on val 2.1883370876312256


 56%|████████████████████████████████████▋                             | 556/1000 [29:53<00:51,  8.60it/s]

training for epoch 556
for epoch 556 learning rate is 0.00016423203268260683
training_step
0 2.0712027916541467
validation_step
0 2.051960138174204
for epoch 556 mean loss on train 2.0712027549743652
for epoch 556 mean loss on val 2.0519602298736572


 56%|████████████████████████████████████▊                             | 557/1000 [29:53<00:52,  8.51it/s]

training for epoch 557
for epoch 557 learning rate is 0.00016423203268260683
training_step
0 2.036763998178335
validation_step
0 2.0508780846228967
for epoch 557 mean loss on train 2.036763906478882
for epoch 557 mean loss on val 2.0508780479431152


 56%|████████████████████████████████████▊                             | 558/1000 [29:53<00:53,  8.24it/s]

training for epoch 558
for epoch 558 learning rate is 0.00016423203268260683
training_step
0 2.072156465970553
validation_step
0 1.9683387462909405
for epoch 558 mean loss on train 2.0721564292907715
for epoch 558 mean loss on val 1.9683387279510498


 56%|████████████████████████████████████▉                             | 559/1000 [29:53<00:52,  8.32it/s]

training for epoch 559
for epoch 559 learning rate is 0.00016423203268260683
training_step
0 2.0857539543738732
validation_step
0 2.0297409937931943
for epoch 559 mean loss on train 2.085753917694092
for epoch 559 mean loss on val 2.029741048812866


 56%|████████████████████████████████████▉                             | 560/1000 [29:53<00:53,  8.25it/s]

training for epoch 560
for epoch 560 learning rate is 0.00016423203268260683
training_step
0 1.9760035001314604
validation_step
0 2.07396492591271
for epoch 560 mean loss on train 1.9760035276412964
for epoch 560 mean loss on val 2.073964834213257


 56%|█████████████████████████████████████                             | 561/1000 [29:53<00:52,  8.34it/s]

training for epoch 561
for epoch 561 learning rate is 0.00016423203268260683
training_step
0 1.9994013859675481
validation_step
0 2.1181457226092997
for epoch 561 mean loss on train 1.999401330947876
for epoch 561 mean loss on val 2.118145704269409


 56%|█████████████████████████████████████                             | 562/1000 [29:54<00:52,  8.41it/s]

training for epoch 562
for epoch 562 learning rate is 0.00016423203268260683
training_step
0 2.067899997417743
validation_step
0 2.0243986569918118
for epoch 562 mean loss on train 2.0678999423980713
for epoch 562 mean loss on val 2.0243985652923584


 56%|█████████████████████████████████████▏                            | 563/1000 [29:54<00:52,  8.37it/s]

training for epoch 563
for epoch 563 learning rate is 0.00016423203268260683
training_step
0 2.136013617882362
validation_step
0 1.996882365300105
for epoch 563 mean loss on train 2.1360135078430176
for epoch 563 mean loss on val 1.9968823194503784


 56%|█████████████████████████████████████▏                            | 564/1000 [29:54<00:51,  8.41it/s]

training for epoch 564
for epoch 564 learning rate is 0.00016423203268260683
training_step
0 2.110882098858173
validation_step
0 2.041979716374324
for epoch 564 mean loss on train 2.110882043838501
for epoch 564 mean loss on val 2.0419797897338867


 56%|█████████████████████████████████████▎                            | 565/1000 [29:54<00:50,  8.66it/s]

training for epoch 565
for epoch 565 learning rate is 0.00016423203268260683
training_step
0 2.010926026564378
validation_step
0 2.0786499610314
for epoch 565 mean loss on train 2.0109260082244873
for epoch 565 mean loss on val 2.0786499977111816


 57%|█████████████████████████████████████▎                            | 566/1000 [29:54<00:49,  8.70it/s]

training for epoch 566
for epoch 566 learning rate is 0.00016423203268260683
training_step
0 2.2062542255108175
validation_step
0 2.0117036379300632
for epoch 566 mean loss on train 2.206254243850708
for epoch 566 mean loss on val 2.0117037296295166


 57%|█████████████████████████████████████▍                            | 567/1000 [29:54<00:49,  8.72it/s]

training for epoch 567
for epoch 567 learning rate is 0.00014780882941434616
training_step
0 1.999762168297401
validation_step
0 2.0057012117826023
for epoch 567 mean loss on train 1.9997621774673462
for epoch 567 mean loss on val 2.0057013034820557


 57%|█████████████████████████████████████▍                            | 568/1000 [29:54<00:49,  8.66it/s]

training for epoch 568
for epoch 568 learning rate is 0.00014780882941434616
training_step
0 2.0761501605694113
validation_step
0 1.997808309701773
for epoch 568 mean loss on train 2.0761501789093018
for epoch 568 mean loss on val 1.9978083372116089


 57%|█████████████████████████████████████▌                            | 569/1000 [29:54<00:49,  8.64it/s]

training for epoch 569
for epoch 569 learning rate is 0.00014780882941434616
training_step
0 2.0808872809776893
validation_step
0 2.0031939286452074
for epoch 569 mean loss on train 2.0808873176574707
for epoch 569 mean loss on val 2.0031938552856445


 57%|█████████████████████████████████████▌                            | 570/1000 [29:54<00:49,  8.73it/s]

training for epoch 570
for epoch 570 learning rate is 0.00014780882941434616
training_step
0 2.0291466346153846
validation_step
0 1.9514146951528697
for epoch 570 mean loss on train 2.029146671295166
for epoch 570 mean loss on val 1.951414704322815


 57%|█████████████████████████████████████▋                            | 571/1000 [29:55<00:49,  8.65it/s]

training for epoch 571
for epoch 571 learning rate is 0.00014780882941434616
training_step
0 2.0594087747427134
validation_step
0 2.067366673396184
for epoch 571 mean loss on train 2.059408664703369
for epoch 571 mean loss on val 2.067366600036621


 57%|█████████████████████████████████████▊                            | 572/1000 [29:55<00:50,  8.44it/s]

training for epoch 572
for epoch 572 learning rate is 0.00014780882941434616
training_step
0 2.0354369236872745
validation_step
0 2.01420050400954
for epoch 572 mean loss on train 2.0354368686676025
for epoch 572 mean loss on val 2.014200448989868


 57%|█████████████████████████████████████▊                            | 573/1000 [29:55<00:51,  8.25it/s]

training for epoch 573
for epoch 573 learning rate is 0.00014780882941434616
training_step
0 2.025989679189829
validation_step
0 2.0725069779616137
for epoch 573 mean loss on train 2.0259897708892822
for epoch 573 mean loss on val 2.072506904602051


 57%|█████████████████████████████████████▉                            | 574/1000 [29:55<00:51,  8.29it/s]

training for epoch 574
for epoch 574 learning rate is 0.00014780882941434616
training_step
0 2.0790708982027493
validation_step
0 2.0156195713923526
for epoch 574 mean loss on train 2.079070806503296
for epoch 574 mean loss on val 2.0156195163726807


 57%|█████████████████████████████████████▉                            | 575/1000 [29:55<00:50,  8.39it/s]

training for epoch 575
for epoch 575 learning rate is 0.00014780882941434616
training_step
0 2.043457764845628
validation_step
0 1.9791597219613881
for epoch 575 mean loss on train 2.0434577465057373
for epoch 575 mean loss on val 1.9791597127914429


 58%|██████████████████████████████████████                            | 576/1000 [29:55<00:50,  8.43it/s]

training for epoch 576
for epoch 576 learning rate is 0.00014780882941434616
training_step
0 1.9748673072228065
validation_step
0 1.9692177405724158
for epoch 576 mean loss on train 1.974867343902588
for epoch 576 mean loss on val 1.9692177772521973


 58%|██████████████████████████████████████                            | 577/1000 [29:55<00:49,  8.54it/s]

training for epoch 577
for epoch 577 learning rate is 0.00014780882941434616
training_step
0 2.053813640887921
validation_step
0 2.129455272967999
for epoch 577 mean loss on train 2.0538136959075928
for epoch 577 mean loss on val 2.129455327987671


 58%|██████████████████████████████████████▏                           | 578/1000 [29:55<00:49,  8.60it/s]

training for epoch 578
for epoch 578 learning rate is 0.00014780882941434616
training_step
0 1.9639788407545824
validation_step
0 2.0734842740572414
for epoch 578 mean loss on train 1.963978886604309
for epoch 578 mean loss on val 2.073484182357788


 58%|██████████████████████████████████████▏                           | 579/1000 [29:55<00:48,  8.77it/s]

training for epoch 579
for epoch 579 learning rate is 0.00014780882941434616
training_step
0 1.999598136314979
validation_step
0 1.9595794677734375
for epoch 579 mean loss on train 1.9995981454849243
for epoch 579 mean loss on val 1.9595794677734375


 58%|██████████████████████████████████████▎                           | 580/1000 [29:56<00:47,  8.76it/s]

training for epoch 580
for epoch 580 learning rate is 0.00014780882941434616
training_step
0 2.030604435847356
validation_step
0 2.0367422837477465
for epoch 580 mean loss on train 2.030604362487793
for epoch 580 mean loss on val 2.0367422103881836


 58%|██████████████████████████████████████▎                           | 581/1000 [29:56<00:47,  8.77it/s]

training for epoch 581
for epoch 581 learning rate is 0.00014780882941434616
training_step
0 1.9543176797720103
validation_step
0 2.078505589411809
for epoch 581 mean loss on train 1.9543176889419556
for epoch 581 mean loss on val 2.078505516052246


 58%|██████████████████████████████████████▍                           | 582/1000 [29:56<00:48,  8.60it/s]

training for epoch 582
for epoch 582 learning rate is 0.00014780882941434616
training_step
0 1.9548662625826323
validation_step
0 2.004758541400616
for epoch 582 mean loss on train 1.9548662900924683
for epoch 582 mean loss on val 2.004758596420288


 58%|██████████████████████████████████████▍                           | 583/1000 [29:56<00:48,  8.65it/s]

training for epoch 583
for epoch 583 learning rate is 0.00014780882941434616
training_step
0 2.0282429915208082
validation_step
0 2.111712382389949
for epoch 583 mean loss on train 2.028243064880371
for epoch 583 mean loss on val 2.1117124557495117


 58%|██████████████████████████████████████▌                           | 584/1000 [29:56<00:48,  8.66it/s]

training for epoch 584
for epoch 584 learning rate is 0.00014780882941434616
training_step
0 2.003837145291842
validation_step
0 1.982966349675105
for epoch 584 mean loss on train 2.0038371086120605
for epoch 584 mean loss on val 1.9829663038253784


 58%|██████████████████████████████████████▌                           | 585/1000 [29:56<00:47,  8.67it/s]

training for epoch 585
for epoch 585 learning rate is 0.00014780882941434616
training_step
0 2.0711936950683594
validation_step
0 2.0113468170166016
for epoch 585 mean loss on train 2.0711936950683594
for epoch 585 mean loss on val 2.0113468170166016


 59%|██████████████████████████████████████▋                           | 586/1000 [29:56<00:48,  8.62it/s]

training for epoch 586
for epoch 586 learning rate is 0.00014780882941434616
training_step
0 2.0670433044433594
validation_step
0 2.0566896291879506
for epoch 586 mean loss on train 2.0670433044433594
for epoch 586 mean loss on val 2.056689739227295


 59%|██████████████████████████████████████▋                           | 587/1000 [29:56<00:47,  8.61it/s]

training for epoch 587
for epoch 587 learning rate is 0.00014780882941434616
training_step
0 2.018919724684495
validation_step
0 2.0626839857835035
for epoch 587 mean loss on train 2.0189197063446045
for epoch 587 mean loss on val 2.0626840591430664


 59%|██████████████████████████████████████▊                           | 588/1000 [29:57<00:47,  8.63it/s]

training for epoch 588
for epoch 588 learning rate is 0.00013302794647291155
training_step
0 2.082611377422626
validation_step
0 1.9629390423114483
for epoch 588 mean loss on train 2.082611322402954
for epoch 588 mean loss on val 1.9629390239715576


 59%|██████████████████████████████████████▊                           | 589/1000 [29:57<00:47,  8.73it/s]

training for epoch 589
for epoch 589 learning rate is 0.00013302794647291155
training_step
0 2.046686025766226
validation_step
0 1.9883761772742639
for epoch 589 mean loss on train 2.0466859340667725
for epoch 589 mean loss on val 1.9883761405944824


 59%|██████████████████████████████████████▉                           | 590/1000 [29:57<00:46,  8.80it/s]

training for epoch 590
for epoch 590 learning rate is 0.00013302794647291155
training_step
0 2.0262845846322866
validation_step
0 2.0756117013784556
for epoch 590 mean loss on train 2.026284694671631
for epoch 590 mean loss on val 2.0756115913391113


 59%|███████████████████████████████████████                           | 591/1000 [29:57<00:47,  8.61it/s]

training for epoch 591
for epoch 591 learning rate is 0.00013302794647291155
training_step
0 2.0026688208946815
validation_step
0 2.0924606323242188
for epoch 591 mean loss on train 2.002668857574463
for epoch 591 mean loss on val 2.0924606323242188


 59%|███████████████████████████████████████                           | 592/1000 [29:57<00:45,  8.88it/s]

training for epoch 592
for epoch 592 learning rate is 0.00013302794647291155
training_step
0 2.117214349599985
validation_step
0 2.0991610013521633
for epoch 592 mean loss on train 2.1172144412994385
for epoch 592 mean loss on val 2.09916090965271


 59%|███████████████████████████████████████▏                          | 593/1000 [29:57<00:46,  8.76it/s]

training for epoch 593
for epoch 593 learning rate is 0.00013302794647291155
training_step
0 2.0323882469764123
validation_step
0 2.184979952298678
for epoch 593 mean loss on train 2.032388210296631
for epoch 593 mean loss on val 2.1849799156188965


 59%|███████████████████████████████████████▏                          | 594/1000 [29:57<00:46,  8.65it/s]

training for epoch 594
for epoch 594 learning rate is 0.00013302794647291155
training_step
0 2.0073736630953274
validation_step
0 2.0846711672269382
for epoch 594 mean loss on train 2.007373571395874
for epoch 594 mean loss on val 2.0846712589263916


 60%|███████████████████████████████████████▎                          | 595/1000 [29:57<00:46,  8.73it/s]

training for epoch 595
for epoch 595 learning rate is 0.00013302794647291155
training_step
0 2.0135973416841946
validation_step
0 2.0270057091346154
for epoch 595 mean loss on train 2.013597249984741
for epoch 595 mean loss on val 2.027005672454834


 60%|███████████████████████████████████████▎                          | 596/1000 [29:57<00:44,  9.07it/s]

training for epoch 596
for epoch 596 learning rate is 0.00013302794647291155
training_step
0 2.103490976186899
validation_step
0 2.01747072660006
for epoch 596 mean loss on train 2.1034910678863525
for epoch 596 mean loss on val 2.0174708366394043


 60%|███████████████████████████████████████▍                          | 597/1000 [29:58<00:43,  9.18it/s]

training for epoch 597
for epoch 597 learning rate is 0.00013302794647291155
training_step
0 2.037267098060021
validation_step
0 2.0666966071495643
for epoch 597 mean loss on train 2.0372672080993652
for epoch 597 mean loss on val 2.0666966438293457


 60%|███████████████████████████████████████▍                          | 598/1000 [29:58<00:45,  8.87it/s]

training for epoch 598
for epoch 598 learning rate is 0.00013302794647291155
training_step
0 2.071904109074519
validation_step
0 2.2096707270695615
for epoch 598 mean loss on train 2.071904182434082
for epoch 598 mean loss on val 2.2096707820892334


 60%|███████████████████████████████████████▌                          | 599/1000 [29:58<00:46,  8.69it/s]

training for epoch 599
for epoch 599 learning rate is 0.00013302794647291155
training_step
0 2.0127598689152646
validation_step
0 2.1039070716271033
for epoch 599 mean loss on train 2.0127599239349365
for epoch 599 mean loss on val 2.1039071083068848


 60%|███████████████████████████████████████▌                          | 600/1000 [29:58<00:46,  8.59it/s]

training for epoch 600
for epoch 600 learning rate is 0.00013302794647291155
training_step
0 1.9944246732271635
validation_step
0 2.00484378521259
for epoch 600 mean loss on train 1.9944247007369995
for epoch 600 mean loss on val 2.0048437118530273


 60%|███████████████████████████████████████▋                          | 601/1000 [29:58<00:47,  8.47it/s]

training for epoch 601
for epoch 601 learning rate is 0.00013302794647291155
training_step
0 2.009587361262395
validation_step
0 2.016435769888071
for epoch 601 mean loss on train 2.009587287902832
for epoch 601 mean loss on val 2.0164358615875244


 60%|███████████████████████████████████████▋                          | 602/1000 [29:58<00:47,  8.30it/s]

training for epoch 602
for epoch 602 learning rate is 0.00013302794647291155
training_step
0 2.0977940192589393
validation_step
0 2.109395980834961
for epoch 602 mean loss on train 2.0977940559387207
for epoch 602 mean loss on val 2.109395980834961


 60%|███████████████████████████████████████▊                          | 603/1000 [29:58<00:48,  8.14it/s]

training for epoch 603
for epoch 603 learning rate is 0.00013302794647291155
training_step
0 1.9705484830416167
validation_step
0 2.1505540701059194
for epoch 603 mean loss on train 1.9705485105514526
for epoch 603 mean loss on val 2.1505541801452637


 60%|███████████████████████████████████████▊                          | 604/1000 [29:58<00:50,  7.86it/s]

training for epoch 604
for epoch 604 learning rate is 0.00013302794647291155
training_step
0 2.1315923837515025
validation_step
0 2.047942381638747
for epoch 604 mean loss on train 2.131592273712158
for epoch 604 mean loss on val 2.0479423999786377


 60%|███████████████████████████████████████▉                          | 605/1000 [29:59<00:49,  7.95it/s]

training for epoch 605
for epoch 605 learning rate is 0.00013302794647291155
training_step
0 2.1336209223820615
validation_step
0 2.048627706674429
for epoch 605 mean loss on train 2.1336209774017334
for epoch 605 mean loss on val 2.0486276149749756


 61%|███████████████████████████████████████▉                          | 606/1000 [29:59<00:50,  7.82it/s]

training for epoch 606
for epoch 606 learning rate is 0.00013302794647291155
training_step
0 2.0412748776949368
validation_step
0 2.0658721923828125
for epoch 606 mean loss on train 2.0412747859954834
for epoch 606 mean loss on val 2.0658721923828125


 61%|████████████████████████████████████████                          | 607/1000 [29:59<00:48,  8.15it/s]

training for epoch 607
for epoch 607 learning rate is 0.00013302794647291155
training_step
0 2.12146964439979
validation_step
0 2.052409392136794
for epoch 607 mean loss on train 2.121469736099243
for epoch 607 mean loss on val 2.0524094104766846


 61%|████████████████████████████████████████▏                         | 608/1000 [29:59<00:47,  8.31it/s]

training for epoch 608
for epoch 608 learning rate is 0.00013302794647291155
training_step
0 2.041184351994441
validation_step
0 1.9803091195913463
for epoch 608 mean loss on train 2.041184425354004
for epoch 608 mean loss on val 1.9803091287612915


 61%|████████████████████████████████████████▏                         | 609/1000 [29:59<00:45,  8.52it/s]

training for epoch 609
for epoch 609 learning rate is 0.00011972515182562039
training_step
0 2.0138075901911807
validation_step
0 2.1467679830697866
for epoch 609 mean loss on train 2.013807535171509
for epoch 609 mean loss on val 2.146768093109131


 61%|████████████████████████████████████████▎                         | 610/1000 [29:59<00:44,  8.83it/s]

training for epoch 610
for epoch 610 learning rate is 0.00011972515182562039
training_step
0 2.1519123957707333
validation_step
0 2.084177017211914
for epoch 610 mean loss on train 2.1519124507904053
for epoch 610 mean loss on val 2.084177017211914
training for epoch 611
for epoch 611 learning rate is 0.00011972515182562039
training_step
0 2.029253886296199
validation_step
0 2.1008041088397684
for epoch 611 mean loss on train 2.0292539596557617
for epoch 611 mean loss on val 2.100804090499878


 61%|████████████████████████████████████████▍                         | 612/1000 [29:59<00:42,  9.10it/s]

training for epoch 612
for epoch 612 learning rate is 0.00011972515182562039
training_step
0 1.9953622084397535
validation_step
0 2.1291382129375753
for epoch 612 mean loss on train 1.9953621625900269
for epoch 612 mean loss on val 2.129138231277466


 61%|████████████████████████████████████████▍                         | 613/1000 [29:59<00:43,  9.00it/s]

training for epoch 613
for epoch 613 learning rate is 0.00011972515182562039
training_step
0 1.970732908982497
validation_step
0 1.999951876126803
for epoch 613 mean loss on train 1.9707329273223877
for epoch 613 mean loss on val 1.9999518394470215


 61%|████████████████████████████████████████▌                         | 614/1000 [30:00<00:42,  9.10it/s]

training for epoch 614
for epoch 614 learning rate is 0.00011972515182562039
training_step
0 2.0661849975585938
validation_step
0 2.002190956702599
for epoch 614 mean loss on train 2.0661849975585938
for epoch 614 mean loss on val 2.0021910667419434


 62%|████████████████████████████████████████▌                         | 615/1000 [30:00<00:43,  8.84it/s]

training for epoch 615
for epoch 615 learning rate is 0.00011972515182562039
training_step
0 2.1601204505333533
validation_step
0 2.0327286353478065
for epoch 615 mean loss on train 2.1601204872131348
for epoch 615 mean loss on val 2.032728672027588


 62%|████████████████████████████████████████▋                         | 616/1000 [30:00<00:44,  8.64it/s]

training for epoch 616
for epoch 616 learning rate is 0.00011972515182562039
training_step
0 2.0451495830829325
validation_step
0 2.136689846332257
for epoch 616 mean loss on train 2.045149564743042
for epoch 616 mean loss on val 2.1366899013519287


 62%|████████████████████████████████████████▋                         | 617/1000 [30:00<00:45,  8.42it/s]

training for epoch 617
for epoch 617 learning rate is 0.00011972515182562039
training_step
0 2.116654029259315
validation_step
0 2.0590672126183143
for epoch 617 mean loss on train 2.1166539192199707
for epoch 617 mean loss on val 2.0590672492980957


 62%|████████████████████████████████████████▊                         | 618/1000 [30:00<00:44,  8.51it/s]

training for epoch 618
for epoch 618 learning rate is 0.00011972515182562039
training_step
0 2.034228251530574
validation_step
0 2.023312495304988
for epoch 618 mean loss on train 2.0342283248901367
for epoch 618 mean loss on val 2.023312568664551


 62%|████████████████████████████████████████▊                         | 619/1000 [30:00<00:43,  8.72it/s]

training for epoch 619
for epoch 619 learning rate is 0.00011972515182562039
training_step
0 2.0959622309758115
validation_step
0 2.0299327556903544
for epoch 619 mean loss on train 2.0959622859954834
for epoch 619 mean loss on val 2.029932737350464


 62%|████████████████████████████████████████▉                         | 620/1000 [30:00<00:42,  8.97it/s]

training for epoch 620
for epoch 620 learning rate is 0.00011972515182562039
training_step
0 2.170711957491361
validation_step
0 2.040435791015625
for epoch 620 mean loss on train 2.1707119941711426
for epoch 620 mean loss on val 2.040435791015625
training for epoch 621
for epoch 621 learning rate is 0.00011972515182562039
training_step
0 2.1289112384502706
validation_step
0 1.9884317838228667
for epoch 621 mean loss on train 2.128911256790161
for epoch 621 mean loss on val 1.9884318113327026


 62%|█████████████████████████████████████████                         | 622/1000 [30:00<00:39,  9.48it/s]

training for epoch 622
for epoch 622 learning rate is 0.00011972515182562039
training_step
0 2.0787189190204325
validation_step
0 2.0395918626051683
for epoch 622 mean loss on train 2.078718900680542
for epoch 622 mean loss on val 2.0395917892456055


 62%|█████████████████████████████████████████                         | 623/1000 [30:01<00:41,  9.15it/s]

training for epoch 623
for epoch 623 learning rate is 0.00011972515182562039
training_step
0 1.987115273108849
validation_step
0 1.9949339353121245
for epoch 623 mean loss on train 1.9871152639389038
for epoch 623 mean loss on val 1.9949339628219604


 62%|█████████████████████████████████████████▏                        | 624/1000 [30:01<00:40,  9.20it/s]

training for epoch 624
for epoch 624 learning rate is 0.00011972515182562039
training_step
0 2.017518703754132
validation_step
0 2.0151004791259766
for epoch 624 mean loss on train 2.0175187587738037
for epoch 624 mean loss on val 2.0151004791259766


 62%|█████████████████████████████████████████▎                        | 625/1000 [30:01<00:40,  9.36it/s]

training for epoch 625
for epoch 625 learning rate is 0.00011972515182562039
training_step
0 2.103830044086163
validation_step
0 2.1103440798245945
for epoch 625 mean loss on train 2.103830099105835
for epoch 625 mean loss on val 2.110344171524048


 63%|█████████████████████████████████████████▎                        | 626/1000 [30:01<00:39,  9.41it/s]

training for epoch 626
for epoch 626 learning rate is 0.00011972515182562039
training_step
0 2.1451016939603367
validation_step
0 1.972294880793645
for epoch 626 mean loss on train 2.14510178565979
for epoch 626 mean loss on val 1.9722949266433716
training for epoch 627
for epoch 627 learning rate is 0.00011972515182562039
training_step
0 1.9695053100585938
validation_step
0 2.0381188025841346
for epoch 627 mean loss on train 1.9695053100585938
for epoch 627 mean loss on val 2.038118839263916


 63%|█████████████████████████████████████████▍                        | 628/1000 [30:01<00:38,  9.69it/s]

training for epoch 628
for epoch 628 learning rate is 0.00011972515182562039
training_step
0 2.081678977379432
validation_step
0 2.0652556786170373
for epoch 628 mean loss on train 2.081678867340088
for epoch 628 mean loss on val 2.065255641937256
training for epoch 629
for epoch 629 learning rate is 0.00011972515182562039
training_step
0 2.0470233330359826
validation_step
0 2.1172574850229116
for epoch 629 mean loss on train 2.047023296356201
for epoch 629 mean loss on val 2.117257595062256


 63%|█████████████████████████████████████████▌                        | 630/1000 [30:01<00:37,  9.80it/s]

training for epoch 630
for epoch 630 learning rate is 0.00010775263664305835
training_step
0 2.034753652719351
validation_step
0 2.1238199380727916
for epoch 630 mean loss on train 2.0347535610198975
for epoch 630 mean loss on val 2.1238198280334473
training for epoch 631
for epoch 631 learning rate is 0.00010775263664305835
training_step
0 2.019463612483098
validation_step
0 2.0821251502403846
for epoch 631 mean loss on train 2.019463539123535
for epoch 631 mean loss on val 2.082125186920166


 63%|█████████████████████████████████████████▋                        | 632/1000 [30:01<00:37,  9.71it/s]

training for epoch 632
for epoch 632 learning rate is 0.00010775263664305835
training_step
0 1.9965626643254206
validation_step
0 2.0984866802509012
for epoch 632 mean loss on train 1.9965627193450928
for epoch 632 mean loss on val 2.0984866619110107
training for epoch 633
for epoch 633 learning rate is 0.00010775263664305835
training_step
0 2.070557374220628
validation_step
0 2.0053945688100963
for epoch 633 mean loss on train 2.0705573558807373
for epoch 633 mean loss on val 2.005394458770752


 63%|█████████████████████████████████████████▊                        | 634/1000 [30:02<00:37,  9.71it/s]

training for epoch 634
for epoch 634 learning rate is 0.00010775263664305835
training_step
0 2.1132791959322414
validation_step
0 2.032164793748122
for epoch 634 mean loss on train 2.113279104232788
for epoch 634 mean loss on val 2.0321648120880127


 64%|█████████████████████████████████████████▉                        | 635/1000 [30:02<00:37,  9.69it/s]

training for epoch 635
for epoch 635 learning rate is 0.00010775263664305835
training_step
0 2.0550707303560696
validation_step
0 2.095701364370493
for epoch 635 mean loss on train 2.055070638656616
for epoch 635 mean loss on val 2.0957014560699463
training for epoch 636
for epoch 636 learning rate is 0.00010775263664305835
training_step
0 2.0576631105863132
validation_step
0 2.1060877579909105
for epoch 636 mean loss on train 2.0576632022857666
for epoch 636 mean loss on val 2.1060876846313477


 64%|██████████████████████████████████████████                        | 637/1000 [30:02<00:36, 10.00it/s]

training for epoch 637
for epoch 637 learning rate is 0.00010775263664305835
training_step
0 2.0279196225679836
validation_step
0 2.015307353093074
for epoch 637 mean loss on train 2.0279195308685303
for epoch 637 mean loss on val 2.0153074264526367


 64%|██████████████████████████████████████████                        | 638/1000 [30:02<00:36,  9.88it/s]

training for epoch 638
for epoch 638 learning rate is 0.00010775263664305835
training_step
0 2.05732668363131
validation_step
0 2.067778073824369
for epoch 638 mean loss on train 2.0573267936706543
for epoch 638 mean loss on val 2.0677781105041504


 64%|██████████████████████████████████████████▏                       | 639/1000 [30:02<00:37,  9.72it/s]

training for epoch 639
for epoch 639 learning rate is 0.00010775263664305835
training_step
0 2.0285472869873047
validation_step
0 2.062428254347581
for epoch 639 mean loss on train 2.0285472869873047
for epoch 639 mean loss on val 2.0624282360076904


 64%|██████████████████████████████████████████▏                       | 640/1000 [30:02<00:37,  9.66it/s]

training for epoch 640
for epoch 640 learning rate is 0.00010775263664305835
training_step
0 1.99884033203125
validation_step
0 2.0366363525390625
for epoch 640 mean loss on train 1.99884033203125
for epoch 640 mean loss on val 2.0366363525390625


 64%|██████████████████████████████████████████▎                       | 641/1000 [30:02<00:37,  9.47it/s]

training for epoch 641
for epoch 641 learning rate is 0.00010775263664305835
training_step
0 2.0495788867657003
validation_step
0 2.0417088728684645
for epoch 641 mean loss on train 2.049578905105591
for epoch 641 mean loss on val 2.0417089462280273
training for epoch 642
for epoch 642 learning rate is 0.00010775263664305835
training_step
0 2.188264406644381
validation_step
0 2.0272212395301232
for epoch 642 mean loss on train 2.1882643699645996
for epoch 642 mean loss on val 2.027221202850342


 64%|██████████████████████████████████████████▍                       | 643/1000 [30:03<00:38,  9.23it/s]

training for epoch 643
for epoch 643 learning rate is 0.00010775263664305835
training_step
0 2.034489705012395
validation_step
0 2.0461000295785756
for epoch 643 mean loss on train 2.034489631652832
for epoch 643 mean loss on val 2.04610013961792


 64%|██████████████████████████████████████████▌                       | 644/1000 [30:03<00:39,  9.08it/s]

training for epoch 644
for epoch 644 learning rate is 0.00010775263664305835
training_step
0 2.003896419818585
validation_step
0 2.047181789691632
for epoch 644 mean loss on train 2.003896474838257
for epoch 644 mean loss on val 2.0471818447113037


 64%|██████████████████████████████████████████▌                       | 645/1000 [30:03<00:40,  8.86it/s]

training for epoch 645
for epoch 645 learning rate is 0.00010775263664305835
training_step
0 2.028403795682467
validation_step
0 2.0356869330772986
for epoch 645 mean loss on train 2.0284037590026855
for epoch 645 mean loss on val 2.03568696975708


 65%|██████████████████████████████████████████▋                       | 646/1000 [30:03<00:40,  8.81it/s]

training for epoch 646
for epoch 646 learning rate is 0.00010775263664305835
training_step
0 2.0747379889855018
validation_step
0 2.0287739680363583
for epoch 646 mean loss on train 2.074738025665283
for epoch 646 mean loss on val 2.0287740230560303


 65%|██████████████████████████████████████████▋                       | 647/1000 [30:03<00:39,  8.96it/s]

training for epoch 647
for epoch 647 learning rate is 0.00010775263664305835
training_step
0 2.091554348285382
validation_step
0 2.1096707857572117
for epoch 647 mean loss on train 2.0915544033050537
for epoch 647 mean loss on val 2.109670877456665


 65%|██████████████████████████████████████████▊                       | 648/1000 [30:03<00:40,  8.73it/s]

training for epoch 648
for epoch 648 learning rate is 0.00010775263664305835
training_step
0 2.0225125826322117
validation_step
0 1.9801945319542518
for epoch 648 mean loss on train 2.022512674331665
for epoch 648 mean loss on val 1.9801945686340332


 65%|██████████████████████████████████████████▊                       | 649/1000 [30:03<00:40,  8.65it/s]

training for epoch 649
for epoch 649 learning rate is 0.00010775263664305835
training_step
0 2.0308396266056943
validation_step
0 2.111469562237079
for epoch 649 mean loss on train 2.030839681625366
for epoch 649 mean loss on val 2.1114695072174072


 65%|██████████████████████████████████████████▉                       | 650/1000 [30:03<00:40,  8.58it/s]

training for epoch 650
for epoch 650 learning rate is 0.00010775263664305835
training_step
0 2.070386006281926
validation_step
0 2.046345930833083
for epoch 650 mean loss on train 2.0703859329223633
for epoch 650 mean loss on val 2.0463459491729736


 65%|██████████████████████████████████████████▉                       | 651/1000 [30:04<00:40,  8.56it/s]

training for epoch 651
for epoch 651 learning rate is 9.697737297875251e-05
training_step
0 2.0275997748741736
validation_step
0 2.0771111708420973
for epoch 651 mean loss on train 2.027599811553955
for epoch 651 mean loss on val 2.07711124420166
training for epoch 652
for epoch 652 learning rate is 9.697737297875251e-05
training_step
0 2.014113499568059
validation_step
0 2.032093781691331
for epoch 652 mean loss on train 2.014113426208496
for epoch 652 mean loss on val 2.0320937633514404


 65%|███████████████████████████████████████████                       | 653/1000 [30:04<00:37,  9.31it/s]

training for epoch 653
for epoch 653 learning rate is 9.697737297875251e-05
training_step
0 1.964904051560622
validation_step
0 2.031575716458834
for epoch 653 mean loss on train 1.9649040699005127
for epoch 653 mean loss on val 2.0315756797790527


 65%|███████████████████████████████████████████▏                      | 654/1000 [30:04<00:37,  9.28it/s]

training for epoch 654
for epoch 654 learning rate is 9.697737297875251e-05
training_step
0 2.0225894634540262
validation_step
0 2.039035650400015
for epoch 654 mean loss on train 2.0225894451141357
for epoch 654 mean loss on val 2.0390355587005615


 66%|███████████████████████████████████████████▏                      | 655/1000 [30:04<00:38,  8.97it/s]

training for epoch 655
for epoch 655 learning rate is 9.697737297875251e-05
training_step
0 2.0323486328125
validation_step
0 1.9896034827599158
for epoch 655 mean loss on train 2.0323486328125
for epoch 655 mean loss on val 1.9896035194396973


 66%|███████████████████████████████████████████▎                      | 656/1000 [30:04<00:38,  8.92it/s]

training for epoch 656
for epoch 656 learning rate is 9.697737297875251e-05
training_step
0 2.007566745464618
validation_step
0 2.0277486947866588
for epoch 656 mean loss on train 2.0075666904449463
for epoch 656 mean loss on val 2.0277485847473145


 66%|███████████████████████████████████████████▎                      | 657/1000 [30:04<00:38,  8.91it/s]

training for epoch 657
for epoch 657 learning rate is 9.697737297875251e-05
training_step
0 1.946783212515024
validation_step
0 2.029878909771259
for epoch 657 mean loss on train 1.946783185005188
for epoch 657 mean loss on val 2.029878854751587


 66%|███████████████████████████████████████████▍                      | 658/1000 [30:04<00:38,  8.97it/s]

training for epoch 658
for epoch 658 learning rate is 9.697737297875251e-05
training_step
0 2.0751444009634166
validation_step
0 2.0965857872596154
for epoch 658 mean loss on train 2.0751442909240723
for epoch 658 mean loss on val 2.096585750579834


 66%|███████████████████████████████████████████▍                      | 659/1000 [30:04<00:38,  8.91it/s]

training for epoch 659
for epoch 659 learning rate is 9.697737297875251e-05
training_step
0 1.9939422607421875
validation_step
0 2.0237626295823317
for epoch 659 mean loss on train 1.9939422607421875
for epoch 659 mean loss on val 2.0237627029418945


 66%|███████████████████████████████████████████▌                      | 660/1000 [30:05<00:38,  8.85it/s]

training for epoch 660
for epoch 660 learning rate is 9.697737297875251e-05
training_step
0 2.0196533203125
validation_step
0 2.0610997126652646
for epoch 660 mean loss on train 2.0196533203125
for epoch 660 mean loss on val 2.0610997676849365


 66%|███████████████████████████████████████████▋                      | 661/1000 [30:05<00:40,  8.43it/s]

training for epoch 661
for epoch 661 learning rate is 9.697737297875251e-05
training_step
0 2.087281447190505
validation_step
0 2.0713167924147387
for epoch 661 mean loss on train 2.0872814655303955
for epoch 661 mean loss on val 2.071316719055176


 66%|███████████████████████████████████████████▋                      | 662/1000 [30:05<00:40,  8.41it/s]

training for epoch 662
for epoch 662 learning rate is 9.697737297875251e-05
training_step
0 2.089124092688927
validation_step
0 2.0041320507342997
for epoch 662 mean loss on train 2.0891242027282715
for epoch 662 mean loss on val 2.004132032394409


 66%|███████████████████████████████████████████▊                      | 663/1000 [30:05<00:38,  8.66it/s]

training for epoch 663
for epoch 663 learning rate is 9.697737297875251e-05
training_step
0 2.0963448744553785
validation_step
0 2.030207267174354
for epoch 663 mean loss on train 2.0963449478149414
for epoch 663 mean loss on val 2.0302071571350098
training for epoch 664
for epoch 664 learning rate is 9.697737297875251e-05
training_step
0 2.0354334024282603
validation_step
0 1.9929172809307392
for epoch 664 mean loss on train 2.035433292388916
for epoch 664 mean loss on val 1.9929172992706299


 66%|███████████████████████████████████████████▉                      | 665/1000 [30:05<00:36,  9.08it/s]

training for epoch 665
for epoch 665 learning rate is 9.697737297875251e-05
training_step
0 2.026576115534856
validation_step
0 2.100529597355769
for epoch 665 mean loss on train 2.026576042175293
for epoch 665 mean loss on val 2.100529670715332


 67%|███████████████████████████████████████████▉                      | 666/1000 [30:05<00:37,  9.01it/s]

training for epoch 666
for epoch 666 learning rate is 9.697737297875251e-05
training_step
0 2.156128663283128
validation_step
0 1.9661156580998347
for epoch 666 mean loss on train 2.1561286449432373
for epoch 666 mean loss on val 1.9661157131195068


 67%|████████████████████████████████████████████                      | 667/1000 [30:05<00:36,  9.08it/s]

training for epoch 667
for epoch 667 learning rate is 9.697737297875251e-05
training_step
0 2.025244346031776
validation_step
0 2.1070291079007664
for epoch 667 mean loss on train 2.0252442359924316
for epoch 667 mean loss on val 2.1070291996002197


 67%|████████████████████████████████████████████                      | 668/1000 [30:05<00:39,  8.37it/s]

training for epoch 668
for epoch 668 learning rate is 9.697737297875251e-05
training_step
0 2.0333385467529297
validation_step
0 2.0493618891789365
for epoch 668 mean loss on train 2.0333385467529297
for epoch 668 mean loss on val 2.0493619441986084


 67%|████████████████████████████████████████████▏                     | 669/1000 [30:06<00:41,  7.89it/s]

training for epoch 669
for epoch 669 learning rate is 9.697737297875251e-05
training_step
0 2.0674720177283654
validation_step
0 2.077510246863732
for epoch 669 mean loss on train 2.067471981048584
for epoch 669 mean loss on val 2.077510356903076


 67%|████████████████████████████████████████████▏                     | 670/1000 [30:06<00:52,  6.28it/s]

training for epoch 670
for epoch 670 learning rate is 9.697737297875251e-05
training_step
0 2.109476823073167
validation_step
0 2.0574979048508863
for epoch 670 mean loss on train 2.1094768047332764
for epoch 670 mean loss on val 2.057497978210449


 67%|████████████████████████████████████████████▎                     | 671/1000 [30:06<00:51,  6.37it/s]

training for epoch 671
for epoch 671 learning rate is 9.697737297875251e-05
training_step
0 2.1655084169827976
validation_step
0 2.062408740703876
for epoch 671 mean loss on train 2.165508508682251
for epoch 671 mean loss on val 2.062408685684204


 67%|████████████████████████████████████████████▎                     | 672/1000 [30:06<00:53,  6.11it/s]

training for epoch 672
for epoch 672 learning rate is 8.727963568087727e-05
training_step
0 2.0084604116586537
validation_step
0 1.9665676997258112
for epoch 672 mean loss on train 2.008460521697998
for epoch 672 mean loss on val 1.9665677547454834


 67%|████████████████████████████████████████████▍                     | 673/1000 [30:06<00:50,  6.42it/s]

training for epoch 673
for epoch 673 learning rate is 8.727963568087727e-05
training_step
0 2.08834721491887
validation_step
0 2.0387643667367787
for epoch 673 mean loss on train 2.0883471965789795
for epoch 673 mean loss on val 2.038764476776123


 67%|████████████████████████████████████████████▍                     | 674/1000 [30:06<00:47,  6.88it/s]

training for epoch 674
for epoch 674 learning rate is 8.727963568087727e-05
training_step
0 2.186705369215745
validation_step
0 2.0449578211857724
for epoch 674 mean loss on train 2.1867053508758545
for epoch 674 mean loss on val 2.0449578762054443


 68%|████████████████████████████████████████████▌                     | 675/1000 [30:07<00:44,  7.28it/s]

training for epoch 675
for epoch 675 learning rate is 8.727963568087727e-05
training_step
0 2.1164525838998647
validation_step
0 2.1229862800011268
for epoch 675 mean loss on train 2.116452693939209
for epoch 675 mean loss on val 2.122986316680908


 68%|████████████████████████████████████████████▌                     | 676/1000 [30:07<00:43,  7.43it/s]

training for epoch 676
for epoch 676 learning rate is 8.727963568087727e-05
training_step
0 2.0429781400240383
validation_step
0 1.9952730032113881
for epoch 676 mean loss on train 2.042978048324585
for epoch 676 mean loss on val 1.9952729940414429


 68%|████████████████████████████████████████████▋                     | 677/1000 [30:07<00:41,  7.70it/s]

training for epoch 677
for epoch 677 learning rate is 8.727963568087727e-05
training_step
0 2.003002166748047
validation_step
0 1.9923716325026293
for epoch 677 mean loss on train 2.003002166748047
for epoch 677 mean loss on val 1.992371678352356


 68%|████████████████████████████████████████████▋                     | 678/1000 [30:07<00:40,  7.88it/s]

training for epoch 678
for epoch 678 learning rate is 8.727963568087727e-05
training_step
0 2.0355701446533203
validation_step
0 2.233857961801382
for epoch 678 mean loss on train 2.0355701446533203
for epoch 678 mean loss on val 2.2338578701019287


 68%|████████████████████████████████████████████▊                     | 679/1000 [30:07<00:40,  8.02it/s]

training for epoch 679
for epoch 679 learning rate is 8.727963568087727e-05
training_step
0 2.065222373375526
validation_step
0 2.062640850360577
for epoch 679 mean loss on train 2.0652222633361816
for epoch 679 mean loss on val 2.062640905380249


 68%|████████████████████████████████████████████▉                     | 680/1000 [30:07<00:39,  8.06it/s]

training for epoch 680
for epoch 680 learning rate is 8.727963568087727e-05
training_step
0 1.989685792189378
validation_step
0 2.0405877920297475
for epoch 680 mean loss on train 1.9896857738494873
for epoch 680 mean loss on val 2.040587902069092


 68%|████████████████████████████████████████████▉                     | 681/1000 [30:07<00:39,  8.13it/s]

training for epoch 681
for epoch 681 learning rate is 8.727963568087727e-05
training_step
0 2.0941842886117787
validation_step
0 2.0084071526160607
for epoch 681 mean loss on train 2.094184398651123
for epoch 681 mean loss on val 2.0084071159362793


 68%|█████████████████████████████████████████████                     | 682/1000 [30:07<00:39,  8.08it/s]

training for epoch 682
for epoch 682 learning rate is 8.727963568087727e-05
training_step
0 2.0783807314359226
validation_step
0 1.9878366910494292
for epoch 682 mean loss on train 2.078380823135376
for epoch 682 mean loss on val 1.9878367185592651


 68%|█████████████████████████████████████████████                     | 683/1000 [30:08<00:38,  8.16it/s]

training for epoch 683
for epoch 683 learning rate is 8.727963568087727e-05
training_step
0 2.099765630868765
validation_step
0 2.088655765240009
for epoch 683 mean loss on train 2.0997655391693115
for epoch 683 mean loss on val 2.088655710220337


 68%|█████████████████████████████████████████████▏                    | 684/1000 [30:08<00:37,  8.34it/s]

training for epoch 684
for epoch 684 learning rate is 8.727963568087727e-05
training_step
0 2.0030411940354567
validation_step
0 2.1501382681039662
for epoch 684 mean loss on train 2.0030412673950195
for epoch 684 mean loss on val 2.1501383781433105


 68%|█████████████████████████████████████████████▏                    | 685/1000 [30:08<00:36,  8.52it/s]

training for epoch 685
for epoch 685 learning rate is 8.727963568087727e-05
training_step
0 2.036508560180664
validation_step
0 2.0481871091402493
for epoch 685 mean loss on train 2.036508560180664
for epoch 685 mean loss on val 2.048187017440796


 69%|█████████████████████████████████████████████▎                    | 686/1000 [30:08<00:36,  8.60it/s]

training for epoch 686
for epoch 686 learning rate is 8.727963568087727e-05
training_step
0 2.1156306633582482
validation_step
0 2.069621306199294
for epoch 686 mean loss on train 2.115630626678467
for epoch 686 mean loss on val 2.0696213245391846


 69%|█████████████████████████████████████████████▎                    | 687/1000 [30:08<00:36,  8.52it/s]

training for epoch 687
for epoch 687 learning rate is 8.727963568087727e-05
training_step
0 2.105351081261268
validation_step
0 1.9808927682729869
for epoch 687 mean loss on train 2.105350971221924
for epoch 687 mean loss on val 1.9808927774429321


 69%|█████████████████████████████████████████████▍                    | 688/1000 [30:08<00:36,  8.60it/s]

training for epoch 688
for epoch 688 learning rate is 8.727963568087727e-05
training_step
0 2.0415385319636417
validation_step
0 2.024605530958909
for epoch 688 mean loss on train 2.0415384769439697
for epoch 688 mean loss on val 2.0246055126190186


 69%|█████████████████████████████████████████████▍                    | 689/1000 [30:08<00:37,  8.23it/s]

training for epoch 689
for epoch 689 learning rate is 8.727963568087727e-05
training_step
0 2.0434178572434645
validation_step
0 1.9724417466383715
for epoch 689 mean loss on train 2.0434179306030273
for epoch 689 mean loss on val 1.9724417924880981


 69%|█████████████████████████████████████████████▌                    | 690/1000 [30:08<00:37,  8.27it/s]

training for epoch 690
for epoch 690 learning rate is 8.727963568087727e-05
training_step
0 2.0119888599102316
validation_step
0 2.010173064011794
for epoch 690 mean loss on train 2.011988878250122
for epoch 690 mean loss on val 2.0101730823516846


 69%|█████████████████████████████████████████████▌                    | 691/1000 [30:09<00:38,  7.97it/s]

training for epoch 691
for epoch 691 learning rate is 8.727963568087727e-05
training_step
0 1.9502463707557092
validation_step
0 2.0383487114539514
for epoch 691 mean loss on train 1.9502463340759277
for epoch 691 mean loss on val 2.03834867477417


 69%|█████████████████████████████████████████████▋                    | 692/1000 [30:09<00:38,  8.07it/s]

training for epoch 692
for epoch 692 learning rate is 8.727963568087727e-05
training_step
0 2.10849615243765
validation_step
0 2.0540071634145884
for epoch 692 mean loss on train 2.1084961891174316
for epoch 692 mean loss on val 2.054007053375244


 69%|█████████████████████████████████████████████▋                    | 693/1000 [30:09<00:37,  8.25it/s]

training for epoch 693
for epoch 693 learning rate is 7.855167211278955e-05
training_step
0 2.0673883878267727
validation_step
0 2.0877245389498196
for epoch 693 mean loss on train 2.0673882961273193
for epoch 693 mean loss on val 2.087724447250366


 69%|█████████████████████████████████████████████▊                    | 694/1000 [30:09<00:36,  8.27it/s]

training for epoch 694
for epoch 694 learning rate is 7.855167211278955e-05
training_step
0 1.9971573169414814
validation_step
0 1.9987276517427885
for epoch 694 mean loss on train 1.997157335281372
for epoch 694 mean loss on val 1.9987276792526245


 70%|█████████████████████████████████████████████▊                    | 695/1000 [30:09<00:35,  8.56it/s]

training for epoch 695
for epoch 695 learning rate is 7.855167211278955e-05
training_step
0 2.0585531088022084
validation_step
0 2.016484333918645
for epoch 695 mean loss on train 2.0585532188415527
for epoch 695 mean loss on val 2.016484260559082
training for epoch 696
for epoch 696 learning rate is 7.855167211278955e-05
training_step
0 2.046960243811974
validation_step
0 2.086334521953876
for epoch 696 mean loss on train 2.0469603538513184
for epoch 696 mean loss on val 2.086334466934204


 70%|██████████████████████████████████████████████                    | 697/1000 [30:09<00:34,  8.75it/s]

training for epoch 697
for epoch 697 learning rate is 7.855167211278955e-05
training_step
0 2.0215521592360277
validation_step
0 2.0223746666541467
for epoch 697 mean loss on train 2.021552085876465
for epoch 697 mean loss on val 2.0223746299743652


 70%|██████████████████████████████████████████████                    | 698/1000 [30:09<00:33,  8.97it/s]

training for epoch 698
for epoch 698 learning rate is 7.855167211278955e-05
training_step
0 2.0452665182260366
validation_step
0 2.012442662165715
for epoch 698 mean loss on train 2.045266628265381
for epoch 698 mean loss on val 2.0124425888061523
training for epoch 699
for epoch 699 learning rate is 7.855167211278955e-05
training_step
0 2.1160386892465444
validation_step
0 1.9828301943265474
for epoch 699 mean loss on train 2.1160387992858887
for epoch 699 mean loss on val 1.9828301668167114


 70%|██████████████████████████████████████████████▏                   | 700/1000 [30:09<00:32,  9.31it/s]

training for epoch 700
for epoch 700 learning rate is 7.855167211278955e-05
training_step
0 2.092269017146184
validation_step
0 2.0012245178222656
for epoch 700 mean loss on train 2.092268943786621
for epoch 700 mean loss on val 2.0012245178222656


 70%|██████████████████████████████████████████████▎                   | 701/1000 [30:10<00:33,  8.87it/s]

training for epoch 701
for epoch 701 learning rate is 7.855167211278955e-05
training_step
0 2.0463927342341495
validation_step
0 2.0578448955829325
for epoch 701 mean loss on train 2.0463926792144775
for epoch 701 mean loss on val 2.057844877243042


 70%|██████████████████████████████████████████████▎                   | 702/1000 [30:10<00:35,  8.30it/s]

training for epoch 702
for epoch 702 learning rate is 7.855167211278955e-05
training_step
0 2.087930385883038
validation_step
0 2.137305186345027
for epoch 702 mean loss on train 2.08793044090271
for epoch 702 mean loss on val 2.13730525970459


 70%|██████████████████████████████████████████████▍                   | 703/1000 [30:10<00:36,  8.06it/s]

training for epoch 703
for epoch 703 learning rate is 7.855167211278955e-05
training_step
0 2.0005726447472205
validation_step
0 2.122151594895583
for epoch 703 mean loss on train 2.000572681427002
for epoch 703 mean loss on val 2.1221516132354736


 70%|██████████████████████████████████████████████▍                   | 704/1000 [30:10<00:36,  8.20it/s]

training for epoch 704
for epoch 704 learning rate is 7.855167211278955e-05
training_step
0 2.031770412738507
validation_step
0 2.055995354285607
for epoch 704 mean loss on train 2.0317704677581787
for epoch 704 mean loss on val 2.055995464324951


 70%|██████████████████████████████████████████████▌                   | 705/1000 [30:10<00:36,  8.19it/s]

training for epoch 705
for epoch 705 learning rate is 7.855167211278955e-05
training_step
0 2.241357216468224
validation_step
0 2.0948272118201623
for epoch 705 mean loss on train 2.2413573265075684
for epoch 705 mean loss on val 2.094827175140381


 71%|██████████████████████████████████████████████▌                   | 706/1000 [30:10<00:36,  8.08it/s]

training for epoch 706
for epoch 706 learning rate is 7.855167211278955e-05
training_step
0 2.0656214493971605
validation_step
0 2.062382771418645
for epoch 706 mean loss on train 2.0656213760375977
for epoch 706 mean loss on val 2.062382698059082


 71%|██████████████████████████████████████████████▋                   | 707/1000 [30:10<00:35,  8.29it/s]

training for epoch 707
for epoch 707 learning rate is 7.855167211278955e-05
training_step
0 2.025252709021935
validation_step
0 1.9555631784292369
for epoch 707 mean loss on train 2.0252528190612793
for epoch 707 mean loss on val 1.9555631875991821


 71%|██████████████████████████████████████████████▋                   | 708/1000 [30:10<00:33,  8.62it/s]

training for epoch 708
for epoch 708 learning rate is 7.855167211278955e-05
training_step
0 2.062432802640475
validation_step
0 2.0449361067551832
for epoch 708 mean loss on train 2.0624327659606934
for epoch 708 mean loss on val 2.044936180114746


 71%|██████████████████████████████████████████████▊                   | 709/1000 [30:11<00:33,  8.78it/s]

training for epoch 709
for epoch 709 learning rate is 7.855167211278955e-05
training_step
0 2.0274711022010217
validation_step
0 2.049621288592999
for epoch 709 mean loss on train 2.0274710655212402
for epoch 709 mean loss on val 2.049621343612671


 71%|██████████████████████████████████████████████▊                   | 710/1000 [30:11<00:33,  8.63it/s]

training for epoch 710
for epoch 710 learning rate is 7.855167211278955e-05
training_step
0 2.01535650400015
validation_step
0 1.9881289555476263
for epoch 710 mean loss on train 2.0153565406799316
for epoch 710 mean loss on val 1.988128900527954


 71%|██████████████████████████████████████████████▉                   | 711/1000 [30:11<00:33,  8.72it/s]

training for epoch 711
for epoch 711 learning rate is 7.855167211278955e-05
training_step
0 2.07451292184683
validation_step
0 1.9469287578876202
for epoch 711 mean loss on train 2.0745129585266113
for epoch 711 mean loss on val 1.9469287395477295


 71%|██████████████████████████████████████████████▉                   | 712/1000 [30:11<00:32,  8.74it/s]

training for epoch 712
for epoch 712 learning rate is 7.855167211278955e-05
training_step
0 2.008106378408579
validation_step
0 2.0637767498309794
for epoch 712 mean loss on train 2.0081064701080322
for epoch 712 mean loss on val 2.063776731491089


 71%|███████████████████████████████████████████████                   | 713/1000 [30:11<00:31,  9.07it/s]

training for epoch 713
for epoch 713 learning rate is 7.855167211278955e-05
training_step
0 2.0686412224402795
validation_step
0 2.0487791208120494
for epoch 713 mean loss on train 2.068641185760498
for epoch 713 mean loss on val 2.048779010772705


 71%|███████████████████████████████████████████████                   | 714/1000 [30:11<00:33,  8.65it/s]

training for epoch 714
for epoch 714 learning rate is 7.06965049015106e-05
training_step
0 2.0293922424316406
validation_step
0 1.9913912553053637
for epoch 714 mean loss on train 2.0293922424316406
for epoch 714 mean loss on val 1.9913913011550903


 72%|███████████████████████████████████████████████▏                  | 715/1000 [30:11<00:32,  8.79it/s]

training for epoch 715
for epoch 715 learning rate is 7.06965049015106e-05
training_step
0 2.12982177734375
validation_step
0 2.052357893723708
for epoch 715 mean loss on train 2.12982177734375
for epoch 715 mean loss on val 2.0523579120635986


 72%|███████████████████████████████████████████████▎                  | 716/1000 [30:11<00:32,  8.72it/s]

training for epoch 716
for epoch 716 learning rate is 7.06965049015106e-05
training_step
0 2.037120525653546
validation_step
0 2.0160064697265625
for epoch 716 mean loss on train 2.0371205806732178
for epoch 716 mean loss on val 2.0160064697265625


 72%|███████████████████████████████████████████████▎                  | 717/1000 [30:12<00:33,  8.51it/s]

training for epoch 717
for epoch 717 learning rate is 7.06965049015106e-05
training_step
0 1.9934702653151293
validation_step
0 2.1211009392371545
for epoch 717 mean loss on train 1.993470311164856
for epoch 717 mean loss on val 2.121100902557373


 72%|███████████████████████████████████████████████▍                  | 718/1000 [30:12<00:32,  8.71it/s]

training for epoch 718
for epoch 718 learning rate is 7.06965049015106e-05
training_step
0 2.087060341468224
validation_step
0 1.9960019038273737
for epoch 718 mean loss on train 2.0870604515075684
for epoch 718 mean loss on val 1.996001958847046


 72%|███████████████████████████████████████████████▍                  | 719/1000 [30:12<00:33,  8.29it/s]

training for epoch 719
for epoch 719 learning rate is 7.06965049015106e-05
training_step
0 2.01295412503756
validation_step
0 1.9554223280686598
for epoch 719 mean loss on train 2.0129542350769043
for epoch 719 mean loss on val 1.955422282218933


 72%|███████████████████████████████████████████████▌                  | 720/1000 [30:12<00:33,  8.32it/s]

training for epoch 720
for epoch 720 learning rate is 7.06965049015106e-05
training_step
0 2.0959334740271935
validation_step
0 2.025738936204177
for epoch 720 mean loss on train 2.095933437347412
for epoch 720 mean loss on val 2.0257389545440674


 72%|███████████████████████████████████████████████▌                  | 721/1000 [30:12<00:33,  8.45it/s]

training for epoch 721
for epoch 721 learning rate is 7.06965049015106e-05
training_step
0 1.987745578472431
validation_step
0 2.0086323664738583
for epoch 721 mean loss on train 1.9877455234527588
for epoch 721 mean loss on val 2.0086324214935303


 72%|███████████████████████████████████████████████▋                  | 722/1000 [30:12<00:33,  8.27it/s]

training for epoch 722
for epoch 722 learning rate is 7.06965049015106e-05
training_step
0 2.0700561816875753
validation_step
0 2.092852372389573
for epoch 722 mean loss on train 2.070056200027466
for epoch 722 mean loss on val 2.0928523540496826


 72%|███████████████████████████████████████████████▋                  | 723/1000 [30:12<00:33,  8.21it/s]

training for epoch 723
for epoch 723 learning rate is 7.06965049015106e-05
training_step
0 2.0122109926663914
validation_step
0 2.0023523477407603
for epoch 723 mean loss on train 2.0122110843658447
for epoch 723 mean loss on val 2.002352237701416


 72%|███████████████████████████████████████████████▊                  | 724/1000 [30:12<00:33,  8.23it/s]

training for epoch 724
for epoch 724 learning rate is 7.06965049015106e-05
training_step
0 2.066434860229492
validation_step
0 2.117788314819336
for epoch 724 mean loss on train 2.066434860229492
for epoch 724 mean loss on val 2.117788314819336


 72%|███████████████████████████████████████████████▊                  | 725/1000 [30:12<00:33,  8.26it/s]

training for epoch 725
for epoch 725 learning rate is 7.06965049015106e-05
training_step
0 2.000607123741737
validation_step
0 2.0177511068490834
for epoch 725 mean loss on train 2.0006070137023926
for epoch 725 mean loss on val 2.0177512168884277


 73%|███████████████████████████████████████████████▉                  | 726/1000 [30:13<00:34,  8.03it/s]

training for epoch 726
for epoch 726 learning rate is 7.06965049015106e-05
training_step
0 2.089880136343149
validation_step
0 2.0902318220872145
for epoch 726 mean loss on train 2.0898802280426025
for epoch 726 mean loss on val 2.0902318954467773


 73%|███████████████████████████████████████████████▉                  | 727/1000 [30:13<00:35,  7.66it/s]

training for epoch 727
for epoch 727 learning rate is 7.06965049015106e-05
training_step
0 2.014937767615685
validation_step
0 2.0090854351337137
for epoch 727 mean loss on train 2.0149378776550293
for epoch 727 mean loss on val 2.0090854167938232


 73%|████████████████████████████████████████████████                  | 728/1000 [30:13<00:35,  7.74it/s]

training for epoch 728
for epoch 728 learning rate is 7.06965049015106e-05
training_step
0 2.0459778125469503
validation_step
0 1.9592929253211389
for epoch 728 mean loss on train 2.045977830886841
for epoch 728 mean loss on val 1.9592928886413574


 73%|████████████████████████████████████████████████                  | 729/1000 [30:13<00:35,  7.57it/s]

training for epoch 729
for epoch 729 learning rate is 7.06965049015106e-05
training_step
0 2.0654707688551683
validation_step
0 1.9540554926945612
for epoch 729 mean loss on train 2.0654706954956055
for epoch 729 mean loss on val 1.9540555477142334


 73%|████████████████████████████████████████████████▏                 | 730/1000 [30:13<00:37,  7.24it/s]

training for epoch 730
for epoch 730 learning rate is 7.06965049015106e-05
training_step
0 1.985956045297476
validation_step
0 2.005796139056866
for epoch 730 mean loss on train 1.985956072807312
for epoch 730 mean loss on val 2.005796194076538


 73%|████████████████████████████████████████████████▏                 | 731/1000 [30:13<00:37,  7.24it/s]

training for epoch 731
for epoch 731 learning rate is 7.06965049015106e-05
training_step
0 1.9992791689359224
validation_step
0 2.050727990957407
for epoch 731 mean loss on train 1.9992791414260864
for epoch 731 mean loss on val 2.0507280826568604


 73%|████████████████████████████████████████████████▎                 | 732/1000 [30:13<00:38,  7.03it/s]

training for epoch 732
for epoch 732 learning rate is 7.06965049015106e-05
training_step
0 2.017078693096454
validation_step
0 2.163362062894381
for epoch 732 mean loss on train 2.0170786380767822
for epoch 732 mean loss on val 2.1633620262145996


 73%|████████████████████████████████████████████████▍                 | 733/1000 [30:14<00:37,  7.21it/s]

training for epoch 733
for epoch 733 learning rate is 7.06965049015106e-05
training_step
0 2.1162955944354715
validation_step
0 2.204294204711914
for epoch 733 mean loss on train 2.116295576095581
for epoch 733 mean loss on val 2.204294204711914


 73%|████████████████████████████████████████████████▍                 | 734/1000 [30:14<00:34,  7.64it/s]

training for epoch 734
for epoch 734 learning rate is 7.06965049015106e-05
training_step
0 2.037391809316782
validation_step
0 1.9878487220177283
for epoch 734 mean loss on train 2.0373919010162354
for epoch 734 mean loss on val 1.9878487586975098


 74%|████████████████████████████████████████████████▌                 | 735/1000 [30:14<00:32,  8.08it/s]

training for epoch 735
for epoch 735 learning rate is 6.362685441135955e-05
training_step
0 2.136411373431866
validation_step
0 2.069077418400691
for epoch 735 mean loss on train 2.136411428451538
for epoch 735 mean loss on val 2.069077491760254


 74%|████████████████████████████████████████████████▌                 | 736/1000 [30:14<00:33,  7.99it/s]

training for epoch 736
for epoch 736 learning rate is 6.362685441135955e-05
training_step
0 2.046925324660081
validation_step
0 2.121101526113657
for epoch 736 mean loss on train 2.0469253063201904
for epoch 736 mean loss on val 2.1211016178131104


 74%|████████████████████████████████████████████████▋                 | 737/1000 [30:14<00:31,  8.43it/s]

training for epoch 737
for epoch 737 learning rate is 6.362685441135955e-05
training_step
0 2.0189619797926683
validation_step
0 2.0443766667292667
for epoch 737 mean loss on train 2.0189619064331055
for epoch 737 mean loss on val 2.0443766117095947


 74%|████████████████████████████████████████████████▋                 | 738/1000 [30:14<00:31,  8.22it/s]

training for epoch 738
for epoch 738 learning rate is 6.362685441135955e-05
training_step
0 2.1359107677753153
validation_step
0 2.0208570040189304
for epoch 738 mean loss on train 2.135910749435425
for epoch 738 mean loss on val 2.020857095718384


 74%|████████████████████████████████████████████████▊                 | 739/1000 [30:14<00:30,  8.45it/s]

training for epoch 739
for epoch 739 learning rate is 6.362685441135955e-05
training_step
0 1.9961971869835486
validation_step
0 1.971960654625526
for epoch 739 mean loss on train 1.99619722366333
for epoch 739 mean loss on val 1.9719606637954712


 74%|████████████████████████████████████████████████▊                 | 740/1000 [30:14<00:29,  8.71it/s]

training for epoch 740
for epoch 740 learning rate is 6.362685441135955e-05
training_step
0 2.0376724830040565
validation_step
0 1.986747301541842
for epoch 740 mean loss on train 2.037672519683838
for epoch 740 mean loss on val 1.9867472648620605


 74%|████████████████████████████████████████████████▉                 | 741/1000 [30:15<00:29,  8.65it/s]

training for epoch 741
for epoch 741 learning rate is 6.362685441135955e-05
training_step
0 1.9425010681152344
validation_step
0 2.12042236328125
for epoch 741 mean loss on train 1.9425010681152344
for epoch 741 mean loss on val 2.12042236328125


 74%|████████████████████████████████████████████████▉                 | 742/1000 [30:15<00:30,  8.56it/s]

training for epoch 742
for epoch 742 learning rate is 6.362685441135955e-05
training_step
0 2.108963599571815
validation_step
0 2.0511531829833984
for epoch 742 mean loss on train 2.1089634895324707
for epoch 742 mean loss on val 2.0511531829833984


 74%|█████████████████████████████████████████████████                 | 743/1000 [30:15<00:32,  7.83it/s]

training for epoch 743
for epoch 743 learning rate is 6.362685441135955e-05
training_step
0 2.0168414482703576
validation_step
0 2.1289512927715597
for epoch 743 mean loss on train 2.016841411590576
for epoch 743 mean loss on val 2.12895131111145


 74%|█████████████████████████████████████████████████                 | 744/1000 [30:15<00:31,  8.13it/s]

training for epoch 744
for epoch 744 learning rate is 6.362685441135955e-05
training_step
0 2.0747149540827823
validation_step
0 2.022443037766677
for epoch 744 mean loss on train 2.0747148990631104
for epoch 744 mean loss on val 2.0224430561065674


 74%|█████████████████████████████████████████████████▏                | 745/1000 [30:15<00:31,  8.21it/s]

training for epoch 745
for epoch 745 learning rate is 6.362685441135955e-05
training_step
0 2.0412022517277646
validation_step
0 2.031969950749324
for epoch 745 mean loss on train 2.0412023067474365
for epoch 745 mean loss on val 2.0319700241088867


 75%|█████████████████████████████████████████████████▏                | 746/1000 [30:15<00:29,  8.48it/s]

training for epoch 746
for epoch 746 learning rate is 6.362685441135955e-05
training_step
0 1.9900565514197717
validation_step
0 2.021884037898137
for epoch 746 mean loss on train 1.9900565147399902
for epoch 746 mean loss on val 2.021883964538574


 75%|█████████████████████████████████████████████████▎                | 747/1000 [30:15<00:29,  8.63it/s]

training for epoch 747
for epoch 747 learning rate is 6.362685441135955e-05
training_step
0 2.051069112924429
validation_step
0 2.073351639967698
for epoch 747 mean loss on train 2.0510690212249756
for epoch 747 mean loss on val 2.0733516216278076


 75%|█████████████████████████████████████████████████▎                | 748/1000 [30:15<00:28,  8.71it/s]

training for epoch 748
for epoch 748 learning rate is 6.362685441135955e-05
training_step
0 2.0330687302809496
validation_step
0 2.036951651939979
for epoch 748 mean loss on train 2.0330686569213867
for epoch 748 mean loss on val 2.0369515419006348


 75%|█████████████████████████████████████████████████▍                | 749/1000 [30:15<00:29,  8.58it/s]

training for epoch 749
for epoch 749 learning rate is 6.362685441135955e-05
training_step
0 2.045228224534255
validation_step
0 2.021930694580078
for epoch 749 mean loss on train 2.0452282428741455
for epoch 749 mean loss on val 2.021930694580078


 75%|█████████████████████████████████████████████████▌                | 750/1000 [30:16<00:28,  8.80it/s]

training for epoch 750
for epoch 750 learning rate is 6.362685441135955e-05
training_step
0 2.001874483548678
validation_step
0 1.9567561516394982
for epoch 750 mean loss on train 2.0018744468688965
for epoch 750 mean loss on val 1.9567561149597168


 75%|█████████████████████████████████████████████████▌                | 751/1000 [30:16<00:27,  9.06it/s]

training for epoch 751
for epoch 751 learning rate is 6.362685441135955e-05
training_step
0 2.019491048959585
validation_step
0 2.1165916736309347
for epoch 751 mean loss on train 2.019490957260132
for epoch 751 mean loss on val 2.116591691970825


 75%|█████████████████████████████████████████████████▋                | 752/1000 [30:16<00:27,  9.17it/s]

training for epoch 752
for epoch 752 learning rate is 6.362685441135955e-05
training_step
0 1.9557599287766676
validation_step
0 1.9949221977820764
for epoch 752 mean loss on train 1.955759882926941
for epoch 752 mean loss on val 1.994922161102295
training for epoch 753
for epoch 753 learning rate is 6.362685441135955e-05
training_step
0 2.007576135488657
validation_step
0 2.103671293992263
for epoch 753 mean loss on train 2.0075762271881104
for epoch 753 mean loss on val 2.1036713123321533


 75%|█████████████████████████████████████████████████▊                | 754/1000 [30:16<00:26,  9.28it/s]

training for epoch 754
for epoch 754 learning rate is 6.362685441135955e-05
training_step
0 1.99992062495305
validation_step
0 2.063209973848783
for epoch 754 mean loss on train 1.9999206066131592
for epoch 754 mean loss on val 2.0632100105285645


 76%|█████████████████████████████████████████████████▊                | 755/1000 [30:16<00:26,  9.33it/s]

training for epoch 755
for epoch 755 learning rate is 6.362685441135955e-05
training_step
0 2.0822420853834887
validation_step
0 2.0149385012113132
for epoch 755 mean loss on train 2.082242012023926
for epoch 755 mean loss on val 2.0149385929107666
training for epoch 756
for epoch 756 learning rate is 5.7264168970223595e-05
training_step
0 1.961135717538687
validation_step
0 2.015392303466797
for epoch 756 mean loss on train 1.961135745048523
for epoch 756 mean loss on val 2.015392303466797


 76%|█████████████████████████████████████████████████▉                | 757/1000 [30:16<00:25,  9.48it/s]

training for epoch 757
for epoch 757 learning rate is 5.7264168970223595e-05
training_step
0 2.0622998751126804
validation_step
0 1.9939921452448919
for epoch 757 mean loss on train 2.062299966812134
for epoch 757 mean loss on val 1.9939920902252197


 76%|██████████████████████████████████████████████████                | 758/1000 [30:16<00:25,  9.40it/s]

training for epoch 758
for epoch 758 learning rate is 5.7264168970223595e-05
training_step
0 2.115009307861328
validation_step
0 2.051066912137545
for epoch 758 mean loss on train 2.115009307861328
for epoch 758 mean loss on val 2.0510668754577637


 76%|██████████████████████████████████████████████████                | 759/1000 [30:17<00:26,  9.15it/s]

training for epoch 759
for epoch 759 learning rate is 5.7264168970223595e-05
training_step
0 2.011688525860126
validation_step
0 2.061146956223708
for epoch 759 mean loss on train 2.011688470840454
for epoch 759 mean loss on val 2.0611469745635986


 76%|██████████████████████████████████████████████████▏               | 760/1000 [30:17<00:25,  9.27it/s]

training for epoch 760
for epoch 760 learning rate is 5.7264168970223595e-05
training_step
0 1.9991996471698468
validation_step
0 2.0010208716759315
for epoch 760 mean loss on train 1.999199628829956
for epoch 760 mean loss on val 2.001020908355713


 76%|██████████████████████████████████████████████████▏               | 761/1000 [30:17<00:26,  8.90it/s]

training for epoch 761
for epoch 761 learning rate is 5.7264168970223595e-05
training_step
0 2.071041987492488
validation_step
0 1.986473376934345
for epoch 761 mean loss on train 2.071042060852051
for epoch 761 mean loss on val 1.9864733219146729


 76%|██████████████████████████████████████████████████▎               | 762/1000 [30:17<00:26,  8.99it/s]

training for epoch 762
for epoch 762 learning rate is 5.7264168970223595e-05
training_step
0 2.084945091834435
validation_step
0 2.0646507556621847
for epoch 762 mean loss on train 2.0849452018737793
for epoch 762 mean loss on val 2.064650774002075


 76%|██████████████████████████████████████████████████▎               | 763/1000 [30:17<00:26,  8.85it/s]

training for epoch 763
for epoch 763 learning rate is 5.7264168970223595e-05
training_step
0 2.0323231036846456
validation_step
0 1.9908118614783654
for epoch 763 mean loss on train 2.032323122024536
for epoch 763 mean loss on val 1.990811824798584


 76%|██████████████████████████████████████████████████▍               | 764/1000 [30:17<00:25,  9.08it/s]

training for epoch 764
for epoch 764 learning rate is 5.7264168970223595e-05
training_step
0 2.01626102740948
validation_step
0 2.037078563983624
for epoch 764 mean loss on train 2.016261100769043
for epoch 764 mean loss on val 2.037078619003296


 76%|██████████████████████████████████████████████████▍               | 765/1000 [30:17<00:28,  8.23it/s]

training for epoch 765
for epoch 765 learning rate is 5.7264168970223595e-05
training_step
0 2.0128612518310547
validation_step
0 2.10135371868427
for epoch 765 mean loss on train 2.0128612518310547
for epoch 765 mean loss on val 2.101353645324707


 77%|██████████████████████████████████████████████████▌               | 766/1000 [30:17<00:28,  8.10it/s]

training for epoch 766
for epoch 766 learning rate is 5.7264168970223595e-05
training_step
0 2.00196779691256
validation_step
0 2.059023196880634
for epoch 766 mean loss on train 2.0019679069519043
for epoch 766 mean loss on val 2.059023141860962


 77%|██████████████████████████████████████████████████▌               | 767/1000 [30:17<00:28,  8.29it/s]

training for epoch 767
for epoch 767 learning rate is 5.7264168970223595e-05
training_step
0 2.073830384474534
validation_step
0 2.1042936765230618
for epoch 767 mean loss on train 2.0738303661346436
for epoch 767 mean loss on val 2.1042935848236084


 77%|██████████████████████████████████████████████████▋               | 768/1000 [30:18<00:29,  7.89it/s]

training for epoch 768
for epoch 768 learning rate is 5.7264168970223595e-05
training_step
0 2.0822054056020884
validation_step
0 2.1457686791053185
for epoch 768 mean loss on train 2.082205295562744
for epoch 768 mean loss on val 2.145768642425537


 77%|██████████████████████████████████████████████████▊               | 769/1000 [30:18<00:27,  8.29it/s]

training for epoch 769
for epoch 769 learning rate is 5.7264168970223595e-05
training_step
0 1.981115487905649
validation_step
0 2.0239645150991588
for epoch 769 mean loss on train 1.981115460395813
for epoch 769 mean loss on val 2.0239644050598145


 77%|██████████████████████████████████████████████████▊               | 770/1000 [30:18<00:28,  8.20it/s]

training for epoch 770
for epoch 770 learning rate is 5.7264168970223595e-05
training_step
0 2.1142252408541164
validation_step
0 2.045765656691331
for epoch 770 mean loss on train 2.114225149154663
for epoch 770 mean loss on val 2.0457656383514404


 77%|██████████████████████████████████████████████████▉               | 771/1000 [30:18<00:27,  8.25it/s]

training for epoch 771
for epoch 771 learning rate is 5.7264168970223595e-05
training_step
0 1.9762489612285907
validation_step
0 2.0583541576678934
for epoch 771 mean loss on train 1.9762489795684814
for epoch 771 mean loss on val 2.058354139328003


 77%|██████████████████████████████████████████████████▉               | 772/1000 [30:18<00:27,  8.43it/s]

training for epoch 772
for epoch 772 learning rate is 5.7264168970223595e-05
training_step
0 2.016963078425481
validation_step
0 2.0241977985088644
for epoch 772 mean loss on train 2.016963005065918
for epoch 772 mean loss on val 2.024197816848755


 77%|███████████████████████████████████████████████████               | 773/1000 [30:18<00:27,  8.17it/s]

training for epoch 773
for epoch 773 learning rate is 5.7264168970223595e-05
training_step
0 1.9641167567326472
validation_step
0 2.079657187828651
for epoch 773 mean loss on train 1.9641168117523193
for epoch 773 mean loss on val 2.0796570777893066


 77%|███████████████████████████████████████████████████               | 774/1000 [30:18<00:29,  7.70it/s]

training for epoch 774
for epoch 774 learning rate is 5.7264168970223595e-05
training_step
0 2.120893331674429
validation_step
0 2.0239488161527195
for epoch 774 mean loss on train 2.1208932399749756
for epoch 774 mean loss on val 2.023948907852173


 78%|███████████████████████████████████████████████████▏              | 775/1000 [30:19<00:31,  7.06it/s]

training for epoch 775
for epoch 775 learning rate is 5.7264168970223595e-05
training_step
0 1.9601042820857122
validation_step
0 1.9573983412522535
for epoch 775 mean loss on train 1.96010422706604
for epoch 775 mean loss on val 1.9573982954025269


 78%|███████████████████████████████████████████████████▏              | 776/1000 [30:19<00:30,  7.41it/s]

training for epoch 776
for epoch 776 learning rate is 5.7264168970223595e-05
training_step
0 1.983909900371845
validation_step
0 2.035630592933068
for epoch 776 mean loss on train 1.9839098453521729
for epoch 776 mean loss on val 2.035630702972412


 78%|███████████████████████████████████████████████████▎              | 777/1000 [30:19<00:29,  7.52it/s]

training for epoch 777
for epoch 777 learning rate is 5.153775207320124e-05
training_step
0 2.049324475801908
validation_step
0 2.0995680002065806
for epoch 777 mean loss on train 2.0493245124816895
for epoch 777 mean loss on val 2.0995678901672363


 78%|███████████████████████████████████████████████████▎              | 778/1000 [30:19<00:30,  7.39it/s]

training for epoch 778
for epoch 778 learning rate is 5.153775207320124e-05
training_step
0 2.0582015697772684
validation_step
0 2.0154630220853367
for epoch 778 mean loss on train 2.058201551437378
for epoch 778 mean loss on val 2.01546311378479


 78%|███████████████████████████████████████████████████▍              | 779/1000 [30:19<00:29,  7.39it/s]

training for epoch 779
for epoch 779 learning rate is 5.153775207320124e-05
training_step
0 1.985946508554312
validation_step
0 2.098676534799429
for epoch 779 mean loss on train 1.985946536064148
for epoch 779 mean loss on val 2.0986764430999756


 78%|███████████████████████████████████████████████████▍              | 780/1000 [30:19<00:29,  7.49it/s]

training for epoch 780
for epoch 780 learning rate is 5.153775207320124e-05
training_step
0 2.1119434650127706
validation_step
0 1.9994836953970103
for epoch 780 mean loss on train 2.111943483352661
for epoch 780 mean loss on val 1.9994837045669556


 78%|███████████████████████████████████████████████████▌              | 781/1000 [30:19<00:30,  7.24it/s]

training for epoch 781
for epoch 781 learning rate is 5.153775207320124e-05
training_step
0 2.1014104989858775
validation_step
0 2.0546068044809194
for epoch 781 mean loss on train 2.101410388946533
for epoch 781 mean loss on val 2.0546069145202637


 78%|███████████████████████████████████████████████████▌              | 782/1000 [30:19<00:30,  7.24it/s]

training for epoch 782
for epoch 782 learning rate is 5.153775207320124e-05
training_step
0 1.9933002178485577
validation_step
0 2.1064745096059947
for epoch 782 mean loss on train 1.993300199508667
for epoch 782 mean loss on val 2.1064743995666504


 78%|███████████████████████████████████████████████████▋              | 783/1000 [30:20<00:28,  7.51it/s]

training for epoch 783
for epoch 783 learning rate is 5.153775207320124e-05
training_step
0 2.0336761474609375
validation_step
0 1.9733355595515325
for epoch 783 mean loss on train 2.0336761474609375
for epoch 783 mean loss on val 1.9733355045318604


 78%|███████████████████████████████████████████████████▋              | 784/1000 [30:20<00:29,  7.29it/s]

training for epoch 784
for epoch 784 learning rate is 5.153775207320124e-05
training_step
0 2.0446353325477014
validation_step
0 2.0389957427978516
for epoch 784 mean loss on train 2.04463529586792
for epoch 784 mean loss on val 2.0389957427978516


 78%|███████████████████████████████████████████████████▊              | 785/1000 [30:20<00:29,  7.25it/s]

training for epoch 785
for epoch 785 learning rate is 5.153775207320124e-05
training_step
0 2.101544600266677
validation_step
0 2.0456993396465597
for epoch 785 mean loss on train 2.1015446186065674
for epoch 785 mean loss on val 2.04569935798645


 79%|███████████████████████████████████████████████████▉              | 786/1000 [30:20<00:31,  6.87it/s]

training for epoch 786
for epoch 786 learning rate is 5.153775207320124e-05
training_step
0 2.2807445526123047
validation_step
0 2.0575970869797926
for epoch 786 mean loss on train 2.2807445526123047
for epoch 786 mean loss on val 2.0575971603393555


 79%|███████████████████████████████████████████████████▉              | 787/1000 [30:20<00:30,  7.04it/s]

training for epoch 787
for epoch 787 learning rate is 5.153775207320124e-05
training_step
0 1.9322602198674128
validation_step
0 2.055254422701322
for epoch 787 mean loss on train 1.932260274887085
for epoch 787 mean loss on val 2.0552544593811035


 79%|████████████████████████████████████████████████████              | 788/1000 [30:20<00:30,  6.98it/s]

training for epoch 788
for epoch 788 learning rate is 5.153775207320124e-05
training_step
0 1.9622661884014423
validation_step
0 2.0546939556415262
for epoch 788 mean loss on train 1.962266206741333
for epoch 788 mean loss on val 2.0546939373016357


 79%|████████████████████████████████████████████████████              | 789/1000 [30:20<00:30,  6.86it/s]

training for epoch 789
for epoch 789 learning rate is 5.153775207320124e-05
training_step
0 2.003125997690054
validation_step
0 2.01428589454064
for epoch 789 mean loss on train 2.0031259059906006
for epoch 789 mean loss on val 2.0142858028411865


 79%|████████████████████████████████████████████████████▏             | 790/1000 [30:21<00:29,  7.19it/s]

training for epoch 790
for epoch 790 learning rate is 5.153775207320124e-05
training_step
0 2.014881427471454
validation_step
0 2.030101042527419
for epoch 790 mean loss on train 2.0148813724517822
for epoch 790 mean loss on val 2.0301010608673096


 79%|████████████████████████████████████████████████████▏             | 791/1000 [30:21<00:28,  7.42it/s]

training for epoch 791
for epoch 791 learning rate is 5.153775207320124e-05
training_step
0 2.039264678955078
validation_step
0 2.0807049091045675
for epoch 791 mean loss on train 2.039264678955078
for epoch 791 mean loss on val 2.080704927444458


 79%|████████████████████████████████████████████████████▎             | 792/1000 [30:21<00:28,  7.25it/s]

training for epoch 792
for epoch 792 learning rate is 5.153775207320124e-05
training_step
0 2.090367243840144
validation_step
0 2.0675682654747596
for epoch 792 mean loss on train 2.090367317199707
for epoch 792 mean loss on val 2.067568302154541


 79%|████████████████████████████████████████████████████▎             | 793/1000 [30:21<00:28,  7.29it/s]

training for epoch 793
for epoch 793 learning rate is 5.153775207320124e-05
training_step
0 2.0551704993614783
validation_step
0 2.1028294196495643
for epoch 793 mean loss on train 2.0551705360412598
for epoch 793 mean loss on val 2.1028294563293457


 79%|████████████████████████████████████████████████████▍             | 794/1000 [30:21<00:27,  7.39it/s]

training for epoch 794
for epoch 794 learning rate is 5.153775207320124e-05
training_step
0 2.048732170691857
validation_step
0 1.9608573913574219
for epoch 794 mean loss on train 2.048732280731201
for epoch 794 mean loss on val 1.9608573913574219


 80%|████████████████████████████████████████████████████▍             | 795/1000 [30:21<00:26,  7.60it/s]

training for epoch 795
for epoch 795 learning rate is 5.153775207320124e-05
training_step
0 2.007886299720177
validation_step
0 2.1059097876915565
for epoch 795 mean loss on train 2.0078864097595215
for epoch 795 mean loss on val 2.105909824371338


 80%|████████████████████████████████████████████████████▌             | 796/1000 [30:21<00:26,  7.77it/s]

training for epoch 796
for epoch 796 learning rate is 5.153775207320124e-05
training_step
0 2.044698275052584
validation_step
0 1.9696602454552283
for epoch 796 mean loss on train 2.0446982383728027
for epoch 796 mean loss on val 1.9696602821350098


 80%|████████████████████████████████████████████████████▌             | 797/1000 [30:22<00:26,  7.79it/s]

training for epoch 797
for epoch 797 learning rate is 5.153775207320124e-05
training_step
0 2.0598520132211537
validation_step
0 2.091248732346755
for epoch 797 mean loss on train 2.059852123260498
for epoch 797 mean loss on val 2.0912487506866455


 80%|████████████████████████████████████████████████████▋             | 798/1000 [30:22<00:25,  7.99it/s]

training for epoch 798
for epoch 798 learning rate is 4.6383976865881114e-05
training_step
0 1.956326411320613
validation_step
0 2.064001670250526
for epoch 798 mean loss on train 1.9563263654708862
for epoch 798 mean loss on val 2.0640015602111816


 80%|████████████████████████████████████████████████████▋             | 799/1000 [30:22<00:24,  8.24it/s]

training for epoch 799
for epoch 799 learning rate is 4.6383976865881114e-05
training_step
0 2.0114950033334584
validation_step
0 2.0795445075401893
for epoch 799 mean loss on train 2.0114951133728027
for epoch 799 mean loss on val 2.0795445442199707


 80%|████████████████████████████████████████████████████▊             | 800/1000 [30:22<00:24,  8.13it/s]

training for epoch 800
for epoch 800 learning rate is 4.6383976865881114e-05
training_step
0 2.096829047569862
validation_step
0 1.99462890625
for epoch 800 mean loss on train 2.0968289375305176
for epoch 800 mean loss on val 1.99462890625


 80%|████████████████████████████████████████████████████▊             | 801/1000 [30:22<00:23,  8.52it/s]

training for epoch 801
for epoch 801 learning rate is 4.6383976865881114e-05
training_step
0 2.1404892848088193
validation_step
0 1.9755508716289814
for epoch 801 mean loss on train 2.140489339828491
for epoch 801 mean loss on val 1.975550889968872


 80%|████████████████████████████████████████████████████▉             | 802/1000 [30:22<00:23,  8.52it/s]

training for epoch 802
for epoch 802 learning rate is 4.6383976865881114e-05
training_step
0 2.0018143287071815
validation_step
0 2.0444109990046573
for epoch 802 mean loss on train 2.001814365386963
for epoch 802 mean loss on val 2.0444109439849854


 80%|████████████████████████████████████████████████████▉             | 803/1000 [30:22<00:23,  8.41it/s]

training for epoch 803
for epoch 803 learning rate is 4.6383976865881114e-05
training_step
0 2.071042721088116
validation_step
0 2.0587982764610877
for epoch 803 mean loss on train 2.071042776107788
for epoch 803 mean loss on val 2.058798313140869


 80%|█████████████████████████████████████████████████████             | 804/1000 [30:22<00:23,  8.29it/s]

training for epoch 804
for epoch 804 learning rate is 4.6383976865881114e-05
training_step
0 2.104948190542368
validation_step
0 2.123588415292593
for epoch 804 mean loss on train 2.1049482822418213
for epoch 804 mean loss on val 2.1235883235931396


 80%|█████████████████████████████████████████████████████▏            | 805/1000 [30:22<00:23,  8.42it/s]

training for epoch 805
for epoch 805 learning rate is 4.6383976865881114e-05
training_step
0 2.069912396944486
validation_step
0 2.0851431626539965
for epoch 805 mean loss on train 2.0699124336242676
for epoch 805 mean loss on val 2.0851430892944336


 81%|█████████████████████████████████████████████████████▏            | 806/1000 [30:23<00:26,  7.28it/s]

training for epoch 806
for epoch 806 learning rate is 4.6383976865881114e-05
training_step
0 2.081822321965144
validation_step
0 1.944905501145583
for epoch 806 mean loss on train 2.081822395324707
for epoch 806 mean loss on val 1.9449055194854736


 81%|█████████████████████████████████████████████████████▎            | 807/1000 [30:23<00:26,  7.32it/s]

training for epoch 807
for epoch 807 learning rate is 4.6383976865881114e-05
training_step
0 2.0986781487098107
validation_step
0 2.020550067608173
for epoch 807 mean loss on train 2.0986781120300293
for epoch 807 mean loss on val 2.020550012588501


 81%|█████████████████████████████████████████████████████▎            | 808/1000 [30:23<00:26,  7.34it/s]

training for epoch 808
for epoch 808 learning rate is 4.6383976865881114e-05
training_step
0 1.9547907022329478
validation_step
0 1.9646399571345403
for epoch 808 mean loss on train 1.954790711402893
for epoch 808 mean loss on val 1.9646399021148682


 81%|█████████████████████████████████████████████████████▍            | 809/1000 [30:23<00:25,  7.36it/s]

training for epoch 809
for epoch 809 learning rate is 4.6383976865881114e-05
training_step
0 2.147112332857572
validation_step
0 2.0242933126596303
for epoch 809 mean loss on train 2.1471123695373535
for epoch 809 mean loss on val 2.0242934226989746


 81%|█████████████████████████████████████████████████████▍            | 810/1000 [30:23<00:24,  7.74it/s]

training for epoch 810
for epoch 810 learning rate is 4.6383976865881114e-05
training_step
0 2.1032142639160156
validation_step
0 2.05562371474046
for epoch 810 mean loss on train 2.1032142639160156
for epoch 810 mean loss on val 2.055623769760132


 81%|█████████████████████████████████████████████████████▌            | 811/1000 [30:23<00:24,  7.76it/s]

training for epoch 811
for epoch 811 learning rate is 4.6383976865881114e-05
training_step
0 2.048319596510667
validation_step
0 2.0516366225022535
for epoch 811 mean loss on train 2.0483195781707764
for epoch 811 mean loss on val 2.0516366958618164


 81%|█████████████████████████████████████████████████████▌            | 812/1000 [30:23<00:23,  7.86it/s]

training for epoch 812
for epoch 812 learning rate is 4.6383976865881114e-05
training_step
0 2.053370108971229
validation_step
0 1.9740509620079627
for epoch 812 mean loss on train 2.0533699989318848
for epoch 812 mean loss on val 1.9740509986877441


 81%|█████████████████████████████████████████████████████▋            | 813/1000 [30:24<00:22,  8.25it/s]

training for epoch 813
for epoch 813 learning rate is 4.6383976865881114e-05
training_step
0 2.016305630023663
validation_step
0 1.9784817328819861
for epoch 813 mean loss on train 2.016305685043335
for epoch 813 mean loss on val 1.9784817695617676


 81%|█████████████████████████████████████████████████████▋            | 814/1000 [30:24<00:21,  8.59it/s]

training for epoch 814
for epoch 814 learning rate is 4.6383976865881114e-05
training_step
0 2.0020903073824368
validation_step
0 2.0273601825420675
for epoch 814 mean loss on train 2.0020902156829834
for epoch 814 mean loss on val 2.027360200881958
training for epoch 815
for epoch 815 learning rate is 4.6383976865881114e-05
training_step
0 1.9942110501802885
validation_step
0 2.0791061107928934
for epoch 815 mean loss on train 1.9942110776901245
for epoch 815 mean loss on val 2.079106092453003


 82%|█████████████████████████████████████████████████████▊            | 816/1000 [30:24<00:19,  9.31it/s]

training for epoch 816
for epoch 816 learning rate is 4.6383976865881114e-05
training_step
0 2.0084962111253004
validation_step
0 2.021061530480018
for epoch 816 mean loss on train 2.0084962844848633
for epoch 816 mean loss on val 2.021061420440674


 82%|█████████████████████████████████████████████████████▉            | 817/1000 [30:24<00:19,  9.19it/s]

training for epoch 817
for epoch 817 learning rate is 4.6383976865881114e-05
training_step
0 2.1096744537353516
validation_step
0 2.023191892183744
for epoch 817 mean loss on train 2.1096744537353516
for epoch 817 mean loss on val 2.0231919288635254


 82%|█████████████████████████████████████████████████████▉            | 818/1000 [30:24<00:20,  8.99it/s]

training for epoch 818
for epoch 818 learning rate is 4.6383976865881114e-05
training_step
0 1.9781878544734075
validation_step
0 2.028903374305138
for epoch 818 mean loss on train 1.9781877994537354
for epoch 818 mean loss on val 2.0289034843444824


 82%|██████████████████████████████████████████████████████            | 819/1000 [30:24<00:20,  8.93it/s]

training for epoch 819
for epoch 819 learning rate is 4.1745579179293e-05
training_step
0 2.093557357788086
validation_step
0 2.0638046264648438
for epoch 819 mean loss on train 2.093557357788086
for epoch 819 mean loss on val 2.0638046264648438


 82%|██████████████████████████████████████████████████████            | 820/1000 [30:24<00:19,  9.07it/s]

training for epoch 820
for epoch 820 learning rate is 4.1745579179293e-05
training_step
0 2.0568692133976865
validation_step
0 2.1420489091139574
for epoch 820 mean loss on train 2.0568692684173584
for epoch 820 mean loss on val 2.1420488357543945


 82%|██████████████████████████████████████████████████████▏           | 821/1000 [30:24<00:19,  9.08it/s]

training for epoch 821
for epoch 821 learning rate is 4.1745579179293e-05
training_step
0 1.9906240609975963
validation_step
0 2.068953880896935
for epoch 821 mean loss on train 1.9906240701675415
for epoch 821 mean loss on val 2.0689539909362793


 82%|██████████████████████████████████████████████████████▎           | 822/1000 [30:25<00:20,  8.82it/s]

training for epoch 822
for epoch 822 learning rate is 4.1745579179293e-05
training_step
0 2.0773240602933445
validation_step
0 2.0422226832463193
for epoch 822 mean loss on train 2.077324151992798
for epoch 822 mean loss on val 2.042222738265991


 82%|██████████████████████████████████████████████████████▎           | 823/1000 [30:25<00:20,  8.68it/s]

training for epoch 823
for epoch 823 learning rate is 4.1745579179293e-05
training_step
0 2.1848157735971303
validation_step
0 1.9672550788292518
for epoch 823 mean loss on train 2.1848158836364746
for epoch 823 mean loss on val 1.9672551155090332


 82%|██████████████████████████████████████████████████████▍           | 824/1000 [30:25<00:21,  8.08it/s]

training for epoch 824
for epoch 824 learning rate is 4.1745579179293e-05
training_step
0 2.0479996020977316
validation_step
0 1.9765613262469952
for epoch 824 mean loss on train 2.047999620437622
for epoch 824 mean loss on val 1.9765613079071045
training for epoch 825
for epoch 825 learning rate is 4.1745579179293e-05
training_step
0 2.031448951134315
validation_step
0 2.0803482349102316
for epoch 825 mean loss on train 2.0314488410949707
for epoch 825 mean loss on val 2.080348253250122


 83%|██████████████████████████████████████████████████████▌           | 826/1000 [30:25<00:19,  8.87it/s]

training for epoch 826
for epoch 826 learning rate is 4.1745579179293e-05
training_step
0 2.0405348264254055
validation_step
0 2.047318385197566
for epoch 826 mean loss on train 2.040534734725952
for epoch 826 mean loss on val 2.047318458557129


 83%|██████████████████████████████████████████████████████▌           | 827/1000 [30:25<00:19,  9.09it/s]

training for epoch 827
for epoch 827 learning rate is 4.1745579179293e-05
training_step
0 2.0886537111722507
validation_step
0 2.0536573850191555
for epoch 827 mean loss on train 2.088653802871704
for epoch 827 mean loss on val 2.053657293319702


 83%|██████████████████████████████████████████████████████▋           | 828/1000 [30:25<00:19,  8.79it/s]

training for epoch 828
for epoch 828 learning rate is 4.1745579179293e-05
training_step
0 2.0359881474421573
validation_step
0 2.010221921480619
for epoch 828 mean loss on train 2.0359880924224854
for epoch 828 mean loss on val 2.0102219581604004


 83%|██████████████████████████████████████████████████████▋           | 829/1000 [30:25<00:19,  8.67it/s]

training for epoch 829
for epoch 829 learning rate is 4.1745579179293e-05
training_step
0 2.0028101114126353
validation_step
0 2.0857473520132213
for epoch 829 mean loss on train 2.002810001373291
for epoch 829 mean loss on val 2.085747241973877


 83%|██████████████████████████████████████████████████████▊           | 830/1000 [30:25<00:19,  8.68it/s]

training for epoch 830
for epoch 830 learning rate is 4.1745579179293e-05
training_step
0 1.9811529012826772
validation_step
0 2.0442864344670224
for epoch 830 mean loss on train 1.981152892112732
for epoch 830 mean loss on val 2.0442864894866943


 83%|██████████████████████████████████████████████████████▊           | 831/1000 [30:26<00:19,  8.70it/s]

training for epoch 831
for epoch 831 learning rate is 4.1745579179293e-05
training_step
0 2.0507645240196815
validation_step
0 2.025562286376953
for epoch 831 mean loss on train 2.050764560699463
for epoch 831 mean loss on val 2.025562286376953


 83%|██████████████████████████████████████████████████████▉           | 832/1000 [30:26<00:19,  8.54it/s]

training for epoch 832
for epoch 832 learning rate is 4.1745579179293e-05
training_step
0 2.0215719663179836
validation_step
0 2.0485767951378455
for epoch 832 mean loss on train 2.0215718746185303
for epoch 832 mean loss on val 2.048576831817627


 83%|██████████████████████████████████████████████████████▉           | 833/1000 [30:26<00:19,  8.57it/s]

training for epoch 833
for epoch 833 learning rate is 4.1745579179293e-05
training_step
0 2.0242579533503604
validation_step
0 2.04132813673753
for epoch 833 mean loss on train 2.0242578983306885
for epoch 833 mean loss on val 2.041328191757202


 83%|███████████████████████████████████████████████████████           | 834/1000 [30:26<00:19,  8.54it/s]

training for epoch 834
for epoch 834 learning rate is 4.1745579179293e-05
training_step
0 2.032016607431265
validation_step
0 2.0208568572998047
for epoch 834 mean loss on train 2.0320165157318115
for epoch 834 mean loss on val 2.0208568572998047


 84%|███████████████████████████████████████████████████████           | 835/1000 [30:26<00:19,  8.56it/s]

training for epoch 835
for epoch 835 learning rate is 4.1745579179293e-05
training_step
0 2.0176852299616885
validation_step
0 2.0242308103121243
for epoch 835 mean loss on train 2.0176851749420166
for epoch 835 mean loss on val 2.024230718612671


 84%|███████████████████████████████████████████████████████▏          | 836/1000 [30:26<00:19,  8.53it/s]

training for epoch 836
for epoch 836 learning rate is 4.1745579179293e-05
training_step
0 1.998102334829477
validation_step
0 2.117759998028095
for epoch 836 mean loss on train 1.9981023073196411
for epoch 836 mean loss on val 2.117759943008423


 84%|███████████████████████████████████████████████████████▏          | 837/1000 [30:26<00:19,  8.50it/s]

training for epoch 837
for epoch 837 learning rate is 4.1745579179293e-05
training_step
0 2.0172591576209435
validation_step
0 2.0095995389498196
for epoch 837 mean loss on train 2.017259120941162
for epoch 837 mean loss on val 2.009599447250366


 84%|███████████████████████████████████████████████████████▎          | 838/1000 [30:26<00:18,  8.67it/s]

training for epoch 838
for epoch 838 learning rate is 4.1745579179293e-05
training_step
0 2.0532282315767727
validation_step
0 2.0561963594876804
for epoch 838 mean loss on train 2.0532281398773193
for epoch 838 mean loss on val 2.056196451187134


 84%|███████████████████████████████████████████████████████▎          | 839/1000 [30:26<00:17,  8.98it/s]

training for epoch 839
for epoch 839 learning rate is 4.1745579179293e-05
training_step
0 2.1276124807504506
validation_step
0 1.9881974733792818
for epoch 839 mean loss on train 2.127612590789795
for epoch 839 mean loss on val 1.9881974458694458


 84%|███████████████████████████████████████████████████████▍          | 840/1000 [30:27<00:17,  9.02it/s]

training for epoch 840
for epoch 840 learning rate is 3.75710212613637e-05
training_step
0 2.0839014786940355
validation_step
0 2.0321017045241137
for epoch 840 mean loss on train 2.0839014053344727
for epoch 840 mean loss on val 2.032101631164551


 84%|███████████████████████████████████████████████████████▌          | 841/1000 [30:27<00:17,  8.97it/s]

training for epoch 841
for epoch 841 learning rate is 3.75710212613637e-05
training_step
0 2.1771040696364183
validation_step
0 2.00994506249061
for epoch 841 mean loss on train 2.1771039962768555
for epoch 841 mean loss on val 2.0099451541900635


 84%|███████████████████████████████████████████████████████▌          | 842/1000 [30:27<00:17,  8.92it/s]

training for epoch 842
for epoch 842 learning rate is 3.75710212613637e-05
training_step
0 2.106694588294396
validation_step
0 2.088071823120117
for epoch 842 mean loss on train 2.1066946983337402
for epoch 842 mean loss on val 2.088071823120117


 84%|███████████████████████████████████████████████████████▋          | 843/1000 [30:27<00:17,  9.08it/s]

training for epoch 843
for epoch 843 learning rate is 3.75710212613637e-05
training_step
0 2.051667726956881
validation_step
0 2.114159657404973
for epoch 843 mean loss on train 2.0516676902770996
for epoch 843 mean loss on val 2.11415958404541


 84%|███████████████████████████████████████████████████████▋          | 844/1000 [30:27<00:17,  8.76it/s]

training for epoch 844
for epoch 844 learning rate is 3.75710212613637e-05
training_step
0 2.032157164353591
validation_step
0 1.9942908653846154
for epoch 844 mean loss on train 2.0321571826934814
for epoch 844 mean loss on val 1.994290828704834


 84%|███████████████████████████████████████████████████████▊          | 845/1000 [30:27<00:17,  8.84it/s]

training for epoch 845
for epoch 845 learning rate is 3.75710212613637e-05
training_step
0 2.039547700148362
validation_step
0 2.148914337158203
for epoch 845 mean loss on train 2.0395476818084717
for epoch 845 mean loss on val 2.148914337158203


 85%|███████████████████████████████████████████████████████▊          | 846/1000 [30:27<00:17,  8.96it/s]

training for epoch 846
for epoch 846 learning rate is 3.75710212613637e-05
training_step
0 2.0626358619103065
validation_step
0 2.000541246854342
for epoch 846 mean loss on train 2.062635898590088
for epoch 846 mean loss on val 2.0005412101745605


 85%|███████████████████████████████████████████████████████▉          | 847/1000 [30:27<00:17,  8.72it/s]

training for epoch 847
for epoch 847 learning rate is 3.75710212613637e-05
training_step
0 2.0507114116962137
validation_step
0 1.9930284940279448
for epoch 847 mean loss on train 2.0507113933563232
for epoch 847 mean loss on val 1.9930285215377808


 85%|███████████████████████████████████████████████████████▉          | 848/1000 [30:27<00:17,  8.88it/s]

training for epoch 848
for epoch 848 learning rate is 3.75710212613637e-05
training_step
0 2.0871583498441257
validation_step
0 2.031728010911208
for epoch 848 mean loss on train 2.087158441543579
for epoch 848 mean loss on val 2.0317280292510986


 85%|████████████████████████████████████████████████████████          | 849/1000 [30:28<00:17,  8.87it/s]

training for epoch 849
for epoch 849 learning rate is 3.75710212613637e-05
training_step
0 2.067211004403921
validation_step
0 1.9909823491023138
for epoch 849 mean loss on train 2.0672109127044678
for epoch 849 mean loss on val 1.9909822940826416


 85%|████████████████████████████████████████████████████████          | 850/1000 [30:28<00:17,  8.81it/s]

training for epoch 850
for epoch 850 learning rate is 3.75710212613637e-05
training_step
0 2.0271182427039514
validation_step
0 2.08804438664363
for epoch 850 mean loss on train 2.02711820602417
for epoch 850 mean loss on val 2.0880444049835205


 85%|████████████████████████████████████████████████████████▏         | 851/1000 [30:28<00:16,  8.92it/s]

training for epoch 851
for epoch 851 learning rate is 3.75710212613637e-05
training_step
0 2.0247328831599307
validation_step
0 2.107967963585487
for epoch 851 mean loss on train 2.024732828140259
for epoch 851 mean loss on val 2.1079678535461426


 85%|████████████████████████████████████████████████████████▏         | 852/1000 [30:28<00:16,  9.00it/s]

training for epoch 852
for epoch 852 learning rate is 3.75710212613637e-05
training_step
0 2.0064785297100363
validation_step
0 2.078144513643705
for epoch 852 mean loss on train 2.0064785480499268
for epoch 852 mean loss on val 2.0781445503234863


 85%|████████████████████████████████████████████████████████▎         | 853/1000 [30:28<00:16,  9.00it/s]

training for epoch 853
for epoch 853 learning rate is 3.75710212613637e-05
training_step
0 2.010517120361328
validation_step
0 2.110077637892503
for epoch 853 mean loss on train 2.010517120361328
for epoch 853 mean loss on val 2.1100776195526123


 85%|████████████████████████████████████████████████████████▎         | 854/1000 [30:28<00:16,  8.76it/s]

training for epoch 854
for epoch 854 learning rate is 3.75710212613637e-05
training_step
0 1.990743637084961
validation_step
0 2.0790525583120494
for epoch 854 mean loss on train 1.990743637084961
for epoch 854 mean loss on val 2.079052448272705


 86%|████████████████████████████████████████████████████████▍         | 855/1000 [30:28<00:16,  8.65it/s]

training for epoch 855
for epoch 855 learning rate is 3.75710212613637e-05
training_step
0 2.059461446908804
validation_step
0 2.028338358952449
for epoch 855 mean loss on train 2.0594613552093506
for epoch 855 mean loss on val 2.0283384323120117


 86%|████████████████████████████████████████████████████████▍         | 856/1000 [30:28<00:16,  8.62it/s]

training for epoch 856
for epoch 856 learning rate is 3.75710212613637e-05
training_step
0 2.0382567185621996
validation_step
0 2.078858889066256
for epoch 856 mean loss on train 2.0382566452026367
for epoch 856 mean loss on val 2.0788588523864746


 86%|████████████████████████████████████████████████████████▌         | 857/1000 [30:28<00:16,  8.79it/s]

training for epoch 857
for epoch 857 learning rate is 3.75710212613637e-05
training_step
0 2.0368351569542518
validation_step
0 2.0180482130784254
for epoch 857 mean loss on train 2.036835193634033
for epoch 857 mean loss on val 2.0180482864379883


 86%|████████████████████████████████████████████████████████▋         | 858/1000 [30:29<00:15,  8.96it/s]

training for epoch 858
for epoch 858 learning rate is 3.75710212613637e-05
training_step
0 2.043585410484901
validation_step
0 2.117281253521259
for epoch 858 mean loss on train 2.0435853004455566
for epoch 858 mean loss on val 2.117281198501587
training for epoch 859
for epoch 859 learning rate is 3.75710212613637e-05
training_step
0 2.0760057889498196
validation_step
0 2.021613047673152
for epoch 859 mean loss on train 2.076005697250366
for epoch 859 mean loss on val 2.021613121032715


 86%|████████████████████████████████████████████████████████▊         | 860/1000 [30:29<00:14,  9.41it/s]

training for epoch 860
for epoch 860 learning rate is 3.75710212613637e-05
training_step
0 1.97603885944073
validation_step
0 2.085131425123948
for epoch 860 mean loss on train 1.9760388135910034
for epoch 860 mean loss on val 2.0851314067840576
training for epoch 861
for epoch 861 learning rate is 3.381391913522733e-05
training_step
0 2.166626710158128
validation_step
0 1.9915577815129206
for epoch 861 mean loss on train 2.1666266918182373
for epoch 861 mean loss on val 1.9915578365325928


 86%|████████████████████████████████████████████████████████▉         | 862/1000 [30:29<00:14,  9.72it/s]

training for epoch 862
for epoch 862 learning rate is 3.381391913522733e-05
training_step
0 2.0188060173621545
validation_step
0 2.0448863689716044
for epoch 862 mean loss on train 2.018805980682373
for epoch 862 mean loss on val 2.044886350631714


 86%|████████████████████████████████████████████████████████▉         | 863/1000 [30:29<00:14,  9.71it/s]

training for epoch 863
for epoch 863 learning rate is 3.381391913522733e-05
training_step
0 2.010255666879507
validation_step
0 2.009168038001427
for epoch 863 mean loss on train 2.0102555751800537
for epoch 863 mean loss on val 2.0091681480407715
training for epoch 864
for epoch 864 learning rate is 3.381391913522733e-05
training_step
0 2.0436917818509617
validation_step
0 2.0331172943115234
for epoch 864 mean loss on train 2.043691873550415
for epoch 864 mean loss on val 2.0331172943115234


 86%|█████████████████████████████████████████████████████████         | 865/1000 [30:29<00:13, 10.04it/s]

training for epoch 865
for epoch 865 learning rate is 3.381391913522733e-05
training_step
0 2.0242245013897238
validation_step
0 2.0378258044903097
for epoch 865 mean loss on train 2.0242245197296143
for epoch 865 mean loss on val 2.0378258228302


 87%|█████████████████████████████████████████████████████████▏        | 866/1000 [30:29<00:13,  9.81it/s]

training for epoch 866
for epoch 866 learning rate is 3.381391913522733e-05
training_step
0 2.092040282029372
validation_step
0 2.020723196176382
for epoch 866 mean loss on train 2.0920403003692627
for epoch 866 mean loss on val 2.0207231044769287


 87%|█████████████████████████████████████████████████████████▏        | 867/1000 [30:30<00:13,  9.72it/s]

training for epoch 867
for epoch 867 learning rate is 3.381391913522733e-05
training_step
0 2.0269559713510366
validation_step
0 2.0629268059363732
for epoch 867 mean loss on train 2.026956081390381
for epoch 867 mean loss on val 2.062926769256592


 87%|█████████████████████████████████████████████████████████▎        | 868/1000 [30:30<00:14,  9.29it/s]

training for epoch 868
for epoch 868 learning rate is 3.381391913522733e-05
training_step
0 2.17197638291579
validation_step
0 2.0226427224966197
for epoch 868 mean loss on train 2.171976327896118
for epoch 868 mean loss on val 2.0226426124572754


 87%|█████████████████████████████████████████████████████████▎        | 869/1000 [30:30<00:14,  8.74it/s]

training for epoch 869
for epoch 869 learning rate is 3.381391913522733e-05
training_step
0 2.014027815598708
validation_step
0 1.9954339540921724
for epoch 869 mean loss on train 2.0140278339385986
for epoch 869 mean loss on val 1.9954339265823364


 87%|█████████████████████████████████████████████████████████▍        | 870/1000 [30:30<00:15,  8.41it/s]

training for epoch 870
for epoch 870 learning rate is 3.381391913522733e-05
training_step
0 2.096612049983098
validation_step
0 2.1220466907207785
for epoch 870 mean loss on train 2.096611976623535
for epoch 870 mean loss on val 2.122046709060669


 87%|█████████████████████████████████████████████████████████▍        | 871/1000 [30:30<00:15,  8.44it/s]

training for epoch 871
for epoch 871 learning rate is 3.381391913522733e-05
training_step
0 1.9776992797851562
validation_step
0 2.0846069042499247
for epoch 871 mean loss on train 1.9776992797851562
for epoch 871 mean loss on val 2.084606885910034


 87%|█████████████████████████████████████████████████████████▌        | 872/1000 [30:30<00:14,  8.58it/s]

training for epoch 872
for epoch 872 learning rate is 3.381391913522733e-05
training_step
0 2.212995382455679
validation_step
0 1.9584068885216346
for epoch 872 mean loss on train 2.2129952907562256
for epoch 872 mean loss on val 1.958406925201416


 87%|█████████████████████████████████████████████████████████▌        | 873/1000 [30:30<00:14,  8.85it/s]

training for epoch 873
for epoch 873 learning rate is 3.381391913522733e-05
training_step
0 2.111907225388747
validation_step
0 2.001993179321289
for epoch 873 mean loss on train 2.1119072437286377
for epoch 873 mean loss on val 2.001993179321289


 87%|█████████████████████████████████████████████████████████▋        | 874/1000 [30:30<00:13,  9.15it/s]

training for epoch 874
for epoch 874 learning rate is 3.381391913522733e-05
training_step
0 2.0500259399414062
validation_step
0 2.016053419846755
for epoch 874 mean loss on train 2.0500259399414062
for epoch 874 mean loss on val 2.0160534381866455
training for epoch 875
for epoch 875 learning rate is 3.381391913522733e-05
training_step
0 1.9760887439434345
validation_step
0 2.02452879685622
for epoch 875 mean loss on train 1.9760887622833252
for epoch 875 mean loss on val 2.024528741836548


 88%|█████████████████████████████████████████████████████████▊        | 876/1000 [30:31<00:13,  9.35it/s]

training for epoch 876
for epoch 876 learning rate is 3.381391913522733e-05
training_step
0 1.9927266927865834
validation_step
0 1.932046890258789
for epoch 876 mean loss on train 1.9927266836166382
for epoch 876 mean loss on val 1.932046890258789
saving for epoch 876


 88%|█████████████████████████████████████████████████████████▉        | 877/1000 [30:31<00:22,  5.41it/s]

training for epoch 877
for epoch 877 learning rate is 3.381391913522733e-05
training_step
0 2.0153667743389425
validation_step
0 2.023298116830679
for epoch 877 mean loss on train 2.015366792678833
for epoch 877 mean loss on val 2.0232980251312256


 88%|█████████████████████████████████████████████████████████▉        | 878/1000 [30:31<00:20,  5.98it/s]

training for epoch 878
for epoch 878 learning rate is 3.381391913522733e-05
training_step
0 2.015266858614408
validation_step
0 2.0545152517465444
for epoch 878 mean loss on train 2.0152668952941895
for epoch 878 mean loss on val 2.0545153617858887


 88%|██████████████████████████████████████████████████████████        | 879/1000 [30:31<00:18,  6.71it/s]

training for epoch 879
for epoch 879 learning rate is 3.381391913522733e-05
training_step
0 2.097932375394381
validation_step
0 1.9837022928091197
for epoch 879 mean loss on train 2.0979323387145996
for epoch 879 mean loss on val 1.983702301979065


 88%|██████████████████████████████████████████████████████████        | 880/1000 [30:31<00:16,  7.25it/s]

training for epoch 880
for epoch 880 learning rate is 3.381391913522733e-05
training_step
0 2.0186183636005106
validation_step
0 2.111466334416316
for epoch 880 mean loss on train 2.01861834526062
for epoch 880 mean loss on val 2.111466407775879
training for epoch 881
for epoch 881 learning rate is 3.381391913522733e-05
training_step
0 2.138636662409856
validation_step
0 2.1191589648907003
for epoch 881 mean loss on train 2.138636589050293
for epoch 881 mean loss on val 2.119158983230591


 88%|██████████████████████████████████████████████████████████▏       | 882/1000 [30:31<00:14,  8.32it/s]

training for epoch 882
for epoch 882 learning rate is 3.0432527221704597e-05
training_step
0 2.004144081702599
validation_step
0 2.1040128561166616
for epoch 882 mean loss on train 2.0041441917419434
for epoch 882 mean loss on val 2.104012966156006
training for epoch 883
for epoch 883 learning rate is 3.0432527221704597e-05
training_step
0 1.9450969696044922
validation_step
0 2.0363763662484975
for epoch 883 mean loss on train 1.9450969696044922
for epoch 883 mean loss on val 2.036376476287842


 88%|██████████████████████████████████████████████████████████▎       | 884/1000 [30:32<00:13,  8.69it/s]

training for epoch 884
for epoch 884 learning rate is 3.0432527221704597e-05
training_step
0 2.0293863736666164
validation_step
0 2.0567654829758863
for epoch 884 mean loss on train 2.029386281967163
for epoch 884 mean loss on val 2.056765556335449


 88%|██████████████████████████████████████████████████████████▍       | 885/1000 [30:32<00:12,  8.85it/s]

training for epoch 885
for epoch 885 learning rate is 3.0432527221704597e-05
training_step
0 2.061743662907527
validation_step
0 2.015775827261118
for epoch 885 mean loss on train 2.06174373626709
for epoch 885 mean loss on val 2.0157759189605713


 89%|██████████████████████████████████████████████████████████▍       | 886/1000 [30:32<00:12,  9.06it/s]

training for epoch 886
for epoch 886 learning rate is 3.0432527221704597e-05
training_step
0 1.999719473031851
validation_step
0 2.1095962524414062
for epoch 886 mean loss on train 1.999719500541687
for epoch 886 mean loss on val 2.1095962524414062


 89%|██████████████████████████████████████████████████████████▌       | 887/1000 [30:32<00:12,  9.28it/s]

training for epoch 887
for epoch 887 learning rate is 3.0432527221704597e-05
training_step
0 1.9607637845552885
validation_step
0 1.9622245201697717
for epoch 887 mean loss on train 1.9607638120651245
for epoch 887 mean loss on val 1.9622244834899902


 89%|██████████████████████████████████████████████████████████▌       | 888/1000 [30:32<00:11,  9.38it/s]

training for epoch 888
for epoch 888 learning rate is 3.0432527221704597e-05
training_step
0 2.0960731506347656
validation_step
0 2.0601529341477613
for epoch 888 mean loss on train 2.0960731506347656
for epoch 888 mean loss on val 2.060153007507324
training for epoch 889
for epoch 889 learning rate is 3.0432527221704597e-05
training_step
0 1.991641117976262
validation_step
0 2.08649415236253
for epoch 889 mean loss on train 1.9916411638259888
for epoch 889 mean loss on val 2.086494207382202


 89%|██████████████████████████████████████████████████████████▋       | 890/1000 [30:32<00:11,  9.63it/s]

training for epoch 890
for epoch 890 learning rate is 3.0432527221704597e-05
training_step
0 2.066897465632512
validation_step
0 2.0819791647104116
for epoch 890 mean loss on train 2.066897392272949
for epoch 890 mean loss on val 2.081979274749756
training for epoch 891
for epoch 891 learning rate is 3.0432527221704597e-05
training_step
0 2.042594176072341
validation_step
0 1.9891681671142578
for epoch 891 mean loss on train 2.0425941944122314
for epoch 891 mean loss on val 1.9891681671142578


 89%|██████████████████████████████████████████████████████████▊       | 892/1000 [30:33<00:10,  9.89it/s]

training for epoch 892
for epoch 892 learning rate is 3.0432527221704597e-05
training_step
0 1.9667606353759766
validation_step
0 1.9429352100078876
for epoch 892 mean loss on train 1.9667606353759766
for epoch 892 mean loss on val 1.9429352283477783


 89%|██████████████████████████████████████████████████████████▉       | 893/1000 [30:33<00:11,  9.64it/s]

training for epoch 893
for epoch 893 learning rate is 3.0432527221704597e-05
training_step
0 2.0670896676870494
validation_step
0 2.055294477022611
for epoch 893 mean loss on train 2.067089557647705
for epoch 893 mean loss on val 2.0552945137023926


 89%|███████████████████████████████████████████████████████████       | 894/1000 [30:33<00:11,  9.49it/s]

training for epoch 894
for epoch 894 learning rate is 3.0432527221704597e-05
training_step
0 1.9910269517164965
validation_step
0 2.0577270801250753
for epoch 894 mean loss on train 1.9910269975662231
for epoch 894 mean loss on val 2.057727098464966


 90%|███████████████████████████████████████████████████████████       | 895/1000 [30:33<00:11,  9.24it/s]

training for epoch 895
for epoch 895 learning rate is 3.0432527221704597e-05
training_step
0 2.0841209705059347
validation_step
0 2.0418579395000753
for epoch 895 mean loss on train 2.084120988845825
for epoch 895 mean loss on val 2.041857957839966


 90%|███████████████████████████████████████████████████████████▏      | 896/1000 [30:33<00:11,  9.12it/s]

training for epoch 896
for epoch 896 learning rate is 3.0432527221704597e-05
training_step
0 1.9811614109919622
validation_step
0 1.9805443103496845
for epoch 896 mean loss on train 1.98116135597229
for epoch 896 mean loss on val 1.9805443286895752


 90%|███████████████████████████████████████████████████████████▏      | 897/1000 [30:33<00:11,  9.10it/s]

training for epoch 897
for epoch 897 learning rate is 3.0432527221704597e-05
training_step
0 2.012287433330829
validation_step
0 2.011712147639348
for epoch 897 mean loss on train 2.0122873783111572
for epoch 897 mean loss on val 2.011712074279785


 90%|███████████████████████████████████████████████████████████▎      | 898/1000 [30:33<00:11,  8.97it/s]

training for epoch 898
for epoch 898 learning rate is 3.0432527221704597e-05
training_step
0 2.0876636505126953
validation_step
0 2.020647635826698
for epoch 898 mean loss on train 2.0876636505126953
for epoch 898 mean loss on val 2.0206475257873535


 90%|███████████████████████████████████████████████████████████▎      | 899/1000 [30:33<00:11,  8.79it/s]

training for epoch 899
for epoch 899 learning rate is 3.0432527221704597e-05
training_step
0 1.9761728140024037
validation_step
0 2.0335184244009166
for epoch 899 mean loss on train 1.9761728048324585
for epoch 899 mean loss on val 2.0335183143615723


 90%|███████████████████████████████████████████████████████████▍      | 900/1000 [30:33<00:11,  8.77it/s]

training for epoch 900
for epoch 900 learning rate is 3.0432527221704597e-05
training_step
0 2.0403366088867188
validation_step
0 2.0866850339449368
for epoch 900 mean loss on train 2.0403366088867188
for epoch 900 mean loss on val 2.0866849422454834


 90%|███████████████████████████████████████████████████████████▍      | 901/1000 [30:34<00:11,  8.78it/s]

training for epoch 901
for epoch 901 learning rate is 3.0432527221704597e-05
training_step
0 1.9596195220947266
validation_step
0 2.0571780571570764
for epoch 901 mean loss on train 1.9596195220947266
for epoch 901 mean loss on val 2.057178020477295
training for epoch 902
for epoch 902 learning rate is 3.0432527221704597e-05
training_step
0 1.9964373661921575
validation_step
0 2.060834151047927
for epoch 902 mean loss on train 1.9964373111724854
for epoch 902 mean loss on val 2.0608341693878174


 90%|███████████████████████████████████████████████████████████▌      | 903/1000 [30:34<00:10,  9.36it/s]

training for epoch 903
for epoch 903 learning rate is 2.7389274499534138e-05
training_step
0 2.1235514420729418
validation_step
0 2.1309984647310696
for epoch 903 mean loss on train 2.123551368713379
for epoch 903 mean loss on val 2.130998373031616


 90%|███████████████████████████████████████████████████████████▋      | 904/1000 [30:34<00:10,  8.95it/s]

training for epoch 904
for epoch 904 learning rate is 2.7389274499534138e-05
training_step
0 2.031364294198843
validation_step
0 2.050360606266902
for epoch 904 mean loss on train 2.0313642024993896
for epoch 904 mean loss on val 2.050360679626465


 90%|███████████████████████████████████████████████████████████▋      | 905/1000 [30:34<00:11,  8.52it/s]

training for epoch 905
for epoch 905 learning rate is 2.7389274499534138e-05
training_step
0 2.1743323986346903
validation_step
0 2.0606949145977316
for epoch 905 mean loss on train 2.1743323802948
for epoch 905 mean loss on val 2.060694932937622


 91%|███████████████████████████████████████████████████████████▊      | 906/1000 [30:34<00:11,  7.91it/s]

training for epoch 906
for epoch 906 learning rate is 2.7389274499534138e-05
training_step
0 2.104745571429913
validation_step
0 1.992678715632512
for epoch 906 mean loss on train 2.104745626449585
for epoch 906 mean loss on val 1.9926787614822388


 91%|███████████████████████████████████████████████████████████▊      | 907/1000 [30:34<00:12,  7.47it/s]

training for epoch 907
for epoch 907 learning rate is 2.7389274499534138e-05
training_step
0 2.0126194587120643
validation_step
0 2.051823689387395
for epoch 907 mean loss on train 2.0126194953918457
for epoch 907 mean loss on val 2.051823616027832


 91%|███████████████████████████████████████████████████████████▉      | 908/1000 [30:34<00:11,  7.85it/s]

training for epoch 908
for epoch 908 learning rate is 2.7389274499534138e-05
training_step
0 2.1333241095909705
validation_step
0 2.0231626950777493
for epoch 908 mean loss on train 2.133324146270752
for epoch 908 mean loss on val 2.023162603378296


 91%|███████████████████████████████████████████████████████████▉      | 909/1000 [30:35<00:11,  7.98it/s]

training for epoch 909
for epoch 909 learning rate is 2.7389274499534138e-05
training_step
0 2.007121599637545
validation_step
0 2.0961293440598707
for epoch 909 mean loss on train 2.0071215629577637
for epoch 909 mean loss on val 2.0961294174194336


 91%|████████████████████████████████████████████████████████████      | 910/1000 [30:35<00:11,  8.13it/s]

training for epoch 910
for epoch 910 learning rate is 2.7389274499534138e-05
training_step
0 2.015883519099309
validation_step
0 1.9690874539888823
for epoch 910 mean loss on train 2.015883445739746
for epoch 910 mean loss on val 1.9690874814987183


 91%|████████████████████████████████████████████████████████████▏     | 911/1000 [30:35<00:11,  8.00it/s]

training for epoch 911
for epoch 911 learning rate is 2.7389274499534138e-05
training_step
0 1.9820656409630408
validation_step
0 2.0863299736609826
for epoch 911 mean loss on train 1.9820656776428223
for epoch 911 mean loss on val 2.086329936981201


 91%|████████████████████████████████████████████████████████████▏     | 912/1000 [30:35<00:10,  8.28it/s]

training for epoch 912
for epoch 912 learning rate is 2.7389274499534138e-05
training_step
0 2.077424709613507
validation_step
0 2.1215889270489035
for epoch 912 mean loss on train 2.0774247646331787
for epoch 912 mean loss on val 2.121588945388794


 91%|████████████████████████████████████████████████████████████▎     | 913/1000 [30:35<00:10,  8.52it/s]

training for epoch 913
for epoch 913 learning rate is 2.7389274499534138e-05
training_step
0 2.0654195638803334
validation_step
0 1.9680225665752704
for epoch 913 mean loss on train 2.0654196739196777
for epoch 913 mean loss on val 1.9680225849151611
training for epoch 914
for epoch 914 learning rate is 2.7389274499534138e-05
training_step
0 1.9716336176945612
validation_step
0 2.106385010939378
for epoch 914 mean loss on train 1.9716336727142334
for epoch 914 mean loss on val 2.1063849925994873


 92%|████████████████████████████████████████████████████████████▍     | 915/1000 [30:35<00:09,  9.16it/s]

training for epoch 915
for epoch 915 learning rate is 2.7389274499534138e-05
training_step
0 1.9768185248741736
validation_step
0 2.006598105797401
for epoch 915 mean loss on train 1.976818561553955
for epoch 915 mean loss on val 2.0065979957580566
training for epoch 916
for epoch 916 learning rate is 2.7389274499534138e-05
training_step
0 2.0101422529954176
validation_step
0 2.073759519136869
for epoch 916 mean loss on train 2.0101423263549805
for epoch 916 mean loss on val 2.0737595558166504


 92%|████████████████████████████████████████████████████████████▌     | 917/1000 [30:35<00:09,  9.14it/s]

training for epoch 917
for epoch 917 learning rate is 2.7389274499534138e-05
training_step
0 1.980908173781175
validation_step
0 2.0799036759596605
for epoch 917 mean loss on train 1.9809081554412842
for epoch 917 mean loss on val 2.0799036026000977


 92%|████████████████████████████████████████████████████████████▌     | 918/1000 [30:36<00:09,  9.01it/s]

training for epoch 918
for epoch 918 learning rate is 2.7389274499534138e-05
training_step
0 2.1765163128192606
validation_step
0 2.1177219977745643
for epoch 918 mean loss on train 2.17651629447937
for epoch 918 mean loss on val 2.1177220344543457


 92%|████████████████████████████████████████████████████████████▋     | 919/1000 [30:36<00:09,  8.82it/s]

training for epoch 919
for epoch 919 learning rate is 2.7389274499534138e-05
training_step
0 2.1205189044658956
validation_step
0 2.0101878826434794
for epoch 919 mean loss on train 2.120518922805786
for epoch 919 mean loss on val 2.010187864303589


 92%|████████████████████████████████████████████████████████████▋     | 920/1000 [30:36<00:09,  8.85it/s]

training for epoch 920
for epoch 920 learning rate is 2.7389274499534138e-05
training_step
0 2.075309019822341
validation_step
0 2.0186437460092397
for epoch 920 mean loss on train 2.0753090381622314
for epoch 920 mean loss on val 2.018643856048584


 92%|████████████████████████████████████████████████████████████▊     | 921/1000 [30:36<00:09,  8.72it/s]

training for epoch 921
for epoch 921 learning rate is 2.7389274499534138e-05
training_step
0 1.9835629096397986
validation_step
0 2.079204265887921
for epoch 921 mean loss on train 1.98356294631958
for epoch 921 mean loss on val 2.0792043209075928


 92%|████████████████████████████████████████████████████████████▊     | 922/1000 [30:36<00:09,  8.58it/s]

training for epoch 922
for epoch 922 learning rate is 2.7389274499534138e-05
training_step
0 2.093722710242638
validation_step
0 2.115908842820388
for epoch 922 mean loss on train 2.0937228202819824
for epoch 922 mean loss on val 2.1159088611602783


 92%|████████████████████████████████████████████████████████████▉     | 923/1000 [30:36<00:09,  8.50it/s]

training for epoch 923
for epoch 923 learning rate is 2.7389274499534138e-05
training_step
0 1.9949268927940955
validation_step
0 2.0030567462627706
for epoch 923 mean loss on train 1.994926929473877
for epoch 923 mean loss on val 2.003056764602661


 92%|████████████████████████████████████████████████████████████▉     | 924/1000 [30:36<00:09,  8.43it/s]

training for epoch 924
for epoch 924 learning rate is 2.4650347049580723e-05
training_step
0 2.0493348928598256
validation_step
0 2.109277138343224
for epoch 924 mean loss on train 2.04933500289917
for epoch 924 mean loss on val 2.1092772483825684


 92%|█████████████████████████████████████████████████████████████     | 925/1000 [30:36<00:08,  8.41it/s]

training for epoch 925
for epoch 925 learning rate is 2.4650347049580723e-05
training_step
0 2.045468110304612
validation_step
0 2.1321478623610277
for epoch 925 mean loss on train 2.0454680919647217
for epoch 925 mean loss on val 2.132147789001465


 93%|█████████████████████████████████████████████████████████████     | 926/1000 [30:36<00:08,  8.56it/s]

training for epoch 926
for epoch 926 learning rate is 2.4650347049580723e-05
training_step
0 2.058868701641376
validation_step
0 2.008472736065204
for epoch 926 mean loss on train 2.058868646621704
for epoch 926 mean loss on val 2.0084726810455322
training for epoch 927
for epoch 927 learning rate is 2.4650347049580723e-05
training_step
0 1.98960935152494
validation_step
0 1.9903165377103365
for epoch 927 mean loss on train 1.9896093606948853
for epoch 927 mean loss on val 1.9903165102005005


 93%|█████████████████████████████████████████████████████████████▏    | 928/1000 [30:37<00:07,  9.26it/s]

training for epoch 928
for epoch 928 learning rate is 2.4650347049580723e-05
training_step
0 2.0702046614426832
validation_step
0 2.0142996861384463
for epoch 928 mean loss on train 2.070204734802246
for epoch 928 mean loss on val 2.0142996311187744


 93%|█████████████████████████████████████████████████████████████▎    | 929/1000 [30:37<00:07,  9.28it/s]

training for epoch 929
for epoch 929 learning rate is 2.4650347049580723e-05
training_step
0 1.9450151003324068
validation_step
0 2.0534032674936147
for epoch 929 mean loss on train 1.9450150728225708
for epoch 929 mean loss on val 2.053403377532959


 93%|█████████████████████████████████████████████████████████████▍    | 930/1000 [30:37<00:07,  9.38it/s]

training for epoch 930
for epoch 930 learning rate is 2.4650347049580723e-05
training_step
0 2.2103346311129055
validation_step
0 2.19023689856896
for epoch 930 mean loss on train 2.210334539413452
for epoch 930 mean loss on val 2.190236806869507


 93%|█████████████████████████████████████████████████████████████▍    | 931/1000 [30:37<00:07,  9.45it/s]

training for epoch 931
for epoch 931 learning rate is 2.4650347049580723e-05
training_step
0 2.0599241990309496
validation_step
0 2.099410423865685
for epoch 931 mean loss on train 2.0599241256713867
for epoch 931 mean loss on val 2.0994105339050293


 93%|█████████████████████████████████████████████████████████████▌    | 932/1000 [30:37<00:07,  9.18it/s]

training for epoch 932
for epoch 932 learning rate is 2.4650347049580723e-05
training_step
0 2.052718529334435
validation_step
0 2.0250698969914365
for epoch 932 mean loss on train 2.0527186393737793
for epoch 932 mean loss on val 2.0250699520111084


 93%|█████████████████████████████████████████████████████████████▌    | 933/1000 [30:37<00:07,  9.03it/s]

training for epoch 933
for epoch 933 learning rate is 2.4650347049580723e-05
training_step
0 2.051942825317383
validation_step
0 2.0360447810246396
for epoch 933 mean loss on train 2.051942825317383
for epoch 933 mean loss on val 2.0360448360443115


 93%|█████████████████████████████████████████████████████████████▋    | 934/1000 [30:37<00:07,  8.87it/s]

training for epoch 934
for epoch 934 learning rate is 2.4650347049580723e-05
training_step
0 2.1011302654559794
validation_step
0 2.1555604201096754
for epoch 934 mean loss on train 2.101130247116089
for epoch 934 mean loss on val 2.1555604934692383


 94%|█████████████████████████████████████████████████████████████▋    | 935/1000 [30:37<00:07,  8.75it/s]

training for epoch 935
for epoch 935 learning rate is 2.4650347049580723e-05
training_step
0 2.09993523817796
validation_step
0 2.0771653101994443
for epoch 935 mean loss on train 2.099935293197632
for epoch 935 mean loss on val 2.077165365219116


 94%|█████████████████████████████████████████████████████████████▊    | 936/1000 [30:38<00:07,  8.97it/s]

training for epoch 936
for epoch 936 learning rate is 2.4650347049580723e-05
training_step
0 2.0323939690223107
validation_step
0 2.0288036052997294
for epoch 936 mean loss on train 2.0323939323425293
for epoch 936 mean loss on val 2.028803586959839


 94%|█████████████████████████████████████████████████████████████▊    | 937/1000 [30:38<00:06,  9.07it/s]

training for epoch 937
for epoch 937 learning rate is 2.4650347049580723e-05
training_step
0 2.0387852008526144
validation_step
0 2.1652091099665713
for epoch 937 mean loss on train 2.038785219192505
for epoch 937 mean loss on val 2.1652090549468994
training for epoch 938
for epoch 938 learning rate is 2.4650347049580723e-05
training_step
0 2.104666929978591
validation_step
0 1.9895123701829176
for epoch 938 mean loss on train 2.1046669483184814
for epoch 938 mean loss on val 1.989512324333191


 94%|█████████████████████████████████████████████████████████████▉    | 939/1000 [30:38<00:06,  9.37it/s]

training for epoch 939
for epoch 939 learning rate is 2.4650347049580723e-05
training_step
0 1.9932655921349158
validation_step
0 2.009995827308068
for epoch 939 mean loss on train 1.9932656288146973
for epoch 939 mean loss on val 2.009995937347412


 94%|██████████████████████████████████████████████████████████████    | 940/1000 [30:38<00:06,  9.33it/s]

training for epoch 940
for epoch 940 learning rate is 2.4650347049580723e-05
training_step
0 2.017364795391376
validation_step
0 2.136407412015475
for epoch 940 mean loss on train 2.017364740371704
for epoch 940 mean loss on val 2.1364073753356934


 94%|██████████████████████████████████████████████████████████████    | 941/1000 [30:38<00:06,  9.44it/s]

training for epoch 941
for epoch 941 learning rate is 2.4650347049580723e-05
training_step
0 2.090466719407302
validation_step
0 1.9908621861384466
for epoch 941 mean loss on train 2.0904667377471924
for epoch 941 mean loss on val 1.9908621311187744


 94%|██████████████████████████████████████████████████████████████▏   | 942/1000 [30:38<00:06,  9.16it/s]

training for epoch 942
for epoch 942 learning rate is 2.4650347049580723e-05
training_step
0 2.1439795860877404
validation_step
0 2.0428906954251804
for epoch 942 mean loss on train 2.143979549407959
for epoch 942 mean loss on val 2.042890787124634


 94%|██████████████████████████████████████████████████████████████▏   | 943/1000 [30:38<00:06,  8.94it/s]

training for epoch 943
for epoch 943 learning rate is 2.4650347049580723e-05
training_step
0 2.075086446908804
validation_step
0 2.077573482806866
for epoch 943 mean loss on train 2.0750863552093506
for epoch 943 mean loss on val 2.077573537826538


 94%|██████████████████████████████████████████████████████████████▎   | 944/1000 [30:38<00:06,  8.81it/s]

training for epoch 944
for epoch 944 learning rate is 2.4650347049580723e-05
training_step
0 2.081327438354492
validation_step
0 1.9870018592247596
for epoch 944 mean loss on train 2.081327438354492
for epoch 944 mean loss on val 1.987001895904541


 94%|██████████████████████████████████████████████████████████████▎   | 945/1000 [30:39<00:06,  8.79it/s]

training for epoch 945
for epoch 945 learning rate is 2.218531234462265e-05
training_step
0 2.1040912041297326
validation_step
0 2.085129664494441
for epoch 945 mean loss on train 2.104091167449951
for epoch 945 mean loss on val 2.085129737854004
training for epoch 946
for epoch 946 learning rate is 2.218531234462265e-05
training_step
0 2.044230681199294
validation_step
0 1.9720439910888672
for epoch 946 mean loss on train 2.0442306995391846
for epoch 946 mean loss on val 1.9720439910888672


 95%|██████████████████████████████████████████████████████████████▌   | 947/1000 [30:39<00:05,  9.12it/s]

training for epoch 947
for epoch 947 learning rate is 2.218531234462265e-05
training_step
0 1.9933838477501502
validation_step
0 2.0139328883244443
for epoch 947 mean loss on train 1.9933838844299316
for epoch 947 mean loss on val 2.013932943344116


 95%|██████████████████████████████████████████████████████████████▌   | 948/1000 [30:39<00:05,  8.97it/s]

training for epoch 948
for epoch 948 learning rate is 2.218531234462265e-05
training_step
0 2.0086895869328427
validation_step
0 2.0623353811410756
for epoch 948 mean loss on train 2.0086896419525146
for epoch 948 mean loss on val 2.06233549118042


 95%|██████████████████████████████████████████████████████████████▋   | 949/1000 [30:39<00:05,  8.98it/s]

training for epoch 949
for epoch 949 learning rate is 2.218531234462265e-05
training_step
0 1.9613623985877404
validation_step
0 2.031771879929763
for epoch 949 mean loss on train 1.961362361907959
for epoch 949 mean loss on val 2.0317718982696533


 95%|██████████████████████████████████████████████████████████████▋   | 950/1000 [30:39<00:05,  9.02it/s]

training for epoch 950
for epoch 950 learning rate is 2.218531234462265e-05
training_step
0 2.0497283935546875
validation_step
0 1.9951590024507964
for epoch 950 mean loss on train 2.0497283935546875
for epoch 950 mean loss on val 1.9951590299606323


 95%|██████████████████████████████████████████████████████████████▊   | 951/1000 [30:39<00:05,  8.86it/s]

training for epoch 951
for epoch 951 learning rate is 2.218531234462265e-05
training_step
0 2.0336073361910305
validation_step
0 2.0462427872877855
for epoch 951 mean loss on train 2.033607244491577
for epoch 951 mean loss on val 2.0462427139282227


 95%|██████████████████████████████████████████████████████████████▊   | 952/1000 [30:39<00:05,  8.93it/s]

training for epoch 952
for epoch 952 learning rate is 2.218531234462265e-05
training_step
0 2.0190494243915262
validation_step
0 2.0385585198035607
for epoch 952 mean loss on train 2.0190494060516357
for epoch 952 mean loss on val 2.0385584831237793
training for epoch 953
for epoch 953 learning rate is 2.218531234462265e-05
training_step
0 2.0647786947397084
validation_step
0 1.9896579155555139
for epoch 953 mean loss on train 2.0647788047790527
for epoch 953 mean loss on val 1.9896578788757324


 95%|██████████████████████████████████████████████████████████████▉   | 954/1000 [30:40<00:05,  9.13it/s]

training for epoch 954
for epoch 954 learning rate is 2.218531234462265e-05
training_step
0 2.0814009446364183
validation_step
0 2.0648166949932394
for epoch 954 mean loss on train 2.0814008712768555
for epoch 954 mean loss on val 2.06481671333313


 96%|███████████████████████████████████████████████████████████████   | 955/1000 [30:40<00:04,  9.06it/s]

training for epoch 955
for epoch 955 learning rate is 2.218531234462265e-05
training_step
0 2.0059789510873647
validation_step
0 2.0167902432955227
for epoch 955 mean loss on train 2.005979061126709
for epoch 955 mean loss on val 2.0167901515960693


 96%|███████████████████████████████████████████████████████████████   | 956/1000 [30:40<00:05,  8.70it/s]

training for epoch 956
for epoch 956 learning rate is 2.218531234462265e-05
training_step
0 2.0316024193396935
validation_step
0 2.183487525353065
for epoch 956 mean loss on train 2.031602382659912
for epoch 956 mean loss on val 2.1834874153137207


 96%|███████████████████████████████████████████████████████████████▏  | 957/1000 [30:40<00:04,  8.91it/s]

training for epoch 957
for epoch 957 learning rate is 2.218531234462265e-05
training_step
0 1.9999564244196966
validation_step
0 1.957539044893705
for epoch 957 mean loss on train 1.9999563694000244
for epoch 957 mean loss on val 1.9575390815734863


 96%|███████████████████████████████████████████████████████████████▏  | 958/1000 [30:40<00:04,  9.08it/s]

training for epoch 958
for epoch 958 learning rate is 2.218531234462265e-05
training_step
0 2.0599055657019982
validation_step
0 2.0200653076171875
for epoch 958 mean loss on train 2.059905529022217
for epoch 958 mean loss on val 2.0200653076171875


 96%|███████████████████████████████████████████████████████████████▎  | 959/1000 [30:40<00:04,  8.79it/s]

training for epoch 959
for epoch 959 learning rate is 2.218531234462265e-05
training_step
0 2.100488075843224
validation_step
0 2.0668549170860877
for epoch 959 mean loss on train 2.1004881858825684
for epoch 959 mean loss on val 2.066854953765869
training for epoch 960
for epoch 960 learning rate is 2.218531234462265e-05
training_step
0 1.9902926224928637
validation_step
0 2.0210514068603516
for epoch 960 mean loss on train 1.9902926683425903
for epoch 960 mean loss on val 2.0210514068603516


 96%|███████████████████████████████████████████████████████████████▍  | 961/1000 [30:40<00:04,  9.32it/s]

training for epoch 961
for epoch 961 learning rate is 2.218531234462265e-05
training_step
0 2.0278004866379957
validation_step
0 2.1573926485501804
for epoch 961 mean loss on train 2.0278005599975586
for epoch 961 mean loss on val 2.157392740249634


 96%|███████████████████████████████████████████████████████████████▍  | 962/1000 [30:40<00:04,  9.41it/s]

training for epoch 962
for epoch 962 learning rate is 2.218531234462265e-05
training_step
0 2.009173026451698
validation_step
0 2.019841120793269
for epoch 962 mean loss on train 2.0091729164123535
for epoch 962 mean loss on val 2.019841194152832


 96%|███████████████████████████████████████████████████████████████▌  | 963/1000 [30:41<00:04,  9.02it/s]

training for epoch 963
for epoch 963 learning rate is 2.218531234462265e-05
training_step
0 2.0205216040978065
validation_step
0 2.1037673950195312
for epoch 963 mean loss on train 2.020521640777588
for epoch 963 mean loss on val 2.1037673950195312


 96%|███████████████████████████████████████████████████████████████▌  | 964/1000 [30:41<00:03,  9.01it/s]

training for epoch 964
for epoch 964 learning rate is 2.218531234462265e-05
training_step
0 1.936141821054312
validation_step
0 2.076753322894757
for epoch 964 mean loss on train 1.936141848564148
for epoch 964 mean loss on val 2.0767533779144287


 96%|███████████████████████████████████████████████████████████████▋  | 965/1000 [30:41<00:03,  8.98it/s]

training for epoch 965
for epoch 965 learning rate is 2.218531234462265e-05
training_step
0 2.016530550443209
validation_step
0 2.0621575575608473
for epoch 965 mean loss on train 2.0165305137634277
for epoch 965 mean loss on val 2.06215763092041


 97%|███████████████████████████████████████████████████████████████▊  | 966/1000 [30:41<00:03,  8.83it/s]

training for epoch 966
for epoch 966 learning rate is 1.9966781110160387e-05
training_step
0 2.0405921936035156
validation_step
0 2.095319601205679
for epoch 966 mean loss on train 2.0405921936035156
for epoch 966 mean loss on val 2.0953195095062256


 97%|███████████████████████████████████████████████████████████████▊  | 967/1000 [30:41<00:03,  8.75it/s]

training for epoch 967
for epoch 967 learning rate is 1.9966781110160387e-05
training_step
0 2.156201729407677
validation_step
0 2.04463870708759
for epoch 967 mean loss on train 2.1562018394470215
for epoch 967 mean loss on val 2.0446386337280273


 97%|███████████████████████████████████████████████████████████████▉  | 968/1000 [30:41<00:03,  8.55it/s]

training for epoch 968
for epoch 968 learning rate is 1.9966781110160387e-05
training_step
0 2.057830076951247
validation_step
0 2.071205872755784
for epoch 968 mean loss on train 2.0578300952911377
for epoch 968 mean loss on val 2.0712058544158936


 97%|███████████████████████████████████████████████████████████████▉  | 969/1000 [30:41<00:03,  8.73it/s]

training for epoch 969
for epoch 969 learning rate is 1.9966781110160387e-05
training_step
0 1.9910593766432543
validation_step
0 2.0290643251859226
for epoch 969 mean loss on train 1.991059422492981
for epoch 969 mean loss on val 2.029064416885376


 97%|████████████████████████████████████████████████████████████████  | 970/1000 [30:41<00:03,  9.07it/s]

training for epoch 970
for epoch 970 learning rate is 1.9966781110160387e-05
training_step
0 1.9642546727107122
validation_step
0 2.091565205500676
for epoch 970 mean loss on train 1.96425461769104
for epoch 970 mean loss on val 2.0915651321411133


 97%|████████████████████████████████████████████████████████████████  | 971/1000 [30:41<00:03,  9.22it/s]

training for epoch 971
for epoch 971 learning rate is 1.9966781110160387e-05
training_step
0 2.043489749615009
validation_step
0 1.9509127690241888
for epoch 971 mean loss on train 2.043489694595337
for epoch 971 mean loss on val 1.9509127140045166
training for epoch 972
for epoch 972 learning rate is 1.9966781110160387e-05
training_step
0 1.9957155080942006
validation_step
0 2.009104215181791
for epoch 972 mean loss on train 1.9957154989242554
for epoch 972 mean loss on val 2.0091042518615723


 97%|████████████████████████████████████████████████████████████████▏ | 973/1000 [30:42<00:02,  9.62it/s]

training for epoch 973
for epoch 973 learning rate is 1.9966781110160387e-05
training_step
0 1.9952816596397986
validation_step
0 2.038289290208083
for epoch 973 mean loss on train 1.99528169631958
for epoch 973 mean loss on val 2.0382893085479736


 97%|████████████████████████████████████████████████████████████████▎ | 974/1000 [30:42<00:02,  9.66it/s]

training for epoch 974
for epoch 974 learning rate is 1.9966781110160387e-05
training_step
0 2.066874284010667
validation_step
0 2.151190244234525
for epoch 974 mean loss on train 2.0668742656707764
for epoch 974 mean loss on val 2.1511902809143066


 98%|████████████████████████████████████████████████████████████████▎ | 975/1000 [30:42<00:02,  9.31it/s]

training for epoch 975
for epoch 975 learning rate is 1.9966781110160387e-05
training_step
0 2.048126220703125
validation_step
0 1.9855330540583684
for epoch 975 mean loss on train 2.048126220703125
for epoch 975 mean loss on val 1.9855329990386963


 98%|████████████████████████████████████████████████████████████████▍ | 976/1000 [30:42<00:02,  9.46it/s]

training for epoch 976
for epoch 976 learning rate is 1.9966781110160387e-05
training_step
0 1.9828937237079327
validation_step
0 2.030606196476863
for epoch 976 mean loss on train 1.982893705368042
for epoch 976 mean loss on val 2.030606269836426


 98%|████████████████████████████████████████████████████████████████▍ | 977/1000 [30:42<00:02,  9.32it/s]

training for epoch 977
for epoch 977 learning rate is 1.9966781110160387e-05
training_step
0 2.1725183633657603
validation_step
0 2.0901737213134766
for epoch 977 mean loss on train 2.172518253326416
for epoch 977 mean loss on val 2.0901737213134766


 98%|████████████████████████████████████████████████████████████████▌ | 978/1000 [30:42<00:02,  9.26it/s]

training for epoch 978
for epoch 978 learning rate is 1.9966781110160387e-05
training_step
0 2.0169620513916016
validation_step
0 2.0684758699857273
for epoch 978 mean loss on train 2.0169620513916016
for epoch 978 mean loss on val 2.0684759616851807


 98%|████████████████████████████████████████████████████████████████▌ | 979/1000 [30:42<00:02,  8.85it/s]

training for epoch 979
for epoch 979 learning rate is 1.9966781110160387e-05
training_step
0 2.066394952627329
validation_step
0 1.9851157848651593
for epoch 979 mean loss on train 2.0663950443267822
for epoch 979 mean loss on val 1.9851157665252686


 98%|████████████████████████████████████████████████████████████████▋ | 980/1000 [30:42<00:02,  8.55it/s]

training for epoch 980
for epoch 980 learning rate is 1.9966781110160387e-05
training_step
0 1.9987591963547926
validation_step
0 2.0313660548283505
for epoch 980 mean loss on train 1.998759150505066
for epoch 980 mean loss on val 2.0313661098480225


 98%|████████████████████████████████████████████████████████████████▋ | 981/1000 [30:43<00:02,  8.58it/s]

training for epoch 981
for epoch 981 learning rate is 1.9966781110160387e-05
training_step
0 2.029749943659856
validation_step
0 2.025705777681791
for epoch 981 mean loss on train 2.029749870300293
for epoch 981 mean loss on val 2.0257058143615723


 98%|████████████████████████████████████████████████████████████████▊ | 982/1000 [30:43<00:02,  8.54it/s]

training for epoch 982
for epoch 982 learning rate is 1.9966781110160387e-05
training_step
0 1.9506502885084887
validation_step
0 2.156097412109375
for epoch 982 mean loss on train 1.9506503343582153
for epoch 982 mean loss on val 2.156097412109375


 98%|████████████████████████████████████████████████████████████████▉ | 983/1000 [30:43<00:01,  8.60it/s]

training for epoch 983
for epoch 983 learning rate is 1.9966781110160387e-05
training_step
0 2.013791744525616
validation_step
0 2.1294410412128153
for epoch 983 mean loss on train 2.013791799545288
for epoch 983 mean loss on val 2.129441022872925


 98%|████████████████████████████████████████████████████████████████▉ | 984/1000 [30:43<00:01,  8.69it/s]

training for epoch 984
for epoch 984 learning rate is 1.9966781110160387e-05
training_step
0 2.0520268953763523
validation_step
0 1.9859525240384615
for epoch 984 mean loss on train 2.0520269870758057
for epoch 984 mean loss on val 1.9859524965286255


 98%|█████████████████████████████████████████████████████████████████ | 985/1000 [30:43<00:01,  8.59it/s]

training for epoch 985
for epoch 985 learning rate is 1.9966781110160387e-05
training_step
0 2.0365144289456882
validation_step
0 2.150940234844501
for epoch 985 mean loss on train 2.0365145206451416
for epoch 985 mean loss on val 2.150940179824829


 99%|█████████████████████████████████████████████████████████████████ | 986/1000 [30:43<00:01,  8.68it/s]

training for epoch 986
for epoch 986 learning rate is 1.9966781110160387e-05
training_step
0 1.990171139056866
validation_step
0 2.133959550123948
for epoch 986 mean loss on train 1.990171194076538
for epoch 986 mean loss on val 2.1339595317840576


 99%|█████████████████████████████████████████████████████████████████▏| 987/1000 [30:43<00:01,  8.58it/s]

training for epoch 987
for epoch 987 learning rate is 1.797010299914435e-05
training_step
0 2.2174368638258715
validation_step
0 2.04818608210637
for epoch 987 mean loss on train 2.2174367904663086
for epoch 987 mean loss on val 2.0481860637664795


 99%|█████████████████████████████████████████████████████████████████▏| 988/1000 [30:43<00:01,  8.81it/s]

training for epoch 988
for epoch 988 learning rate is 1.797010299914435e-05
training_step
0 2.029234666090745
validation_step
0 2.1350540748009315
for epoch 988 mean loss on train 2.0292346477508545
for epoch 988 mean loss on val 2.135054111480713


 99%|█████████████████████████████████████████████████████████████████▎| 989/1000 [30:43<00:01,  8.71it/s]

training for epoch 989
for epoch 989 learning rate is 1.797010299914435e-05
training_step
0 2.001299491295448
validation_step
0 2.092967987060547
for epoch 989 mean loss on train 2.0012993812561035
for epoch 989 mean loss on val 2.092967987060547


 99%|█████████████████████████████████████████████████████████████████▎| 990/1000 [30:44<00:01,  8.98it/s]

training for epoch 990
for epoch 990 learning rate is 1.797010299914435e-05
training_step
0 2.0023202162522535
validation_step
0 2.0174669119027944
for epoch 990 mean loss on train 2.0023202896118164
for epoch 990 mean loss on val 2.0174670219421387
training for epoch 991
for epoch 991 learning rate is 1.797010299914435e-05
training_step
0 2.052747286283053
validation_step
0 2.00922599205604
for epoch 991 mean loss on train 2.0527472496032715
for epoch 991 mean loss on val 2.009226083755493


 99%|█████████████████████████████████████████████████████████████████▍| 992/1000 [30:44<00:00,  9.46it/s]

training for epoch 992
for epoch 992 learning rate is 1.797010299914435e-05
training_step
0 2.0355490170992336
validation_step
0 2.020517055804913
for epoch 992 mean loss on train 2.0355489253997803
for epoch 992 mean loss on val 2.020517110824585


 99%|█████████████████████████████████████████████████████████████████▌| 993/1000 [30:44<00:00,  9.41it/s]

training for epoch 993
for epoch 993 learning rate is 1.797010299914435e-05
training_step
0 2.04326424231896
validation_step
0 1.9970343663142278
for epoch 993 mean loss on train 2.043264150619507
for epoch 993 mean loss on val 1.9970343112945557
training for epoch 994
for epoch 994 learning rate is 1.797010299914435e-05
training_step
0 2.1457892197829027
validation_step
0 2.071165818434495
for epoch 994 mean loss on train 2.14578914642334
for epoch 994 mean loss on val 2.0711658000946045


100%|█████████████████████████████████████████████████████████████████▋| 995/1000 [30:44<00:00,  9.67it/s]

training for epoch 995
for epoch 995 learning rate is 1.797010299914435e-05
training_step
0 2.017881099994366
validation_step
0 2.0201957409198465
for epoch 995 mean loss on train 2.017881155014038
for epoch 995 mean loss on val 2.020195722579956
training for epoch 996
for epoch 996 learning rate is 1.797010299914435e-05
training_step
0 2.090831756591797
validation_step
0 2.022869110107422
for epoch 996 mean loss on train 2.090831756591797
for epoch 996 mean loss on val 2.022869110107422


100%|█████████████████████████████████████████████████████████████████▊| 997/1000 [30:44<00:00,  9.76it/s]

training for epoch 997
for epoch 997 learning rate is 1.797010299914435e-05
training_step
0 2.0668665078970103
validation_step
0 1.9960645528940053
for epoch 997 mean loss on train 2.066866397857666
for epoch 997 mean loss on val 1.99606454372406
training for epoch 998
for epoch 998 learning rate is 1.797010299914435e-05
training_step
0 2.075634296123798
validation_step
0 2.0461813119741588
for epoch 998 mean loss on train 2.075634241104126
for epoch 998 mean loss on val 2.0461812019348145


100%|█████████████████████████████████████████████████████████████████▉| 999/1000 [30:44<00:00,  9.96it/s]

training for epoch 999
for epoch 999 learning rate is 1.797010299914435e-05
training_step
0 2.05783932025616
validation_step
0 2.0840624295748196
for epoch 999 mean loss on train 2.0578393936157227
for epoch 999 mean loss on val 2.084062337875366


100%|█████████████████████████████████████████████████████████████████| 1000/1000 [30:45<00:00,  1.85s/it]


In [8]:
for el in model_trainer.train_data_loader:
    print([el1.shape for el1 in el])

[torch.Size([15, 2]), torch.Size([15, 6]), torch.Size([15]), torch.Size([15])]


In [9]:
%matplotlib notebook
import matplotlib.pyplot as plt
plt.plot(results["losses"]["train"],"b*")
plt.plot(results["losses"]["val"],"g*")
plt.title("losses")
plt.savefig(f'test.png', bbox_inches='tight')