In [1]:
from datetime import datetime
import json
from collections import Counter

import pandas as pd
from sklearn.base import BaseEstimator, TransformerMixin
import torch
import torch.nn as nn
import torch.functional as F
from torch.utils.data import DataLoader, Dataset
import torchmetrics

from deeprec.torch.trainer import Trainer, set_device
from deeprec import ROOT

In [2]:
with open('../data/metadata.json', 'r') as fp:
    meta = json.load(fp)

meta.keys()

dict_keys(['title_emb_size', 'string_na', 'genres', 'ages', 'occupations', 'user', 'movie', 'city', 'state'])

In [54]:
len(Counter([2, 1, 3]))

3

In [55]:
class Vocab(BaseEstimator, TransformerMixin):
    def __init__(self):
        pass

    def fit(self, x):
        c = Counter(x)
        self.lookup_ = {
            str(v).lower(): k for k, v in enumerate([x[0] for x in sorted(c.items(), key=lambda x: x[1], reverse=True)])
        }

    def transform(self, x):
        return [self.lookup_.get(str(xx).lower(), len(self.lookup_)+1) for xx in x]

In [102]:
state_enc = Vocab()
state_enc.fit(meta['state'])

city_enc = Vocab()
city_enc.fit(meta['city'])

user_enc = Vocab()
user_enc.fit(meta['user'])

movie_enc = Vocab()
movie_enc.fit(meta['movie'])

In [57]:
df = pd.read_parquet('../data/train.parq.gzip').drop('rating', axis=1)
df.head()

Unnamed: 0_level_0,user,movie,hour,day_of_week,month,gender,age,occupation,city,state,...,embed_15,embed_16,embed_17,embed_18,embed_19,embed_20,embed_21,embed_22,embed_23,embed_24
index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
956151,6036,3132,1,2,4,1,25,15,Gainesville,FL,...,0.511667,1.46494,-2.46967,-1.196152,-0.7946,-0.09462,2.84776,2.17518,1.03427,-0.75034
956152,6037,3132,3,2,4,1,45,1,Arlington,TX,...,0.511667,1.46494,-2.46967,-1.196152,-0.7946,-0.09462,2.84776,2.17518,1.03427,-0.75034
956149,5960,3132,17,5,4,1,45,0,Slidell,LA,...,0.511667,1.46494,-2.46967,-1.196152,-0.7946,-0.09462,2.84776,2.17518,1.03427,-0.75034
956150,6016,3132,20,2,4,0,45,1,Nashville,TN,...,0.511667,1.46494,-2.46967,-1.196152,-0.7946,-0.09462,2.84776,2.17518,1.03427,-0.75034
956146,5643,3132,6,6,5,1,35,1,Salt Lake City,UT,...,0.511667,1.46494,-2.46967,-1.196152,-0.7946,-0.09462,2.84776,2.17518,1.03427,-0.75034


In [58]:
df = pd.read_parquet('../data/train.parq.gzip', columns=['rating'])
df.head()

Unnamed: 0_level_0,rating
index,Unnamed: 1_level_1
956151,5
956152,4
956149,5
956150,3
956146,4


In [59]:
set(df['rating'])

{1, 2, 3, 4, 5}

In [103]:
class MovieDataset(Dataset):
    def __init__(self, filename, vocabs):
        x = pd.read_parquet(filename).drop('rating', axis=1)
        y = pd.read_parquet(filename, columns=['rating'])

        x['state'] = vocabs['state'].transform(x['state'])
        x['city'] = vocabs['city'].transform(x['city'])
        x['user'] = vocabs['user'].transform(x['user'])
        x['movie'] = vocabs['movie'].transform(x['movie'])

        self.feature_names = x.columns
        self.x = x.to_dict('records')
        self.y = torch.tensor(y.values, dtype=torch.float32)

    def __len__(self):
        return len(self.x)

    def __getitem__(self, idx):
        return self.x[idx], self.y[idx]


In [104]:
encoders = {
    'user': user_enc,
    'movie': movie_enc,
    'city': city_enc,
    'state': state_enc
}

train = MovieDataset('../data/train.parq.gzip', vocabs=encoders)
test = MovieDataset('../data/test.parq.gzip', vocabs=encoders)

In [30]:
print(len(train.feature_names))
train.feature_names

54


Index(['user', 'movie', 'hour', 'day_of_week', 'month', 'gender', 'age',
       'occupation', 'city', 'state', 'year', 'genre_action',
       'genre_adventure', 'genre_animation', 'genre_childrens', 'genre_comedy',
       'genre_crime', 'genre_documentary', 'genre_drama', 'genre_fantasy',
       'genre_filmnoir', 'genre_horror', 'genre_musical', 'genre_mystery',
       'genre_romance', 'genre_scifi', 'genre_thriller', 'genre_war',
       'genre_western', 'embed_0', 'embed_1', 'embed_2', 'embed_3', 'embed_4',
       'embed_5', 'embed_6', 'embed_7', 'embed_8', 'embed_9', 'embed_10',
       'embed_11', 'embed_12', 'embed_13', 'embed_14', 'embed_15', 'embed_16',
       'embed_17', 'embed_18', 'embed_19', 'embed_20', 'embed_21', 'embed_22',
       'embed_23', 'embed_24'],
      dtype='object')

In [31]:
dl = DataLoader(train, 4)
next(iter(dl))

[{'user': tensor([  66, 1566, 1665,   60]),
  'movie': tensor([3132, 3132, 3132, 3132]),
  'hour': tensor([ 1,  3, 17, 20]),
  'day_of_week': tensor([2, 2, 5, 2]),
  'month': tensor([4, 4, 4, 4]),
  'gender': tensor([1, 1, 1, 0]),
  'age': tensor([25, 45, 45, 45]),
  'occupation': tensor([15,  1,  0,  1]),
  'city': tensor([   61,    34, 99999,    38]),
  'state': tensor([    9,     4, 99999, 99999]),
  'year': tensor([1919, 1919, 1919, 1919]),
  'genre_action': tensor([0, 0, 0, 0]),
  'genre_adventure': tensor([0, 0, 0, 0]),
  'genre_animation': tensor([0, 0, 0, 0]),
  'genre_childrens': tensor([0, 0, 0, 0]),
  'genre_comedy': tensor([1, 1, 1, 1]),
  'genre_crime': tensor([0, 0, 0, 0]),
  'genre_documentary': tensor([0, 0, 0, 0]),
  'genre_drama': tensor([0, 0, 0, 0]),
  'genre_fantasy': tensor([0, 0, 0, 0]),
  'genre_filmnoir': tensor([0, 0, 0, 0]),
  'genre_horror': tensor([0, 0, 0, 0]),
  'genre_musical': tensor([0, 0, 0, 0]),
  'genre_mystery': tensor([0, 0, 0, 0]),
  'genre_roman

In [36]:
z = next(iter(dl))

In [37]:
def stack_features(inputs, feat):
    return torch.stack([v for k, v in inputs.items() if feat in k], 1)


stack_features(z[0], 'genre')

tensor([[0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
        [0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
        [0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
        [0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]])

In [137]:
class RecModel(nn.Module):
    def __init__(self, metadata, n_features=54):
        super().__init__()
        self.meta = metadata
        self.embed_dims = {
            'large': 25,
            'med': 7,
            'small': 3
        }
        self.loss_func = nn.MSELoss()

        self.user_embeds = nn.Embedding(
            num_embeddings=len(meta['user'].keys()) + 2,
            embedding_dim=self.embed_dims['large']
        )

        self.city_embeds = nn.Embedding(
            num_embeddings=len(meta['city'].keys()) + 2,
            embedding_dim=self.embed_dims['med']
        )

        self.state_embeds = nn.Embedding(
            num_embeddings=len(meta['state'].keys()) + 2,
            embedding_dim=self.embed_dims['small']
        )

        self.age_embeds = nn.Embedding(
            num_embeddings=max(meta['ages']) + 1,
            embedding_dim=self.embed_dims['small']
        )

        self.occ_embeds = nn.Embedding(
            num_embeddings=max(meta['occupations']) + 1,
            embedding_dim=self.embed_dims['small']
        )

        self.user_model = nn.Sequential(
            nn.LazyLinear(out_features=128),
            nn.ReLU(),
            nn.Linear(in_features=128, out_features=64),
            nn.ReLU()
        )

        self.movie_embeds = nn.Embedding(
            num_embeddings=len(meta['movie'].keys()) + 2,
            embedding_dim=self.embed_dims['large']
        )

        self.title_embeds = nn.Sequential(
            nn.Linear(in_features=25, out_features=25),
            nn.Softmax()
        )

        self.movie_model = nn.Sequential(
            nn.LazyLinear(out_features=128),
            nn.ReLU(),
            nn.Linear(in_features=128, out_features=64),
            nn.ReLU()
        )

        self.model = nn.Sequential(
            nn.LazyLinear(out_features=64),
            nn.ReLU(),
            nn.Linear(in_features=64, out_features=1)
        )

    def forward(self, x):
        x_user = torch.concat(
            (
                self.user_embeds(x['user']),
                self.city_embeds(x['city']),
                self.state_embeds(x['state']),
                self.age_embeds(x['age']),
                self.occ_embeds(x['occupation']),
                x['gender'].unsqueeze(-1),
                x['hour'].unsqueeze(-1),
                x['day_of_week'].unsqueeze(-1),
                x['month'].unsqueeze(-1)
            ),
            dim=1
        )

        x_movie = torch.concat(
            (
                self.movie_embeds(x['movie']),
                self.title_embeds(stack_features(x, 'embed').float()),
                stack_features(x, 'genre'),
                x['year'].unsqueeze(-1)
            ),
            dim=1
        )
        user_block = self.user_model(x_user)
        movie_block = self.movie_model(x_movie.float())
        x = self.model(torch.mul(user_block, movie_block))
        return x

    def predict(self, x):
        return self(x)

In [None]:
NOW = datetime.now().strftime('%Y%m%d-%H%M')
LOG_DIR = ROOT.joinpath('runs', NOW)
BATCH = 10_000

train_loader = DataLoader(train, batch_size=BATCH, shuffle=True)
test_loader = DataLoader(test, batch_size=BATCH)

device = set_device()
mod = RecModel(metadata=meta)
opt = torch.optim.AdamW(mod.parameters(), lr=0.001)
trainer = Trainer(
    mod, epochs=100, device=device, log_dir=LOG_DIR, checkpoint_file=LOG_DIR.joinpath('model.pt'),
    optimizer=opt, score_funcs={'mse': torchmetrics.MeanSquaredError()}
)
trainer.fit(train_loader, test_loader, verbose=True)

Epoch:   0%|          | 0/100 [00:00<?, ?it/s]
Batch:   0%|          | 0/96 [00:00<?, ?it/s][A
Batch:   1%|          | 1/96 [00:01<01:36,  1.02s/it][A
Batch:   2%|▏         | 2/96 [00:01<01:01,  1.54it/s][A
Batch:   3%|▎         | 3/96 [00:01<00:48,  1.93it/s][A
Batch:   4%|▍         | 4/96 [00:02<00:44,  2.05it/s][A
Batch:   5%|▌         | 5/96 [00:02<00:42,  2.14it/s][A
Batch:   6%|▋         | 6/96 [00:03<00:41,  2.18it/s][A
Batch:   7%|▋         | 7/96 [00:03<00:47,  1.89it/s][A
Batch:   8%|▊         | 8/96 [00:04<00:46,  1.89it/s][A
Batch:   9%|▉         | 9/96 [00:05<01:03,  1.36it/s][A
Batch:  10%|█         | 10/96 [00:05<00:54,  1.59it/s][A
Batch:  11%|█▏        | 11/96 [00:06<00:47,  1.78it/s][A
Batch:  12%|█▎        | 12/96 [00:06<00:42,  1.97it/s][A
Batch:  14%|█▎        | 13/96 [00:07<00:39,  2.11it/s][A
Batch:  15%|█▍        | 14/96 [00:07<00:43,  1.87it/s][A
Batch:  16%|█▌        | 15/96 [00:08<00:39,  2.05it/s][A
Batch:  17%|█▋        | 16/96 [00:08<00:36,

defaultdict(<class 'list'>, {'epoch': [0], 'train_loss': [1.9797084629535675], 'train_mse': [1.754181146621704], 'valid_loss': [1.5039867401123046], 'valid_mse': [1.4654463529586792]})



Batch:   0%|          | 0/96 [00:00<?, ?it/s][A
Batch:   1%|          | 1/96 [00:00<00:44,  2.13it/s][A
Batch:   2%|▏         | 2/96 [00:00<00:38,  2.43it/s][A
Batch:   3%|▎         | 3/96 [00:01<00:37,  2.50it/s][A
Batch:   4%|▍         | 4/96 [00:01<00:43,  2.13it/s][A
Batch:   5%|▌         | 5/96 [00:02<00:38,  2.33it/s][A
Batch:   6%|▋         | 6/96 [00:02<00:37,  2.41it/s][A
Batch:   7%|▋         | 7/96 [00:02<00:36,  2.42it/s][A
Batch:   8%|▊         | 8/96 [00:03<00:37,  2.33it/s][A
Batch:   9%|▉         | 9/96 [00:03<00:37,  2.30it/s][A
Batch:  10%|█         | 10/96 [00:04<00:44,  1.95it/s][A
Batch:  11%|█▏        | 11/96 [00:04<00:41,  2.05it/s][A
Batch:  12%|█▎        | 12/96 [00:05<00:39,  2.15it/s][A
Batch:  14%|█▎        | 13/96 [00:05<00:36,  2.25it/s][A
Batch:  15%|█▍        | 14/96 [00:06<00:35,  2.33it/s][A
Batch:  16%|█▌        | 15/96 [00:06<00:33,  2.40it/s][A
Batch:  17%|█▋        | 16/96 [00:06<00:32,  2.44it/s][A
Batch:  18%|█▊        | 17/96 [

defaultdict(<class 'list'>, {'epoch': [0, 1], 'train_loss': [1.9797084629535675, 1.2134958902994792], 'train_mse': [1.754181146621704, 1.210875153541565], 'valid_loss': [1.5039867401123046, 1.4068276643753053], 'valid_mse': [1.4654463529586792, 1.3711152076721191]})



Batch:   0%|          | 0/96 [00:00<?, ?it/s][A
Batch:   1%|          | 1/96 [00:00<01:09,  1.36it/s][A
Batch:   2%|▏         | 2/96 [00:01<00:53,  1.77it/s][A
Batch:   3%|▎         | 3/96 [00:01<00:46,  2.02it/s][A
Batch:   4%|▍         | 4/96 [00:02<00:44,  2.09it/s][A
Batch:   5%|▌         | 5/96 [00:02<00:39,  2.30it/s][A
Batch:   6%|▋         | 6/96 [00:02<00:36,  2.45it/s][A
Batch:   7%|▋         | 7/96 [00:03<00:35,  2.53it/s][A
Batch:   8%|▊         | 8/96 [00:03<00:39,  2.22it/s][A
Batch:   9%|▉         | 9/96 [00:04<00:37,  2.32it/s][A
Batch:  10%|█         | 10/96 [00:04<00:35,  2.39it/s][A
Batch:  11%|█▏        | 11/96 [00:04<00:35,  2.40it/s][A
Batch:  12%|█▎        | 12/96 [00:05<00:33,  2.48it/s][A
Batch:  14%|█▎        | 13/96 [00:05<00:33,  2.49it/s][A
Batch:  15%|█▍        | 14/96 [00:06<00:32,  2.49it/s][A
Batch:  16%|█▌        | 15/96 [00:06<00:36,  2.20it/s][A
Batch:  17%|█▋        | 16/96 [00:07<00:35,  2.26it/s][A
Batch:  18%|█▊        | 17/96 [

defaultdict(<class 'list'>, {'epoch': [0, 1, 2], 'train_loss': [1.9797084629535675, 1.2134958902994792, 1.0961089121798675], 'train_mse': [1.754181146621704, 1.210875153541565, 1.0938191413879395], 'valid_loss': [1.5039867401123046, 1.4068276643753053, 1.347963523864746], 'valid_mse': [1.4654463529586792, 1.3711152076721191, 1.3246128559112549]})



Batch:   0%|          | 0/96 [00:00<?, ?it/s][A
Batch:   1%|          | 1/96 [00:00<00:41,  2.30it/s][A
Batch:   2%|▏         | 2/96 [00:00<00:37,  2.53it/s][A
Batch:   3%|▎         | 3/96 [00:01<00:35,  2.64it/s][A
Batch:   4%|▍         | 4/96 [00:01<00:35,  2.60it/s][A
Batch:   5%|▌         | 5/96 [00:02<00:41,  2.21it/s][A
Batch:   6%|▋         | 6/96 [00:02<00:39,  2.29it/s][A
Batch:   7%|▋         | 7/96 [00:02<00:38,  2.33it/s][A
Batch:   8%|▊         | 8/96 [00:03<00:37,  2.34it/s][A
Batch:   9%|▉         | 9/96 [00:03<00:37,  2.33it/s][A
Batch:  10%|█         | 10/96 [00:04<00:36,  2.34it/s][A
Batch:  11%|█▏        | 11/96 [00:04<00:37,  2.29it/s][A
Batch:  12%|█▎        | 12/96 [00:05<00:41,  2.03it/s][A
Batch:  14%|█▎        | 13/96 [00:05<00:38,  2.15it/s][A
Batch:  15%|█▍        | 14/96 [00:06<00:36,  2.26it/s][A
Batch:  16%|█▌        | 15/96 [00:06<00:34,  2.36it/s][A
Batch:  17%|█▋        | 16/96 [00:06<00:31,  2.51it/s][A
Batch:  18%|█▊        | 17/96 [

defaultdict(<class 'list'>, {'epoch': [0, 1, 2, 3], 'train_loss': [1.9797084629535675, 1.2134958902994792, 1.0961089121798675, 1.019505084802707], 'train_mse': [1.754181146621704, 1.210875153541565, 1.0938191413879395, 1.0170938968658447], 'valid_loss': [1.5039867401123046, 1.4068276643753053, 1.347963523864746, 1.3431025743484497], 'valid_mse': [1.4654463529586792, 1.3711152076721191, 1.3246128559112549, 1.322115421295166]})



Batch:   0%|          | 0/96 [00:00<?, ?it/s][A
Batch:   1%|          | 1/96 [00:00<00:49,  1.92it/s][A
Batch:   2%|▏         | 2/96 [00:01<00:56,  1.67it/s][A
Batch:   3%|▎         | 3/96 [00:01<00:46,  1.99it/s][A
Batch:   4%|▍         | 4/96 [00:01<00:40,  2.25it/s][A
Batch:   5%|▌         | 5/96 [00:02<00:37,  2.40it/s][A
Batch:   6%|▋         | 6/96 [00:02<00:35,  2.53it/s][A
Batch:   7%|▋         | 7/96 [00:02<00:33,  2.63it/s][A
Batch:   8%|▊         | 8/96 [00:03<00:32,  2.67it/s][A
Batch:   9%|▉         | 9/96 [00:03<00:38,  2.25it/s][A
Batch:  10%|█         | 10/96 [00:04<00:36,  2.38it/s][A
Batch:  11%|█▏        | 11/96 [00:04<00:34,  2.49it/s][A
Batch:  12%|█▎        | 12/96 [00:05<00:32,  2.58it/s][A
Batch:  14%|█▎        | 13/96 [00:05<00:31,  2.65it/s][A
Batch:  15%|█▍        | 14/96 [00:05<00:30,  2.65it/s][A
Batch:  16%|█▌        | 15/96 [00:06<00:30,  2.68it/s][A
Batch:  17%|█▋        | 16/96 [00:06<00:40,  1.98it/s][A
Batch:  18%|█▊        | 17/96 [

defaultdict(<class 'list'>, {'epoch': [0, 1, 2, 3, 4], 'train_loss': [1.9797084629535675, 1.2134958902994792, 1.0961089121798675, 1.019505084802707, 0.970624710743626], 'train_mse': [1.754181146621704, 1.210875153541565, 1.0938191413879395, 1.0170938968658447, 0.9683411121368408], 'valid_loss': [1.5039867401123046, 1.4068276643753053, 1.347963523864746, 1.3431025743484497, 1.2990557670593261], 'valid_mse': [1.4654463529586792, 1.3711152076721191, 1.3246128559112549, 1.322115421295166, 1.2867449522018433]})



Batch:   0%|          | 0/96 [00:00<?, ?it/s][A
Batch:   1%|          | 1/96 [00:00<00:59,  1.61it/s][A
Batch:   2%|▏         | 2/96 [00:01<00:45,  2.06it/s][A
Batch:   3%|▎         | 3/96 [00:01<00:39,  2.34it/s][A
Batch:   4%|▍         | 4/96 [00:01<00:36,  2.50it/s][A
Batch:   5%|▌         | 5/96 [00:02<00:36,  2.48it/s][A
Batch:   6%|▋         | 6/96 [00:02<00:35,  2.51it/s][A
Batch:   7%|▋         | 7/96 [00:02<00:36,  2.47it/s][A
Batch:   8%|▊         | 8/96 [00:03<00:42,  2.08it/s][A
Batch:   9%|▉         | 9/96 [00:04<00:40,  2.17it/s][A
Batch:  10%|█         | 10/96 [00:04<00:38,  2.25it/s][A
Batch:  11%|█▏        | 11/96 [00:04<00:36,  2.32it/s][A
Batch:  12%|█▎        | 12/96 [00:05<00:34,  2.41it/s][A
Batch:  14%|█▎        | 13/96 [00:05<00:33,  2.48it/s][A
Batch:  15%|█▍        | 14/96 [00:05<00:32,  2.54it/s][A
Batch:  16%|█▌        | 15/96 [00:06<00:37,  2.18it/s][A
Batch:  17%|█▋        | 16/96 [00:06<00:34,  2.29it/s][A
Batch:  18%|█▊        | 17/96 [

defaultdict(<class 'list'>, {'epoch': [0, 1, 2, 3, 4, 5], 'train_loss': [1.9797084629535675, 1.2134958902994792, 1.0961089121798675, 1.019505084802707, 0.970624710743626, 0.9361999065925678], 'train_mse': [1.754181146621704, 1.210875153541565, 1.0938191413879395, 1.0170938968658447, 0.9683411121368408, 0.9346914887428284], 'valid_loss': [1.5039867401123046, 1.4068276643753053, 1.347963523864746, 1.3431025743484497, 1.2990557670593261, 1.3068964004516601], 'valid_mse': [1.4654463529586792, 1.3711152076721191, 1.3246128559112549, 1.322115421295166, 1.2867449522018433, 1.2920000553131104]})



Batch:   0%|          | 0/96 [00:00<?, ?it/s][A
Batch:   1%|          | 1/96 [00:00<00:46,  2.03it/s][A
Batch:   2%|▏         | 2/96 [00:00<00:40,  2.31it/s][A
Batch:   3%|▎         | 3/96 [00:01<00:42,  2.17it/s][A
Batch:   4%|▍         | 4/96 [00:01<00:38,  2.38it/s][A
Batch:   5%|▌         | 5/96 [00:02<00:45,  2.02it/s][A
Batch:   6%|▋         | 6/96 [00:02<00:40,  2.21it/s][A
Batch:   7%|▋         | 7/96 [00:03<00:37,  2.39it/s][A
Batch:   8%|▊         | 8/96 [00:03<00:35,  2.49it/s][A
Batch:   9%|▉         | 9/96 [00:03<00:33,  2.59it/s][A
Batch:  10%|█         | 10/96 [00:04<00:33,  2.55it/s][A
Batch:  11%|█▏        | 11/96 [00:04<00:32,  2.59it/s][A
Batch:  12%|█▎        | 12/96 [00:05<00:37,  2.22it/s][A
Batch:  14%|█▎        | 13/96 [00:05<00:36,  2.29it/s][A
Batch:  15%|█▍        | 14/96 [00:06<00:35,  2.28it/s][A
Batch:  16%|█▌        | 15/96 [00:06<00:34,  2.32it/s][A
Batch:  17%|█▋        | 16/96 [00:06<00:34,  2.32it/s][A
Batch:  18%|█▊        | 17/96 [

defaultdict(<class 'list'>, {'epoch': [0, 1, 2, 3, 4, 5, 6], 'train_loss': [1.9797084629535675, 1.2134958902994792, 1.0961089121798675, 1.019505084802707, 0.970624710743626, 0.9361999065925678, 0.9203691116223732], 'train_mse': [1.754181146621704, 1.210875153541565, 1.0938191413879395, 1.0170938968658447, 0.9683411121368408, 0.9346914887428284, 0.9184542298316956], 'valid_loss': [1.5039867401123046, 1.4068276643753053, 1.347963523864746, 1.3431025743484497, 1.2990557670593261, 1.3068964004516601, 1.3262903690338135], 'valid_mse': [1.4654463529586792, 1.3711152076721191, 1.3246128559112549, 1.322115421295166, 1.2867449522018433, 1.2920000553131104, 1.3098210096359253]})



Batch:   0%|          | 0/96 [00:00<?, ?it/s][A
Batch:   1%|          | 1/96 [00:00<00:39,  2.41it/s][A
Batch:   2%|▏         | 2/96 [00:00<00:36,  2.57it/s][A
Batch:   3%|▎         | 3/96 [00:01<00:38,  2.43it/s][A
Batch:   4%|▍         | 4/96 [00:01<00:46,  1.97it/s][A
Batch:   5%|▌         | 5/96 [00:02<00:43,  2.07it/s][A
Batch:   6%|▋         | 6/96 [00:02<00:41,  2.16it/s][A
Batch:   7%|▋         | 7/96 [00:03<00:40,  2.18it/s][A
Batch:   8%|▊         | 8/96 [00:03<00:37,  2.35it/s][A
Batch:   9%|▉         | 9/96 [00:03<00:35,  2.45it/s][A
Batch:  10%|█         | 10/96 [00:04<00:40,  2.14it/s][A
Batch:  11%|█▏        | 11/96 [00:04<00:37,  2.26it/s][A
Batch:  12%|█▎        | 12/96 [00:05<00:35,  2.35it/s][A
Batch:  14%|█▎        | 13/96 [00:05<00:34,  2.42it/s][A
Batch:  15%|█▍        | 14/96 [00:06<00:33,  2.46it/s][A
Batch:  16%|█▌        | 15/96 [00:06<00:31,  2.54it/s][A
Batch:  17%|█▋        | 16/96 [00:06<00:30,  2.62it/s][A
Batch:  18%|█▊        | 17/96 [

defaultdict(<class 'list'>, {'epoch': [0, 1, 2, 3, 4, 5, 6, 7], 'train_loss': [1.9797084629535675, 1.2134958902994792, 1.0961089121798675, 1.019505084802707, 0.970624710743626, 0.9361999065925678, 0.9203691116223732, 0.9102975397060314], 'train_mse': [1.754181146621704, 1.210875153541565, 1.0938191413879395, 1.0170938968658447, 0.9683411121368408, 0.9346914887428284, 0.9184542298316956, 0.9085344076156616], 'valid_loss': [1.5039867401123046, 1.4068276643753053, 1.347963523864746, 1.3431025743484497, 1.2990557670593261, 1.3068964004516601, 1.3262903690338135, 1.2831723928451537], 'valid_mse': [1.4654463529586792, 1.3711152076721191, 1.3246128559112549, 1.322115421295166, 1.2867449522018433, 1.2920000553131104, 1.3098210096359253, 1.2791754007339478]})



Batch:   0%|          | 0/96 [00:00<?, ?it/s][A
Batch:   1%|          | 1/96 [00:00<00:42,  2.21it/s][A
Batch:   2%|▏         | 2/96 [00:00<00:39,  2.38it/s][A
Batch:   3%|▎         | 3/96 [00:01<00:36,  2.53it/s][A
Batch:   4%|▍         | 4/96 [00:01<00:35,  2.56it/s][A
Batch:   5%|▌         | 5/96 [00:01<00:35,  2.54it/s][A
Batch:   6%|▋         | 6/96 [00:02<00:35,  2.57it/s][A
Batch:   7%|▋         | 7/96 [00:03<00:41,  2.15it/s][A
Batch:   8%|▊         | 8/96 [00:03<00:40,  2.18it/s][A
Batch:   9%|▉         | 9/96 [00:03<00:40,  2.17it/s][A
Batch:  10%|█         | 10/96 [00:04<00:39,  2.19it/s][A
Batch:  11%|█▏        | 11/96 [00:04<00:38,  2.18it/s][A
Batch:  12%|█▎        | 12/96 [00:05<00:37,  2.23it/s][A
Batch:  14%|█▎        | 13/96 [00:05<00:37,  2.24it/s][A
Batch:  15%|█▍        | 14/96 [00:06<00:41,  1.99it/s][A
Batch:  16%|█▌        | 15/96 [00:06<00:38,  2.12it/s][A
Batch:  17%|█▋        | 16/96 [00:07<00:36,  2.22it/s][A
Batch:  18%|█▊        | 17/96 [

defaultdict(<class 'list'>, {'epoch': [0, 1, 2, 3, 4, 5, 6, 7, 8], 'train_loss': [1.9797084629535675, 1.2134958902994792, 1.0961089121798675, 1.019505084802707, 0.970624710743626, 0.9361999065925678, 0.9203691116223732, 0.9102975397060314, 0.9030387476086617], 'train_mse': [1.754181146621704, 1.210875153541565, 1.0938191413879395, 1.0170938968658447, 0.9683411121368408, 0.9346914887428284, 0.9184542298316956, 0.9085344076156616, 0.9012981653213501], 'valid_loss': [1.5039867401123046, 1.4068276643753053, 1.347963523864746, 1.3431025743484497, 1.2990557670593261, 1.3068964004516601, 1.3262903690338135, 1.2831723928451537, 1.3266811847686768], 'valid_mse': [1.4654463529586792, 1.3711152076721191, 1.3246128559112549, 1.322115421295166, 1.2867449522018433, 1.2920000553131104, 1.3098210096359253, 1.2791754007339478, 1.3130725622177124]})



Batch:   0%|          | 0/96 [00:00<?, ?it/s][A
Batch:   1%|          | 1/96 [00:00<00:50,  1.90it/s][A
Batch:   2%|▏         | 2/96 [00:00<00:45,  2.09it/s][A
Batch:   3%|▎         | 3/96 [00:01<00:41,  2.24it/s][A
Batch:   4%|▍         | 4/96 [00:01<00:46,  1.98it/s][A
Batch:   5%|▌         | 5/96 [00:02<00:41,  2.21it/s][A
Batch:   6%|▋         | 6/96 [00:02<00:37,  2.37it/s][A
Batch:   7%|▋         | 7/96 [00:03<00:36,  2.43it/s][A
Batch:   8%|▊         | 8/96 [00:03<00:35,  2.51it/s][A
Batch:   9%|▉         | 9/96 [00:03<00:33,  2.58it/s][A
Batch:  10%|█         | 10/96 [00:04<00:32,  2.62it/s][A
Batch:  11%|█▏        | 11/96 [00:04<00:37,  2.24it/s][A
Batch:  12%|█▎        | 12/96 [00:05<00:35,  2.36it/s][A
Batch:  14%|█▎        | 13/96 [00:05<00:33,  2.49it/s][A
Batch:  15%|█▍        | 14/96 [00:05<00:31,  2.58it/s][A
Batch:  16%|█▌        | 15/96 [00:06<00:31,  2.56it/s][A
Batch:  17%|█▋        | 16/96 [00:06<00:33,  2.42it/s][A
Batch:  18%|█▊        | 17/96 [

defaultdict(<class 'list'>, {'epoch': [0, 1, 2, 3, 4, 5, 6, 7, 8, 9], 'train_loss': [1.9797084629535675, 1.2134958902994792, 1.0961089121798675, 1.019505084802707, 0.970624710743626, 0.9361999065925678, 0.9203691116223732, 0.9102975397060314, 0.9030387476086617, 0.9006111019601425], 'train_mse': [1.754181146621704, 1.210875153541565, 1.0938191413879395, 1.0170938968658447, 0.9683411121368408, 0.9346914887428284, 0.9184542298316956, 0.9085344076156616, 0.9012981653213501, 0.8983248472213745], 'valid_loss': [1.5039867401123046, 1.4068276643753053, 1.347963523864746, 1.3431025743484497, 1.2990557670593261, 1.3068964004516601, 1.3262903690338135, 1.2831723928451537, 1.3266811847686768, 1.2985729932785035], 'valid_mse': [1.4654463529586792, 1.3711152076721191, 1.3246128559112549, 1.322115421295166, 1.2867449522018433, 1.2920000553131104, 1.3098210096359253, 1.2791754007339478, 1.3130725622177124, 1.288860559463501]})



Batch:   0%|          | 0/96 [00:00<?, ?it/s][A
Batch:   1%|          | 1/96 [00:00<00:42,  2.24it/s][A
Batch:   2%|▏         | 2/96 [00:01<00:51,  1.83it/s][A
Batch:   3%|▎         | 3/96 [00:01<00:43,  2.13it/s][A
Batch:   4%|▍         | 4/96 [00:01<00:39,  2.33it/s][A
Batch:   5%|▌         | 5/96 [00:02<00:38,  2.37it/s][A
Batch:   6%|▋         | 6/96 [00:02<00:38,  2.34it/s][A
Batch:   7%|▋         | 7/96 [00:03<00:38,  2.32it/s][A
Batch:   8%|▊         | 8/96 [00:03<00:38,  2.31it/s][A
Batch:   9%|▉         | 9/96 [00:04<00:44,  1.97it/s][A
Batch:  10%|█         | 10/96 [00:04<00:40,  2.13it/s][A
Batch:  11%|█▏        | 11/96 [00:04<00:37,  2.27it/s][A
Batch:  12%|█▎        | 12/96 [00:05<00:34,  2.42it/s][A
Batch:  14%|█▎        | 13/96 [00:05<00:32,  2.54it/s][A
Batch:  15%|█▍        | 14/96 [00:06<00:31,  2.60it/s][A
Batch:  16%|█▌        | 15/96 [00:06<00:30,  2.67it/s][A
Batch:  17%|█▋        | 16/96 [00:06<00:35,  2.26it/s][A
Batch:  18%|█▊        | 17/96 [

defaultdict(<class 'list'>, {'epoch': [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10], 'train_loss': [1.9797084629535675, 1.2134958902994792, 1.0961089121798675, 1.019505084802707, 0.970624710743626, 0.9361999065925678, 0.9203691116223732, 0.9102975397060314, 0.9030387476086617, 0.9006111019601425, 0.8957089508573214], 'train_mse': [1.754181146621704, 1.210875153541565, 1.0938191413879395, 1.0170938968658447, 0.9683411121368408, 0.9346914887428284, 0.9184542298316956, 0.9085344076156616, 0.9012981653213501, 0.8983248472213745, 0.8942691087722778], 'valid_loss': [1.5039867401123046, 1.4068276643753053, 1.347963523864746, 1.3431025743484497, 1.2990557670593261, 1.3068964004516601, 1.3262903690338135, 1.2831723928451537, 1.3266811847686768, 1.2985729932785035, 1.3109185218811035], 'valid_mse': [1.4654463529586792, 1.3711152076721191, 1.3246128559112549, 1.322115421295166, 1.2867449522018433, 1.2920000553131104, 1.3098210096359253, 1.2791754007339478, 1.3130725622177124, 1.288860559463501, 1.2981456518


Batch:   0%|          | 0/96 [00:00<?, ?it/s][A
Batch:   1%|          | 1/96 [00:00<00:38,  2.45it/s][A
Batch:   2%|▏         | 2/96 [00:00<00:38,  2.42it/s][A
Batch:   3%|▎         | 3/96 [00:01<00:37,  2.50it/s][A
Batch:   4%|▍         | 4/96 [00:01<00:35,  2.56it/s][A
Batch:   5%|▌         | 5/96 [00:01<00:35,  2.56it/s][A
Batch:   6%|▋         | 6/96 [00:02<00:41,  2.19it/s][A
Batch:   7%|▋         | 7/96 [00:02<00:37,  2.35it/s][A
Batch:   8%|▊         | 8/96 [00:03<00:36,  2.40it/s][A
Batch:   9%|▉         | 9/96 [00:03<00:34,  2.51it/s][A
Batch:  10%|█         | 10/96 [00:04<00:33,  2.60it/s][A
Batch:  11%|█▏        | 11/96 [00:04<00:31,  2.66it/s][A
Batch:  12%|█▎        | 12/96 [00:04<00:31,  2.70it/s][A
Batch:  14%|█▎        | 13/96 [00:05<00:37,  2.19it/s][A
Batch:  15%|█▍        | 14/96 [00:05<00:36,  2.25it/s][A
Batch:  16%|█▌        | 15/96 [00:06<00:35,  2.30it/s][A
Batch:  17%|█▋        | 16/96 [00:06<00:34,  2.29it/s][A
Batch:  18%|█▊        | 17/96 [

defaultdict(<class 'list'>, {'epoch': [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11], 'train_loss': [1.9797084629535675, 1.2134958902994792, 1.0961089121798675, 1.019505084802707, 0.970624710743626, 0.9361999065925678, 0.9203691116223732, 0.9102975397060314, 0.9030387476086617, 0.9006111019601425, 0.8957089508573214, 0.8914586945126454], 'train_mse': [1.754181146621704, 1.210875153541565, 1.0938191413879395, 1.0170938968658447, 0.9683411121368408, 0.9346914887428284, 0.9184542298316956, 0.9085344076156616, 0.9012981653213501, 0.8983248472213745, 0.8942691087722778, 0.8896504640579224], 'valid_loss': [1.5039867401123046, 1.4068276643753053, 1.347963523864746, 1.3431025743484497, 1.2990557670593261, 1.3068964004516601, 1.3262903690338135, 1.2831723928451537, 1.3266811847686768, 1.2985729932785035, 1.3109185218811035, 1.2896851301193237], 'valid_mse': [1.4654463529586792, 1.3711152076721191, 1.3246128559112549, 1.322115421295166, 1.2867449522018433, 1.2920000553131104, 1.3098210096359253, 1.27917


Batch:   0%|          | 0/96 [00:00<?, ?it/s][A
Batch:   1%|          | 1/96 [00:00<00:45,  2.07it/s][A
Batch:   2%|▏         | 2/96 [00:00<00:44,  2.12it/s][A
Batch:   3%|▎         | 3/96 [00:01<00:42,  2.17it/s][A
Batch:   4%|▍         | 4/96 [00:02<00:49,  1.86it/s][A
Batch:   5%|▌         | 5/96 [00:02<00:45,  2.02it/s][A
Batch:   6%|▋         | 6/96 [00:02<00:41,  2.17it/s][A
Batch:   7%|▋         | 7/96 [00:03<00:38,  2.29it/s][A
Batch:   8%|▊         | 8/96 [00:03<00:36,  2.41it/s][A
Batch:   9%|▉         | 9/96 [00:03<00:34,  2.49it/s][A
Batch:  10%|█         | 10/96 [00:04<00:33,  2.55it/s][A
Batch:  11%|█▏        | 11/96 [00:04<00:38,  2.22it/s][A
Batch:  12%|█▎        | 12/96 [00:05<00:35,  2.38it/s][A
Batch:  14%|█▎        | 13/96 [00:05<00:33,  2.46it/s][A
Batch:  15%|█▍        | 14/96 [00:06<00:32,  2.54it/s][A
Batch:  16%|█▌        | 15/96 [00:06<00:31,  2.60it/s][A
Batch:  17%|█▋        | 16/96 [00:06<00:30,  2.65it/s][A
Batch:  18%|█▊        | 17/96 [

defaultdict(<class 'list'>, {'epoch': [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12], 'train_loss': [1.9797084629535675, 1.2134958902994792, 1.0961089121798675, 1.019505084802707, 0.970624710743626, 0.9361999065925678, 0.9203691116223732, 0.9102975397060314, 0.9030387476086617, 0.9006111019601425, 0.8957089508573214, 0.8914586945126454, 0.8876819796860218], 'train_mse': [1.754181146621704, 1.210875153541565, 1.0938191413879395, 1.0170938968658447, 0.9683411121368408, 0.9346914887428284, 0.9184542298316956, 0.9085344076156616, 0.9012981653213501, 0.8983248472213745, 0.8942691087722778, 0.8896504640579224, 0.8861417174339294], 'valid_loss': [1.5039867401123046, 1.4068276643753053, 1.347963523864746, 1.3431025743484497, 1.2990557670593261, 1.3068964004516601, 1.3262903690338135, 1.2831723928451537, 1.3266811847686768, 1.2985729932785035, 1.3109185218811035, 1.2896851301193237, 1.3710047960281373], 'valid_mse': [1.4654463529586792, 1.3711152076721191, 1.3246128559112549, 1.322115421295166, 1.2


Batch:   0%|          | 0/96 [00:00<?, ?it/s][A
Batch:   1%|          | 1/96 [00:00<01:04,  1.48it/s][A
Batch:   2%|▏         | 2/96 [00:01<00:47,  1.99it/s][A
Batch:   3%|▎         | 3/96 [00:01<00:42,  2.19it/s][A
Batch:   4%|▍         | 4/96 [00:01<00:39,  2.36it/s][A
Batch:   5%|▌         | 5/96 [00:02<00:36,  2.49it/s][A
Batch:   6%|▋         | 6/96 [00:02<00:35,  2.54it/s][A
Batch:   7%|▋         | 7/96 [00:02<00:34,  2.58it/s][A
Batch:   8%|▊         | 8/96 [00:03<00:41,  2.12it/s][A
Batch:   9%|▉         | 9/96 [00:04<00:40,  2.17it/s][A
Batch:  10%|█         | 10/96 [00:04<00:38,  2.21it/s][A
Batch:  11%|█▏        | 11/96 [00:04<00:37,  2.26it/s][A
Batch:  12%|█▎        | 12/96 [00:05<00:37,  2.26it/s][A
Batch:  14%|█▎        | 13/96 [00:05<00:36,  2.30it/s][A
Batch:  15%|█▍        | 14/96 [00:06<00:33,  2.43it/s][A
Batch:  16%|█▌        | 15/96 [00:06<00:38,  2.13it/s][A
Batch:  17%|█▋        | 16/96 [00:07<00:34,  2.29it/s][A
Batch:  18%|█▊        | 17/96 [

defaultdict(<class 'list'>, {'epoch': [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13], 'train_loss': [1.9797084629535675, 1.2134958902994792, 1.0961089121798675, 1.019505084802707, 0.970624710743626, 0.9361999065925678, 0.9203691116223732, 0.9102975397060314, 0.9030387476086617, 0.9006111019601425, 0.8957089508573214, 0.8914586945126454, 0.8876819796860218, 0.8860143677641948], 'train_mse': [1.754181146621704, 1.210875153541565, 1.0938191413879395, 1.0170938968658447, 0.9683411121368408, 0.9346914887428284, 0.9184542298316956, 0.9085344076156616, 0.9012981653213501, 0.8983248472213745, 0.8942691087722778, 0.8896504640579224, 0.8861417174339294, 0.8838943243026733], 'valid_loss': [1.5039867401123046, 1.4068276643753053, 1.347963523864746, 1.3431025743484497, 1.2990557670593261, 1.3068964004516601, 1.3262903690338135, 1.2831723928451537, 1.3266811847686768, 1.2985729932785035, 1.3109185218811035, 1.2896851301193237, 1.3710047960281373, 1.3261852502822875], 'valid_mse': [1.4654463529586792


Batch:   0%|          | 0/96 [00:00<?, ?it/s][A
Batch:   1%|          | 1/96 [00:00<00:47,  2.01it/s][A
Batch:   2%|▏         | 2/96 [00:00<00:43,  2.15it/s][A
Batch:   3%|▎         | 3/96 [00:01<00:42,  2.20it/s][A
Batch:   4%|▍         | 4/96 [00:01<00:38,  2.41it/s][A
Batch:   5%|▌         | 5/96 [00:02<00:43,  2.10it/s][A
Batch:   6%|▋         | 6/96 [00:02<00:39,  2.30it/s][A
Batch:   7%|▋         | 7/96 [00:03<00:37,  2.36it/s][A
Batch:   8%|▊         | 8/96 [00:03<00:36,  2.43it/s][A
Batch:   9%|▉         | 9/96 [00:03<00:34,  2.50it/s][A
Batch:  10%|█         | 10/96 [00:04<00:33,  2.56it/s][A
Batch:  11%|█▏        | 11/96 [00:04<00:32,  2.65it/s][A
Batch:  12%|█▎        | 12/96 [00:05<00:37,  2.26it/s][A
Batch:  14%|█▎        | 13/96 [00:05<00:34,  2.39it/s][A
Batch:  15%|█▍        | 14/96 [00:05<00:32,  2.49it/s][A
Batch:  16%|█▌        | 15/96 [00:06<00:31,  2.56it/s][A
Batch:  17%|█▋        | 16/96 [00:06<00:32,  2.49it/s][A
Batch:  18%|█▊        | 17/96 [

defaultdict(<class 'list'>, {'epoch': [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14], 'train_loss': [1.9797084629535675, 1.2134958902994792, 1.0961089121798675, 1.019505084802707, 0.970624710743626, 0.9361999065925678, 0.9203691116223732, 0.9102975397060314, 0.9030387476086617, 0.9006111019601425, 0.8957089508573214, 0.8914586945126454, 0.8876819796860218, 0.8860143677641948, 0.8821020666509867], 'train_mse': [1.754181146621704, 1.210875153541565, 1.0938191413879395, 1.0170938968658447, 0.9683411121368408, 0.9346914887428284, 0.9184542298316956, 0.9085344076156616, 0.9012981653213501, 0.8983248472213745, 0.8942691087722778, 0.8896504640579224, 0.8861417174339294, 0.8838943243026733, 0.880273163318634], 'valid_loss': [1.5039867401123046, 1.4068276643753053, 1.347963523864746, 1.3431025743484497, 1.2990557670593261, 1.3068964004516601, 1.3262903690338135, 1.2831723928451537, 1.3266811847686768, 1.2985729932785035, 1.3109185218811035, 1.2896851301193237, 1.3710047960281373, 1.32618525


Batch:   0%|          | 0/96 [00:00<?, ?it/s][A
Batch:   1%|          | 1/96 [00:00<00:59,  1.60it/s][A
Batch:   2%|▏         | 2/96 [00:01<00:45,  2.05it/s][A
Batch:   3%|▎         | 3/96 [00:01<00:40,  2.28it/s][A
Batch:   4%|▍         | 4/96 [00:01<00:38,  2.42it/s][A
Batch:   5%|▌         | 5/96 [00:02<00:39,  2.30it/s][A
Batch:   6%|▋         | 6/96 [00:02<00:39,  2.30it/s][A
Batch:   7%|▋         | 7/96 [00:03<00:38,  2.31it/s][A
Batch:   8%|▊         | 8/96 [00:03<00:45,  1.93it/s][A
Batch:   9%|▉         | 9/96 [00:04<00:42,  2.03it/s][A
Batch:  10%|█         | 10/96 [00:04<00:39,  2.20it/s][A
Batch:  11%|█▏        | 11/96 [00:04<00:36,  2.33it/s][A
Batch:  12%|█▎        | 12/96 [00:05<00:34,  2.46it/s][A
Batch:  14%|█▎        | 13/96 [00:05<00:32,  2.55it/s][A
Batch:  15%|█▍        | 14/96 [00:06<00:31,  2.59it/s][A
Batch:  16%|█▌        | 15/96 [00:06<00:35,  2.26it/s][A
Batch:  17%|█▋        | 16/96 [00:06<00:33,  2.41it/s][A
Batch:  18%|█▊        | 17/96 [

defaultdict(<class 'list'>, {'epoch': [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15], 'train_loss': [1.9797084629535675, 1.2134958902994792, 1.0961089121798675, 1.019505084802707, 0.970624710743626, 0.9361999065925678, 0.9203691116223732, 0.9102975397060314, 0.9030387476086617, 0.9006111019601425, 0.8957089508573214, 0.8914586945126454, 0.8876819796860218, 0.8860143677641948, 0.8821020666509867, 0.8829322097202142], 'train_mse': [1.754181146621704, 1.210875153541565, 1.0938191413879395, 1.0170938968658447, 0.9683411121368408, 0.9346914887428284, 0.9184542298316956, 0.9085344076156616, 0.9012981653213501, 0.8983248472213745, 0.8942691087722778, 0.8896504640579224, 0.8861417174339294, 0.8838943243026733, 0.880273163318634, 0.8814249634742737], 'valid_loss': [1.5039867401123046, 1.4068276643753053, 1.347963523864746, 1.3431025743484497, 1.2990557670593261, 1.3068964004516601, 1.3262903690338135, 1.2831723928451537, 1.3266811847686768, 1.2985729932785035, 1.3109185218811035, 1.2896


Batch:   0%|          | 0/96 [00:00<?, ?it/s][A
Batch:   1%|          | 1/96 [00:00<00:49,  1.91it/s][A
Batch:   2%|▏         | 2/96 [00:00<00:43,  2.18it/s][A
Batch:   3%|▎         | 3/96 [00:01<00:40,  2.31it/s][A
Batch:   4%|▍         | 4/96 [00:01<00:37,  2.48it/s][A
Batch:   5%|▌         | 5/96 [00:02<00:43,  2.09it/s][A
Batch:   6%|▋         | 6/96 [00:02<00:40,  2.24it/s][A
Batch:   7%|▋         | 7/96 [00:03<00:37,  2.35it/s][A
Batch:   8%|▊         | 8/96 [00:03<00:36,  2.43it/s][A
Batch:   9%|▉         | 9/96 [00:03<00:34,  2.50it/s][A
Batch:  10%|█         | 10/96 [00:04<00:33,  2.57it/s][A
Batch:  11%|█▏        | 11/96 [00:04<00:32,  2.63it/s][A
Batch:  12%|█▎        | 12/96 [00:05<00:39,  2.15it/s][A
Batch:  14%|█▎        | 13/96 [00:05<00:37,  2.23it/s][A
Batch:  15%|█▍        | 14/96 [00:06<00:36,  2.26it/s][A
Batch:  16%|█▌        | 15/96 [00:06<00:35,  2.29it/s][A
Batch:  17%|█▋        | 16/96 [00:06<00:34,  2.34it/s][A
Batch:  18%|█▊        | 17/96 [

defaultdict(<class 'list'>, {'epoch': [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16], 'train_loss': [1.9797084629535675, 1.2134958902994792, 1.0961089121798675, 1.019505084802707, 0.970624710743626, 0.9361999065925678, 0.9203691116223732, 0.9102975397060314, 0.9030387476086617, 0.9006111019601425, 0.8957089508573214, 0.8914586945126454, 0.8876819796860218, 0.8860143677641948, 0.8821020666509867, 0.8829322097202142, 0.8817982642600933], 'train_mse': [1.754181146621704, 1.210875153541565, 1.0938191413879395, 1.0170938968658447, 0.9683411121368408, 0.9346914887428284, 0.9184542298316956, 0.9085344076156616, 0.9012981653213501, 0.8983248472213745, 0.8942691087722778, 0.8896504640579224, 0.8861417174339294, 0.8838943243026733, 0.880273163318634, 0.8814249634742737, 0.8799905180931091], 'valid_loss': [1.5039867401123046, 1.4068276643753053, 1.347963523864746, 1.3431025743484497, 1.2990557670593261, 1.3068964004516601, 1.3262903690338135, 1.2831723928451537, 1.3266811847686768, 1.


Batch:   0%|          | 0/96 [00:00<?, ?it/s][A
Batch:   1%|          | 1/96 [00:00<00:52,  1.81it/s][A
Batch:   2%|▏         | 2/96 [00:01<00:51,  1.82it/s][A
Batch:   3%|▎         | 3/96 [00:01<00:56,  1.65it/s][A
Batch:   4%|▍         | 4/96 [00:02<00:49,  1.86it/s][A
Batch:   5%|▌         | 5/96 [00:02<00:44,  2.02it/s][A
Batch:   6%|▋         | 6/96 [00:03<00:41,  2.17it/s][A
Batch:   7%|▋         | 7/96 [00:03<00:38,  2.33it/s][A
Batch:   8%|▊         | 8/96 [00:03<00:36,  2.43it/s][A
Batch:   9%|▉         | 9/96 [00:04<00:34,  2.54it/s][A
Batch:  10%|█         | 10/96 [00:04<00:38,  2.22it/s][A
Batch:  11%|█▏        | 11/96 [00:05<00:36,  2.35it/s][A
Batch:  12%|█▎        | 12/96 [00:05<00:33,  2.48it/s][A
Batch:  14%|█▎        | 13/96 [00:05<00:32,  2.54it/s][A
Batch:  15%|█▍        | 14/96 [00:06<00:31,  2.60it/s][A
Batch:  16%|█▌        | 15/96 [00:06<00:30,  2.63it/s][A
Batch:  17%|█▋        | 16/96 [00:07<00:35,  2.26it/s][A
Batch:  18%|█▊        | 17/96 [

defaultdict(<class 'list'>, {'epoch': [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17], 'train_loss': [1.9797084629535675, 1.2134958902994792, 1.0961089121798675, 1.019505084802707, 0.970624710743626, 0.9361999065925678, 0.9203691116223732, 0.9102975397060314, 0.9030387476086617, 0.9006111019601425, 0.8957089508573214, 0.8914586945126454, 0.8876819796860218, 0.8860143677641948, 0.8821020666509867, 0.8829322097202142, 0.8817982642600933, 0.8758441414684057], 'train_mse': [1.754181146621704, 1.210875153541565, 1.0938191413879395, 1.0170938968658447, 0.9683411121368408, 0.9346914887428284, 0.9184542298316956, 0.9085344076156616, 0.9012981653213501, 0.8983248472213745, 0.8942691087722778, 0.8896504640579224, 0.8861417174339294, 0.8838943243026733, 0.880273163318634, 0.8814249634742737, 0.8799905180931091, 0.8740538954734802], 'valid_loss': [1.5039867401123046, 1.4068276643753053, 1.347963523864746, 1.3431025743484497, 1.2990557670593261, 1.3068964004516601, 1.3262903690338135


Batch:   0%|          | 0/96 [00:00<?, ?it/s][A
Batch:   1%|          | 1/96 [00:00<00:44,  2.16it/s][A
Batch:   2%|▏         | 2/96 [00:00<00:39,  2.37it/s][A
Batch:   3%|▎         | 3/96 [00:01<00:37,  2.48it/s][A
Batch:   4%|▍         | 4/96 [00:01<00:36,  2.51it/s][A
Batch:   5%|▌         | 5/96 [00:01<00:35,  2.60it/s][A
Batch:   6%|▋         | 6/96 [00:02<00:42,  2.13it/s][A
Batch:   7%|▋         | 7/96 [00:03<00:40,  2.17it/s][A
Batch:   8%|▊         | 8/96 [00:03<00:39,  2.23it/s][A
Batch:   9%|▉         | 9/96 [00:03<00:39,  2.22it/s][A
Batch:  10%|█         | 10/96 [00:04<00:38,  2.23it/s][A
Batch:  11%|█▏        | 11/96 [00:04<00:37,  2.24it/s][A
Batch:  12%|█▎        | 12/96 [00:05<00:36,  2.29it/s][A
Batch:  14%|█▎        | 13/96 [00:05<00:40,  2.04it/s][A
Batch:  15%|█▍        | 14/96 [00:06<00:36,  2.22it/s][A
Batch:  16%|█▌        | 15/96 [00:06<00:34,  2.37it/s][A
Batch:  17%|█▋        | 16/96 [00:06<00:32,  2.48it/s][A
Batch:  18%|█▊        | 17/96 [

defaultdict(<class 'list'>, {'epoch': [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18], 'train_loss': [1.9797084629535675, 1.2134958902994792, 1.0961089121798675, 1.019505084802707, 0.970624710743626, 0.9361999065925678, 0.9203691116223732, 0.9102975397060314, 0.9030387476086617, 0.9006111019601425, 0.8957089508573214, 0.8914586945126454, 0.8876819796860218, 0.8860143677641948, 0.8821020666509867, 0.8829322097202142, 0.8817982642600933, 0.8758441414684057, 0.8783296688149372], 'train_mse': [1.754181146621704, 1.210875153541565, 1.0938191413879395, 1.0170938968658447, 0.9683411121368408, 0.9346914887428284, 0.9184542298316956, 0.9085344076156616, 0.9012981653213501, 0.8983248472213745, 0.8942691087722778, 0.8896504640579224, 0.8861417174339294, 0.8838943243026733, 0.880273163318634, 0.8814249634742737, 0.8799905180931091, 0.8740538954734802, 0.8760746121406555], 'valid_loss': [1.5039867401123046, 1.4068276643753053, 1.347963523864746, 1.3431025743484497, 1.299055767059


Batch:   0%|          | 0/96 [00:00<?, ?it/s][A
Batch:   1%|          | 1/96 [00:00<00:49,  1.92it/s][A
Batch:   2%|▏         | 2/96 [00:01<00:47,  1.97it/s][A
Batch:   3%|▎         | 3/96 [00:01<00:43,  2.14it/s][A
Batch:   4%|▍         | 4/96 [00:02<00:48,  1.88it/s][A
Batch:   5%|▌         | 5/96 [00:02<00:47,  1.92it/s][A
Batch:   6%|▋         | 6/96 [00:02<00:43,  2.05it/s][A
Batch:   7%|▋         | 7/96 [00:03<00:39,  2.23it/s][A
Batch:   8%|▊         | 8/96 [00:03<00:37,  2.32it/s][A
Batch:   9%|▉         | 9/96 [00:04<00:35,  2.43it/s][A
Batch:  10%|█         | 10/96 [00:04<00:34,  2.53it/s][A
Batch:  11%|█▏        | 11/96 [00:05<00:38,  2.21it/s][A
Batch:  12%|█▎        | 12/96 [00:05<00:36,  2.27it/s][A
Batch:  14%|█▎        | 13/96 [00:05<00:36,  2.26it/s][A
Batch:  15%|█▍        | 14/96 [00:06<00:35,  2.29it/s][A
Batch:  16%|█▌        | 15/96 [00:06<00:35,  2.29it/s][A
Batch:  17%|█▋        | 16/96 [00:07<00:34,  2.31it/s][A
Batch:  18%|█▊        | 17/96 [

defaultdict(<class 'list'>, {'epoch': [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19], 'train_loss': [1.9797084629535675, 1.2134958902994792, 1.0961089121798675, 1.019505084802707, 0.970624710743626, 0.9361999065925678, 0.9203691116223732, 0.9102975397060314, 0.9030387476086617, 0.9006111019601425, 0.8957089508573214, 0.8914586945126454, 0.8876819796860218, 0.8860143677641948, 0.8821020666509867, 0.8829322097202142, 0.8817982642600933, 0.8758441414684057, 0.8783296688149372, 0.8755392972379923], 'train_mse': [1.754181146621704, 1.210875153541565, 1.0938191413879395, 1.0170938968658447, 0.9683411121368408, 0.9346914887428284, 0.9184542298316956, 0.9085344076156616, 0.9012981653213501, 0.8983248472213745, 0.8942691087722778, 0.8896504640579224, 0.8861417174339294, 0.8838943243026733, 0.880273163318634, 0.8814249634742737, 0.8799905180931091, 0.8740538954734802, 0.8760746121406555, 0.8727739453315735], 'valid_loss': [1.5039867401123046, 1.4068276643753053, 1.3479635


Batch:   0%|          | 0/96 [00:00<?, ?it/s][A
Batch:   1%|          | 1/96 [00:00<01:07,  1.40it/s][A
Batch:   2%|▏         | 2/96 [00:01<00:53,  1.75it/s][A
Batch:   3%|▎         | 3/96 [00:01<00:47,  1.97it/s][A
Batch:   4%|▍         | 4/96 [00:02<00:44,  2.08it/s][A
Batch:   5%|▌         | 5/96 [00:02<00:42,  2.16it/s][A
Batch:   6%|▋         | 6/96 [00:02<00:40,  2.22it/s][A
Batch:   7%|▋         | 7/96 [00:03<00:37,  2.37it/s][A
Batch:   8%|▊         | 8/96 [00:03<00:41,  2.10it/s][A
Batch:   9%|▉         | 9/96 [00:04<00:38,  2.29it/s][A
Batch:  10%|█         | 10/96 [00:04<00:35,  2.42it/s][A
Batch:  11%|█▏        | 11/96 [00:04<00:33,  2.51it/s][A
Batch:  12%|█▎        | 12/96 [00:05<00:32,  2.58it/s][A
Batch:  14%|█▎        | 13/96 [00:05<00:31,  2.64it/s][A
Batch:  15%|█▍        | 14/96 [00:06<00:30,  2.68it/s][A
Batch:  16%|█▌        | 15/96 [00:06<00:35,  2.31it/s][A
Batch:  17%|█▋        | 16/96 [00:06<00:33,  2.41it/s][A
Batch:  18%|█▊        | 17/96 [

defaultdict(<class 'list'>, {'epoch': [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20], 'train_loss': [1.9797084629535675, 1.2134958902994792, 1.0961089121798675, 1.019505084802707, 0.970624710743626, 0.9361999065925678, 0.9203691116223732, 0.9102975397060314, 0.9030387476086617, 0.9006111019601425, 0.8957089508573214, 0.8914586945126454, 0.8876819796860218, 0.8860143677641948, 0.8821020666509867, 0.8829322097202142, 0.8817982642600933, 0.8758441414684057, 0.8783296688149372, 0.8755392972379923, 0.8733817531416813], 'train_mse': [1.754181146621704, 1.210875153541565, 1.0938191413879395, 1.0170938968658447, 0.9683411121368408, 0.9346914887428284, 0.9184542298316956, 0.9085344076156616, 0.9012981653213501, 0.8983248472213745, 0.8942691087722778, 0.8896504640579224, 0.8861417174339294, 0.8838943243026733, 0.880273163318634, 0.8814249634742737, 0.8799905180931091, 0.8740538954734802, 0.8760746121406555, 0.8727739453315735, 0.8712725639343262], 'valid_loss': [1.503


Batch:   0%|          | 0/96 [00:00<?, ?it/s][A
Batch:   1%|          | 1/96 [00:00<00:45,  2.07it/s][A
Batch:   2%|▏         | 2/96 [00:00<00:39,  2.40it/s][A
Batch:   3%|▎         | 3/96 [00:01<00:37,  2.47it/s][A
Batch:   4%|▍         | 4/96 [00:01<00:44,  2.08it/s][A
Batch:   5%|▌         | 5/96 [00:02<00:40,  2.27it/s][A
Batch:   6%|▋         | 6/96 [00:02<00:37,  2.38it/s][A
Batch:   7%|▋         | 7/96 [00:02<00:36,  2.47it/s][A
Batch:   8%|▊         | 8/96 [00:03<00:36,  2.42it/s][A
Batch:   9%|▉         | 9/96 [00:03<00:36,  2.39it/s][A
Batch:  10%|█         | 10/96 [00:04<00:36,  2.36it/s][A
Batch:  11%|█▏        | 11/96 [00:04<00:42,  2.01it/s][A
Batch:  12%|█▎        | 12/96 [00:05<00:41,  2.02it/s][A
Batch:  14%|█▎        | 13/96 [00:05<00:38,  2.16it/s][A
Batch:  15%|█▍        | 14/96 [00:06<00:35,  2.33it/s][A
Batch:  16%|█▌        | 15/96 [00:06<00:34,  2.38it/s][A
Batch:  17%|█▋        | 16/96 [00:06<00:32,  2.44it/s][A
Batch:  18%|█▊        | 17/96 [

defaultdict(<class 'list'>, {'epoch': [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21], 'train_loss': [1.9797084629535675, 1.2134958902994792, 1.0961089121798675, 1.019505084802707, 0.970624710743626, 0.9361999065925678, 0.9203691116223732, 0.9102975397060314, 0.9030387476086617, 0.9006111019601425, 0.8957089508573214, 0.8914586945126454, 0.8876819796860218, 0.8860143677641948, 0.8821020666509867, 0.8829322097202142, 0.8817982642600933, 0.8758441414684057, 0.8783296688149372, 0.8755392972379923, 0.8733817531416813, 0.8687647537638744], 'train_mse': [1.754181146621704, 1.210875153541565, 1.0938191413879395, 1.0170938968658447, 0.9683411121368408, 0.9346914887428284, 0.9184542298316956, 0.9085344076156616, 0.9012981653213501, 0.8983248472213745, 0.8942691087722778, 0.8896504640579224, 0.8861417174339294, 0.8838943243026733, 0.880273163318634, 0.8814249634742737, 0.8799905180931091, 0.8740538954734802, 0.8760746121406555, 0.8727739453315735, 0.871272563934326


Batch:   0%|          | 0/96 [00:00<?, ?it/s][A
Batch:   1%|          | 1/96 [00:00<01:24,  1.12it/s][A
Batch:   2%|▏         | 2/96 [00:01<01:01,  1.53it/s][A
Batch:   3%|▎         | 3/96 [00:01<00:51,  1.81it/s][A
Batch:   4%|▍         | 4/96 [00:02<00:44,  2.07it/s][A
Batch:   5%|▌         | 5/96 [00:02<00:40,  2.23it/s][A
Batch:   6%|▋         | 6/96 [00:02<00:38,  2.36it/s][A
Batch:   7%|▋         | 7/96 [00:03<00:36,  2.47it/s][A
Batch:   8%|▊         | 8/96 [00:03<00:41,  2.14it/s][A
Batch:   9%|▉         | 9/96 [00:04<00:38,  2.28it/s][A
Batch:  10%|█         | 10/96 [00:04<00:36,  2.38it/s][A
Batch:  11%|█▏        | 11/96 [00:05<00:34,  2.49it/s][A
Batch:  12%|█▎        | 12/96 [00:05<00:32,  2.57it/s][A
Batch:  14%|█▎        | 13/96 [00:05<00:32,  2.56it/s][A
Batch:  15%|█▍        | 14/96 [00:06<00:37,  2.18it/s][A
Batch:  16%|█▌        | 15/96 [00:06<00:36,  2.22it/s][A
Batch:  17%|█▋        | 16/96 [00:07<00:35,  2.26it/s][A
Batch:  18%|█▊        | 17/96 [

defaultdict(<class 'list'>, {'epoch': [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22], 'train_loss': [1.9797084629535675, 1.2134958902994792, 1.0961089121798675, 1.019505084802707, 0.970624710743626, 0.9361999065925678, 0.9203691116223732, 0.9102975397060314, 0.9030387476086617, 0.9006111019601425, 0.8957089508573214, 0.8914586945126454, 0.8876819796860218, 0.8860143677641948, 0.8821020666509867, 0.8829322097202142, 0.8817982642600933, 0.8758441414684057, 0.8783296688149372, 0.8755392972379923, 0.8733817531416813, 0.8687647537638744, 0.8692146874964237], 'train_mse': [1.754181146621704, 1.210875153541565, 1.0938191413879395, 1.0170938968658447, 0.9683411121368408, 0.9346914887428284, 0.9184542298316956, 0.9085344076156616, 0.9012981653213501, 0.8983248472213745, 0.8942691087722778, 0.8896504640579224, 0.8861417174339294, 0.8838943243026733, 0.880273163318634, 0.8814249634742737, 0.8799905180931091, 0.8740538954734802, 0.8760746121406555, 0.87277394533


Batch:   0%|          | 0/96 [00:00<?, ?it/s][A
Batch:   1%|          | 1/96 [00:00<00:45,  2.07it/s][A
Batch:   2%|▏         | 2/96 [00:00<00:42,  2.21it/s][A
Batch:   3%|▎         | 3/96 [00:01<00:39,  2.35it/s][A
Batch:   4%|▍         | 4/96 [00:01<00:40,  2.26it/s][A
Batch:   5%|▌         | 5/96 [00:02<00:47,  1.91it/s][A
Batch:   6%|▋         | 6/96 [00:02<00:46,  1.93it/s][A
Batch:   7%|▋         | 7/96 [00:03<00:43,  2.03it/s][A
Batch:   8%|▊         | 8/96 [00:03<00:40,  2.15it/s][A
Batch:   9%|▉         | 9/96 [00:04<00:38,  2.26it/s][A
Batch:  10%|█         | 10/96 [00:04<00:36,  2.38it/s][A
Batch:  11%|█▏        | 11/96 [00:04<00:35,  2.41it/s][A
Batch:  12%|█▎        | 12/96 [00:05<00:39,  2.12it/s][A
Batch:  14%|█▎        | 13/96 [00:05<00:36,  2.26it/s][A
Batch:  15%|█▍        | 14/96 [00:06<00:34,  2.41it/s][A
Batch:  16%|█▌        | 15/96 [00:06<00:32,  2.51it/s][A
Batch:  17%|█▋        | 16/96 [00:07<00:30,  2.59it/s][A
Batch:  18%|█▊        | 17/96 [

defaultdict(<class 'list'>, {'epoch': [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23], 'train_loss': [1.9797084629535675, 1.2134958902994792, 1.0961089121798675, 1.019505084802707, 0.970624710743626, 0.9361999065925678, 0.9203691116223732, 0.9102975397060314, 0.9030387476086617, 0.9006111019601425, 0.8957089508573214, 0.8914586945126454, 0.8876819796860218, 0.8860143677641948, 0.8821020666509867, 0.8829322097202142, 0.8817982642600933, 0.8758441414684057, 0.8783296688149372, 0.8755392972379923, 0.8733817531416813, 0.8687647537638744, 0.8692146874964237, 0.8681346314648787], 'train_mse': [1.754181146621704, 1.210875153541565, 1.0938191413879395, 1.0170938968658447, 0.9683411121368408, 0.9346914887428284, 0.9184542298316956, 0.9085344076156616, 0.9012981653213501, 0.8983248472213745, 0.8942691087722778, 0.8896504640579224, 0.8861417174339294, 0.8838943243026733, 0.880273163318634, 0.8814249634742737, 0.8799905180931091, 0.8740538954734802, 0.8760746


Batch:   0%|          | 0/96 [00:00<?, ?it/s][A
Batch:   1%|          | 1/96 [00:00<01:08,  1.39it/s][A
Batch:   2%|▏         | 2/96 [00:01<00:51,  1.84it/s][A
Batch:   3%|▎         | 3/96 [00:01<00:46,  2.01it/s][A
Batch:   4%|▍         | 4/96 [00:02<00:49,  1.86it/s][A
Batch:   5%|▌         | 5/96 [00:02<00:43,  2.10it/s][A
Batch:   6%|▋         | 6/96 [00:02<00:39,  2.27it/s][A
Batch:   7%|▋         | 7/96 [00:03<00:37,  2.39it/s][A
Batch:   8%|▊         | 8/96 [00:03<00:35,  2.48it/s][A
Batch:   9%|▉         | 9/96 [00:04<00:33,  2.59it/s][A
Batch:  10%|█         | 10/96 [00:04<00:32,  2.62it/s][A
Batch:  11%|█▏        | 11/96 [00:05<00:39,  2.14it/s][A
Batch:  12%|█▎        | 12/96 [00:05<00:37,  2.22it/s][A
Batch:  14%|█▎        | 13/96 [00:05<00:37,  2.24it/s][A
Batch:  15%|█▍        | 14/96 [00:06<00:36,  2.27it/s][A
Batch:  16%|█▌        | 15/96 [00:06<00:35,  2.31it/s][A
Batch:  17%|█▋        | 16/96 [00:07<00:33,  2.42it/s][A
Batch:  18%|█▊        | 17/96 [

defaultdict(<class 'list'>, {'epoch': [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24], 'train_loss': [1.9797084629535675, 1.2134958902994792, 1.0961089121798675, 1.019505084802707, 0.970624710743626, 0.9361999065925678, 0.9203691116223732, 0.9102975397060314, 0.9030387476086617, 0.9006111019601425, 0.8957089508573214, 0.8914586945126454, 0.8876819796860218, 0.8860143677641948, 0.8821020666509867, 0.8829322097202142, 0.8817982642600933, 0.8758441414684057, 0.8783296688149372, 0.8755392972379923, 0.8733817531416813, 0.8687647537638744, 0.8692146874964237, 0.8681346314648787, 0.8651720906297365], 'train_mse': [1.754181146621704, 1.210875153541565, 1.0938191413879395, 1.0170938968658447, 0.9683411121368408, 0.9346914887428284, 0.9184542298316956, 0.9085344076156616, 0.9012981653213501, 0.8983248472213745, 0.8942691087722778, 0.8896504640579224, 0.8861417174339294, 0.8838943243026733, 0.880273163318634, 0.8814249634742737, 0.8799905180931091, 0.874


Batch:   0%|          | 0/96 [00:00<?, ?it/s][A
Batch:   1%|          | 1/96 [00:00<01:06,  1.42it/s][A
Batch:   2%|▏         | 2/96 [00:01<00:51,  1.82it/s][A
Batch:   3%|▎         | 3/96 [00:01<00:51,  1.79it/s][A
Batch:   4%|▍         | 4/96 [00:02<00:47,  1.93it/s][A
Batch:   5%|▌         | 5/96 [00:02<00:45,  2.00it/s][A
Batch:   6%|▋         | 6/96 [00:03<00:41,  2.15it/s][A
Batch:   7%|▋         | 7/96 [00:03<00:45,  1.94it/s][A
Batch:   8%|▊         | 8/96 [00:04<00:41,  2.14it/s][A
Batch:   9%|▉         | 9/96 [00:04<00:38,  2.28it/s][A
Batch:  10%|█         | 10/96 [00:04<00:35,  2.39it/s][A
Batch:  11%|█▏        | 11/96 [00:05<00:34,  2.50it/s][A
Batch:  12%|█▎        | 12/96 [00:05<00:32,  2.57it/s][A
Batch:  14%|█▎        | 13/96 [00:05<00:31,  2.61it/s][A
Batch:  15%|█▍        | 14/96 [00:06<00:36,  2.23it/s][A
Batch:  16%|█▌        | 15/96 [00:06<00:33,  2.39it/s][A
Batch:  17%|█▋        | 16/96 [00:07<00:32,  2.48it/s][A
Batch:  18%|█▊        | 17/96 [

defaultdict(<class 'list'>, {'epoch': [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25], 'train_loss': [1.9797084629535675, 1.2134958902994792, 1.0961089121798675, 1.019505084802707, 0.970624710743626, 0.9361999065925678, 0.9203691116223732, 0.9102975397060314, 0.9030387476086617, 0.9006111019601425, 0.8957089508573214, 0.8914586945126454, 0.8876819796860218, 0.8860143677641948, 0.8821020666509867, 0.8829322097202142, 0.8817982642600933, 0.8758441414684057, 0.8783296688149372, 0.8755392972379923, 0.8733817531416813, 0.8687647537638744, 0.8692146874964237, 0.8681346314648787, 0.8651720906297365, 0.8635766903559366], 'train_mse': [1.754181146621704, 1.210875153541565, 1.0938191413879395, 1.0170938968658447, 0.9683411121368408, 0.9346914887428284, 0.9184542298316956, 0.9085344076156616, 0.9012981653213501, 0.8983248472213745, 0.8942691087722778, 0.8896504640579224, 0.8861417174339294, 0.8838943243026733, 0.880273163318634, 0.8814249634742737, 0


Batch:   0%|          | 0/96 [00:00<?, ?it/s][A
Batch:   1%|          | 1/96 [00:00<00:38,  2.45it/s][A
Batch:   2%|▏         | 2/96 [00:00<00:36,  2.60it/s][A
Batch:   3%|▎         | 3/96 [00:01<00:36,  2.57it/s][A
Batch:   4%|▍         | 4/96 [00:01<00:45,  2.01it/s][A
Batch:   5%|▌         | 5/96 [00:02<00:41,  2.18it/s][A
Batch:   6%|▋         | 6/96 [00:02<00:45,  1.99it/s][A
Batch:   7%|▋         | 7/96 [00:03<00:44,  1.99it/s][A
Batch:   8%|▊         | 8/96 [00:03<00:42,  2.05it/s][A
Batch:   9%|▉         | 9/96 [00:04<00:42,  2.03it/s][A
Batch:  10%|█         | 10/96 [00:04<00:40,  2.10it/s][A
Batch:  11%|█▏        | 11/96 [00:05<00:43,  1.95it/s][A
Batch:  12%|█▎        | 12/96 [00:05<00:41,  2.03it/s][A
Batch:  14%|█▎        | 13/96 [00:06<00:37,  2.21it/s][A
Batch:  15%|█▍        | 14/96 [00:06<00:34,  2.35it/s][A
Batch:  16%|█▌        | 15/96 [00:06<00:32,  2.47it/s][A
Batch:  17%|█▋        | 16/96 [00:07<00:31,  2.56it/s][A
Batch:  18%|█▊        | 17/96 [

defaultdict(<class 'list'>, {'epoch': [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26], 'train_loss': [1.9797084629535675, 1.2134958902994792, 1.0961089121798675, 1.019505084802707, 0.970624710743626, 0.9361999065925678, 0.9203691116223732, 0.9102975397060314, 0.9030387476086617, 0.9006111019601425, 0.8957089508573214, 0.8914586945126454, 0.8876819796860218, 0.8860143677641948, 0.8821020666509867, 0.8829322097202142, 0.8817982642600933, 0.8758441414684057, 0.8783296688149372, 0.8755392972379923, 0.8733817531416813, 0.8687647537638744, 0.8692146874964237, 0.8681346314648787, 0.8651720906297365, 0.8635766903559366, 0.864732313901186], 'train_mse': [1.754181146621704, 1.210875153541565, 1.0938191413879395, 1.0170938968658447, 0.9683411121368408, 0.9346914887428284, 0.9184542298316956, 0.9085344076156616, 0.9012981653213501, 0.8983248472213745, 0.8942691087722778, 0.8896504640579224, 0.8861417174339294, 0.8838943243026733, 0.880273163318634


Batch:   0%|          | 0/96 [00:00<?, ?it/s][A
Batch:   1%|          | 1/96 [00:00<00:47,  2.01it/s][A
Batch:   2%|▏         | 2/96 [00:01<00:53,  1.77it/s][A
Batch:   3%|▎         | 3/96 [00:01<00:46,  1.99it/s][A
Batch:   4%|▍         | 4/96 [00:01<00:42,  2.16it/s][A
Batch:   5%|▌         | 5/96 [00:02<00:42,  2.16it/s][A
Batch:   6%|▋         | 6/96 [00:02<00:41,  2.17it/s][A
Batch:   7%|▋         | 7/96 [00:03<00:39,  2.23it/s][A
Batch:   8%|▊         | 8/96 [00:03<00:37,  2.34it/s][A
Batch:   9%|▉         | 9/96 [00:04<00:40,  2.13it/s][A
Batch:  10%|█         | 10/96 [00:04<00:37,  2.27it/s][A
Batch:  11%|█▏        | 11/96 [00:04<00:35,  2.42it/s][A
Batch:  12%|█▎        | 12/96 [00:05<00:33,  2.53it/s][A
Batch:  14%|█▎        | 13/96 [00:05<00:33,  2.50it/s][A
Batch:  15%|█▍        | 14/96 [00:06<00:33,  2.47it/s][A
Batch:  16%|█▌        | 15/96 [00:06<00:38,  2.09it/s][A
Batch:  17%|█▋        | 16/96 [00:07<00:36,  2.18it/s][A
Batch:  18%|█▊        | 17/96 [

defaultdict(<class 'list'>, {'epoch': [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27], 'train_loss': [1.9797084629535675, 1.2134958902994792, 1.0961089121798675, 1.019505084802707, 0.970624710743626, 0.9361999065925678, 0.9203691116223732, 0.9102975397060314, 0.9030387476086617, 0.9006111019601425, 0.8957089508573214, 0.8914586945126454, 0.8876819796860218, 0.8860143677641948, 0.8821020666509867, 0.8829322097202142, 0.8817982642600933, 0.8758441414684057, 0.8783296688149372, 0.8755392972379923, 0.8733817531416813, 0.8687647537638744, 0.8692146874964237, 0.8681346314648787, 0.8651720906297365, 0.8635766903559366, 0.864732313901186, 0.8641628064215183], 'train_mse': [1.754181146621704, 1.210875153541565, 1.0938191413879395, 1.0170938968658447, 0.9683411121368408, 0.9346914887428284, 0.9184542298316956, 0.9085344076156616, 0.9012981653213501, 0.8983248472213745, 0.8942691087722778, 0.8896504640579224, 0.8861417174339294, 0.88389432430


Batch:   0%|          | 0/96 [00:00<?, ?it/s][A
Batch:   1%|          | 1/96 [00:00<00:48,  1.97it/s][A
Batch:   2%|▏         | 2/96 [00:00<00:44,  2.12it/s][A
Batch:   3%|▎         | 3/96 [00:01<00:43,  2.15it/s][A
Batch:   4%|▍         | 4/96 [00:01<00:41,  2.22it/s][A
Batch:   5%|▌         | 5/96 [00:02<00:42,  2.15it/s][A
Batch:   6%|▋         | 6/96 [00:02<00:47,  1.88it/s][A
Batch:   7%|▋         | 7/96 [00:03<00:44,  2.01it/s][A
Batch:   8%|▊         | 8/96 [00:03<00:42,  2.09it/s][A
Batch:   9%|▉         | 9/96 [00:04<00:39,  2.19it/s][A
Batch:  10%|█         | 10/96 [00:04<00:36,  2.37it/s][A
Batch:  11%|█▏        | 11/96 [00:04<00:34,  2.49it/s][A
Batch:  12%|█▎        | 12/96 [00:05<00:32,  2.58it/s][A
Batch:  14%|█▎        | 13/96 [00:05<00:37,  2.24it/s][A
Batch:  15%|█▍        | 14/96 [00:06<00:34,  2.39it/s][A
Batch:  16%|█▌        | 15/96 [00:06<00:32,  2.51it/s][A
Batch:  17%|█▋        | 16/96 [00:06<00:30,  2.59it/s][A
Batch:  18%|█▊        | 17/96 [

defaultdict(<class 'list'>, {'epoch': [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28], 'train_loss': [1.9797084629535675, 1.2134958902994792, 1.0961089121798675, 1.019505084802707, 0.970624710743626, 0.9361999065925678, 0.9203691116223732, 0.9102975397060314, 0.9030387476086617, 0.9006111019601425, 0.8957089508573214, 0.8914586945126454, 0.8876819796860218, 0.8860143677641948, 0.8821020666509867, 0.8829322097202142, 0.8817982642600933, 0.8758441414684057, 0.8783296688149372, 0.8755392972379923, 0.8733817531416813, 0.8687647537638744, 0.8692146874964237, 0.8681346314648787, 0.8651720906297365, 0.8635766903559366, 0.864732313901186, 0.8641628064215183, 0.8598204075048367], 'train_mse': [1.754181146621704, 1.210875153541565, 1.0938191413879395, 1.0170938968658447, 0.9683411121368408, 0.9346914887428284, 0.9184542298316956, 0.9085344076156616, 0.9012981653213501, 0.8983248472213745, 0.8942691087722778, 0.8896504640579224, 0.8861417


Batch:   0%|          | 0/96 [00:00<?, ?it/s][A
Batch:   1%|          | 1/96 [00:00<00:40,  2.32it/s][A
Batch:   2%|▏         | 2/96 [00:00<00:37,  2.47it/s][A
Batch:   3%|▎         | 3/96 [00:01<00:35,  2.60it/s][A
Batch:   4%|▍         | 4/96 [00:01<00:44,  2.08it/s][A
Batch:   5%|▌         | 5/96 [00:02<00:40,  2.26it/s][A
Batch:   6%|▋         | 6/96 [00:02<00:39,  2.30it/s][A
Batch:   7%|▋         | 7/96 [00:03<00:38,  2.33it/s][A
Batch:   8%|▊         | 8/96 [00:03<00:40,  2.17it/s][A
Batch:   9%|▉         | 9/96 [00:04<00:40,  2.12it/s][A
Batch:  10%|█         | 10/96 [00:04<00:39,  2.15it/s][A
Batch:  11%|█▏        | 11/96 [00:05<00:44,  1.91it/s][A
Batch:  12%|█▎        | 12/96 [00:05<00:41,  2.05it/s][A
Batch:  14%|█▎        | 13/96 [00:05<00:37,  2.23it/s][A
Batch:  15%|█▍        | 14/96 [00:06<00:34,  2.38it/s][A
Batch:  16%|█▌        | 15/96 [00:06<00:32,  2.50it/s][A
Batch:  17%|█▋        | 16/96 [00:07<00:31,  2.51it/s][A
Batch:  18%|█▊        | 17/96 [

defaultdict(<class 'list'>, {'epoch': [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29], 'train_loss': [1.9797084629535675, 1.2134958902994792, 1.0961089121798675, 1.019505084802707, 0.970624710743626, 0.9361999065925678, 0.9203691116223732, 0.9102975397060314, 0.9030387476086617, 0.9006111019601425, 0.8957089508573214, 0.8914586945126454, 0.8876819796860218, 0.8860143677641948, 0.8821020666509867, 0.8829322097202142, 0.8817982642600933, 0.8758441414684057, 0.8783296688149372, 0.8755392972379923, 0.8733817531416813, 0.8687647537638744, 0.8692146874964237, 0.8681346314648787, 0.8651720906297365, 0.8635766903559366, 0.864732313901186, 0.8641628064215183, 0.8598204075048367, 0.8591968522717556], 'train_mse': [1.754181146621704, 1.210875153541565, 1.0938191413879395, 1.0170938968658447, 0.9683411121368408, 0.9346914887428284, 0.9184542298316956, 0.9085344076156616, 0.9012981653213501, 0.8983248472213745, 0.8942691087722778, 0.889


Batch:   0%|          | 0/96 [00:00<?, ?it/s][A
Batch:   1%|          | 1/96 [00:00<01:01,  1.54it/s][A
Batch:   2%|▏         | 2/96 [00:01<00:46,  2.04it/s][A
Batch:   3%|▎         | 3/96 [00:01<00:40,  2.30it/s][A
Batch:   4%|▍         | 4/96 [00:01<00:36,  2.49it/s][A
Batch:   5%|▌         | 5/96 [00:02<00:35,  2.58it/s][A
Batch:   6%|▋         | 6/96 [00:02<00:34,  2.64it/s][A
Batch:   7%|▋         | 7/96 [00:02<00:34,  2.58it/s][A
Batch:   8%|▊         | 8/96 [00:03<00:42,  2.08it/s][A
Batch:   9%|▉         | 9/96 [00:03<00:40,  2.14it/s][A
Batch:  10%|█         | 10/96 [00:04<00:39,  2.18it/s][A
Batch:  11%|█▏        | 11/96 [00:04<00:37,  2.24it/s][A
Batch:  12%|█▎        | 12/96 [00:05<00:38,  2.20it/s][A
Batch:  14%|█▎        | 13/96 [00:05<00:35,  2.33it/s][A
Batch:  15%|█▍        | 14/96 [00:06<00:33,  2.45it/s][A
Batch:  16%|█▌        | 15/96 [00:06<00:36,  2.21it/s][A
Batch:  17%|█▋        | 16/96 [00:06<00:33,  2.38it/s][A
Batch:  18%|█▊        | 17/96 [

defaultdict(<class 'list'>, {'epoch': [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30], 'train_loss': [1.9797084629535675, 1.2134958902994792, 1.0961089121798675, 1.019505084802707, 0.970624710743626, 0.9361999065925678, 0.9203691116223732, 0.9102975397060314, 0.9030387476086617, 0.9006111019601425, 0.8957089508573214, 0.8914586945126454, 0.8876819796860218, 0.8860143677641948, 0.8821020666509867, 0.8829322097202142, 0.8817982642600933, 0.8758441414684057, 0.8783296688149372, 0.8755392972379923, 0.8733817531416813, 0.8687647537638744, 0.8692146874964237, 0.8681346314648787, 0.8651720906297365, 0.8635766903559366, 0.864732313901186, 0.8641628064215183, 0.8598204075048367, 0.8591968522717556, 0.8583729968716701], 'train_mse': [1.754181146621704, 1.210875153541565, 1.0938191413879395, 1.0170938968658447, 0.9683411121368408, 0.9346914887428284, 0.9184542298316956, 0.9085344076156616, 0.9012981653213501, 0.8983248472213745, 0


Batch:   0%|          | 0/96 [00:00<?, ?it/s][A
Batch:   1%|          | 1/96 [00:00<00:54,  1.74it/s][A
Batch:   2%|▏         | 2/96 [00:01<00:46,  2.03it/s][A
Batch:   3%|▎         | 3/96 [00:01<00:39,  2.37it/s][A
Batch:   4%|▍         | 4/96 [00:01<00:36,  2.53it/s][A
Batch:   5%|▌         | 5/96 [00:02<00:40,  2.23it/s][A
Batch:   6%|▋         | 6/96 [00:02<00:37,  2.43it/s][A
Batch:   7%|▋         | 7/96 [00:02<00:34,  2.56it/s][A
Batch:   8%|▊         | 8/96 [00:03<00:33,  2.64it/s][A
Batch:   9%|▉         | 9/96 [00:03<00:32,  2.70it/s][A
Batch:  10%|█         | 10/96 [00:03<00:30,  2.78it/s][A
Batch:  11%|█▏        | 11/96 [00:04<00:29,  2.85it/s][A
Batch:  12%|█▎        | 12/96 [00:04<00:34,  2.42it/s][A
Batch:  14%|█▎        | 13/96 [00:05<00:32,  2.56it/s][A
Batch:  15%|█▍        | 14/96 [00:05<00:30,  2.70it/s][A
Batch:  16%|█▌        | 15/96 [00:05<00:28,  2.83it/s][A
Batch:  17%|█▋        | 16/96 [00:06<00:27,  2.90it/s][A
Batch:  18%|█▊        | 17/96 [

defaultdict(<class 'list'>, {'epoch': [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31], 'train_loss': [1.9797084629535675, 1.2134958902994792, 1.0961089121798675, 1.019505084802707, 0.970624710743626, 0.9361999065925678, 0.9203691116223732, 0.9102975397060314, 0.9030387476086617, 0.9006111019601425, 0.8957089508573214, 0.8914586945126454, 0.8876819796860218, 0.8860143677641948, 0.8821020666509867, 0.8829322097202142, 0.8817982642600933, 0.8758441414684057, 0.8783296688149372, 0.8755392972379923, 0.8733817531416813, 0.8687647537638744, 0.8692146874964237, 0.8681346314648787, 0.8651720906297365, 0.8635766903559366, 0.864732313901186, 0.8641628064215183, 0.8598204075048367, 0.8591968522717556, 0.8583729968716701, 0.8587259321163098], 'train_mse': [1.754181146621704, 1.210875153541565, 1.0938191413879395, 1.0170938968658447, 0.9683411121368408, 0.9346914887428284, 0.9184542298316956, 0.9085344076156616, 0.901298165321350


Batch:   0%|          | 0/96 [00:00<?, ?it/s][A
Batch:   1%|          | 1/96 [00:00<01:03,  1.50it/s][A
Batch:   2%|▏         | 2/96 [00:01<00:48,  1.95it/s][A
Batch:   3%|▎         | 3/96 [00:01<00:46,  2.02it/s][A
Batch:   4%|▍         | 4/96 [00:02<00:44,  2.09it/s][A
Batch:   5%|▌         | 5/96 [00:02<00:43,  2.10it/s][A
Batch:   6%|▋         | 6/96 [00:02<00:41,  2.16it/s][A
Batch:   7%|▋         | 7/96 [00:03<00:39,  2.24it/s][A
Batch:   8%|▊         | 8/96 [00:03<00:37,  2.33it/s][A
Batch:   9%|▉         | 9/96 [00:04<00:40,  2.13it/s][A
Batch:  10%|█         | 10/96 [00:04<00:37,  2.30it/s][A
Batch:  11%|█▏        | 11/96 [00:04<00:34,  2.46it/s][A
Batch:  12%|█▎        | 12/96 [00:05<00:33,  2.50it/s][A
Batch:  14%|█▎        | 13/96 [00:05<00:31,  2.62it/s][A
Batch:  15%|█▍        | 14/96 [00:06<00:30,  2.68it/s][A
Batch:  16%|█▌        | 15/96 [00:06<00:33,  2.38it/s][A
Batch:  17%|█▋        | 16/96 [00:06<00:31,  2.58it/s][A
Batch:  18%|█▊        | 17/96 [

defaultdict(<class 'list'>, {'epoch': [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32], 'train_loss': [1.9797084629535675, 1.2134958902994792, 1.0961089121798675, 1.019505084802707, 0.970624710743626, 0.9361999065925678, 0.9203691116223732, 0.9102975397060314, 0.9030387476086617, 0.9006111019601425, 0.8957089508573214, 0.8914586945126454, 0.8876819796860218, 0.8860143677641948, 0.8821020666509867, 0.8829322097202142, 0.8817982642600933, 0.8758441414684057, 0.8783296688149372, 0.8755392972379923, 0.8733817531416813, 0.8687647537638744, 0.8692146874964237, 0.8681346314648787, 0.8651720906297365, 0.8635766903559366, 0.864732313901186, 0.8641628064215183, 0.8598204075048367, 0.8591968522717556, 0.8583729968716701, 0.8587259321163098, 0.8572949909915527], 'train_mse': [1.754181146621704, 1.210875153541565, 1.0938191413879395, 1.0170938968658447, 0.9683411121368408, 0.9346914887428284, 0.9184542298316956, 0.90853440761


Batch:   0%|          | 0/96 [00:00<?, ?it/s][A
Batch:   1%|          | 1/96 [00:00<00:43,  2.17it/s][A
Batch:   2%|▏         | 2/96 [00:00<00:38,  2.41it/s][A
Batch:   3%|▎         | 3/96 [00:01<00:35,  2.63it/s][A
Batch:   4%|▍         | 4/96 [00:01<00:35,  2.58it/s][A
Batch:   5%|▌         | 5/96 [00:02<00:41,  2.20it/s][A
Batch:   6%|▋         | 6/96 [00:02<00:37,  2.39it/s][A
Batch:   7%|▋         | 7/96 [00:02<00:34,  2.55it/s][A
Batch:   8%|▊         | 8/96 [00:03<00:35,  2.49it/s][A
Batch:   9%|▉         | 9/96 [00:03<00:34,  2.53it/s][A
Batch:  10%|█         | 10/96 [00:03<00:32,  2.62it/s][A
Batch:  11%|█▏        | 11/96 [00:04<00:32,  2.64it/s][A
Batch:  12%|█▎        | 12/96 [00:04<00:36,  2.29it/s][A
Batch:  14%|█▎        | 13/96 [00:05<00:35,  2.31it/s][A
Batch:  15%|█▍        | 14/96 [00:05<00:35,  2.33it/s][A
Batch:  16%|█▌        | 15/96 [00:06<00:33,  2.41it/s][A
Batch:  17%|█▋        | 16/96 [00:06<00:32,  2.44it/s][A
Batch:  18%|█▊        | 17/96 [

defaultdict(<class 'list'>, {'epoch': [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33], 'train_loss': [1.9797084629535675, 1.2134958902994792, 1.0961089121798675, 1.019505084802707, 0.970624710743626, 0.9361999065925678, 0.9203691116223732, 0.9102975397060314, 0.9030387476086617, 0.9006111019601425, 0.8957089508573214, 0.8914586945126454, 0.8876819796860218, 0.8860143677641948, 0.8821020666509867, 0.8829322097202142, 0.8817982642600933, 0.8758441414684057, 0.8783296688149372, 0.8755392972379923, 0.8733817531416813, 0.8687647537638744, 0.8692146874964237, 0.8681346314648787, 0.8651720906297365, 0.8635766903559366, 0.864732313901186, 0.8641628064215183, 0.8598204075048367, 0.8591968522717556, 0.8583729968716701, 0.8587259321163098, 0.8572949909915527, 0.8539101692537466], 'train_mse': [1.754181146621704, 1.210875153541565, 1.0938191413879395, 1.0170938968658447, 0.9683411121368408, 0.9346914887428284, 0.9184542


Batch:   0%|          | 0/96 [00:00<?, ?it/s][A
Batch:   1%|          | 1/96 [00:00<00:46,  2.05it/s][A
Batch:   2%|▏         | 2/96 [00:01<00:50,  1.86it/s][A
Batch:   3%|▎         | 3/96 [00:01<00:42,  2.17it/s][A
Batch:   4%|▍         | 4/96 [00:01<00:38,  2.41it/s][A
Batch:   5%|▌         | 5/96 [00:02<00:35,  2.57it/s][A
Batch:   6%|▋         | 6/96 [00:02<00:34,  2.60it/s][A
Batch:   7%|▋         | 7/96 [00:02<00:32,  2.73it/s][A
Batch:   8%|▊         | 8/96 [00:03<00:31,  2.82it/s][A
Batch:   9%|▉         | 9/96 [00:03<00:36,  2.38it/s][A
Batch:  10%|█         | 10/96 [00:04<00:34,  2.48it/s][A
Batch:  11%|█▏        | 11/96 [00:04<00:32,  2.61it/s][A
Batch:  12%|█▎        | 12/96 [00:04<00:30,  2.75it/s][A
Batch:  14%|█▎        | 13/96 [00:05<00:29,  2.80it/s][A
Batch:  15%|█▍        | 14/96 [00:05<00:29,  2.82it/s][A
Batch:  16%|█▌        | 15/96 [00:05<00:28,  2.85it/s][A
Batch:  17%|█▋        | 16/96 [00:06<00:32,  2.43it/s][A
Batch:  18%|█▊        | 17/96 [

defaultdict(<class 'list'>, {'epoch': [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34], 'train_loss': [1.9797084629535675, 1.2134958902994792, 1.0961089121798675, 1.019505084802707, 0.970624710743626, 0.9361999065925678, 0.9203691116223732, 0.9102975397060314, 0.9030387476086617, 0.9006111019601425, 0.8957089508573214, 0.8914586945126454, 0.8876819796860218, 0.8860143677641948, 0.8821020666509867, 0.8829322097202142, 0.8817982642600933, 0.8758441414684057, 0.8783296688149372, 0.8755392972379923, 0.8733817531416813, 0.8687647537638744, 0.8692146874964237, 0.8681346314648787, 0.8651720906297365, 0.8635766903559366, 0.864732313901186, 0.8641628064215183, 0.8598204075048367, 0.8591968522717556, 0.8583729968716701, 0.8587259321163098, 0.8572949909915527, 0.8539101692537466, 0.8566891855249802], 'train_mse': [1.754181146621704, 1.210875153541565, 1.0938191413879395, 1.0170938968658447, 0.9683411121368408, 0.934


Batch:   0%|          | 0/96 [00:00<?, ?it/s][A
Batch:   1%|          | 1/96 [00:00<00:33,  2.82it/s][A
Batch:   2%|▏         | 2/96 [00:00<00:35,  2.63it/s][A
Batch:   3%|▎         | 3/96 [00:01<00:36,  2.53it/s][A
Batch:   4%|▍         | 4/96 [00:01<00:37,  2.48it/s][A
Batch:   5%|▌         | 5/96 [00:02<00:37,  2.44it/s][A
Batch:   6%|▋         | 6/96 [00:02<00:42,  2.13it/s][A
Batch:   7%|▋         | 7/96 [00:02<00:37,  2.38it/s][A
Batch:   8%|▊         | 8/96 [00:03<00:35,  2.48it/s][A
Batch:   9%|▉         | 9/96 [00:03<00:33,  2.58it/s][A
Batch:  10%|█         | 10/96 [00:04<00:33,  2.57it/s][A
Batch:  11%|█▏        | 11/96 [00:04<00:31,  2.66it/s][A
Batch:  12%|█▎        | 12/96 [00:04<00:37,  2.24it/s][A
Batch:  14%|█▎        | 13/96 [00:05<00:35,  2.33it/s][A
Batch:  15%|█▍        | 14/96 [00:05<00:34,  2.37it/s][A
Batch:  16%|█▌        | 15/96 [00:06<00:33,  2.40it/s][A
Batch:  17%|█▋        | 16/96 [00:06<00:32,  2.45it/s][A
Batch:  18%|█▊        | 17/96 [

defaultdict(<class 'list'>, {'epoch': [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35], 'train_loss': [1.9797084629535675, 1.2134958902994792, 1.0961089121798675, 1.019505084802707, 0.970624710743626, 0.9361999065925678, 0.9203691116223732, 0.9102975397060314, 0.9030387476086617, 0.9006111019601425, 0.8957089508573214, 0.8914586945126454, 0.8876819796860218, 0.8860143677641948, 0.8821020666509867, 0.8829322097202142, 0.8817982642600933, 0.8758441414684057, 0.8783296688149372, 0.8755392972379923, 0.8733817531416813, 0.8687647537638744, 0.8692146874964237, 0.8681346314648787, 0.8651720906297365, 0.8635766903559366, 0.864732313901186, 0.8641628064215183, 0.8598204075048367, 0.8591968522717556, 0.8583729968716701, 0.8587259321163098, 0.8572949909915527, 0.8539101692537466, 0.8566891855249802, 0.8486969787627459], 'train_mse': [1.754181146621704, 1.210875153541565, 1.0938191413879395, 1.0170938968658447, 0


Batch:   0%|          | 0/96 [00:00<?, ?it/s][A
Batch:   1%|          | 1/96 [00:00<00:35,  2.66it/s][A
Batch:   2%|▏         | 2/96 [00:00<00:32,  2.89it/s][A
Batch:   3%|▎         | 3/96 [00:01<00:40,  2.28it/s][A
Batch:   4%|▍         | 4/96 [00:01<00:37,  2.43it/s][A
Batch:   5%|▌         | 5/96 [00:01<00:35,  2.59it/s][A
Batch:   6%|▋         | 6/96 [00:02<00:36,  2.47it/s][A
Batch:   7%|▋         | 7/96 [00:02<00:36,  2.47it/s][A
Batch:   8%|▊         | 8/96 [00:03<00:35,  2.48it/s][A
Batch:   9%|▉         | 9/96 [00:03<00:35,  2.47it/s][A
Batch:  10%|█         | 10/96 [00:04<00:42,  2.02it/s][A
Batch:  11%|█▏        | 11/96 [00:04<00:40,  2.11it/s][A
Batch:  12%|█▎        | 12/96 [00:05<00:38,  2.20it/s][A
Batch:  14%|█▎        | 13/96 [00:05<00:36,  2.28it/s][A
Batch:  15%|█▍        | 14/96 [00:05<00:34,  2.37it/s][A
Batch:  16%|█▌        | 15/96 [00:06<00:32,  2.47it/s][A
Batch:  17%|█▋        | 16/96 [00:06<00:30,  2.58it/s][A
Batch:  18%|█▊        | 17/96 [

defaultdict(<class 'list'>, {'epoch': [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36], 'train_loss': [1.9797084629535675, 1.2134958902994792, 1.0961089121798675, 1.019505084802707, 0.970624710743626, 0.9361999065925678, 0.9203691116223732, 0.9102975397060314, 0.9030387476086617, 0.9006111019601425, 0.8957089508573214, 0.8914586945126454, 0.8876819796860218, 0.8860143677641948, 0.8821020666509867, 0.8829322097202142, 0.8817982642600933, 0.8758441414684057, 0.8783296688149372, 0.8755392972379923, 0.8733817531416813, 0.8687647537638744, 0.8692146874964237, 0.8681346314648787, 0.8651720906297365, 0.8635766903559366, 0.864732313901186, 0.8641628064215183, 0.8598204075048367, 0.8591968522717556, 0.8583729968716701, 0.8587259321163098, 0.8572949909915527, 0.8539101692537466, 0.8566891855249802, 0.8486969787627459, 0.8507417347282171], 'train_mse': [1.754181146621704, 1.210875153541565, 1.093819141387939


Batch:   0%|          | 0/96 [00:00<?, ?it/s][A
Batch:   1%|          | 1/96 [00:00<00:44,  2.14it/s][A
Batch:   2%|▏         | 2/96 [00:00<00:41,  2.26it/s][A
Batch:   3%|▎         | 3/96 [00:01<00:39,  2.34it/s][A
Batch:   4%|▍         | 4/96 [00:01<00:38,  2.36it/s][A
Batch:   5%|▌         | 5/96 [00:02<00:37,  2.44it/s][A
Batch:   6%|▋         | 6/96 [00:02<00:43,  2.06it/s][A
Batch:   7%|▋         | 7/96 [00:03<00:38,  2.29it/s][A
Batch:   8%|▊         | 8/96 [00:03<00:35,  2.48it/s][A
Batch:   9%|▉         | 9/96 [00:03<00:33,  2.63it/s][A
Batch:  10%|█         | 10/96 [00:04<00:30,  2.78it/s][A
Batch:  11%|█▏        | 11/96 [00:04<00:29,  2.88it/s][A
Batch:  12%|█▎        | 12/96 [00:04<00:28,  2.97it/s][A
Batch:  14%|█▎        | 13/96 [00:05<00:33,  2.50it/s][A
Batch:  15%|█▍        | 14/96 [00:05<00:31,  2.61it/s][A
Batch:  16%|█▌        | 15/96 [00:05<00:29,  2.73it/s][A
Batch:  17%|█▋        | 16/96 [00:06<00:28,  2.80it/s][A
Batch:  18%|█▊        | 17/96 [

defaultdict(<class 'list'>, {'epoch': [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37], 'train_loss': [1.9797084629535675, 1.2134958902994792, 1.0961089121798675, 1.019505084802707, 0.970624710743626, 0.9361999065925678, 0.9203691116223732, 0.9102975397060314, 0.9030387476086617, 0.9006111019601425, 0.8957089508573214, 0.8914586945126454, 0.8876819796860218, 0.8860143677641948, 0.8821020666509867, 0.8829322097202142, 0.8817982642600933, 0.8758441414684057, 0.8783296688149372, 0.8755392972379923, 0.8733817531416813, 0.8687647537638744, 0.8692146874964237, 0.8681346314648787, 0.8651720906297365, 0.8635766903559366, 0.864732313901186, 0.8641628064215183, 0.8598204075048367, 0.8591968522717556, 0.8583729968716701, 0.8587259321163098, 0.8572949909915527, 0.8539101692537466, 0.8566891855249802, 0.8486969787627459, 0.8507417347282171, 0.8707298717151085], 'train_mse': [1.754181146621704, 1.2108751535


Batch:   0%|          | 0/96 [00:00<?, ?it/s][A
Batch:   1%|          | 1/96 [00:00<00:34,  2.75it/s][A
Batch:   2%|▏         | 2/96 [00:00<00:32,  2.89it/s][A
Batch:   3%|▎         | 3/96 [00:01<00:44,  2.09it/s][A
Batch:   4%|▍         | 4/96 [00:01<00:42,  2.18it/s][A
Batch:   5%|▌         | 5/96 [00:02<00:39,  2.33it/s][A
Batch:   6%|▋         | 6/96 [00:02<00:37,  2.43it/s][A
Batch:   7%|▋         | 7/96 [00:02<00:36,  2.42it/s][A
Batch:   8%|▊         | 8/96 [00:03<00:34,  2.56it/s][A
Batch:   9%|▉         | 9/96 [00:03<00:33,  2.61it/s][A
Batch:  10%|█         | 10/96 [00:04<00:37,  2.31it/s][A
Batch:  11%|█▏        | 11/96 [00:04<00:34,  2.47it/s][A
Batch:  12%|█▎        | 12/96 [00:04<00:32,  2.62it/s][A
Batch:  14%|█▎        | 13/96 [00:05<00:30,  2.72it/s][A
Batch:  15%|█▍        | 14/96 [00:05<00:28,  2.88it/s][A
Batch:  16%|█▌        | 15/96 [00:05<00:27,  2.96it/s][A
Batch:  17%|█▋        | 16/96 [00:06<00:28,  2.79it/s][A
Batch:  18%|█▊        | 17/96 [

defaultdict(<class 'list'>, {'epoch': [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38], 'train_loss': [1.9797084629535675, 1.2134958902994792, 1.0961089121798675, 1.019505084802707, 0.970624710743626, 0.9361999065925678, 0.9203691116223732, 0.9102975397060314, 0.9030387476086617, 0.9006111019601425, 0.8957089508573214, 0.8914586945126454, 0.8876819796860218, 0.8860143677641948, 0.8821020666509867, 0.8829322097202142, 0.8817982642600933, 0.8758441414684057, 0.8783296688149372, 0.8755392972379923, 0.8733817531416813, 0.8687647537638744, 0.8692146874964237, 0.8681346314648787, 0.8651720906297365, 0.8635766903559366, 0.864732313901186, 0.8641628064215183, 0.8598204075048367, 0.8591968522717556, 0.8583729968716701, 0.8587259321163098, 0.8572949909915527, 0.8539101692537466, 0.8566891855249802, 0.8486969787627459, 0.8507417347282171, 0.8707298717151085, 0.8479721527546644], 'train_mse': [1.75418


Batch:   0%|          | 0/96 [00:00<?, ?it/s][A
Batch:   1%|          | 1/96 [00:00<00:57,  1.64it/s][A
Batch:   2%|▏         | 2/96 [00:00<00:44,  2.09it/s][A
Batch:   3%|▎         | 3/96 [00:01<00:38,  2.42it/s][A
Batch:   4%|▍         | 4/96 [00:01<00:34,  2.68it/s][A
Batch:   5%|▌         | 5/96 [00:01<00:33,  2.75it/s][A
Batch:   6%|▋         | 6/96 [00:02<00:32,  2.79it/s][A
Batch:   7%|▋         | 7/96 [00:02<00:30,  2.92it/s][A
Batch:   8%|▊         | 8/96 [00:03<00:36,  2.40it/s][A
Batch:   9%|▉         | 9/96 [00:03<00:34,  2.52it/s][A
Batch:  10%|█         | 10/96 [00:03<00:33,  2.58it/s][A
Batch:  11%|█▏        | 11/96 [00:04<00:33,  2.57it/s][A
Batch:  12%|█▎        | 12/96 [00:04<00:33,  2.54it/s][A
Batch:  14%|█▎        | 13/96 [00:05<00:32,  2.55it/s][A
Batch:  15%|█▍        | 14/96 [00:05<00:31,  2.59it/s][A
Batch:  16%|█▌        | 15/96 [00:06<00:36,  2.24it/s][A
Batch:  17%|█▋        | 16/96 [00:06<00:33,  2.36it/s][A
Batch:  18%|█▊        | 17/96 [

defaultdict(<class 'list'>, {'epoch': [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39], 'train_loss': [1.9797084629535675, 1.2134958902994792, 1.0961089121798675, 1.019505084802707, 0.970624710743626, 0.9361999065925678, 0.9203691116223732, 0.9102975397060314, 0.9030387476086617, 0.9006111019601425, 0.8957089508573214, 0.8914586945126454, 0.8876819796860218, 0.8860143677641948, 0.8821020666509867, 0.8829322097202142, 0.8817982642600933, 0.8758441414684057, 0.8783296688149372, 0.8755392972379923, 0.8733817531416813, 0.8687647537638744, 0.8692146874964237, 0.8681346314648787, 0.8651720906297365, 0.8635766903559366, 0.864732313901186, 0.8641628064215183, 0.8598204075048367, 0.8591968522717556, 0.8583729968716701, 0.8587259321163098, 0.8572949909915527, 0.8539101692537466, 0.8566891855249802, 0.8486969787627459, 0.8507417347282171, 0.8707298717151085, 0.8479721527546644, 0.8529238527019819


Batch:   0%|          | 0/96 [00:00<?, ?it/s][A
Batch:   1%|          | 1/96 [00:00<00:41,  2.31it/s][A
Batch:   2%|▏         | 2/96 [00:00<00:39,  2.39it/s][A
Batch:   3%|▎         | 3/96 [00:01<00:35,  2.62it/s][A
Batch:   4%|▍         | 4/96 [00:01<00:35,  2.56it/s][A
Batch:   5%|▌         | 5/96 [00:01<00:34,  2.67it/s][A
Batch:   6%|▋         | 6/96 [00:02<00:32,  2.79it/s][A
Batch:   7%|▋         | 7/96 [00:02<00:37,  2.39it/s][A
Batch:   8%|▊         | 8/96 [00:03<00:34,  2.52it/s][A
Batch:   9%|▉         | 9/96 [00:03<00:33,  2.63it/s][A
Batch:  10%|█         | 10/96 [00:03<00:33,  2.60it/s][A
Batch:  11%|█▏        | 11/96 [00:04<00:33,  2.54it/s][A
Batch:  12%|█▎        | 12/96 [00:04<00:33,  2.52it/s][A
Batch:  14%|█▎        | 13/96 [00:05<00:38,  2.13it/s][A
Batch:  15%|█▍        | 14/96 [00:05<00:36,  2.22it/s][A
Batch:  16%|█▌        | 15/96 [00:06<00:34,  2.35it/s][A
Batch:  17%|█▋        | 16/96 [00:06<00:32,  2.43it/s][A
Batch:  18%|█▊        | 17/96 [

defaultdict(<class 'list'>, {'epoch': [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40], 'train_loss': [1.9797084629535675, 1.2134958902994792, 1.0961089121798675, 1.019505084802707, 0.970624710743626, 0.9361999065925678, 0.9203691116223732, 0.9102975397060314, 0.9030387476086617, 0.9006111019601425, 0.8957089508573214, 0.8914586945126454, 0.8876819796860218, 0.8860143677641948, 0.8821020666509867, 0.8829322097202142, 0.8817982642600933, 0.8758441414684057, 0.8783296688149372, 0.8755392972379923, 0.8733817531416813, 0.8687647537638744, 0.8692146874964237, 0.8681346314648787, 0.8651720906297365, 0.8635766903559366, 0.864732313901186, 0.8641628064215183, 0.8598204075048367, 0.8591968522717556, 0.8583729968716701, 0.8587259321163098, 0.8572949909915527, 0.8539101692537466, 0.8566891855249802, 0.8486969787627459, 0.8507417347282171, 0.8707298717151085, 0.8479721527546644, 0.852923852701


Batch:   0%|          | 0/96 [00:00<?, ?it/s][A
Batch:   1%|          | 1/96 [00:00<00:38,  2.47it/s][A
Batch:   2%|▏         | 2/96 [00:00<00:38,  2.45it/s][A
Batch:   3%|▎         | 3/96 [00:01<00:50,  1.85it/s][A
Batch:   4%|▍         | 4/96 [00:01<00:46,  1.98it/s][A
Batch:   5%|▌         | 5/96 [00:02<00:43,  2.08it/s][A
Batch:   6%|▋         | 6/96 [00:02<00:41,  2.19it/s][A
Batch:   7%|▋         | 7/96 [00:03<00:44,  2.01it/s][A
Batch:   8%|▊         | 8/96 [00:03<00:40,  2.15it/s][A
Batch:   9%|▉         | 9/96 [00:04<00:37,  2.35it/s][A
Batch:  10%|█         | 10/96 [00:04<00:40,  2.12it/s][A
Batch:  11%|█▏        | 11/96 [00:05<00:36,  2.35it/s][A
Batch:  12%|█▎        | 12/96 [00:05<00:32,  2.56it/s][A
Batch:  14%|█▎        | 13/96 [00:05<00:31,  2.68it/s][A
Batch:  15%|█▍        | 14/96 [00:06<00:33,  2.47it/s][A
Batch:  16%|█▌        | 15/96 [00:06<00:33,  2.45it/s][A
Batch:  17%|█▋        | 16/96 [00:06<00:32,  2.47it/s][A
Batch:  18%|█▊        | 17/96 [

defaultdict(<class 'list'>, {'epoch': [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41], 'train_loss': [1.9797084629535675, 1.2134958902994792, 1.0961089121798675, 1.019505084802707, 0.970624710743626, 0.9361999065925678, 0.9203691116223732, 0.9102975397060314, 0.9030387476086617, 0.9006111019601425, 0.8957089508573214, 0.8914586945126454, 0.8876819796860218, 0.8860143677641948, 0.8821020666509867, 0.8829322097202142, 0.8817982642600933, 0.8758441414684057, 0.8783296688149372, 0.8755392972379923, 0.8733817531416813, 0.8687647537638744, 0.8692146874964237, 0.8681346314648787, 0.8651720906297365, 0.8635766903559366, 0.864732313901186, 0.8641628064215183, 0.8598204075048367, 0.8591968522717556, 0.8583729968716701, 0.8587259321163098, 0.8572949909915527, 0.8539101692537466, 0.8566891855249802, 0.8486969787627459, 0.8507417347282171, 0.8707298717151085, 0.8479721527546644, 0.85292385


Batch:   0%|          | 0/96 [00:00<?, ?it/s][A
Batch:   1%|          | 1/96 [00:00<00:41,  2.28it/s][A
Batch:   2%|▏         | 2/96 [00:00<00:35,  2.67it/s][A
Batch:   3%|▎         | 3/96 [00:01<00:35,  2.61it/s][A
Batch:   4%|▍         | 4/96 [00:01<00:34,  2.66it/s][A
Batch:   5%|▌         | 5/96 [00:01<00:33,  2.75it/s][A
Batch:   6%|▋         | 6/96 [00:02<00:32,  2.81it/s][A
Batch:   7%|▋         | 7/96 [00:02<00:37,  2.39it/s][A
Batch:   8%|▊         | 8/96 [00:03<00:35,  2.50it/s][A
Batch:   9%|▉         | 9/96 [00:03<00:33,  2.56it/s][A
Batch:  10%|█         | 10/96 [00:03<00:33,  2.58it/s][A
Batch:  11%|█▏        | 11/96 [00:04<00:32,  2.58it/s][A
Batch:  12%|█▎        | 12/96 [00:04<00:32,  2.61it/s][A
Batch:  14%|█▎        | 13/96 [00:05<00:31,  2.62it/s][A
Batch:  15%|█▍        | 14/96 [00:05<00:37,  2.20it/s][A
Batch:  16%|█▌        | 15/96 [00:05<00:34,  2.38it/s][A
Batch:  17%|█▋        | 16/96 [00:06<00:32,  2.50it/s][A
Batch:  18%|█▊        | 17/96 [

defaultdict(<class 'list'>, {'epoch': [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42], 'train_loss': [1.9797084629535675, 1.2134958902994792, 1.0961089121798675, 1.019505084802707, 0.970624710743626, 0.9361999065925678, 0.9203691116223732, 0.9102975397060314, 0.9030387476086617, 0.9006111019601425, 0.8957089508573214, 0.8914586945126454, 0.8876819796860218, 0.8860143677641948, 0.8821020666509867, 0.8829322097202142, 0.8817982642600933, 0.8758441414684057, 0.8783296688149372, 0.8755392972379923, 0.8733817531416813, 0.8687647537638744, 0.8692146874964237, 0.8681346314648787, 0.8651720906297365, 0.8635766903559366, 0.864732313901186, 0.8641628064215183, 0.8598204075048367, 0.8591968522717556, 0.8583729968716701, 0.8587259321163098, 0.8572949909915527, 0.8539101692537466, 0.8566891855249802, 0.8486969787627459, 0.8507417347282171, 0.8707298717151085, 0.8479721527546644, 0.8529


Batch:   0%|          | 0/96 [00:00<?, ?it/s][A
Batch:   1%|          | 1/96 [00:00<00:40,  2.32it/s][A
Batch:   2%|▏         | 2/96 [00:00<00:36,  2.56it/s][A
Batch:   3%|▎         | 3/96 [00:01<00:33,  2.77it/s][A
Batch:   4%|▍         | 4/96 [00:01<00:34,  2.70it/s][A
Batch:   5%|▌         | 5/96 [00:02<00:39,  2.33it/s][A
Batch:   6%|▋         | 6/96 [00:02<00:35,  2.57it/s][A
Batch:   7%|▋         | 7/96 [00:02<00:33,  2.65it/s][A
Batch:   8%|▊         | 8/96 [00:03<00:32,  2.71it/s][A
Batch:   9%|▉         | 9/96 [00:03<00:31,  2.74it/s][A
Batch:  10%|█         | 10/96 [00:03<00:31,  2.75it/s][A
Batch:  11%|█▏        | 11/96 [00:04<00:30,  2.80it/s][A
Batch:  12%|█▎        | 12/96 [00:04<00:37,  2.21it/s][A
Batch:  14%|█▎        | 13/96 [00:05<00:36,  2.29it/s][A
Batch:  15%|█▍        | 14/96 [00:05<00:35,  2.33it/s][A
Batch:  16%|█▌        | 15/96 [00:05<00:33,  2.40it/s][A
Batch:  17%|█▋        | 16/96 [00:06<00:32,  2.46it/s][A
Batch:  18%|█▊        | 17/96 [

defaultdict(<class 'list'>, {'epoch': [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43], 'train_loss': [1.9797084629535675, 1.2134958902994792, 1.0961089121798675, 1.019505084802707, 0.970624710743626, 0.9361999065925678, 0.9203691116223732, 0.9102975397060314, 0.9030387476086617, 0.9006111019601425, 0.8957089508573214, 0.8914586945126454, 0.8876819796860218, 0.8860143677641948, 0.8821020666509867, 0.8829322097202142, 0.8817982642600933, 0.8758441414684057, 0.8783296688149372, 0.8755392972379923, 0.8733817531416813, 0.8687647537638744, 0.8692146874964237, 0.8681346314648787, 0.8651720906297365, 0.8635766903559366, 0.864732313901186, 0.8641628064215183, 0.8598204075048367, 0.8591968522717556, 0.8583729968716701, 0.8587259321163098, 0.8572949909915527, 0.8539101692537466, 0.8566891855249802, 0.8486969787627459, 0.8507417347282171, 0.8707298717151085, 0.8479721527546644, 0.


Batch:   0%|          | 0/96 [00:00<?, ?it/s][A
Batch:   1%|          | 1/96 [00:00<00:38,  2.48it/s][A
Batch:   2%|▏         | 2/96 [00:00<00:47,  1.97it/s][A
Batch:   3%|▎         | 3/96 [00:01<00:44,  2.11it/s][A
Batch:   4%|▍         | 4/96 [00:01<00:41,  2.23it/s][A
Batch:   5%|▌         | 5/96 [00:02<00:40,  2.26it/s][A
Batch:   6%|▋         | 6/96 [00:02<00:40,  2.24it/s][A
Batch:   7%|▋         | 7/96 [00:03<00:39,  2.25it/s][A
Batch:   8%|▊         | 8/96 [00:03<00:38,  2.27it/s][A
Batch:   9%|▉         | 9/96 [00:04<00:43,  2.01it/s][A
Batch:  10%|█         | 10/96 [00:04<00:38,  2.21it/s][A
Batch:  11%|█▏        | 11/96 [00:04<00:36,  2.36it/s][A
Batch:  12%|█▎        | 12/96 [00:05<00:33,  2.52it/s][A
Batch:  14%|█▎        | 13/96 [00:05<00:31,  2.65it/s][A
Batch:  15%|█▍        | 14/96 [00:05<00:30,  2.68it/s][A
Batch:  16%|█▌        | 15/96 [00:06<00:29,  2.76it/s][A
Batch:  17%|█▋        | 16/96 [00:06<00:33,  2.40it/s][A
Batch:  18%|█▊        | 17/96 [

defaultdict(<class 'list'>, {'epoch': [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], 'train_loss': [1.9797084629535675, 1.2134958902994792, 1.0961089121798675, 1.019505084802707, 0.970624710743626, 0.9361999065925678, 0.9203691116223732, 0.9102975397060314, 0.9030387476086617, 0.9006111019601425, 0.8957089508573214, 0.8914586945126454, 0.8876819796860218, 0.8860143677641948, 0.8821020666509867, 0.8829322097202142, 0.8817982642600933, 0.8758441414684057, 0.8783296688149372, 0.8755392972379923, 0.8733817531416813, 0.8687647537638744, 0.8692146874964237, 0.8681346314648787, 0.8651720906297365, 0.8635766903559366, 0.864732313901186, 0.8641628064215183, 0.8598204075048367, 0.8591968522717556, 0.8583729968716701, 0.8587259321163098, 0.8572949909915527, 0.8539101692537466, 0.8566891855249802, 0.8486969787627459, 0.8507417347282171, 0.8707298717151085, 0.8479721527546644


Batch:   0%|          | 0/96 [00:00<?, ?it/s][A
Batch:   1%|          | 1/96 [00:00<00:46,  2.03it/s][A
Batch:   2%|▏         | 2/96 [00:00<00:43,  2.17it/s][A
Batch:   3%|▎         | 3/96 [00:01<00:41,  2.26it/s][A
Batch:   4%|▍         | 4/96 [00:01<00:37,  2.44it/s][A
Batch:   5%|▌         | 5/96 [00:02<00:36,  2.53it/s][A
Batch:   6%|▋         | 6/96 [00:02<00:33,  2.65it/s][A
Batch:   7%|▋         | 7/96 [00:02<00:34,  2.59it/s][A
Batch:   8%|▊         | 8/96 [00:03<00:38,  2.31it/s][A
Batch:   9%|▉         | 9/96 [00:03<00:34,  2.49it/s][A
Batch:  10%|█         | 10/96 [00:04<00:32,  2.64it/s][A
Batch:  11%|█▏        | 11/96 [00:04<00:31,  2.69it/s][A
Batch:  12%|█▎        | 12/96 [00:04<00:31,  2.67it/s][A
Batch:  14%|█▎        | 13/96 [00:05<00:32,  2.58it/s][A
Batch:  15%|█▍        | 14/96 [00:05<00:37,  2.20it/s][A
Batch:  16%|█▌        | 15/96 [00:06<00:35,  2.30it/s][A
Batch:  17%|█▋        | 16/96 [00:06<00:32,  2.45it/s][A
Batch:  18%|█▊        | 17/96 [

defaultdict(<class 'list'>, {'epoch': [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45], 'train_loss': [1.9797084629535675, 1.2134958902994792, 1.0961089121798675, 1.019505084802707, 0.970624710743626, 0.9361999065925678, 0.9203691116223732, 0.9102975397060314, 0.9030387476086617, 0.9006111019601425, 0.8957089508573214, 0.8914586945126454, 0.8876819796860218, 0.8860143677641948, 0.8821020666509867, 0.8829322097202142, 0.8817982642600933, 0.8758441414684057, 0.8783296688149372, 0.8755392972379923, 0.8733817531416813, 0.8687647537638744, 0.8692146874964237, 0.8681346314648787, 0.8651720906297365, 0.8635766903559366, 0.864732313901186, 0.8641628064215183, 0.8598204075048367, 0.8591968522717556, 0.8583729968716701, 0.8587259321163098, 0.8572949909915527, 0.8539101692537466, 0.8566891855249802, 0.8486969787627459, 0.8507417347282171, 0.8707298717151085, 0.847972152754


Batch:   0%|          | 0/96 [00:00<?, ?it/s][A
Batch:   1%|          | 1/96 [00:00<00:38,  2.46it/s][A
Batch:   2%|▏         | 2/96 [00:00<00:35,  2.65it/s][A
Batch:   3%|▎         | 3/96 [00:01<00:33,  2.79it/s][A
Batch:   4%|▍         | 4/96 [00:01<00:40,  2.28it/s][A
Batch:   5%|▌         | 5/96 [00:01<00:36,  2.49it/s][A
Batch:   6%|▋         | 6/96 [00:02<00:35,  2.53it/s][A
Batch:   7%|▋         | 7/96 [00:02<00:34,  2.59it/s][A
Batch:   8%|▊         | 8/96 [00:03<00:35,  2.45it/s][A
Batch:   9%|▉         | 9/96 [00:03<00:34,  2.52it/s][A
Batch:  10%|█         | 10/96 [00:03<00:32,  2.62it/s][A
Batch:  11%|█▏        | 11/96 [00:04<00:36,  2.35it/s][A
Batch:  12%|█▎        | 12/96 [00:04<00:32,  2.57it/s][A
Batch:  14%|█▎        | 13/96 [00:05<00:30,  2.72it/s][A
Batch:  15%|█▍        | 14/96 [00:05<00:30,  2.68it/s][A
Batch:  16%|█▌        | 15/96 [00:05<00:30,  2.64it/s][A
Batch:  17%|█▋        | 16/96 [00:06<00:30,  2.65it/s][A
Batch:  18%|█▊        | 17/96 [

defaultdict(<class 'list'>, {'epoch': [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46], 'train_loss': [1.9797084629535675, 1.2134958902994792, 1.0961089121798675, 1.019505084802707, 0.970624710743626, 0.9361999065925678, 0.9203691116223732, 0.9102975397060314, 0.9030387476086617, 0.9006111019601425, 0.8957089508573214, 0.8914586945126454, 0.8876819796860218, 0.8860143677641948, 0.8821020666509867, 0.8829322097202142, 0.8817982642600933, 0.8758441414684057, 0.8783296688149372, 0.8755392972379923, 0.8733817531416813, 0.8687647537638744, 0.8692146874964237, 0.8681346314648787, 0.8651720906297365, 0.8635766903559366, 0.864732313901186, 0.8641628064215183, 0.8598204075048367, 0.8591968522717556, 0.8583729968716701, 0.8587259321163098, 0.8572949909915527, 0.8539101692537466, 0.8566891855249802, 0.8486969787627459, 0.8507417347282171, 0.8707298717151085, 0.84797215


Batch:   0%|          | 0/96 [00:00<?, ?it/s][A
Batch:   1%|          | 1/96 [00:00<00:44,  2.13it/s][A
Batch:   2%|▏         | 2/96 [00:01<00:52,  1.79it/s][A
Batch:   3%|▎         | 3/96 [00:01<00:43,  2.12it/s][A
Batch:   4%|▍         | 4/96 [00:01<00:39,  2.33it/s][A
Batch:   5%|▌         | 5/96 [00:02<00:37,  2.44it/s][A
Batch:   6%|▋         | 6/96 [00:02<00:34,  2.63it/s][A
Batch:   7%|▋         | 7/96 [00:02<00:34,  2.55it/s][A
Batch:   8%|▊         | 8/96 [00:03<00:35,  2.50it/s][A
Batch:   9%|▉         | 9/96 [00:03<00:41,  2.11it/s][A
Batch:  10%|█         | 10/96 [00:04<00:38,  2.22it/s][A
Batch:  11%|█▏        | 11/96 [00:04<00:37,  2.30it/s][A
Batch:  12%|█▎        | 12/96 [00:05<00:34,  2.44it/s][A
Batch:  14%|█▎        | 13/96 [00:05<00:32,  2.52it/s][A
Batch:  15%|█▍        | 14/96 [00:05<00:31,  2.58it/s][A
Batch:  16%|█▌        | 15/96 [00:06<00:29,  2.73it/s][A
Batch:  17%|█▋        | 16/96 [00:06<00:33,  2.37it/s][A
Batch:  18%|█▊        | 17/96 [

defaultdict(<class 'list'>, {'epoch': [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47], 'train_loss': [1.9797084629535675, 1.2134958902994792, 1.0961089121798675, 1.019505084802707, 0.970624710743626, 0.9361999065925678, 0.9203691116223732, 0.9102975397060314, 0.9030387476086617, 0.9006111019601425, 0.8957089508573214, 0.8914586945126454, 0.8876819796860218, 0.8860143677641948, 0.8821020666509867, 0.8829322097202142, 0.8817982642600933, 0.8758441414684057, 0.8783296688149372, 0.8755392972379923, 0.8733817531416813, 0.8687647537638744, 0.8692146874964237, 0.8681346314648787, 0.8651720906297365, 0.8635766903559366, 0.864732313901186, 0.8641628064215183, 0.8598204075048367, 0.8591968522717556, 0.8583729968716701, 0.8587259321163098, 0.8572949909915527, 0.8539101692537466, 0.8566891855249802, 0.8486969787627459, 0.8507417347282171, 0.8707298717151085, 0.8479


Batch:   0%|          | 0/96 [00:00<?, ?it/s][A
Batch:   1%|          | 1/96 [00:00<00:48,  1.98it/s][A
Batch:   2%|▏         | 2/96 [00:00<00:41,  2.25it/s][A
Batch:   3%|▎         | 3/96 [00:01<00:41,  2.23it/s][A
Batch:   4%|▍         | 4/96 [00:01<00:41,  2.23it/s][A
Batch:   5%|▌         | 5/96 [00:02<00:44,  2.02it/s][A
Batch:   6%|▋         | 6/96 [00:02<00:39,  2.30it/s][A
Batch:   7%|▋         | 7/96 [00:03<00:38,  2.32it/s][A
Batch:   8%|▊         | 8/96 [00:03<00:37,  2.36it/s][A
Batch:   9%|▉         | 9/96 [00:03<00:36,  2.42it/s][A
Batch:  10%|█         | 10/96 [00:04<00:35,  2.41it/s][A
Batch:  11%|█▏        | 11/96 [00:04<00:40,  2.12it/s][A
Batch:  12%|█▎        | 12/96 [00:05<00:36,  2.33it/s][A
Batch:  14%|█▎        | 13/96 [00:05<00:33,  2.51it/s][A
Batch:  15%|█▍        | 14/96 [00:05<00:30,  2.65it/s][A
Batch:  16%|█▌        | 15/96 [00:06<00:30,  2.70it/s][A
Batch:  17%|█▋        | 16/96 [00:06<00:30,  2.64it/s][A
Batch:  18%|█▊        | 17/96 [

defaultdict(<class 'list'>, {'epoch': [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48], 'train_loss': [1.9797084629535675, 1.2134958902994792, 1.0961089121798675, 1.019505084802707, 0.970624710743626, 0.9361999065925678, 0.9203691116223732, 0.9102975397060314, 0.9030387476086617, 0.9006111019601425, 0.8957089508573214, 0.8914586945126454, 0.8876819796860218, 0.8860143677641948, 0.8821020666509867, 0.8829322097202142, 0.8817982642600933, 0.8758441414684057, 0.8783296688149372, 0.8755392972379923, 0.8733817531416813, 0.8687647537638744, 0.8692146874964237, 0.8681346314648787, 0.8651720906297365, 0.8635766903559366, 0.864732313901186, 0.8641628064215183, 0.8598204075048367, 0.8591968522717556, 0.8583729968716701, 0.8587259321163098, 0.8572949909915527, 0.8539101692537466, 0.8566891855249802, 0.8486969787627459, 0.8507417347282171, 0.8707298717151085, 0.


Batch:   0%|          | 0/96 [00:00<?, ?it/s][A
Batch:   1%|          | 1/96 [00:00<00:56,  1.68it/s][A
Batch:   2%|▏         | 2/96 [00:00<00:41,  2.24it/s][A
Batch:   3%|▎         | 3/96 [00:01<00:37,  2.50it/s][A
Batch:   4%|▍         | 4/96 [00:01<00:34,  2.66it/s][A
Batch:   5%|▌         | 5/96 [00:01<00:33,  2.75it/s][A
Batch:   6%|▋         | 6/96 [00:02<00:32,  2.80it/s][A
Batch:   7%|▋         | 7/96 [00:02<00:31,  2.81it/s][A
Batch:   8%|▊         | 8/96 [00:02<00:30,  2.88it/s][A
Batch:   9%|▉         | 9/96 [00:03<00:36,  2.39it/s][A
Batch:  10%|█         | 10/96 [00:03<00:33,  2.53it/s][A
Batch:  11%|█▏        | 11/96 [00:04<00:32,  2.65it/s][A
Batch:  12%|█▎        | 12/96 [00:04<00:30,  2.74it/s][A
Batch:  14%|█▎        | 13/96 [00:04<00:29,  2.86it/s][A
Batch:  15%|█▍        | 14/96 [00:05<00:28,  2.92it/s][A
Batch:  16%|█▌        | 15/96 [00:05<00:33,  2.44it/s][A
Batch:  17%|█▋        | 16/96 [00:06<00:34,  2.33it/s][A
Batch:  18%|█▊        | 17/96 [

defaultdict(<class 'list'>, {'epoch': [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49], 'train_loss': [1.9797084629535675, 1.2134958902994792, 1.0961089121798675, 1.019505084802707, 0.970624710743626, 0.9361999065925678, 0.9203691116223732, 0.9102975397060314, 0.9030387476086617, 0.9006111019601425, 0.8957089508573214, 0.8914586945126454, 0.8876819796860218, 0.8860143677641948, 0.8821020666509867, 0.8829322097202142, 0.8817982642600933, 0.8758441414684057, 0.8783296688149372, 0.8755392972379923, 0.8733817531416813, 0.8687647537638744, 0.8692146874964237, 0.8681346314648787, 0.8651720906297365, 0.8635766903559366, 0.864732313901186, 0.8641628064215183, 0.8598204075048367, 0.8591968522717556, 0.8583729968716701, 0.8587259321163098, 0.8572949909915527, 0.8539101692537466, 0.8566891855249802, 0.8486969787627459, 0.8507417347282171, 0.8707298717151085


Batch:   0%|          | 0/96 [00:00<?, ?it/s][A
Batch:   1%|          | 1/96 [00:00<00:44,  2.13it/s][A
Batch:   2%|▏         | 2/96 [00:00<00:37,  2.49it/s][A
Batch:   3%|▎         | 3/96 [00:01<00:34,  2.66it/s][A
Batch:   4%|▍         | 4/96 [00:01<00:34,  2.68it/s][A
Batch:   5%|▌         | 5/96 [00:01<00:32,  2.78it/s][A
Batch:   6%|▋         | 6/96 [00:02<00:38,  2.34it/s][A
Batch:   7%|▋         | 7/96 [00:02<00:35,  2.49it/s][A
Batch:   8%|▊         | 8/96 [00:03<00:36,  2.43it/s][A
Batch:   9%|▉         | 9/96 [00:03<00:36,  2.38it/s][A
Batch:  10%|█         | 10/96 [00:04<00:35,  2.40it/s][A
Batch:  11%|█▏        | 11/96 [00:04<00:35,  2.41it/s][A
Batch:  12%|█▎        | 12/96 [00:04<00:34,  2.46it/s][A
Batch:  14%|█▎        | 13/96 [00:05<00:37,  2.19it/s][A
Batch:  15%|█▍        | 14/96 [00:05<00:34,  2.39it/s][A
Batch:  16%|█▌        | 15/96 [00:06<00:31,  2.57it/s][A
Batch:  17%|█▋        | 16/96 [00:06<00:29,  2.68it/s][A
Batch:  18%|█▊        | 17/96 [

defaultdict(<class 'list'>, {'epoch': [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50], 'train_loss': [1.9797084629535675, 1.2134958902994792, 1.0961089121798675, 1.019505084802707, 0.970624710743626, 0.9361999065925678, 0.9203691116223732, 0.9102975397060314, 0.9030387476086617, 0.9006111019601425, 0.8957089508573214, 0.8914586945126454, 0.8876819796860218, 0.8860143677641948, 0.8821020666509867, 0.8829322097202142, 0.8817982642600933, 0.8758441414684057, 0.8783296688149372, 0.8755392972379923, 0.8733817531416813, 0.8687647537638744, 0.8692146874964237, 0.8681346314648787, 0.8651720906297365, 0.8635766903559366, 0.864732313901186, 0.8641628064215183, 0.8598204075048367, 0.8591968522717556, 0.8583729968716701, 0.8587259321163098, 0.8572949909915527, 0.8539101692537466, 0.8566891855249802, 0.8486969787627459, 0.8507417347282171, 0.870729871715


Batch:   0%|          | 0/96 [00:00<?, ?it/s][A
Batch:   1%|          | 1/96 [00:00<00:44,  2.13it/s][A
Batch:   2%|▏         | 2/96 [00:00<00:43,  2.18it/s][A
Batch:   3%|▎         | 3/96 [00:01<00:49,  1.87it/s][A
Batch:   4%|▍         | 4/96 [00:01<00:44,  2.06it/s][A
Batch:   5%|▌         | 5/96 [00:02<00:41,  2.21it/s][A
Batch:   6%|▋         | 6/96 [00:02<00:39,  2.28it/s][A
Batch:   7%|▋         | 7/96 [00:03<00:38,  2.33it/s][A
Batch:   8%|▊         | 8/96 [00:03<00:37,  2.36it/s][A
Batch:   9%|▉         | 9/96 [00:04<00:36,  2.36it/s][A
Batch:  10%|█         | 10/96 [00:04<00:40,  2.11it/s][A
Batch:  11%|█▏        | 11/96 [00:04<00:36,  2.30it/s][A
Batch:  12%|█▎        | 12/96 [00:05<00:33,  2.50it/s][A
Batch:  14%|█▎        | 13/96 [00:05<00:31,  2.65it/s][A
Batch:  15%|█▍        | 14/96 [00:05<00:29,  2.74it/s][A
Batch:  16%|█▌        | 15/96 [00:06<00:28,  2.86it/s][A
Batch:  17%|█▋        | 16/96 [00:06<00:27,  2.87it/s][A
Batch:  18%|█▊        | 17/96 [

defaultdict(<class 'list'>, {'epoch': [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51], 'train_loss': [1.9797084629535675, 1.2134958902994792, 1.0961089121798675, 1.019505084802707, 0.970624710743626, 0.9361999065925678, 0.9203691116223732, 0.9102975397060314, 0.9030387476086617, 0.9006111019601425, 0.8957089508573214, 0.8914586945126454, 0.8876819796860218, 0.8860143677641948, 0.8821020666509867, 0.8829322097202142, 0.8817982642600933, 0.8758441414684057, 0.8783296688149372, 0.8755392972379923, 0.8733817531416813, 0.8687647537638744, 0.8692146874964237, 0.8681346314648787, 0.8651720906297365, 0.8635766903559366, 0.864732313901186, 0.8641628064215183, 0.8598204075048367, 0.8591968522717556, 0.8583729968716701, 0.8587259321163098, 0.8572949909915527, 0.8539101692537466, 0.8566891855249802, 0.8486969787627459, 0.8507417347282171, 0.87072987


Batch:   0%|          | 0/96 [00:00<?, ?it/s][A
Batch:   1%|          | 1/96 [00:00<00:37,  2.54it/s][A
Batch:   2%|▏         | 2/96 [00:00<00:35,  2.63it/s][A
Batch:   3%|▎         | 3/96 [00:01<00:33,  2.77it/s][A
Batch:   4%|▍         | 4/96 [00:01<00:33,  2.77it/s][A
Batch:   5%|▌         | 5/96 [00:01<00:32,  2.83it/s][A
Batch:   6%|▋         | 6/96 [00:02<00:30,  2.93it/s][A
Batch:   7%|▋         | 7/96 [00:02<00:35,  2.48it/s][A
Batch:   8%|▊         | 8/96 [00:02<00:33,  2.60it/s][A
Batch:   9%|▉         | 9/96 [00:03<00:32,  2.64it/s][A
Batch:  10%|█         | 10/96 [00:03<00:32,  2.64it/s][A
Batch:  11%|█▏        | 11/96 [00:04<00:31,  2.71it/s][A
Batch:  12%|█▎        | 12/96 [00:04<00:30,  2.74it/s][A
Batch:  14%|█▎        | 13/96 [00:04<00:29,  2.78it/s][A
Batch:  15%|█▍        | 14/96 [00:05<00:34,  2.35it/s][A
Batch:  16%|█▌        | 15/96 [00:05<00:33,  2.40it/s][A
Batch:  17%|█▋        | 16/96 [00:06<00:33,  2.39it/s][A
Batch:  18%|█▊        | 17/96 [

defaultdict(<class 'list'>, {'epoch': [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52], 'train_loss': [1.9797084629535675, 1.2134958902994792, 1.0961089121798675, 1.019505084802707, 0.970624710743626, 0.9361999065925678, 0.9203691116223732, 0.9102975397060314, 0.9030387476086617, 0.9006111019601425, 0.8957089508573214, 0.8914586945126454, 0.8876819796860218, 0.8860143677641948, 0.8821020666509867, 0.8829322097202142, 0.8817982642600933, 0.8758441414684057, 0.8783296688149372, 0.8755392972379923, 0.8733817531416813, 0.8687647537638744, 0.8692146874964237, 0.8681346314648787, 0.8651720906297365, 0.8635766903559366, 0.864732313901186, 0.8641628064215183, 0.8598204075048367, 0.8591968522717556, 0.8583729968716701, 0.8587259321163098, 0.8572949909915527, 0.8539101692537466, 0.8566891855249802, 0.8486969787627459, 0.8507417347282171, 0.8707


Batch:   0%|          | 0/96 [00:00<?, ?it/s][A
Batch:   1%|          | 1/96 [00:00<00:43,  2.20it/s][A
Batch:   2%|▏         | 2/96 [00:00<00:38,  2.47it/s][A
Batch:   3%|▎         | 3/96 [00:01<00:36,  2.53it/s][A
Batch:   4%|▍         | 4/96 [00:01<00:42,  2.16it/s][A
Batch:   5%|▌         | 5/96 [00:02<00:38,  2.38it/s][A
Batch:   6%|▋         | 6/96 [00:02<00:35,  2.56it/s][A
Batch:   7%|▋         | 7/96 [00:02<00:32,  2.70it/s][A
Batch:   8%|▊         | 8/96 [00:03<00:32,  2.74it/s][A
Batch:   9%|▉         | 9/96 [00:03<00:31,  2.73it/s][A
Batch:  10%|█         | 10/96 [00:04<00:40,  2.13it/s][A
Batch:  11%|█▏        | 11/96 [00:04<00:37,  2.24it/s][A
Batch:  12%|█▎        | 12/96 [00:05<00:36,  2.31it/s][A
Batch:  14%|█▎        | 13/96 [00:05<00:35,  2.35it/s][A
Batch:  15%|█▍        | 14/96 [00:05<00:34,  2.37it/s][A
Batch:  16%|█▌        | 15/96 [00:06<00:32,  2.48it/s][A
Batch:  17%|█▋        | 16/96 [00:06<00:30,  2.63it/s][A
Batch:  18%|█▊        | 17/96 [

defaultdict(<class 'list'>, {'epoch': [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53], 'train_loss': [1.9797084629535675, 1.2134958902994792, 1.0961089121798675, 1.019505084802707, 0.970624710743626, 0.9361999065925678, 0.9203691116223732, 0.9102975397060314, 0.9030387476086617, 0.9006111019601425, 0.8957089508573214, 0.8914586945126454, 0.8876819796860218, 0.8860143677641948, 0.8821020666509867, 0.8829322097202142, 0.8817982642600933, 0.8758441414684057, 0.8783296688149372, 0.8755392972379923, 0.8733817531416813, 0.8687647537638744, 0.8692146874964237, 0.8681346314648787, 0.8651720906297365, 0.8635766903559366, 0.864732313901186, 0.8641628064215183, 0.8598204075048367, 0.8591968522717556, 0.8583729968716701, 0.8587259321163098, 0.8572949909915527, 0.8539101692537466, 0.8566891855249802, 0.8486969787627459, 0.8507417347282171, 0.


Batch:   0%|          | 0/96 [00:00<?, ?it/s][A
Batch:   1%|          | 1/96 [00:00<00:34,  2.73it/s][A
Batch:   2%|▏         | 2/96 [00:00<00:32,  2.89it/s][A
Batch:   3%|▎         | 3/96 [00:01<00:36,  2.57it/s][A
Batch:   4%|▍         | 4/96 [00:01<00:36,  2.51it/s][A
Batch:   5%|▌         | 5/96 [00:02<00:38,  2.34it/s][A
Batch:   6%|▋         | 6/96 [00:02<00:39,  2.25it/s][A
Batch:   7%|▋         | 7/96 [00:03<00:46,  1.90it/s][A
Batch:   8%|▊         | 8/96 [00:03<00:43,  2.01it/s][A
Batch:   9%|▉         | 9/96 [00:04<00:40,  2.15it/s][A
Batch:  10%|█         | 10/96 [00:04<00:37,  2.29it/s][A
Batch:  11%|█▏        | 11/96 [00:04<00:34,  2.46it/s][A
Batch:  12%|█▎        | 12/96 [00:05<00:32,  2.61it/s][A
Batch:  14%|█▎        | 13/96 [00:05<00:30,  2.74it/s][A
Batch:  15%|█▍        | 14/96 [00:05<00:33,  2.41it/s][A
Batch:  16%|█▌        | 15/96 [00:06<00:31,  2.61it/s][A
Batch:  17%|█▋        | 16/96 [00:06<00:29,  2.71it/s][A
Batch:  18%|█▊        | 17/96 [

defaultdict(<class 'list'>, {'epoch': [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54], 'train_loss': [1.9797084629535675, 1.2134958902994792, 1.0961089121798675, 1.019505084802707, 0.970624710743626, 0.9361999065925678, 0.9203691116223732, 0.9102975397060314, 0.9030387476086617, 0.9006111019601425, 0.8957089508573214, 0.8914586945126454, 0.8876819796860218, 0.8860143677641948, 0.8821020666509867, 0.8829322097202142, 0.8817982642600933, 0.8758441414684057, 0.8783296688149372, 0.8755392972379923, 0.8733817531416813, 0.8687647537638744, 0.8692146874964237, 0.8681346314648787, 0.8651720906297365, 0.8635766903559366, 0.864732313901186, 0.8641628064215183, 0.8598204075048367, 0.8591968522717556, 0.8583729968716701, 0.8587259321163098, 0.8572949909915527, 0.8539101692537466, 0.8566891855249802, 0.8486969787627459, 0.8507417347282171


Batch:   0%|          | 0/96 [00:00<?, ?it/s][A
Batch:   1%|          | 1/96 [00:00<00:39,  2.43it/s][A
Batch:   2%|▏         | 2/96 [00:00<00:34,  2.73it/s][A
Batch:   3%|▎         | 3/96 [00:01<00:32,  2.88it/s][A
Batch:   4%|▍         | 4/96 [00:01<00:44,  2.08it/s][A
Batch:   5%|▌         | 5/96 [00:02<00:40,  2.24it/s][A
Batch:   6%|▋         | 6/96 [00:02<00:39,  2.30it/s][A
Batch:   7%|▋         | 7/96 [00:02<00:38,  2.32it/s][A
Batch:   8%|▊         | 8/96 [00:03<00:37,  2.34it/s][A
Batch:   9%|▉         | 9/96 [00:03<00:35,  2.44it/s][A
Batch:  10%|█         | 10/96 [00:04<00:34,  2.51it/s][A
Batch:  11%|█▏        | 11/96 [00:04<00:38,  2.20it/s][A
Batch:  12%|█▎        | 12/96 [00:05<00:36,  2.33it/s][A
Batch:  14%|█▎        | 13/96 [00:05<00:34,  2.42it/s][A
Batch:  15%|█▍        | 14/96 [00:05<00:32,  2.56it/s][A
Batch:  16%|█▌        | 15/96 [00:06<00:30,  2.63it/s][A
Batch:  17%|█▋        | 16/96 [00:06<00:30,  2.65it/s][A
Batch:  18%|█▊        | 17/96 [

defaultdict(<class 'list'>, {'epoch': [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55], 'train_loss': [1.9797084629535675, 1.2134958902994792, 1.0961089121798675, 1.019505084802707, 0.970624710743626, 0.9361999065925678, 0.9203691116223732, 0.9102975397060314, 0.9030387476086617, 0.9006111019601425, 0.8957089508573214, 0.8914586945126454, 0.8876819796860218, 0.8860143677641948, 0.8821020666509867, 0.8829322097202142, 0.8817982642600933, 0.8758441414684057, 0.8783296688149372, 0.8755392972379923, 0.8733817531416813, 0.8687647537638744, 0.8692146874964237, 0.8681346314648787, 0.8651720906297365, 0.8635766903559366, 0.864732313901186, 0.8641628064215183, 0.8598204075048367, 0.8591968522717556, 0.8583729968716701, 0.8587259321163098, 0.8572949909915527, 0.8539101692537466, 0.8566891855249802, 0.8486969787627459, 0.850741734728


Batch:   0%|          | 0/96 [00:00<?, ?it/s][A
Batch:   1%|          | 1/96 [00:00<00:47,  2.01it/s][A
Batch:   2%|▏         | 2/96 [00:01<00:51,  1.81it/s][A
Batch:   3%|▎         | 3/96 [00:01<00:43,  2.13it/s][A
Batch:   4%|▍         | 4/96 [00:01<00:38,  2.41it/s][A
Batch:   5%|▌         | 5/96 [00:02<00:35,  2.58it/s][A
Batch:   6%|▋         | 6/96 [00:02<00:33,  2.69it/s][A
Batch:   7%|▋         | 7/96 [00:02<00:32,  2.70it/s][A
Batch:   8%|▊         | 8/96 [00:03<00:37,  2.37it/s][A
Batch:   9%|▉         | 9/96 [00:03<00:34,  2.51it/s][A
Batch:  10%|█         | 10/96 [00:04<00:33,  2.60it/s][A
Batch:  11%|█▏        | 11/96 [00:04<00:32,  2.62it/s][A
Batch:  12%|█▎        | 12/96 [00:04<00:31,  2.66it/s][A
Batch:  14%|█▎        | 13/96 [00:05<00:30,  2.73it/s][A
Batch:  15%|█▍        | 14/96 [00:05<00:29,  2.76it/s][A
Batch:  16%|█▌        | 15/96 [00:06<00:34,  2.38it/s][A
Batch:  17%|█▋        | 16/96 [00:06<00:31,  2.55it/s][A
Batch:  18%|█▊        | 17/96 [

defaultdict(<class 'list'>, {'epoch': [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56], 'train_loss': [1.9797084629535675, 1.2134958902994792, 1.0961089121798675, 1.019505084802707, 0.970624710743626, 0.9361999065925678, 0.9203691116223732, 0.9102975397060314, 0.9030387476086617, 0.9006111019601425, 0.8957089508573214, 0.8914586945126454, 0.8876819796860218, 0.8860143677641948, 0.8821020666509867, 0.8829322097202142, 0.8817982642600933, 0.8758441414684057, 0.8783296688149372, 0.8755392972379923, 0.8733817531416813, 0.8687647537638744, 0.8692146874964237, 0.8681346314648787, 0.8651720906297365, 0.8635766903559366, 0.864732313901186, 0.8641628064215183, 0.8598204075048367, 0.8591968522717556, 0.8583729968716701, 0.8587259321163098, 0.8572949909915527, 0.8539101692537466, 0.8566891855249802, 0.8486969787627459, 0.85074173


Batch:   0%|          | 0/96 [00:00<?, ?it/s][A
Batch:   1%|          | 1/96 [00:00<00:40,  2.36it/s][A
Batch:   2%|▏         | 2/96 [00:00<00:36,  2.57it/s][A
Batch:   3%|▎         | 3/96 [00:01<00:34,  2.72it/s][A
Batch:   4%|▍         | 4/96 [00:01<00:34,  2.64it/s][A
Batch:   5%|▌         | 5/96 [00:02<00:42,  2.15it/s][A
Batch:   6%|▋         | 6/96 [00:02<00:38,  2.36it/s][A
Batch:   7%|▋         | 7/96 [00:02<00:36,  2.43it/s][A
Batch:   8%|▊         | 8/96 [00:03<00:35,  2.51it/s][A
Batch:   9%|▉         | 9/96 [00:03<00:33,  2.58it/s][A
Batch:  10%|█         | 10/96 [00:03<00:32,  2.61it/s][A
Batch:  11%|█▏        | 11/96 [00:04<00:38,  2.22it/s][A
Batch:  12%|█▎        | 12/96 [00:04<00:36,  2.28it/s][A
Batch:  14%|█▎        | 13/96 [00:05<00:35,  2.36it/s][A
Batch:  15%|█▍        | 14/96 [00:05<00:34,  2.38it/s][A
Batch:  16%|█▌        | 15/96 [00:06<00:33,  2.43it/s][A
Batch:  17%|█▋        | 16/96 [00:06<00:32,  2.46it/s][A
Batch:  18%|█▊        | 17/96 [

defaultdict(<class 'list'>, {'epoch': [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57], 'train_loss': [1.9797084629535675, 1.2134958902994792, 1.0961089121798675, 1.019505084802707, 0.970624710743626, 0.9361999065925678, 0.9203691116223732, 0.9102975397060314, 0.9030387476086617, 0.9006111019601425, 0.8957089508573214, 0.8914586945126454, 0.8876819796860218, 0.8860143677641948, 0.8821020666509867, 0.8829322097202142, 0.8817982642600933, 0.8758441414684057, 0.8783296688149372, 0.8755392972379923, 0.8733817531416813, 0.8687647537638744, 0.8692146874964237, 0.8681346314648787, 0.8651720906297365, 0.8635766903559366, 0.864732313901186, 0.8641628064215183, 0.8598204075048367, 0.8591968522717556, 0.8583729968716701, 0.8587259321163098, 0.8572949909915527, 0.8539101692537466, 0.8566891855249802, 0.8486969787627459, 0.8507


Batch:   0%|          | 0/96 [00:00<?, ?it/s][A
Batch:   1%|          | 1/96 [00:00<00:33,  2.85it/s][A
Batch:   2%|▏         | 2/96 [00:00<00:42,  2.23it/s][A
Batch:   3%|▎         | 3/96 [00:01<00:36,  2.55it/s][A
Batch:   4%|▍         | 4/96 [00:01<00:36,  2.52it/s][A
Batch:   5%|▌         | 5/96 [00:01<00:36,  2.51it/s][A
Batch:   6%|▋         | 6/96 [00:02<00:37,  2.42it/s][A
Batch:   7%|▋         | 7/96 [00:02<00:37,  2.39it/s][A
Batch:   8%|▊         | 8/96 [00:03<00:36,  2.41it/s][A
Batch:   9%|▉         | 9/96 [00:03<00:36,  2.38it/s][A
Batch:  10%|█         | 10/96 [00:04<00:41,  2.08it/s][A
Batch:  11%|█▏        | 11/96 [00:04<00:38,  2.20it/s][A
Batch:  12%|█▎        | 12/96 [00:05<00:35,  2.34it/s][A
Batch:  14%|█▎        | 13/96 [00:05<00:33,  2.44it/s][A
Batch:  15%|█▍        | 14/96 [00:05<00:33,  2.46it/s][A
Batch:  16%|█▌        | 15/96 [00:06<00:32,  2.48it/s][A
Batch:  17%|█▋        | 16/96 [00:06<00:36,  2.18it/s][A
Batch:  18%|█▊        | 17/96 [

defaultdict(<class 'list'>, {'epoch': [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58], 'train_loss': [1.9797084629535675, 1.2134958902994792, 1.0961089121798675, 1.019505084802707, 0.970624710743626, 0.9361999065925678, 0.9203691116223732, 0.9102975397060314, 0.9030387476086617, 0.9006111019601425, 0.8957089508573214, 0.8914586945126454, 0.8876819796860218, 0.8860143677641948, 0.8821020666509867, 0.8829322097202142, 0.8817982642600933, 0.8758441414684057, 0.8783296688149372, 0.8755392972379923, 0.8733817531416813, 0.8687647537638744, 0.8692146874964237, 0.8681346314648787, 0.8651720906297365, 0.8635766903559366, 0.864732313901186, 0.8641628064215183, 0.8598204075048367, 0.8591968522717556, 0.8583729968716701, 0.8587259321163098, 0.8572949909915527, 0.8539101692537466, 0.8566891855249802, 0.8486969787627459, 0.


Batch:   0%|          | 0/96 [00:00<?, ?it/s][A
Batch:   1%|          | 1/96 [00:00<00:43,  2.18it/s][A
Batch:   2%|▏         | 2/96 [00:00<00:36,  2.55it/s][A
Batch:   3%|▎         | 3/96 [00:01<00:34,  2.72it/s][A
Batch:   4%|▍         | 4/96 [00:01<00:34,  2.67it/s][A
Batch:   5%|▌         | 5/96 [00:02<00:40,  2.23it/s][A
Batch:   6%|▋         | 6/96 [00:02<00:36,  2.43it/s][A
Batch:   7%|▋         | 7/96 [00:02<00:34,  2.55it/s][A
Batch:   8%|▊         | 8/96 [00:03<00:34,  2.56it/s][A
Batch:   9%|▉         | 9/96 [00:03<00:33,  2.63it/s][A
Batch:  10%|█         | 10/96 [00:03<00:32,  2.66it/s][A
Batch:  11%|█▏        | 11/96 [00:04<00:31,  2.68it/s][A
Batch:  12%|█▎        | 12/96 [00:04<00:35,  2.39it/s][A
Batch:  14%|█▎        | 13/96 [00:05<00:32,  2.54it/s][A
Batch:  15%|█▍        | 14/96 [00:05<00:30,  2.69it/s][A
Batch:  16%|█▌        | 15/96 [00:05<00:29,  2.77it/s][A
Batch:  17%|█▋        | 16/96 [00:06<00:27,  2.86it/s][A
Batch:  18%|█▊        | 17/96 [

defaultdict(<class 'list'>, {'epoch': [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59], 'train_loss': [1.9797084629535675, 1.2134958902994792, 1.0961089121798675, 1.019505084802707, 0.970624710743626, 0.9361999065925678, 0.9203691116223732, 0.9102975397060314, 0.9030387476086617, 0.9006111019601425, 0.8957089508573214, 0.8914586945126454, 0.8876819796860218, 0.8860143677641948, 0.8821020666509867, 0.8829322097202142, 0.8817982642600933, 0.8758441414684057, 0.8783296688149372, 0.8755392972379923, 0.8733817531416813, 0.8687647537638744, 0.8692146874964237, 0.8681346314648787, 0.8651720906297365, 0.8635766903559366, 0.864732313901186, 0.8641628064215183, 0.8598204075048367, 0.8591968522717556, 0.8583729968716701, 0.8587259321163098, 0.8572949909915527, 0.8539101692537466, 0.8566891855249802, 0.8486969787627459


Batch:   0%|          | 0/96 [00:00<?, ?it/s][A
Batch:   1%|          | 1/96 [00:00<00:35,  2.68it/s][A
Batch:   2%|▏         | 2/96 [00:00<00:32,  2.90it/s][A
Batch:   3%|▎         | 3/96 [00:01<00:34,  2.68it/s][A
Batch:   4%|▍         | 4/96 [00:01<00:42,  2.17it/s][A
Batch:   5%|▌         | 5/96 [00:02<00:41,  2.18it/s][A
Batch:   6%|▋         | 6/96 [00:02<00:39,  2.28it/s][A
Batch:   7%|▋         | 7/96 [00:02<00:36,  2.44it/s][A
Batch:   8%|▊         | 8/96 [00:03<00:34,  2.58it/s][A
Batch:   9%|▉         | 9/96 [00:03<00:32,  2.67it/s][A
Batch:  10%|█         | 10/96 [00:04<00:37,  2.28it/s][A
Batch:  11%|█▏        | 11/96 [00:04<00:34,  2.45it/s][A
Batch:  12%|█▎        | 12/96 [00:04<00:34,  2.42it/s][A
Batch:  14%|█▎        | 13/96 [00:05<00:34,  2.41it/s][A
Batch:  15%|█▍        | 14/96 [00:05<00:33,  2.43it/s][A
Batch:  16%|█▌        | 15/96 [00:06<00:33,  2.43it/s][A
Batch:  17%|█▋        | 16/96 [00:06<00:32,  2.44it/s][A
Batch:  18%|█▊        | 17/96 [

defaultdict(<class 'list'>, {'epoch': [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60], 'train_loss': [1.9797084629535675, 1.2134958902994792, 1.0961089121798675, 1.019505084802707, 0.970624710743626, 0.9361999065925678, 0.9203691116223732, 0.9102975397060314, 0.9030387476086617, 0.9006111019601425, 0.8957089508573214, 0.8914586945126454, 0.8876819796860218, 0.8860143677641948, 0.8821020666509867, 0.8829322097202142, 0.8817982642600933, 0.8758441414684057, 0.8783296688149372, 0.8755392972379923, 0.8733817531416813, 0.8687647537638744, 0.8692146874964237, 0.8681346314648787, 0.8651720906297365, 0.8635766903559366, 0.864732313901186, 0.8641628064215183, 0.8598204075048367, 0.8591968522717556, 0.8583729968716701, 0.8587259321163098, 0.8572949909915527, 0.8539101692537466, 0.8566891855249802, 0.848696978762


Batch:   0%|          | 0/96 [00:00<?, ?it/s][A
Batch:   1%|          | 1/96 [00:00<00:57,  1.66it/s][A
Batch:   2%|▏         | 2/96 [00:00<00:41,  2.27it/s][A
Batch:   3%|▎         | 3/96 [00:01<00:36,  2.57it/s][A
Batch:   4%|▍         | 4/96 [00:01<00:36,  2.55it/s][A
Batch:   5%|▌         | 5/96 [00:02<00:35,  2.56it/s][A
Batch:   6%|▋         | 6/96 [00:02<00:38,  2.31it/s][A
Batch:   7%|▋         | 7/96 [00:03<00:43,  2.03it/s][A
Batch:   8%|▊         | 8/96 [00:03<00:40,  2.17it/s][A
Batch:   9%|▉         | 9/96 [00:03<00:38,  2.27it/s][A
Batch:  10%|█         | 10/96 [00:04<00:36,  2.33it/s][A
Batch:  11%|█▏        | 11/96 [00:04<00:35,  2.37it/s][A
Batch:  12%|█▎        | 12/96 [00:05<00:34,  2.44it/s][A
Batch:  14%|█▎        | 13/96 [00:05<00:33,  2.47it/s][A
Batch:  15%|█▍        | 14/96 [00:06<00:38,  2.12it/s][A
Batch:  16%|█▌        | 15/96 [00:06<00:35,  2.26it/s][A
Batch:  17%|█▋        | 16/96 [00:06<00:33,  2.39it/s][A
Batch:  18%|█▊        | 17/96 [

defaultdict(<class 'list'>, {'epoch': [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61], 'train_loss': [1.9797084629535675, 1.2134958902994792, 1.0961089121798675, 1.019505084802707, 0.970624710743626, 0.9361999065925678, 0.9203691116223732, 0.9102975397060314, 0.9030387476086617, 0.9006111019601425, 0.8957089508573214, 0.8914586945126454, 0.8876819796860218, 0.8860143677641948, 0.8821020666509867, 0.8829322097202142, 0.8817982642600933, 0.8758441414684057, 0.8783296688149372, 0.8755392972379923, 0.8733817531416813, 0.8687647537638744, 0.8692146874964237, 0.8681346314648787, 0.8651720906297365, 0.8635766903559366, 0.864732313901186, 0.8641628064215183, 0.8598204075048367, 0.8591968522717556, 0.8583729968716701, 0.8587259321163098, 0.8572949909915527, 0.8539101692537466, 0.8566891855249802, 0.84869697


Batch:   0%|          | 0/96 [00:00<?, ?it/s][A
Batch:   1%|          | 1/96 [00:00<00:51,  1.84it/s][A
Batch:   2%|▏         | 2/96 [00:00<00:45,  2.08it/s][A
Batch:   3%|▎         | 3/96 [00:01<00:42,  2.20it/s][A
Batch:   4%|▍         | 4/96 [00:01<00:39,  2.31it/s][A
Batch:   5%|▌         | 5/96 [00:02<00:43,  2.10it/s][A
Batch:   6%|▋         | 6/96 [00:02<00:39,  2.28it/s][A
Batch:   7%|▋         | 7/96 [00:03<00:36,  2.42it/s][A
Batch:   8%|▊         | 8/96 [00:03<00:34,  2.55it/s][A
Batch:   9%|▉         | 9/96 [00:03<00:32,  2.67it/s][A
Batch:  10%|█         | 10/96 [00:04<00:30,  2.78it/s][A
Batch:  11%|█▏        | 11/96 [00:04<00:29,  2.90it/s][A
Batch:  12%|█▎        | 12/96 [00:04<00:33,  2.50it/s][A
Batch:  14%|█▎        | 13/96 [00:05<00:31,  2.66it/s][A
Batch:  15%|█▍        | 14/96 [00:05<00:30,  2.70it/s][A
Batch:  16%|█▌        | 15/96 [00:05<00:29,  2.75it/s][A
Batch:  17%|█▋        | 16/96 [00:06<00:28,  2.83it/s][A
Batch:  18%|█▊        | 17/96 [

defaultdict(<class 'list'>, {'epoch': [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62], 'train_loss': [1.9797084629535675, 1.2134958902994792, 1.0961089121798675, 1.019505084802707, 0.970624710743626, 0.9361999065925678, 0.9203691116223732, 0.9102975397060314, 0.9030387476086617, 0.9006111019601425, 0.8957089508573214, 0.8914586945126454, 0.8876819796860218, 0.8860143677641948, 0.8821020666509867, 0.8829322097202142, 0.8817982642600933, 0.8758441414684057, 0.8783296688149372, 0.8755392972379923, 0.8733817531416813, 0.8687647537638744, 0.8692146874964237, 0.8681346314648787, 0.8651720906297365, 0.8635766903559366, 0.864732313901186, 0.8641628064215183, 0.8598204075048367, 0.8591968522717556, 0.8583729968716701, 0.8587259321163098, 0.8572949909915527, 0.8539101692537466, 0.8566891855249802, 0.8486


Batch:   0%|          | 0/96 [00:00<?, ?it/s][A
Batch:   1%|          | 1/96 [00:00<00:42,  2.25it/s][A
Batch:   2%|▏         | 2/96 [00:01<00:48,  1.95it/s][A
Batch:   3%|▎         | 3/96 [00:01<00:41,  2.27it/s][A
Batch:   4%|▍         | 4/96 [00:01<00:36,  2.50it/s][A
Batch:   5%|▌         | 5/96 [00:02<00:35,  2.58it/s][A
Batch:   6%|▋         | 6/96 [00:02<00:34,  2.59it/s][A
Batch:   7%|▋         | 7/96 [00:02<00:34,  2.58it/s][A
Batch:   8%|▊         | 8/96 [00:03<00:35,  2.51it/s][A
Batch:   9%|▉         | 9/96 [00:03<00:35,  2.48it/s][A
Batch:  10%|█         | 10/96 [00:04<00:39,  2.16it/s][A
Batch:  11%|█▏        | 11/96 [00:04<00:36,  2.34it/s][A
Batch:  12%|█▎        | 12/96 [00:04<00:33,  2.54it/s][A
Batch:  14%|█▎        | 13/96 [00:05<00:30,  2.69it/s][A
Batch:  15%|█▍        | 14/96 [00:05<00:29,  2.78it/s][A
Batch:  16%|█▌        | 15/96 [00:05<00:27,  2.90it/s][A
Batch:  17%|█▋        | 16/96 [00:06<00:33,  2.36it/s][A
Batch:  18%|█▊        | 17/96 [

defaultdict(<class 'list'>, {'epoch': [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63], 'train_loss': [1.9797084629535675, 1.2134958902994792, 1.0961089121798675, 1.019505084802707, 0.970624710743626, 0.9361999065925678, 0.9203691116223732, 0.9102975397060314, 0.9030387476086617, 0.9006111019601425, 0.8957089508573214, 0.8914586945126454, 0.8876819796860218, 0.8860143677641948, 0.8821020666509867, 0.8829322097202142, 0.8817982642600933, 0.8758441414684057, 0.8783296688149372, 0.8755392972379923, 0.8733817531416813, 0.8687647537638744, 0.8692146874964237, 0.8681346314648787, 0.8651720906297365, 0.8635766903559366, 0.864732313901186, 0.8641628064215183, 0.8598204075048367, 0.8591968522717556, 0.8583729968716701, 0.8587259321163098, 0.8572949909915527, 0.8539101692537466, 0.8566891855249802, 0.


Batch:   0%|          | 0/96 [00:00<?, ?it/s][A
Batch:   1%|          | 1/96 [00:00<00:39,  2.40it/s][A
Batch:   2%|▏         | 2/96 [00:00<00:37,  2.50it/s][A
Batch:   3%|▎         | 3/96 [00:01<00:35,  2.65it/s][A
Batch:   4%|▍         | 4/96 [00:01<00:33,  2.75it/s][A
Batch:   5%|▌         | 5/96 [00:01<00:32,  2.80it/s][A
Batch:   6%|▋         | 6/96 [00:02<00:37,  2.39it/s][A
Batch:   7%|▋         | 7/96 [00:02<00:34,  2.57it/s][A
Batch:   8%|▊         | 8/96 [00:03<00:32,  2.68it/s][A
Batch:   9%|▉         | 9/96 [00:03<00:33,  2.61it/s][A
Batch:  10%|█         | 10/96 [00:03<00:33,  2.55it/s][A
Batch:  11%|█▏        | 11/96 [00:04<00:33,  2.52it/s][A
Batch:  12%|█▎        | 12/96 [00:04<00:33,  2.51it/s][A
Batch:  14%|█▎        | 13/96 [00:05<00:39,  2.08it/s][A
Batch:  15%|█▍        | 14/96 [00:05<00:37,  2.19it/s][A
Batch:  16%|█▌        | 15/96 [00:06<00:35,  2.31it/s][A
Batch:  17%|█▋        | 16/96 [00:06<00:33,  2.40it/s][A
Batch:  18%|█▊        | 17/96 [

defaultdict(<class 'list'>, {'epoch': [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64], 'train_loss': [1.9797084629535675, 1.2134958902994792, 1.0961089121798675, 1.019505084802707, 0.970624710743626, 0.9361999065925678, 0.9203691116223732, 0.9102975397060314, 0.9030387476086617, 0.9006111019601425, 0.8957089508573214, 0.8914586945126454, 0.8876819796860218, 0.8860143677641948, 0.8821020666509867, 0.8829322097202142, 0.8817982642600933, 0.8758441414684057, 0.8783296688149372, 0.8755392972379923, 0.8733817531416813, 0.8687647537638744, 0.8692146874964237, 0.8681346314648787, 0.8651720906297365, 0.8635766903559366, 0.864732313901186, 0.8641628064215183, 0.8598204075048367, 0.8591968522717556, 0.8583729968716701, 0.8587259321163098, 0.8572949909915527, 0.8539101692537466, 0.8566891855249802


Batch:   0%|          | 0/96 [00:00<?, ?it/s][A
Batch:   1%|          | 1/96 [00:00<00:46,  2.06it/s][A
Batch:   2%|▏         | 2/96 [00:01<00:52,  1.80it/s][A
Batch:   3%|▎         | 3/96 [00:01<00:43,  2.12it/s][A
Batch:   4%|▍         | 4/96 [00:01<00:38,  2.37it/s][A
Batch:   5%|▌         | 5/96 [00:02<00:36,  2.49it/s][A
Batch:   6%|▋         | 6/96 [00:02<00:34,  2.57it/s][A
Batch:   7%|▋         | 7/96 [00:02<00:34,  2.59it/s][A
Batch:   8%|▊         | 8/96 [00:03<00:34,  2.56it/s][A
Batch:   9%|▉         | 9/96 [00:03<00:41,  2.12it/s][A
Batch:  10%|█         | 10/96 [00:04<00:38,  2.23it/s][A
Batch:  11%|█▏        | 11/96 [00:04<00:36,  2.31it/s][A
Batch:  12%|█▎        | 12/96 [00:05<00:34,  2.40it/s][A
Batch:  14%|█▎        | 13/96 [00:05<00:33,  2.47it/s][A
Batch:  15%|█▍        | 14/96 [00:05<00:32,  2.53it/s][A
Batch:  16%|█▌        | 15/96 [00:06<00:31,  2.54it/s][A
Batch:  17%|█▋        | 16/96 [00:06<00:36,  2.20it/s][A
Batch:  18%|█▊        | 17/96 [

defaultdict(<class 'list'>, {'epoch': [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65], 'train_loss': [1.9797084629535675, 1.2134958902994792, 1.0961089121798675, 1.019505084802707, 0.970624710743626, 0.9361999065925678, 0.9203691116223732, 0.9102975397060314, 0.9030387476086617, 0.9006111019601425, 0.8957089508573214, 0.8914586945126454, 0.8876819796860218, 0.8860143677641948, 0.8821020666509867, 0.8829322097202142, 0.8817982642600933, 0.8758441414684057, 0.8783296688149372, 0.8755392972379923, 0.8733817531416813, 0.8687647537638744, 0.8692146874964237, 0.8681346314648787, 0.8651720906297365, 0.8635766903559366, 0.864732313901186, 0.8641628064215183, 0.8598204075048367, 0.8591968522717556, 0.8583729968716701, 0.8587259321163098, 0.8572949909915527, 0.8539101692537466, 0.856689185524


Batch:   0%|          | 0/96 [00:00<?, ?it/s][A
Batch:   1%|          | 1/96 [00:00<00:39,  2.40it/s][A
Batch:   2%|▏         | 2/96 [00:00<00:36,  2.59it/s][A
Batch:   3%|▎         | 3/96 [00:01<00:34,  2.69it/s][A
Batch:   4%|▍         | 4/96 [00:01<00:32,  2.81it/s][A
Batch:   5%|▌         | 5/96 [00:01<00:31,  2.85it/s][A
Batch:   6%|▋         | 6/96 [00:02<00:31,  2.89it/s][A
Batch:   7%|▋         | 7/96 [00:02<00:39,  2.25it/s][A
Batch:   8%|▊         | 8/96 [00:03<00:38,  2.30it/s][A
Batch:   9%|▉         | 9/96 [00:03<00:37,  2.31it/s][A
Batch:  10%|█         | 10/96 [00:04<00:38,  2.23it/s][A
Batch:  11%|█▏        | 11/96 [00:04<00:37,  2.30it/s][A
Batch:  12%|█▎        | 12/96 [00:04<00:34,  2.44it/s][A
Batch:  14%|█▎        | 13/96 [00:05<00:31,  2.62it/s][A
Batch:  15%|█▍        | 14/96 [00:05<00:35,  2.30it/s][A
Batch:  16%|█▌        | 15/96 [00:06<00:32,  2.48it/s][A
Batch:  17%|█▋        | 16/96 [00:06<00:30,  2.65it/s][A
Batch:  18%|█▊        | 17/96 [

defaultdict(<class 'list'>, {'epoch': [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66], 'train_loss': [1.9797084629535675, 1.2134958902994792, 1.0961089121798675, 1.019505084802707, 0.970624710743626, 0.9361999065925678, 0.9203691116223732, 0.9102975397060314, 0.9030387476086617, 0.9006111019601425, 0.8957089508573214, 0.8914586945126454, 0.8876819796860218, 0.8860143677641948, 0.8821020666509867, 0.8829322097202142, 0.8817982642600933, 0.8758441414684057, 0.8783296688149372, 0.8755392972379923, 0.8733817531416813, 0.8687647537638744, 0.8692146874964237, 0.8681346314648787, 0.8651720906297365, 0.8635766903559366, 0.864732313901186, 0.8641628064215183, 0.8598204075048367, 0.8591968522717556, 0.8583729968716701, 0.8587259321163098, 0.8572949909915527, 0.8539101692537466, 0.85668918


Batch:   0%|          | 0/96 [00:00<?, ?it/s][A
Batch:   1%|          | 1/96 [00:00<00:47,  1.98it/s][A
Batch:   2%|▏         | 2/96 [00:00<00:41,  2.24it/s][A
Batch:   3%|▎         | 3/96 [00:01<00:49,  1.89it/s][A
Batch:   4%|▍         | 4/96 [00:01<00:43,  2.13it/s][A
Batch:   5%|▌         | 5/96 [00:02<00:39,  2.31it/s][A
Batch:   6%|▋         | 6/96 [00:02<00:36,  2.46it/s][A
Batch:   7%|▋         | 7/96 [00:02<00:33,  2.66it/s][A
Batch:   8%|▊         | 8/96 [00:03<00:33,  2.63it/s][A
Batch:   9%|▉         | 9/96 [00:03<00:33,  2.57it/s][A
Batch:  10%|█         | 10/96 [00:04<00:40,  2.15it/s][A
Batch:  11%|█▏        | 11/96 [00:04<00:38,  2.21it/s][A
Batch:  12%|█▎        | 12/96 [00:05<00:37,  2.27it/s][A
Batch:  14%|█▎        | 13/96 [00:05<00:35,  2.37it/s][A
Batch:  15%|█▍        | 14/96 [00:05<00:32,  2.50it/s][A
Batch:  16%|█▌        | 15/96 [00:06<00:31,  2.61it/s][A
Batch:  17%|█▋        | 16/96 [00:06<00:29,  2.71it/s][A
Batch:  18%|█▊        | 17/96 [

defaultdict(<class 'list'>, {'epoch': [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67], 'train_loss': [1.9797084629535675, 1.2134958902994792, 1.0961089121798675, 1.019505084802707, 0.970624710743626, 0.9361999065925678, 0.9203691116223732, 0.9102975397060314, 0.9030387476086617, 0.9006111019601425, 0.8957089508573214, 0.8914586945126454, 0.8876819796860218, 0.8860143677641948, 0.8821020666509867, 0.8829322097202142, 0.8817982642600933, 0.8758441414684057, 0.8783296688149372, 0.8755392972379923, 0.8733817531416813, 0.8687647537638744, 0.8692146874964237, 0.8681346314648787, 0.8651720906297365, 0.8635766903559366, 0.864732313901186, 0.8641628064215183, 0.8598204075048367, 0.8591968522717556, 0.8583729968716701, 0.8587259321163098, 0.8572949909915527, 0.8539101692537466, 0.8566


Batch:   0%|          | 0/96 [00:00<?, ?it/s][A
Batch:   1%|          | 1/96 [00:00<00:58,  1.63it/s][A
Batch:   2%|▏         | 2/96 [00:01<00:50,  1.87it/s][A
Batch:   3%|▎         | 3/96 [00:01<00:44,  2.08it/s][A
Batch:   4%|▍         | 4/96 [00:01<00:42,  2.18it/s][A
Batch:   5%|▌         | 5/96 [00:02<00:42,  2.15it/s][A
Batch:   6%|▋         | 6/96 [00:02<00:39,  2.25it/s][A
Batch:   7%|▋         | 7/96 [00:03<00:39,  2.26it/s][A
Batch:   8%|▊         | 8/96 [00:03<00:44,  2.00it/s][A
Batch:   9%|▉         | 9/96 [00:04<00:40,  2.14it/s][A
Batch:  10%|█         | 10/96 [00:04<00:37,  2.30it/s][A
Batch:  11%|█▏        | 11/96 [00:04<00:34,  2.49it/s][A
Batch:  12%|█▎        | 12/96 [00:05<00:32,  2.60it/s][A
Batch:  14%|█▎        | 13/96 [00:05<00:30,  2.71it/s][A
Batch:  15%|█▍        | 14/96 [00:05<00:29,  2.77it/s][A
Batch:  16%|█▌        | 15/96 [00:06<00:33,  2.41it/s][A
Batch:  17%|█▋        | 16/96 [00:06<00:31,  2.54it/s][A
Batch:  18%|█▊        | 17/96 [

defaultdict(<class 'list'>, {'epoch': [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68], 'train_loss': [1.9797084629535675, 1.2134958902994792, 1.0961089121798675, 1.019505084802707, 0.970624710743626, 0.9361999065925678, 0.9203691116223732, 0.9102975397060314, 0.9030387476086617, 0.9006111019601425, 0.8957089508573214, 0.8914586945126454, 0.8876819796860218, 0.8860143677641948, 0.8821020666509867, 0.8829322097202142, 0.8817982642600933, 0.8758441414684057, 0.8783296688149372, 0.8755392972379923, 0.8733817531416813, 0.8687647537638744, 0.8692146874964237, 0.8681346314648787, 0.8651720906297365, 0.8635766903559366, 0.864732313901186, 0.8641628064215183, 0.8598204075048367, 0.8591968522717556, 0.8583729968716701, 0.8587259321163098, 0.8572949909915527, 0.8539101692537466, 0.


Batch:   0%|          | 0/96 [00:00<?, ?it/s][A
Batch:   1%|          | 1/96 [00:00<00:34,  2.79it/s][A
Batch:   2%|▏         | 2/96 [00:00<00:33,  2.81it/s][A
Batch:   3%|▎         | 3/96 [00:01<00:34,  2.66it/s][A
Batch:   4%|▍         | 4/96 [00:01<00:33,  2.71it/s][A
Batch:   5%|▌         | 5/96 [00:02<00:39,  2.31it/s][A
Batch:   6%|▋         | 6/96 [00:02<00:37,  2.40it/s][A
Batch:   7%|▋         | 7/96 [00:02<00:34,  2.59it/s][A
Batch:   8%|▊         | 8/96 [00:03<00:32,  2.69it/s][A
Batch:   9%|▉         | 9/96 [00:03<00:31,  2.75it/s][A
Batch:  10%|█         | 10/96 [00:03<00:30,  2.79it/s][A
Batch:  11%|█▏        | 11/96 [00:04<00:29,  2.88it/s][A
Batch:  12%|█▎        | 12/96 [00:04<00:34,  2.44it/s][A
Batch:  14%|█▎        | 13/96 [00:04<00:32,  2.56it/s][A
Batch:  15%|█▍        | 14/96 [00:05<00:30,  2.69it/s][A
Batch:  16%|█▌        | 15/96 [00:05<00:28,  2.81it/s][A
Batch:  17%|█▋        | 16/96 [00:05<00:27,  2.89it/s][A
Batch:  18%|█▊        | 17/96 [

defaultdict(<class 'list'>, {'epoch': [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69], 'train_loss': [1.9797084629535675, 1.2134958902994792, 1.0961089121798675, 1.019505084802707, 0.970624710743626, 0.9361999065925678, 0.9203691116223732, 0.9102975397060314, 0.9030387476086617, 0.9006111019601425, 0.8957089508573214, 0.8914586945126454, 0.8876819796860218, 0.8860143677641948, 0.8821020666509867, 0.8829322097202142, 0.8817982642600933, 0.8758441414684057, 0.8783296688149372, 0.8755392972379923, 0.8733817531416813, 0.8687647537638744, 0.8692146874964237, 0.8681346314648787, 0.8651720906297365, 0.8635766903559366, 0.864732313901186, 0.8641628064215183, 0.8598204075048367, 0.8591968522717556, 0.8583729968716701, 0.8587259321163098, 0.8572949909915527, 0.8539101692537466


Batch:   0%|          | 0/96 [00:00<?, ?it/s][A
Batch:   1%|          | 1/96 [00:00<00:44,  2.12it/s][A
Batch:   2%|▏         | 2/96 [00:00<00:40,  2.33it/s][A
Batch:   3%|▎         | 3/96 [00:01<00:50,  1.82it/s][A
Batch:   4%|▍         | 4/96 [00:01<00:44,  2.06it/s][A
Batch:   5%|▌         | 5/96 [00:02<00:40,  2.26it/s][A
Batch:   6%|▋         | 6/96 [00:02<00:36,  2.45it/s][A
Batch:   7%|▋         | 7/96 [00:03<00:34,  2.57it/s][A
Batch:   8%|▊         | 8/96 [00:03<00:33,  2.61it/s][A
Batch:   9%|▉         | 9/96 [00:03<00:32,  2.65it/s][A
Batch:  10%|█         | 10/96 [00:04<00:37,  2.28it/s][A
Batch:  11%|█▏        | 11/96 [00:04<00:36,  2.33it/s][A
Batch:  12%|█▎        | 12/96 [00:05<00:37,  2.27it/s][A
Batch:  14%|█▎        | 13/96 [00:05<00:35,  2.33it/s][A
Batch:  15%|█▍        | 14/96 [00:06<00:34,  2.37it/s][A
Batch:  16%|█▌        | 15/96 [00:06<00:33,  2.42it/s][A
Batch:  17%|█▋        | 16/96 [00:06<00:31,  2.51it/s][A
Batch:  18%|█▊        | 17/96 [

defaultdict(<class 'list'>, {'epoch': [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70], 'train_loss': [1.9797084629535675, 1.2134958902994792, 1.0961089121798675, 1.019505084802707, 0.970624710743626, 0.9361999065925678, 0.9203691116223732, 0.9102975397060314, 0.9030387476086617, 0.9006111019601425, 0.8957089508573214, 0.8914586945126454, 0.8876819796860218, 0.8860143677641948, 0.8821020666509867, 0.8829322097202142, 0.8817982642600933, 0.8758441414684057, 0.8783296688149372, 0.8755392972379923, 0.8733817531416813, 0.8687647537638744, 0.8692146874964237, 0.8681346314648787, 0.8651720906297365, 0.8635766903559366, 0.864732313901186, 0.8641628064215183, 0.8598204075048367, 0.8591968522717556, 0.8583729968716701, 0.8587259321163098, 0.8572949909915527, 0.853910169253


Batch:   0%|          | 0/96 [00:00<?, ?it/s][A
Batch:   1%|          | 1/96 [00:00<00:34,  2.74it/s][A
Batch:   2%|▏         | 2/96 [00:00<00:32,  2.87it/s][A
Batch:   3%|▎         | 3/96 [00:01<00:31,  2.93it/s][A
Batch:   4%|▍         | 4/96 [00:01<00:33,  2.78it/s][A
Batch:   5%|▌         | 5/96 [00:01<00:36,  2.50it/s][A
Batch:   6%|▋         | 6/96 [00:02<00:35,  2.51it/s][A
Batch:   7%|▋         | 7/96 [00:02<00:42,  2.09it/s][A
Batch:   8%|▊         | 8/96 [00:03<00:39,  2.22it/s][A
Batch:   9%|▉         | 9/96 [00:03<00:37,  2.32it/s][A
Batch:  10%|█         | 10/96 [00:04<00:35,  2.44it/s][A
Batch:  11%|█▏        | 11/96 [00:04<00:33,  2.57it/s][A
Batch:  12%|█▎        | 12/96 [00:04<00:32,  2.56it/s][A
Batch:  14%|█▎        | 13/96 [00:05<00:32,  2.57it/s][A
Batch:  15%|█▍        | 14/96 [00:05<00:31,  2.58it/s][A
Batch:  16%|█▌        | 15/96 [00:06<00:37,  2.18it/s][A
Batch:  17%|█▋        | 16/96 [00:06<00:34,  2.31it/s][A
Batch:  18%|█▊        | 17/96 [

defaultdict(<class 'list'>, {'epoch': [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71], 'train_loss': [1.9797084629535675, 1.2134958902994792, 1.0961089121798675, 1.019505084802707, 0.970624710743626, 0.9361999065925678, 0.9203691116223732, 0.9102975397060314, 0.9030387476086617, 0.9006111019601425, 0.8957089508573214, 0.8914586945126454, 0.8876819796860218, 0.8860143677641948, 0.8821020666509867, 0.8829322097202142, 0.8817982642600933, 0.8758441414684057, 0.8783296688149372, 0.8755392972379923, 0.8733817531416813, 0.8687647537638744, 0.8692146874964237, 0.8681346314648787, 0.8651720906297365, 0.8635766903559366, 0.864732313901186, 0.8641628064215183, 0.8598204075048367, 0.8591968522717556, 0.8583729968716701, 0.8587259321163098, 0.8572949909915527, 0.85391016


Batch:   0%|          | 0/96 [00:00<?, ?it/s][A
Batch:   1%|          | 1/96 [00:00<00:55,  1.72it/s][A
Batch:   2%|▏         | 2/96 [00:01<00:45,  2.05it/s][A
Batch:   3%|▎         | 3/96 [00:01<00:41,  2.22it/s][A
Batch:   4%|▍         | 4/96 [00:01<00:39,  2.34it/s][A
Batch:   5%|▌         | 5/96 [00:02<00:45,  2.02it/s][A
Batch:   6%|▋         | 6/96 [00:02<00:39,  2.25it/s][A
Batch:   7%|▋         | 7/96 [00:03<00:36,  2.42it/s][A
Batch:   8%|▊         | 8/96 [00:03<00:34,  2.58it/s][A
Batch:   9%|▉         | 9/96 [00:03<00:32,  2.71it/s][A
Batch:  10%|█         | 10/96 [00:04<00:30,  2.80it/s][A
Batch:  11%|█▏        | 11/96 [00:04<00:29,  2.90it/s][A
Batch:  12%|█▎        | 12/96 [00:05<00:35,  2.37it/s][A
Batch:  14%|█▎        | 13/96 [00:05<00:33,  2.45it/s][A
Batch:  15%|█▍        | 14/96 [00:05<00:31,  2.57it/s][A
Batch:  16%|█▌        | 15/96 [00:06<00:30,  2.65it/s][A
Batch:  17%|█▋        | 16/96 [00:06<00:29,  2.74it/s][A
Batch:  18%|█▊        | 17/96 [

defaultdict(<class 'list'>, {'epoch': [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72], 'train_loss': [1.9797084629535675, 1.2134958902994792, 1.0961089121798675, 1.019505084802707, 0.970624710743626, 0.9361999065925678, 0.9203691116223732, 0.9102975397060314, 0.9030387476086617, 0.9006111019601425, 0.8957089508573214, 0.8914586945126454, 0.8876819796860218, 0.8860143677641948, 0.8821020666509867, 0.8829322097202142, 0.8817982642600933, 0.8758441414684057, 0.8783296688149372, 0.8755392972379923, 0.8733817531416813, 0.8687647537638744, 0.8692146874964237, 0.8681346314648787, 0.8651720906297365, 0.8635766903559366, 0.864732313901186, 0.8641628064215183, 0.8598204075048367, 0.8591968522717556, 0.8583729968716701, 0.8587259321163098, 0.8572949909915527, 0.8539


Batch:   0%|          | 0/96 [00:00<?, ?it/s][A
Batch:   1%|          | 1/96 [00:00<00:42,  2.22it/s][A
Batch:   2%|▏         | 2/96 [00:01<00:54,  1.71it/s][A
Batch:   3%|▎         | 3/96 [00:01<00:45,  2.05it/s][A
Batch:   4%|▍         | 4/96 [00:01<00:41,  2.23it/s][A
Batch:   5%|▌         | 5/96 [00:02<00:39,  2.28it/s][A
Batch:   6%|▋         | 6/96 [00:02<00:37,  2.39it/s][A
Batch:   7%|▋         | 7/96 [00:03<00:35,  2.52it/s][A
Batch:   8%|▊         | 8/96 [00:03<00:40,  2.19it/s][A
Batch:   9%|▉         | 9/96 [00:03<00:36,  2.36it/s][A
Batch:  10%|█         | 10/96 [00:04<00:34,  2.51it/s][A
Batch:  11%|█▏        | 11/96 [00:04<00:32,  2.62it/s][A
Batch:  12%|█▎        | 12/96 [00:05<00:31,  2.68it/s][A
Batch:  14%|█▎        | 13/96 [00:05<00:31,  2.61it/s][A
Batch:  15%|█▍        | 14/96 [00:05<00:31,  2.59it/s][A
Batch:  16%|█▌        | 15/96 [00:06<00:37,  2.13it/s][A
Batch:  17%|█▋        | 16/96 [00:06<00:36,  2.22it/s][A
Batch:  18%|█▊        | 17/96 [

defaultdict(<class 'list'>, {'epoch': [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73], 'train_loss': [1.9797084629535675, 1.2134958902994792, 1.0961089121798675, 1.019505084802707, 0.970624710743626, 0.9361999065925678, 0.9203691116223732, 0.9102975397060314, 0.9030387476086617, 0.9006111019601425, 0.8957089508573214, 0.8914586945126454, 0.8876819796860218, 0.8860143677641948, 0.8821020666509867, 0.8829322097202142, 0.8817982642600933, 0.8758441414684057, 0.8783296688149372, 0.8755392972379923, 0.8733817531416813, 0.8687647537638744, 0.8692146874964237, 0.8681346314648787, 0.8651720906297365, 0.8635766903559366, 0.864732313901186, 0.8641628064215183, 0.8598204075048367, 0.8591968522717556, 0.8583729968716701, 0.8587259321163098, 0.8572949909915527, 0.


Batch:   0%|          | 0/96 [00:00<?, ?it/s][A
Batch:   1%|          | 1/96 [00:00<00:33,  2.86it/s][A
Batch:   2%|▏         | 2/96 [00:00<00:31,  2.97it/s][A
Batch:   3%|▎         | 3/96 [00:00<00:30,  3.08it/s][A
Batch:   4%|▍         | 4/96 [00:01<00:30,  3.03it/s][A
Batch:   5%|▌         | 5/96 [00:01<00:37,  2.39it/s][A
Batch:   6%|▋         | 6/96 [00:02<00:37,  2.42it/s][A
Batch:   7%|▋         | 7/96 [00:02<00:36,  2.43it/s][A
Batch:   8%|▊         | 8/96 [00:03<00:37,  2.37it/s][A
Batch:   9%|▉         | 9/96 [00:03<00:36,  2.36it/s][A
Batch:  10%|█         | 10/96 [00:04<00:37,  2.29it/s][A
Batch:  11%|█▏        | 11/96 [00:04<00:36,  2.33it/s][A
Batch:  12%|█▎        | 12/96 [00:04<00:34,  2.40it/s][A
Batch:  14%|█▎        | 13/96 [00:05<00:37,  2.19it/s][A
Batch:  15%|█▍        | 14/96 [00:05<00:34,  2.39it/s][A
Batch:  16%|█▌        | 15/96 [00:06<00:31,  2.55it/s][A
Batch:  17%|█▋        | 16/96 [00:06<00:29,  2.70it/s][A
Batch:  18%|█▊        | 17/96 [

defaultdict(<class 'list'>, {'epoch': [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74], 'train_loss': [1.9797084629535675, 1.2134958902994792, 1.0961089121798675, 1.019505084802707, 0.970624710743626, 0.9361999065925678, 0.9203691116223732, 0.9102975397060314, 0.9030387476086617, 0.9006111019601425, 0.8957089508573214, 0.8914586945126454, 0.8876819796860218, 0.8860143677641948, 0.8821020666509867, 0.8829322097202142, 0.8817982642600933, 0.8758441414684057, 0.8783296688149372, 0.8755392972379923, 0.8733817531416813, 0.8687647537638744, 0.8692146874964237, 0.8681346314648787, 0.8651720906297365, 0.8635766903559366, 0.864732313901186, 0.8641628064215183, 0.8598204075048367, 0.8591968522717556, 0.8583729968716701, 0.8587259321163098, 0.8572949909915527


Batch:   0%|          | 0/96 [00:00<?, ?it/s][A
Batch:   1%|          | 1/96 [00:00<00:48,  1.96it/s][A
Batch:   2%|▏         | 2/96 [00:00<00:40,  2.32it/s][A
Batch:   3%|▎         | 3/96 [00:01<00:49,  1.88it/s][A
Batch:   4%|▍         | 4/96 [00:01<00:42,  2.15it/s][A
Batch:   5%|▌         | 5/96 [00:02<00:38,  2.38it/s][A
Batch:   6%|▋         | 6/96 [00:02<00:35,  2.51it/s][A
Batch:   7%|▋         | 7/96 [00:02<00:34,  2.59it/s][A
Batch:   8%|▊         | 8/96 [00:03<00:34,  2.56it/s][A
Batch:   9%|▉         | 9/96 [00:03<00:39,  2.18it/s][A
Batch:  10%|█         | 10/96 [00:04<00:37,  2.26it/s][A
Batch:  11%|█▏        | 11/96 [00:04<00:36,  2.35it/s][A
Batch:  12%|█▎        | 12/96 [00:05<00:34,  2.40it/s][A
Batch:  14%|█▎        | 13/96 [00:05<00:34,  2.44it/s][A
Batch:  15%|█▍        | 14/96 [00:05<00:32,  2.51it/s][A
Batch:  16%|█▌        | 15/96 [00:06<00:31,  2.55it/s][A
Batch:  17%|█▋        | 16/96 [00:06<00:36,  2.18it/s][A
Batch:  18%|█▊        | 17/96 [

defaultdict(<class 'list'>, {'epoch': [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75], 'train_loss': [1.9797084629535675, 1.2134958902994792, 1.0961089121798675, 1.019505084802707, 0.970624710743626, 0.9361999065925678, 0.9203691116223732, 0.9102975397060314, 0.9030387476086617, 0.9006111019601425, 0.8957089508573214, 0.8914586945126454, 0.8876819796860218, 0.8860143677641948, 0.8821020666509867, 0.8829322097202142, 0.8817982642600933, 0.8758441414684057, 0.8783296688149372, 0.8755392972379923, 0.8733817531416813, 0.8687647537638744, 0.8692146874964237, 0.8681346314648787, 0.8651720906297365, 0.8635766903559366, 0.864732313901186, 0.8641628064215183, 0.8598204075048367, 0.8591968522717556, 0.8583729968716701, 0.8587259321163098, 0.857294990991


Batch:   0%|          | 0/96 [00:00<?, ?it/s][A
Batch:   1%|          | 1/96 [00:00<00:34,  2.73it/s][A
Batch:   2%|▏         | 2/96 [00:00<00:32,  2.86it/s][A
Batch:   3%|▎         | 3/96 [00:01<00:31,  2.91it/s][A
Batch:   4%|▍         | 4/96 [00:01<00:33,  2.75it/s][A
Batch:   5%|▌         | 5/96 [00:01<00:34,  2.64it/s][A
Batch:   6%|▋         | 6/96 [00:02<00:42,  2.13it/s][A
Batch:   7%|▋         | 7/96 [00:02<00:39,  2.24it/s][A
Batch:   8%|▊         | 8/96 [00:03<00:38,  2.29it/s][A
Batch:   9%|▉         | 9/96 [00:03<00:38,  2.28it/s][A
Batch:  10%|█         | 10/96 [00:04<00:35,  2.40it/s][A
Batch:  11%|█▏        | 11/96 [00:04<00:33,  2.56it/s][A
Batch:  12%|█▎        | 12/96 [00:04<00:32,  2.57it/s][A
Batch:  14%|█▎        | 13/96 [00:05<00:37,  2.23it/s][A
Batch:  15%|█▍        | 14/96 [00:05<00:35,  2.34it/s][A
Batch:  16%|█▌        | 15/96 [00:06<00:33,  2.40it/s][A
Batch:  17%|█▋        | 16/96 [00:06<00:32,  2.44it/s][A
Batch:  18%|█▊        | 17/96 [

defaultdict(<class 'list'>, {'epoch': [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76], 'train_loss': [1.9797084629535675, 1.2134958902994792, 1.0961089121798675, 1.019505084802707, 0.970624710743626, 0.9361999065925678, 0.9203691116223732, 0.9102975397060314, 0.9030387476086617, 0.9006111019601425, 0.8957089508573214, 0.8914586945126454, 0.8876819796860218, 0.8860143677641948, 0.8821020666509867, 0.8829322097202142, 0.8817982642600933, 0.8758441414684057, 0.8783296688149372, 0.8755392972379923, 0.8733817531416813, 0.8687647537638744, 0.8692146874964237, 0.8681346314648787, 0.8651720906297365, 0.8635766903559366, 0.864732313901186, 0.8641628064215183, 0.8598204075048367, 0.8591968522717556, 0.8583729968716701, 0.8587259321163098, 0.85729499


Batch:   0%|          | 0/96 [00:00<?, ?it/s][A
Batch:   1%|          | 1/96 [00:00<00:43,  2.18it/s][A
Batch:   2%|▏         | 2/96 [00:00<00:39,  2.39it/s][A
Batch:   3%|▎         | 3/96 [00:01<00:47,  1.95it/s][A
Batch:   4%|▍         | 4/96 [00:01<00:41,  2.24it/s][A
Batch:   5%|▌         | 5/96 [00:02<00:37,  2.45it/s][A
Batch:   6%|▋         | 6/96 [00:02<00:34,  2.59it/s][A
Batch:   7%|▋         | 7/96 [00:02<00:33,  2.66it/s][A
Batch:   8%|▊         | 8/96 [00:03<00:32,  2.69it/s][A
Batch:   9%|▉         | 9/96 [00:03<00:31,  2.74it/s][A
Batch:  10%|█         | 10/96 [00:04<00:36,  2.35it/s][A
Batch:  11%|█▏        | 11/96 [00:04<00:35,  2.39it/s][A
Batch:  12%|█▎        | 12/96 [00:04<00:32,  2.55it/s][A
Batch:  14%|█▎        | 13/96 [00:05<00:30,  2.70it/s][A
Batch:  15%|█▍        | 14/96 [00:05<00:29,  2.81it/s][A
Batch:  16%|█▌        | 15/96 [00:05<00:27,  2.91it/s][A
Batch:  17%|█▋        | 16/96 [00:06<00:26,  2.98it/s][A
Batch:  18%|█▊        | 17/96 [

defaultdict(<class 'list'>, {'epoch': [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77], 'train_loss': [1.9797084629535675, 1.2134958902994792, 1.0961089121798675, 1.019505084802707, 0.970624710743626, 0.9361999065925678, 0.9203691116223732, 0.9102975397060314, 0.9030387476086617, 0.9006111019601425, 0.8957089508573214, 0.8914586945126454, 0.8876819796860218, 0.8860143677641948, 0.8821020666509867, 0.8829322097202142, 0.8817982642600933, 0.8758441414684057, 0.8783296688149372, 0.8755392972379923, 0.8733817531416813, 0.8687647537638744, 0.8692146874964237, 0.8681346314648787, 0.8651720906297365, 0.8635766903559366, 0.864732313901186, 0.8641628064215183, 0.8598204075048367, 0.8591968522717556, 0.8583729968716701, 0.8587259321163098, 0.8572


Batch:   0%|          | 0/96 [00:00<?, ?it/s][A
Batch:   1%|          | 1/96 [00:00<00:36,  2.60it/s][A
Batch:   2%|▏         | 2/96 [00:00<00:33,  2.83it/s][A
Batch:   3%|▎         | 3/96 [00:01<00:31,  2.93it/s][A
Batch:   4%|▍         | 4/96 [00:01<00:34,  2.67it/s][A
Batch:   5%|▌         | 5/96 [00:01<00:35,  2.60it/s][A
Batch:   6%|▋         | 6/96 [00:02<00:36,  2.49it/s][A
Batch:   7%|▋         | 7/96 [00:02<00:42,  2.11it/s][A
Batch:   8%|▊         | 8/96 [00:03<00:38,  2.31it/s][A
Batch:   9%|▉         | 9/96 [00:03<00:36,  2.41it/s][A
Batch:  10%|█         | 10/96 [00:04<00:34,  2.51it/s][A
Batch:  11%|█▏        | 11/96 [00:04<00:32,  2.61it/s][A
Batch:  12%|█▎        | 12/96 [00:04<00:31,  2.67it/s][A
Batch:  14%|█▎        | 13/96 [00:05<00:29,  2.77it/s][A
Batch:  15%|█▍        | 14/96 [00:05<00:36,  2.24it/s][A
Batch:  16%|█▌        | 15/96 [00:06<00:35,  2.31it/s][A
Batch:  17%|█▋        | 16/96 [00:06<00:33,  2.38it/s][A
Batch:  18%|█▊        | 17/96 [

defaultdict(<class 'list'>, {'epoch': [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78], 'train_loss': [1.9797084629535675, 1.2134958902994792, 1.0961089121798675, 1.019505084802707, 0.970624710743626, 0.9361999065925678, 0.9203691116223732, 0.9102975397060314, 0.9030387476086617, 0.9006111019601425, 0.8957089508573214, 0.8914586945126454, 0.8876819796860218, 0.8860143677641948, 0.8821020666509867, 0.8829322097202142, 0.8817982642600933, 0.8758441414684057, 0.8783296688149372, 0.8755392972379923, 0.8733817531416813, 0.8687647537638744, 0.8692146874964237, 0.8681346314648787, 0.8651720906297365, 0.8635766903559366, 0.864732313901186, 0.8641628064215183, 0.8598204075048367, 0.8591968522717556, 0.8583729968716701, 0.8587259321163098, 0.


Batch:   0%|          | 0/96 [00:00<?, ?it/s][A
Batch:   1%|          | 1/96 [00:00<00:36,  2.58it/s][A
Batch:   2%|▏         | 2/96 [00:00<00:33,  2.79it/s][A
Batch:   3%|▎         | 3/96 [00:01<00:32,  2.88it/s][A
Batch:   4%|▍         | 4/96 [00:01<00:40,  2.25it/s][A
Batch:   5%|▌         | 5/96 [00:02<00:37,  2.44it/s][A
Batch:   6%|▋         | 6/96 [00:02<00:35,  2.53it/s][A
Batch:   7%|▋         | 7/96 [00:02<00:36,  2.42it/s][A
Batch:   8%|▊         | 8/96 [00:03<00:35,  2.45it/s][A
Batch:   9%|▉         | 9/96 [00:03<00:35,  2.48it/s][A
Batch:  10%|█         | 10/96 [00:04<00:34,  2.50it/s][A
Batch:  11%|█▏        | 11/96 [00:04<00:41,  2.07it/s][A
Batch:  12%|█▎        | 12/96 [00:05<00:39,  2.14it/s][A
Batch:  14%|█▎        | 13/96 [00:05<00:37,  2.23it/s][A
Batch:  15%|█▍        | 14/96 [00:05<00:36,  2.26it/s][A
Batch:  16%|█▌        | 15/96 [00:06<00:34,  2.38it/s][A
Batch:  17%|█▋        | 16/96 [00:06<00:31,  2.50it/s][A
Batch:  18%|█▊        | 17/96 [

defaultdict(<class 'list'>, {'epoch': [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79], 'train_loss': [1.9797084629535675, 1.2134958902994792, 1.0961089121798675, 1.019505084802707, 0.970624710743626, 0.9361999065925678, 0.9203691116223732, 0.9102975397060314, 0.9030387476086617, 0.9006111019601425, 0.8957089508573214, 0.8914586945126454, 0.8876819796860218, 0.8860143677641948, 0.8821020666509867, 0.8829322097202142, 0.8817982642600933, 0.8758441414684057, 0.8783296688149372, 0.8755392972379923, 0.8733817531416813, 0.8687647537638744, 0.8692146874964237, 0.8681346314648787, 0.8651720906297365, 0.8635766903559366, 0.864732313901186, 0.8641628064215183, 0.8598204075048367, 0.8591968522717556, 0.8583729968716701, 0.8587259321163098


Batch:   0%|          | 0/96 [00:00<?, ?it/s][A
Batch:   1%|          | 1/96 [00:00<00:45,  2.07it/s][A
Batch:   2%|▏         | 2/96 [00:01<00:55,  1.70it/s][A
Batch:   3%|▎         | 3/96 [00:01<00:47,  1.95it/s][A
Batch:   4%|▍         | 4/96 [00:01<00:43,  2.12it/s][A
Batch:   5%|▌         | 5/96 [00:02<00:41,  2.20it/s][A
Batch:   6%|▋         | 6/96 [00:02<00:38,  2.36it/s][A
Batch:   7%|▋         | 7/96 [00:03<00:35,  2.49it/s][A
Batch:   8%|▊         | 8/96 [00:03<00:33,  2.61it/s][A
Batch:   9%|▉         | 9/96 [00:04<00:38,  2.27it/s][A
Batch:  10%|█         | 10/96 [00:04<00:35,  2.43it/s][A
Batch:  11%|█▏        | 11/96 [00:04<00:33,  2.57it/s][A
Batch:  12%|█▎        | 12/96 [00:05<00:31,  2.67it/s][A
Batch:  14%|█▎        | 13/96 [00:05<00:29,  2.77it/s][A
Batch:  15%|█▍        | 14/96 [00:05<00:28,  2.85it/s][A
Batch:  16%|█▌        | 15/96 [00:06<00:28,  2.87it/s][A
Batch:  17%|█▋        | 16/96 [00:06<00:32,  2.43it/s][A
Batch:  18%|█▊        | 17/96 [

defaultdict(<class 'list'>, {'epoch': [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80], 'train_loss': [1.9797084629535675, 1.2134958902994792, 1.0961089121798675, 1.019505084802707, 0.970624710743626, 0.9361999065925678, 0.9203691116223732, 0.9102975397060314, 0.9030387476086617, 0.9006111019601425, 0.8957089508573214, 0.8914586945126454, 0.8876819796860218, 0.8860143677641948, 0.8821020666509867, 0.8829322097202142, 0.8817982642600933, 0.8758441414684057, 0.8783296688149372, 0.8755392972379923, 0.8733817531416813, 0.8687647537638744, 0.8692146874964237, 0.8681346314648787, 0.8651720906297365, 0.8635766903559366, 0.864732313901186, 0.8641628064215183, 0.8598204075048367, 0.8591968522717556, 0.8583729968716701, 0.858725932116


Batch:   0%|          | 0/96 [00:00<?, ?it/s][A
Batch:   1%|          | 1/96 [00:00<00:42,  2.22it/s][A
Batch:   2%|▏         | 2/96 [00:00<00:38,  2.43it/s][A
Batch:   3%|▎         | 3/96 [00:01<00:37,  2.47it/s][A
Batch:   4%|▍         | 4/96 [00:01<00:35,  2.60it/s][A
Batch:   5%|▌         | 5/96 [00:02<00:36,  2.48it/s][A
Batch:   6%|▋         | 6/96 [00:02<00:43,  2.06it/s][A
Batch:   7%|▋         | 7/96 [00:03<00:42,  2.09it/s][A
Batch:   8%|▊         | 8/96 [00:03<00:40,  2.15it/s][A
Batch:   9%|▉         | 9/96 [00:03<00:39,  2.21it/s][A
Batch:  10%|█         | 10/96 [00:04<00:37,  2.30it/s][A
Batch:  11%|█▏        | 11/96 [00:04<00:37,  2.27it/s][A
Batch:  12%|█▎        | 12/96 [00:05<00:40,  2.06it/s][A
Batch:  14%|█▎        | 13/96 [00:05<00:40,  2.06it/s][A
Batch:  15%|█▍        | 14/96 [00:06<00:40,  2.05it/s][A
Batch:  16%|█▌        | 15/96 [00:06<00:38,  2.10it/s][A

In [129]:
{e:v for e,v in enumerate(torch.sqrt(torch.tensor(trainer.results['valid_mse'])))}

{0: tensor(1.1409),
 1: tensor(1.1329),
 2: tensor(1.1136),
 3: tensor(1.1165),
 4: tensor(1.1138)}

<hr>

# DEBUGGING

In [62]:
dl = DataLoader(train, 32)
z = next(iter(dl))
z

[{'user': tensor([  66, 1566, 1665,   60,   30, 2355, 1692,   64,  934, 1067, 1065, 2515,
           566, 1637,  862,  129,  219, 1960,   17,   28,  441,  327,   53,  431,
            27,  812, 2028,  803,  137, 2515, 1158,  682]),
  'movie': tensor([3132, 3132, 3132, 3132, 3132, 2821, 3132, 3132, 3132, 3132, 3132, 3132,
          3132, 3132, 3132, 3132, 3132, 3132, 3132, 2823, 3132, 3132, 3132, 3132,
          3132, 3132, 3132, 3132, 3132, 3132, 3132, 2823]),
  'hour': tensor([ 1,  3, 17, 20,  6, 21,  6,  7, 23, 23, 12, 21, 21, 21,  0,  0,  2, 14,
          15, 18,  4, 13, 16, 16, 17,  8, 21,  2,  3,  4,  6,  8]),
  'day_of_week': tensor([2, 2, 5, 2, 6, 2, 0, 5, 3, 3, 2, 5, 4, 5, 4, 5, 0, 3, 3, 0, 6, 3, 0, 1,
          5, 1, 2, 4, 1, 0, 2, 2]),
  'month': tensor([ 4,  4,  4,  4,  5,  5,  6,  6,  6,  6,  7,  7,  7,  7,  8,  8,  8,  8,
           8,  8,  9,  9,  9,  9,  9, 10, 10, 11, 11, 11, 11, 11]),
  'gender': tensor([1, 1, 1, 0, 1, 0, 1, 1, 0, 0, 1, 1, 1, 0, 1, 0, 1, 1, 0, 1, 0, 0,

In [86]:
user_embeds = nn.Embedding(
    num_embeddings=max(meta['occupations'])+1,
    embedding_dim=25
)

In [93]:
user_embeds(z[0]['occupation']).shape

torch.Size([32, 25])

In [120]:
z[0]['gender'].dtype

torch.int64

In [121]:
stack_features(z[0], 'genre').float()

tensor([[0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
        [0., 1., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
        [0., 0.,

In [122]:
meta['movie']

{'2858': 0.003427283697707179,
 '260': 0.002990375011622571,
 '1196': 0.002989375220578899,
 '1210': 0.0028823975789060087,
 '480': 0.0026714416686912435,
 '2028': 0.002652445638861478,
 '589': 0.0026484464746867904,
 '2571': 0.0025894588031101498,
 '1270': 0.002582460265804447,
 '593': 0.0025774613105860873,
 '1580': 0.0025374696688392124,
 '1198': 0.0025134746837910875,
 '608': 0.0025124748927474157,
 '2762': 0.002458486176389135,
 '110': 0.002442489519690385,
 '2396': 0.002368504982458666,
 '1197': 0.0023175156392314005,
 '527': 0.0023035185646199945,
 '1617': 0.0022875219079212445,
 '1265': 0.0022775239974845256,
 '1097': 0.002268525878091479,
 '2628': 0.002249529848261713,
 '2997': 0.0022405317288686664,
 '318': 0.0022265346542572604,
 '858': 0.002222535490082573,
 '356': 0.0021935415498160883,
 '2716': 0.002180544266248354,
 '296': 0.0021705463558116356,
 '1240': 0.0020975616096235885,
 '1': 0.0020765659977064793,
 '1214': 0.00202357707239187,
 '2916': 0.0019955829231690578,
 '45