In [1]:
#colab中运行jupyter文件的步骤：
# 1.挂载云盘
from google.colab import drive
drive.mount('/content/gdrive')

# 2.安装需要的软件
!pip3 install transformers
!pip3 install pytorch-crf==0.7.2

import os
def get_root_dir():
    if os.path.exists('/content/gdrive/MyDrive/第二次进行实体识别-面向课程_toColab/'):
        return '/content/gdrive/MyDrive/第二次进行实体识别-面向课程_toColab/' #在Colab里
    else:
        return './' #在本地

# 3.调用系统命令，切换到对应工程路径，相当于cd，但是直接!cd是不行的
print("change to path:",get_root_dir())
os.chdir(get_root_dir())

# 4.再次确认路径
print('current path:')
!pwd
print('ls in current path:')
!ls

# 不借助print()实现多输出结果的打印
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = 'all'


Mounted at /content/gdrive
Collecting transformers
  Downloading transformers-4.15.0-py3-none-any.whl (3.4 MB)
[K     |████████████████████████████████| 3.4 MB 9.8 MB/s 
[?25hCollecting sacremoses
  Downloading sacremoses-0.0.46-py3-none-any.whl (895 kB)
[K     |████████████████████████████████| 895 kB 23.4 MB/s 
[?25hCollecting pyyaml>=5.1
  Downloading PyYAML-6.0-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl (596 kB)
[K     |████████████████████████████████| 596 kB 60.5 MB/s 
[?25hCollecting huggingface-hub<1.0,>=0.1.0
  Downloading huggingface_hub-0.2.1-py3-none-any.whl (61 kB)
[K     |████████████████████████████████| 61 kB 484 kB/s 
Collecting tokenizers<0.11,>=0.10.1
  Downloading tokenizers-0.10.3-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl (3.3 MB)
[K     |████████████████████████████████| 3.3 MB 45.0 MB/s 
Installing collected packages: pyyaml, tokenizers, sacremoses

# 课前准备

![image.png](attachment:image.png)

![image.png](attachment:image.png)

![image.png](attachment:image.png)

注意1：
    ner任务，是需要获取句子中每个词的标签，在实现过程中一般是对每个字，基于一定规则进行打标签，具体方式可以参考上图显示

注意2：
    一般ner的标签是：
    1. S-game，所代表的含义是，这单个字为代表game的实体
    2. B-game，所代表的含义是，为这个game实体的开始标志位
    3. E-game，所代表的含义是，为这个game实体的结束的标志位
    4. I-game，所代表的含义是，为这个game实体的中间的标志位
    4. O，所代表的含义是，这个字不是我们待处理的实体

那么现在我们就开始构建相应的数据集

# 数据预处理

In [1]:
from collections import defaultdict
from operator import itemgetter
from tqdm import tqdm
import numpy as np
import random
import torch 
import jieba
import json
import os

import pickle as pk

if torch.cuda.is_available():
    device = torch.device('cuda:0')
    torch.cuda.set_device(0)
else:
    device = torch.device('cpu')
# 确定模型训练方式，GPU训练或CPU训练
parameter_copy = {
    # 此处embedding维度为768
    'd_model':768, 
    # rnn的隐层维度为300
    'hid_dim':300,
    # 训练的批次为100轮
    'epoch':100,
    # 单次训练的batch_size为100条数据
    'batch_size':100,
    # 设置两个lstm，原文应该是一个
    'n_layers':2,
    # 设置dropout，为防止过拟合
    'dropout':0.1,
    # 配置cpu、gpu
    'device':device,
    # 设置训练学习率
    'lr':0.001,
    # 优化器的参数，动量主要用于随机梯度下降
    'momentum':0.99,
}

def build_dataSet(parameter):
    data_name = ['train','dev']
    # 准备相应的字典
    data_set = {}
    key_table = defaultdict(int)
    vocab_table = defaultdict(int)
    # 预先准备相应的标志位
    vocab_table['<PAD>'] = 0
    vocab_table['<UNK>'] = 0
    # 数据内容可以参考data文件夹下的README，基于CLUENER 数据进行处理
    # 因为有两份数据，dev和train，因为构建时候同时进行构建
    for i in data_name:
        data_set[i] = []
        data_src = open('data/'+i+'.json','r',encoding = 'utf-8').readlines()
        for data in data_src:
            # 加载相应的数据
            data = json.loads(data)
            # 获取对应的文本和标签
            text = list(data['text'])
            label = data['label']
            # 初始化标准ner标签
            label_new = ['O']*len(text)
            key_table['O']
            # 根据其所带有的标签，如game、address进行数据提取
            for keys in label:
                inds = label[keys].values()
                # 因为其标签下的数据是一个数组，代表这类型标签的数据有多个
                # 因此循环处理，且护士其keys（文本内容），因为可以通过id索引到
                for id_list in inds:
                    for ind in id_list:
                        if ind[1] - ind[0] == 0:
                            # 当id号相同，表明这个实体只有一个字，
                            # 那么他的标签为'S-'+对应的字段
                            keys_list = ['S-'+keys]
                            label_new[ind[0]] = keys_list[0]
                        if ind[1] - ind[0] == 1:
                            # 如果id号相差，仅为1，表明这个实体有两个字
                            # 那么他的标签为 B-*，E-*，表明开始和结束的位置
                            keys_list = ['B-'+keys,'E-'+keys]
                            label_new[ind[0]] = keys_list[0]
                            label_new[ind[1]] = keys_list[1]
                        if ind[1] - ind[0] > 1:
                            # 如果id号相差，大于1，表明这个实体有多个字
                            # 那么他的标签除了 B-*，E-*，表明开始和结束的位置
                            # 还应该有I-*，来表明中间的位置
                            keys_list = ['B-'+keys,'I-'+keys,'E-'+keys]
                            label_new[ind[0]] = keys_list[0]
                            label_new[ind[0]+1:ind[1]] = [keys_list[1]]*(ind[1]-1-ind[0])
                            label_new[ind[1]] = keys_list[2]
                        for key in keys_list:
                            # 为了后面标签转id，提前准好相应的字典
                            key_table[key] += 1
            # 此处用于构建文本的字典
            for j in text:
                vocab_table[j] += 1
            # 保存文本和处理好的标签
            data_set[i].append([text,label_new])
    # 保存标签转id，id转标签的字典
    key2ind = dict(zip(key_table.keys(),range(len(key_table))))
    ind2key = dict(zip(range(len(key_table)),key_table.keys()))
    # 保存字转id，id转字的字典
    word2ind = dict(zip(vocab_table.keys(),range(len(vocab_table))))
    ind2word = dict(zip(range(len(vocab_table)),vocab_table.keys()))
    parameter['key2ind'] = key2ind
    parameter['ind2key'] = ind2key
    parameter['word2ind'] = word2ind
    parameter['ind2word'] = ind2word
    parameter['data_set'] = data_set
    parameter['output_size'] = len(key2ind)
    parameter['word_size'] = len(word2ind)
    return parameter


def batch_yield(parameter,shuffle = True,isTrain = True):
    # 构建数据迭代器
    # 根据训练状态或非训练状态获取相应数据
    data_set = parameter['data_set']['train'] if isTrain else parameter['data_set']['dev']
    Epoch = parameter['epoch'] if isTrain else 1
    for epoch in range(Epoch):
        # 每轮对原始数据进行随机化
        if shuffle:
            random.shuffle(data_set)
        inputs,targets = [],[]
        max_len = 0
        for items in tqdm(data_set):
            # 基于所构建的字典，将原始文本转成id，进行多分类
            input = itemgetter(*items[0])(parameter['word2ind'])
            target = itemgetter(*items[1])(parameter['key2ind'])
            input = input if type(input) == type(()) else (input,0)
            target = target if type(target) == type(()) else (target,0)
            if len(input) > max_len:
                max_len = len(input)
            inputs.append(list(input))
            targets.append(list(target))
            if len(inputs) >= parameter['batch_size']:
                # 填空补齐
                inputs = [i+[0]*(max_len-len(i)) for i in inputs]
                targets = [i+[0]*(max_len-len(i)) for i in targets]
                yield list2torch(inputs),list2torch(targets),None,False
                inputs,targets = [],[]
                max_len = 0
        inputs = [i+[0]*(max_len-len(i)) for i in inputs]
        targets = [i+[0]*(max_len-len(i)) for i in targets]
        yield list2torch(inputs),list2torch(targets),epoch,False
        inputs,targets = [],[]
        max_len = 0
    yield None,None,None,True
            

def list2torch(ins):
    return torch.from_numpy(np.array(ins))

# 因此这边提前配置好用于训练的相关参数
# 不要每次重新生成
if not os.path.exists('parameter.pkl'):
    parameter = parameter_copy
    # 构建相关字典和对应的数据集
    parameter = build_dataSet(parameter)
    pk.dump(parameter,open('parameter.pkl','wb'))
else:
    # 读取已经处理好的parameter，但是考虑到模型训练的参数会发生变化，
    # 因此此处对于parameter中模型训练参数进行替换
    parameter = pk.load(open('parameter.pkl','rb'))
    for i in parameter_copy.keys():
        if i not in parameter:
            parameter[i] = parameter_copy[i]
            continue
        if parameter_copy[i] != parameter[i]:
            parameter[i] = parameter_copy[i]
    for i in parameter_copy.keys():
        print(i,':',parameter[i])
    pk.dump(parameter,open('parameter.pkl','wb'))
    del parameter_copy,i

d_model : 768
hid_dim : 300
epoch : 100
batch_size : 100
n_layers : 2
dropout : 0.1
device : cpu
lr : 0.001
momentum : 0.99


In [3]:
test_yield = batch_yield(parameter)
ins,target,epoch,keys = next(test_yield)
ins.shape,target.shape

  0%|                                                                                        | 0/10748 [00:00<?, ?it/s]

(torch.Size([100, 50]), torch.Size([100, 50]))

In [4]:
parameter['key2ind']

{'O': 0,
 'B-name': 1,
 'I-name': 2,
 'E-name': 3,
 'B-company': 4,
 'I-company': 5,
 'E-company': 6,
 'B-game': 7,
 'I-game': 8,
 'E-game': 9,
 'B-organization': 10,
 'I-organization': 11,
 'E-organization': 12,
 'B-movie': 13,
 'I-movie': 14,
 'E-movie': 15,
 'B-address': 16,
 'E-address': 17,
 'B-position': 18,
 'I-position': 19,
 'E-position': 20,
 'B-government': 21,
 'I-government': 22,
 'E-government': 23,
 'B-scene': 24,
 'I-scene': 25,
 'E-scene': 26,
 'I-address': 27,
 'B-book': 28,
 'I-book': 29,
 'E-book': 30,
 'S-company': 31,
 'S-address': 32,
 'S-name': 33,
 'S-position': 34}

In [None]:
parameter['data_set']['train'][0]

# 基于双向lstm

In [5]:
import torch.nn.functional as F # pytorch 激活函数的类
from torch import nn,optim # 构建模型和优化器

# 构建基于bilstm实现ner
class bilstm(nn.Module):
    def __init__(self, parameter):
        super(bilstm, self).__init__()
        word_size = parameter['word_size']
        embedding_dim = parameter['d_model']
        # 此处直接基于id，对字进行编码
        self.embedding = nn.Embedding(word_size, embedding_dim, padding_idx=0)

        hidden_size = parameter['hid_dim']
        num_layers = parameter['n_layers']
        dropout = parameter['dropout']
        self.lstm = nn.LSTM(embedding_dim, hidden_size, num_layers, bidirectional=True, batch_first=True, dropout=dropout)

        output_size = parameter['output_size']
        self.fc = nn.Linear(hidden_size*2, output_size)
        
        
    def forward(self, x):
        out = self.embedding(x)
        out,(h, c)= self.lstm(out)
        out = self.fc(out)
        return out.view(-1,out.size(-1))

In [6]:
import os
import shutil
import pickle as pk
from torch.utils.tensorboard import SummaryWriter


# 构建模型
model = bilstm(parameter).to(parameter['device'])

# 确定训练模式
model.train()

# 确定优化器和损失
optimizer = torch.optim.SGD(model.parameters(),lr=0.1, momentum=0.95, nesterov=True)
criterion = nn.CrossEntropyLoss()

# 准备迭代器
train_yield = batch_yield(parameter)

# 开始训练
loss_cal = []
min_loss = float('inf')
while 1:
        inputs,targets,epoch,keys = next(train_yield)
        if keys:
            break
        out = model(inputs.long().to(parameter['device']))
        loss = criterion(out, targets.view(-1).long().to(parameter['device']))
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        loss_cal.append(loss.item())
        if epoch is not None:
            if (epoch+1)%1 == 0:
                loss_cal = sum(loss_cal)/len(loss_cal)
                if loss_cal < min_loss:
                    min_loss = loss_cal
                    torch.save(model.state_dict(), 'bilstm.h5')
                print('epoch [{}/{}], Loss: {:.4f}'.format(epoch+1, \
                                                       parameter['epoch'],loss_cal))
            loss_cal = [loss.item()]


bilstm(
  (embedding): Embedding(3746, 768, padding_idx=0)
  (lstm): LSTM(768, 300, num_layers=2, batch_first=True, dropout=0.1, bidirectional=True)
  (fc): Linear(in_features=600, out_features=35, bias=True)
)


  0%|          | 0/10748 [00:00<?, ?it/s][A
  1%|          | 100/10748 [00:00<00:18, 589.61it/s][A
  3%|▎         | 300/10748 [00:00<00:08, 1229.04it/s][A
  6%|▌         | 600/10748 [00:00<00:05, 1698.77it/s][A
  8%|▊         | 900/10748 [00:00<00:05, 1928.63it/s][A
 11%|█         | 1200/10748 [00:00<00:04, 2053.74it/s][A
 14%|█▍        | 1500/10748 [00:00<00:04, 2129.61it/s][A
 17%|█▋        | 1800/10748 [00:00<00:04, 2190.33it/s][A
 20%|█▉        | 2100/10748 [00:01<00:03, 2215.43it/s][A
 22%|██▏       | 2400/10748 [00:01<00:03, 2226.34it/s][A
 25%|██▌       | 2700/10748 [00:01<00:03, 2243.24it/s][A
 28%|██▊       | 3000/10748 [00:01<00:03, 2251.89it/s][A
 31%|███       | 3300/10748 [00:01<00:03, 2252.28it/s][A
 33%|███▎      | 3600/10748 [00:01<00:03, 2252.08it/s][A
 36%|███▋      | 3900/10748 [00:01<00:03, 2267.05it/s][A
 39%|███▉      | 4200/10748 [00:01<00:02, 2278.95it/s][A
 42%|████▏     | 4500/10748 [00:02<00:02, 2277.54it/s][A
 45%|████▍     | 4800/10748 [00

epoch [1/100], Loss: 1.0517



  0%|          | 0/10748 [00:00<?, ?it/s][A
  3%|▎         | 300/10748 [00:00<00:04, 2375.21it/s][A
  6%|▌         | 600/10748 [00:00<00:04, 2344.03it/s][A
  8%|▊         | 900/10748 [00:00<00:04, 2335.02it/s][A
 11%|█         | 1200/10748 [00:00<00:04, 2312.73it/s][A
 14%|█▍        | 1500/10748 [00:00<00:04, 2309.70it/s][A
 17%|█▋        | 1800/10748 [00:00<00:03, 2298.05it/s][A
 20%|█▉        | 2100/10748 [00:00<00:03, 2291.88it/s][A
 22%|██▏       | 2400/10748 [00:01<00:03, 2288.39it/s][A
 25%|██▌       | 2700/10748 [00:01<00:03, 2297.18it/s][A
 28%|██▊       | 3000/10748 [00:01<00:03, 2306.33it/s][A
 31%|███       | 3300/10748 [00:01<00:03, 2293.37it/s][A
 33%|███▎      | 3600/10748 [00:01<00:03, 2295.71it/s][A
 36%|███▋      | 3900/10748 [00:01<00:03, 2280.68it/s][A
 39%|███▉      | 4200/10748 [00:01<00:02, 2284.61it/s][A
 42%|████▏     | 4500/10748 [00:01<00:02, 2282.31it/s][A
 45%|████▍     | 4800/10748 [00:02<00:02, 2288.06it/s][A
 47%|████▋     | 5100/10748 [

epoch [2/100], Loss: 0.6444



  0%|          | 0/10748 [00:00<?, ?it/s][A
  3%|▎         | 300/10748 [00:00<00:04, 2323.67it/s][A
  6%|▌         | 600/10748 [00:00<00:04, 2301.40it/s][A
  8%|▊         | 900/10748 [00:00<00:04, 2293.62it/s][A
 11%|█         | 1200/10748 [00:00<00:04, 2287.92it/s][A
 14%|█▍        | 1500/10748 [00:00<00:04, 2278.16it/s][A
 17%|█▋        | 1800/10748 [00:00<00:03, 2273.72it/s][A
 20%|█▉        | 2100/10748 [00:00<00:03, 2274.02it/s][A
 22%|██▏       | 2400/10748 [00:01<00:03, 2279.38it/s][A
 25%|██▌       | 2700/10748 [00:01<00:03, 2277.28it/s][A
 28%|██▊       | 3000/10748 [00:01<00:03, 2293.91it/s][A
 31%|███       | 3300/10748 [00:01<00:03, 2290.44it/s][A
 33%|███▎      | 3600/10748 [00:01<00:03, 2290.96it/s][A
 36%|███▋      | 3900/10748 [00:01<00:03, 2280.18it/s][A
 39%|███▉      | 4200/10748 [00:01<00:02, 2281.68it/s][A
 42%|████▏     | 4500/10748 [00:01<00:02, 2271.10it/s][A
 45%|████▍     | 4800/10748 [00:02<00:02, 2276.28it/s][A
 47%|████▋     | 5100/10748 [

epoch [3/100], Loss: 0.4691



  0%|          | 0/10748 [00:00<?, ?it/s][A
  3%|▎         | 300/10748 [00:00<00:04, 2329.41it/s][A
  6%|▌         | 600/10748 [00:00<00:04, 2311.99it/s][A
  8%|▊         | 900/10748 [00:00<00:04, 2318.77it/s][A
 11%|█         | 1200/10748 [00:00<00:04, 2304.69it/s][A
 14%|█▍        | 1500/10748 [00:00<00:04, 2303.73it/s][A
 17%|█▋        | 1800/10748 [00:00<00:03, 2294.29it/s][A
 20%|█▉        | 2100/10748 [00:00<00:03, 2279.74it/s][A
 22%|██▏       | 2400/10748 [00:01<00:03, 2284.39it/s][A
 25%|██▌       | 2700/10748 [00:01<00:03, 2285.16it/s][A
 28%|██▊       | 3000/10748 [00:01<00:03, 2297.54it/s][A
 31%|███       | 3300/10748 [00:01<00:03, 2278.16it/s][A
 33%|███▎      | 3600/10748 [00:01<00:03, 2273.21it/s][A
 36%|███▋      | 3900/10748 [00:01<00:02, 2283.34it/s][A
 39%|███▉      | 4200/10748 [00:01<00:02, 2285.70it/s][A
 42%|████▏     | 4500/10748 [00:01<00:02, 2273.85it/s][A
 45%|████▍     | 4800/10748 [00:02<00:02, 2275.95it/s][A
 47%|████▋     | 5100/10748 [

epoch [4/100], Loss: 0.3539



  0%|          | 0/10748 [00:00<?, ?it/s][A
  3%|▎         | 300/10748 [00:00<00:04, 2351.25it/s][A
  6%|▌         | 600/10748 [00:00<00:04, 2316.34it/s][A
  8%|▊         | 900/10748 [00:00<00:04, 2313.74it/s][A
 11%|█         | 1200/10748 [00:00<00:04, 2279.62it/s][A
 14%|█▍        | 1500/10748 [00:00<00:04, 2287.87it/s][A
 17%|█▋        | 1800/10748 [00:00<00:03, 2291.71it/s][A
 20%|█▉        | 2100/10748 [00:00<00:03, 2285.09it/s][A
 22%|██▏       | 2400/10748 [00:01<00:03, 2280.69it/s][A
 25%|██▌       | 2700/10748 [00:01<00:03, 2280.58it/s][A
 28%|██▊       | 3000/10748 [00:01<00:03, 2276.43it/s][A
 31%|███       | 3300/10748 [00:01<00:03, 2278.42it/s][A
 33%|███▎      | 3600/10748 [00:01<00:03, 2276.02it/s][A
 36%|███▋      | 3900/10748 [00:01<00:03, 2274.69it/s][A
 39%|███▉      | 4200/10748 [00:01<00:02, 2275.71it/s][A
 42%|████▏     | 4500/10748 [00:01<00:02, 2271.31it/s][A
 45%|████▍     | 4800/10748 [00:02<00:02, 2273.38it/s][A
 47%|████▋     | 5100/10748 [

epoch [5/100], Loss: 0.2842



  0%|          | 0/10748 [00:00<?, ?it/s][A
  3%|▎         | 300/10748 [00:00<00:04, 2314.15it/s][A
  6%|▌         | 600/10748 [00:00<00:04, 2283.48it/s][A
  8%|▊         | 900/10748 [00:00<00:04, 2282.78it/s][A
 11%|█         | 1200/10748 [00:00<00:04, 2290.87it/s][A
 14%|█▍        | 1500/10748 [00:00<00:04, 2269.38it/s][A
 17%|█▋        | 1800/10748 [00:00<00:03, 2276.57it/s][A
 20%|█▉        | 2100/10748 [00:00<00:03, 2272.41it/s][A
 22%|██▏       | 2400/10748 [00:01<00:03, 2278.95it/s][A
 25%|██▌       | 2700/10748 [00:01<00:03, 2283.51it/s][A
 28%|██▊       | 3000/10748 [00:01<00:03, 2294.46it/s][A
 31%|███       | 3300/10748 [00:01<00:03, 2302.75it/s][A
 33%|███▎      | 3600/10748 [00:01<00:03, 2296.84it/s][A
 36%|███▋      | 3900/10748 [00:01<00:02, 2295.17it/s][A
 39%|███▉      | 4200/10748 [00:01<00:02, 2302.46it/s][A
 42%|████▏     | 4500/10748 [00:01<00:02, 2297.77it/s][A
 45%|████▍     | 4800/10748 [00:02<00:02, 2301.42it/s][A
 47%|████▋     | 5100/10748 [

epoch [6/100], Loss: 0.2302



  0%|          | 0/10748 [00:00<?, ?it/s][A
  3%|▎         | 300/10748 [00:00<00:04, 2405.97it/s][A
  6%|▌         | 600/10748 [00:00<00:04, 2309.53it/s][A
  8%|▊         | 900/10748 [00:00<00:04, 2294.41it/s][A
 11%|█         | 1200/10748 [00:00<00:04, 2287.15it/s][A
 14%|█▍        | 1500/10748 [00:00<00:04, 2277.36it/s][A
 17%|█▋        | 1800/10748 [00:00<00:03, 2279.28it/s][A
 20%|█▉        | 2100/10748 [00:00<00:03, 2284.33it/s][A
 22%|██▏       | 2400/10748 [00:01<00:03, 2296.11it/s][A
 25%|██▌       | 2700/10748 [00:01<00:03, 2290.26it/s][A
 28%|██▊       | 3000/10748 [00:01<00:03, 2289.45it/s][A
 31%|███       | 3300/10748 [00:01<00:03, 2285.99it/s][A
 33%|███▎      | 3600/10748 [00:01<00:03, 2280.92it/s][A
 36%|███▋      | 3900/10748 [00:01<00:02, 2283.76it/s][A
 39%|███▉      | 4200/10748 [00:01<00:02, 2290.26it/s][A
 42%|████▏     | 4500/10748 [00:01<00:02, 2305.38it/s][A
 45%|████▍     | 4800/10748 [00:02<00:02, 2304.04it/s][A
 47%|████▋     | 5100/10748 [

epoch [7/100], Loss: 0.1864



  0%|          | 0/10748 [00:00<?, ?it/s][A
  3%|▎         | 300/10748 [00:00<00:04, 2359.22it/s][A
  6%|▌         | 600/10748 [00:00<00:04, 2318.38it/s][A
  8%|▊         | 900/10748 [00:00<00:04, 2301.49it/s][A
 11%|█         | 1200/10748 [00:00<00:04, 2293.31it/s][A
 14%|█▍        | 1500/10748 [00:00<00:04, 2293.05it/s][A
 17%|█▋        | 1800/10748 [00:00<00:03, 2288.71it/s][A
 20%|█▉        | 2100/10748 [00:00<00:03, 2291.73it/s][A
 22%|██▏       | 2400/10748 [00:01<00:03, 2281.21it/s][A
 25%|██▌       | 2700/10748 [00:01<00:03, 2276.41it/s][A
 28%|██▊       | 3000/10748 [00:01<00:03, 2288.13it/s][A
 31%|███       | 3300/10748 [00:01<00:03, 2280.71it/s][A
 33%|███▎      | 3600/10748 [00:01<00:03, 2282.97it/s][A
 36%|███▋      | 3900/10748 [00:01<00:03, 2271.66it/s][A
 39%|███▉      | 4200/10748 [00:01<00:02, 2280.61it/s][A
 42%|████▏     | 4500/10748 [00:01<00:02, 2282.23it/s][A
 45%|████▍     | 4800/10748 [00:02<00:02, 2286.76it/s][A
 47%|████▋     | 5100/10748 [

epoch [8/100], Loss: 0.1542



  0%|          | 0/10748 [00:00<?, ?it/s][A
  3%|▎         | 300/10748 [00:00<00:04, 2320.56it/s][A
  6%|▌         | 600/10748 [00:00<00:04, 2275.16it/s][A
  8%|▊         | 900/10748 [00:00<00:04, 2282.38it/s][A
 11%|█         | 1200/10748 [00:00<00:04, 2288.09it/s][A
 14%|█▍        | 1500/10748 [00:00<00:04, 2279.31it/s][A
 17%|█▋        | 1800/10748 [00:00<00:03, 2277.92it/s][A
 20%|█▉        | 2100/10748 [00:00<00:03, 2287.97it/s][A
 22%|██▏       | 2400/10748 [00:01<00:03, 2288.33it/s][A
 25%|██▌       | 2700/10748 [00:01<00:03, 2263.78it/s][A
 28%|██▊       | 3000/10748 [00:01<00:03, 2278.65it/s][A
 31%|███       | 3300/10748 [00:01<00:03, 2285.78it/s][A
 33%|███▎      | 3600/10748 [00:01<00:03, 2286.14it/s][A
 36%|███▋      | 3900/10748 [00:01<00:02, 2297.77it/s][A
 39%|███▉      | 4200/10748 [00:01<00:02, 2290.50it/s][A
 42%|████▏     | 4500/10748 [00:01<00:02, 2277.22it/s][A
 45%|████▍     | 4800/10748 [00:02<00:02, 2278.73it/s][A
 47%|████▋     | 5100/10748 [

epoch [9/100], Loss: 0.1257



  0%|          | 0/10748 [00:00<?, ?it/s][A
  3%|▎         | 300/10748 [00:00<00:04, 2339.94it/s][A
  6%|▌         | 600/10748 [00:00<00:04, 2309.72it/s][A
  8%|▊         | 900/10748 [00:00<00:04, 2307.15it/s][A
 11%|█         | 1200/10748 [00:00<00:04, 2311.58it/s][A
 14%|█▍        | 1500/10748 [00:00<00:04, 2303.55it/s][A
 17%|█▋        | 1800/10748 [00:00<00:03, 2299.10it/s][A
 20%|█▉        | 2100/10748 [00:00<00:03, 2299.26it/s][A
 22%|██▏       | 2400/10748 [00:01<00:03, 2291.77it/s][A
 25%|██▌       | 2700/10748 [00:01<00:03, 2280.79it/s][A
 28%|██▊       | 3000/10748 [00:01<00:03, 2295.30it/s][A
 31%|███       | 3300/10748 [00:01<00:03, 2298.76it/s][A
 33%|███▎      | 3600/10748 [00:01<00:03, 2284.37it/s][A
 36%|███▋      | 3900/10748 [00:01<00:02, 2282.83it/s][A
 39%|███▉      | 4200/10748 [00:01<00:02, 2295.45it/s][A
 42%|████▏     | 4500/10748 [00:01<00:02, 2279.69it/s][A
 45%|████▍     | 4800/10748 [00:02<00:02, 2282.44it/s][A
 47%|████▋     | 5100/10748 [

epoch [10/100], Loss: 0.1010



  0%|          | 0/10748 [00:00<?, ?it/s][A
  3%|▎         | 300/10748 [00:00<00:04, 2335.60it/s][A
  6%|▌         | 600/10748 [00:00<00:04, 2293.15it/s][A
  8%|▊         | 900/10748 [00:00<00:04, 2277.03it/s][A
 11%|█         | 1200/10748 [00:00<00:04, 2275.28it/s][A
 14%|█▍        | 1500/10748 [00:00<00:04, 2280.91it/s][A
 17%|█▋        | 1800/10748 [00:00<00:03, 2259.89it/s][A
 20%|█▉        | 2100/10748 [00:00<00:03, 2244.93it/s][A
 22%|██▏       | 2400/10748 [00:01<00:03, 2259.48it/s][A
 25%|██▌       | 2700/10748 [00:01<00:03, 2260.63it/s][A
 28%|██▊       | 3000/10748 [00:01<00:03, 2277.28it/s][A
 31%|███       | 3300/10748 [00:01<00:03, 2263.57it/s][A
 33%|███▎      | 3600/10748 [00:01<00:03, 2266.51it/s][A
 36%|███▋      | 3900/10748 [00:01<00:03, 2276.60it/s][A
 39%|███▉      | 4200/10748 [00:01<00:02, 2276.01it/s][A
 42%|████▏     | 4500/10748 [00:01<00:02, 2281.26it/s][A
 45%|████▍     | 4800/10748 [00:02<00:02, 2293.08it/s][A
 47%|████▋     | 5100/10748 [

epoch [11/100], Loss: 0.0816



  0%|          | 0/10748 [00:00<?, ?it/s][A
  3%|▎         | 300/10748 [00:00<00:04, 2339.51it/s][A
  6%|▌         | 600/10748 [00:00<00:04, 2304.16it/s][A
  8%|▊         | 900/10748 [00:00<00:04, 2302.96it/s][A
 11%|█         | 1200/10748 [00:00<00:04, 2295.31it/s][A
 14%|█▍        | 1500/10748 [00:00<00:04, 2296.15it/s][A
 17%|█▋        | 1800/10748 [00:00<00:03, 2294.69it/s][A
 20%|█▉        | 2100/10748 [00:00<00:03, 2296.72it/s][A
 22%|██▏       | 2400/10748 [00:01<00:03, 2285.17it/s][A
 25%|██▌       | 2700/10748 [00:01<00:03, 2294.27it/s][A
 28%|██▊       | 3000/10748 [00:01<00:03, 2282.64it/s][A
 31%|███       | 3300/10748 [00:01<00:03, 2285.75it/s][A
 33%|███▎      | 3600/10748 [00:01<00:03, 2266.76it/s][A
 36%|███▋      | 3900/10748 [00:01<00:03, 2273.00it/s][A
 39%|███▉      | 4200/10748 [00:01<00:02, 2281.89it/s][A
 42%|████▏     | 4500/10748 [00:01<00:02, 2293.45it/s][A
 45%|████▍     | 4800/10748 [00:02<00:02, 2290.44it/s][A
 47%|████▋     | 5100/10748 [

epoch [12/100], Loss: 0.0681



  0%|          | 0/10748 [00:00<?, ?it/s][A
  3%|▎         | 300/10748 [00:00<00:04, 2300.53it/s][A
  6%|▌         | 600/10748 [00:00<00:04, 2265.19it/s][A
  8%|▊         | 900/10748 [00:00<00:04, 2277.32it/s][A
 11%|█         | 1200/10748 [00:00<00:04, 2274.46it/s][A
 14%|█▍        | 1500/10748 [00:00<00:04, 2285.79it/s][A
 17%|█▋        | 1800/10748 [00:00<00:03, 2283.04it/s][A
 20%|█▉        | 2100/10748 [00:00<00:03, 2282.17it/s][A
 22%|██▏       | 2400/10748 [00:01<00:03, 2268.19it/s][A
 25%|██▌       | 2700/10748 [00:01<00:03, 2273.88it/s][A
 28%|██▊       | 3000/10748 [00:01<00:03, 2281.17it/s][A
 31%|███       | 3300/10748 [00:01<00:03, 2279.46it/s][A
 33%|███▎      | 3600/10748 [00:01<00:03, 2280.31it/s][A
 36%|███▋      | 3900/10748 [00:01<00:02, 2286.45it/s][A
 39%|███▉      | 4200/10748 [00:01<00:02, 2287.33it/s][A
 42%|████▏     | 4500/10748 [00:01<00:02, 2293.06it/s][A
 45%|████▍     | 4800/10748 [00:02<00:02, 2287.92it/s][A
 47%|████▋     | 5100/10748 [

epoch [13/100], Loss: 0.0540



  0%|          | 0/10748 [00:00<?, ?it/s][A
  3%|▎         | 300/10748 [00:00<00:04, 2381.77it/s][A
  6%|▌         | 600/10748 [00:00<00:04, 2294.25it/s][A
  8%|▊         | 900/10748 [00:00<00:04, 2275.20it/s][A
 11%|█         | 1200/10748 [00:00<00:04, 2284.66it/s][A
 14%|█▍        | 1500/10748 [00:00<00:04, 2276.72it/s][A
 17%|█▋        | 1800/10748 [00:00<00:03, 2278.56it/s][A
 20%|█▉        | 2100/10748 [00:00<00:03, 2275.63it/s][A
 22%|██▏       | 2400/10748 [00:01<00:03, 2273.68it/s][A
 25%|██▌       | 2700/10748 [00:01<00:03, 2261.40it/s][A
 28%|██▊       | 3000/10748 [00:01<00:03, 2264.84it/s][A
 31%|███       | 3300/10748 [00:01<00:03, 2265.76it/s][A
 33%|███▎      | 3600/10748 [00:01<00:03, 2276.11it/s][A
 36%|███▋      | 3900/10748 [00:01<00:03, 2274.20it/s][A
 39%|███▉      | 4200/10748 [00:01<00:02, 2276.96it/s][A
 42%|████▏     | 4500/10748 [00:01<00:02, 2276.60it/s][A
 45%|████▍     | 4800/10748 [00:02<00:02, 2277.38it/s][A
 47%|████▋     | 5100/10748 [

epoch [14/100], Loss: 0.0448



  0%|          | 0/10748 [00:00<?, ?it/s][A
  3%|▎         | 300/10748 [00:00<00:04, 2356.34it/s][A
  6%|▌         | 600/10748 [00:00<00:04, 2322.57it/s][A
  8%|▊         | 900/10748 [00:00<00:04, 2303.64it/s][A
 11%|█         | 1200/10748 [00:00<00:04, 2311.23it/s][A
 14%|█▍        | 1500/10748 [00:00<00:04, 2275.18it/s][A
 17%|█▋        | 1800/10748 [00:00<00:03, 2283.77it/s][A
 20%|█▉        | 2100/10748 [00:00<00:03, 2279.30it/s][A
 22%|██▏       | 2400/10748 [00:01<00:03, 2284.98it/s][A
 25%|██▌       | 2700/10748 [00:01<00:03, 2274.24it/s][A
 28%|██▊       | 3000/10748 [00:01<00:03, 2274.77it/s][A
 31%|███       | 3300/10748 [00:01<00:03, 2283.98it/s][A
 33%|███▎      | 3600/10748 [00:01<00:03, 2272.89it/s][A
 36%|███▋      | 3900/10748 [00:01<00:03, 2270.96it/s][A
 39%|███▉      | 4200/10748 [00:01<00:02, 2271.43it/s][A
 42%|████▏     | 4500/10748 [00:01<00:02, 2274.27it/s][A
 45%|████▍     | 4800/10748 [00:02<00:02, 2271.28it/s][A
 47%|████▋     | 5100/10748 [

epoch [15/100], Loss: 0.0361



  0%|          | 0/10748 [00:00<?, ?it/s][A
  3%|▎         | 300/10748 [00:00<00:04, 2361.79it/s][A
  6%|▌         | 600/10748 [00:00<00:04, 2326.08it/s][A
  8%|▊         | 900/10748 [00:00<00:04, 2316.17it/s][A
 11%|█         | 1200/10748 [00:00<00:04, 2309.86it/s][A
 14%|█▍        | 1500/10748 [00:00<00:04, 2304.27it/s][A
 17%|█▋        | 1800/10748 [00:00<00:03, 2308.90it/s][A
 20%|█▉        | 2100/10748 [00:00<00:03, 2291.43it/s][A
 22%|██▏       | 2400/10748 [00:01<00:03, 2293.22it/s][A
 25%|██▌       | 2700/10748 [00:01<00:03, 2303.07it/s][A
 28%|██▊       | 3000/10748 [00:01<00:03, 2296.32it/s][A
 31%|███       | 3300/10748 [00:01<00:03, 2302.64it/s][A
 33%|███▎      | 3600/10748 [00:01<00:03, 2303.97it/s][A
 36%|███▋      | 3900/10748 [00:01<00:02, 2293.71it/s][A
 39%|███▉      | 4200/10748 [00:01<00:02, 2276.53it/s][A
 42%|████▏     | 4500/10748 [00:01<00:02, 2263.66it/s][A
 45%|████▍     | 4800/10748 [00:02<00:02, 2263.99it/s][A
 47%|████▋     | 5100/10748 [

epoch [16/100], Loss: 0.0294



  0%|          | 0/10748 [00:00<?, ?it/s][A
  3%|▎         | 300/10748 [00:00<00:04, 2331.16it/s][A
  6%|▌         | 600/10748 [00:00<00:04, 2309.59it/s][A
  8%|▊         | 900/10748 [00:00<00:04, 2288.67it/s][A
 11%|█         | 1200/10748 [00:00<00:04, 2286.26it/s][A
 14%|█▍        | 1500/10748 [00:00<00:04, 2291.73it/s][A
 17%|█▋        | 1800/10748 [00:00<00:03, 2291.91it/s][A
 20%|█▉        | 2100/10748 [00:00<00:03, 2289.30it/s][A
 22%|██▏       | 2400/10748 [00:01<00:03, 2283.55it/s][A
 25%|██▌       | 2700/10748 [00:01<00:03, 2288.28it/s][A
 28%|██▊       | 3000/10748 [00:01<00:03, 2280.74it/s][A
 31%|███       | 3300/10748 [00:01<00:03, 2285.37it/s][A
 33%|███▎      | 3600/10748 [00:01<00:03, 2281.13it/s][A
 36%|███▋      | 3900/10748 [00:01<00:02, 2292.34it/s][A
 39%|███▉      | 4200/10748 [00:01<00:02, 2296.34it/s][A
 42%|████▏     | 4500/10748 [00:01<00:02, 2290.43it/s][A
 45%|████▍     | 4800/10748 [00:02<00:02, 2282.92it/s][A
 47%|████▋     | 5100/10748 [

epoch [17/100], Loss: 0.0252



  0%|          | 0/10748 [00:00<?, ?it/s][A
  3%|▎         | 300/10748 [00:00<00:04, 2326.67it/s][A
  6%|▌         | 600/10748 [00:00<00:04, 2292.98it/s][A
  8%|▊         | 900/10748 [00:00<00:04, 2307.68it/s][A
 11%|█         | 1200/10748 [00:00<00:04, 2311.73it/s][A
 14%|█▍        | 1500/10748 [00:00<00:04, 2284.78it/s][A
 17%|█▋        | 1800/10748 [00:00<00:03, 2276.43it/s][A
 20%|█▉        | 2100/10748 [00:00<00:03, 2281.60it/s][A
 22%|██▏       | 2400/10748 [00:01<00:03, 2275.89it/s][A
 25%|██▌       | 2700/10748 [00:01<00:03, 2288.22it/s][A
 28%|██▊       | 3000/10748 [00:01<00:03, 2292.05it/s][A
 31%|███       | 3300/10748 [00:01<00:03, 2271.17it/s][A
 33%|███▎      | 3600/10748 [00:01<00:03, 2271.24it/s][A
 36%|███▋      | 3900/10748 [00:01<00:03, 2269.59it/s][A
 39%|███▉      | 4200/10748 [00:01<00:02, 2257.77it/s][A
 42%|████▏     | 4500/10748 [00:01<00:02, 2272.67it/s][A
 45%|████▍     | 4800/10748 [00:02<00:02, 2270.89it/s][A
 47%|████▋     | 5100/10748 [

epoch [18/100], Loss: 0.0214



  0%|          | 0/10748 [00:00<?, ?it/s][A
  3%|▎         | 300/10748 [00:00<00:04, 2379.99it/s][A
  6%|▌         | 600/10748 [00:00<00:04, 2311.36it/s][A
  8%|▊         | 900/10748 [00:00<00:04, 2313.09it/s][A
 11%|█         | 1200/10748 [00:00<00:04, 2278.73it/s][A
 14%|█▍        | 1500/10748 [00:00<00:04, 2278.07it/s][A
 17%|█▋        | 1800/10748 [00:00<00:03, 2273.24it/s][A
 20%|█▉        | 2100/10748 [00:00<00:03, 2286.34it/s][A
 22%|██▏       | 2400/10748 [00:01<00:03, 2289.04it/s][A
 25%|██▌       | 2700/10748 [00:01<00:03, 2297.26it/s][A
 28%|██▊       | 3000/10748 [00:01<00:03, 2284.77it/s][A
 31%|███       | 3300/10748 [00:01<00:03, 2282.92it/s][A
 33%|███▎      | 3600/10748 [00:01<00:03, 2276.11it/s][A
 36%|███▋      | 3900/10748 [00:01<00:03, 2265.19it/s][A
 39%|███▉      | 4200/10748 [00:01<00:02, 2268.07it/s][A
 42%|████▏     | 4500/10748 [00:01<00:02, 2272.99it/s][A
 45%|████▍     | 4800/10748 [00:02<00:02, 2273.10it/s][A
 47%|████▋     | 5100/10748 [

epoch [19/100], Loss: 0.0177



  0%|          | 0/10748 [00:00<?, ?it/s][A
  3%|▎         | 300/10748 [00:00<00:04, 2379.49it/s][A
  6%|▌         | 600/10748 [00:00<00:04, 2305.54it/s][A
  8%|▊         | 900/10748 [00:00<00:04, 2292.18it/s][A
 11%|█         | 1200/10748 [00:00<00:04, 2276.80it/s][A
 14%|█▍        | 1500/10748 [00:00<00:04, 2283.53it/s][A
 17%|█▋        | 1800/10748 [00:00<00:03, 2284.72it/s][A
 20%|█▉        | 2100/10748 [00:00<00:03, 2294.57it/s][A
 22%|██▏       | 2400/10748 [00:01<00:03, 2297.74it/s][A
 25%|██▌       | 2700/10748 [00:01<00:03, 2281.09it/s][A
 28%|██▊       | 3000/10748 [00:01<00:03, 2263.17it/s][A
 31%|███       | 3300/10748 [00:01<00:03, 2264.48it/s][A
 33%|███▎      | 3600/10748 [00:01<00:03, 2266.43it/s][A
 36%|███▋      | 3900/10748 [00:01<00:03, 2275.74it/s][A
 39%|███▉      | 4200/10748 [00:01<00:02, 2279.54it/s][A
 42%|████▏     | 4500/10748 [00:01<00:02, 2274.04it/s][A
 45%|████▍     | 4800/10748 [00:02<00:02, 2264.85it/s][A
 47%|████▋     | 5100/10748 [

epoch [20/100], Loss: 0.0151



  0%|          | 0/10748 [00:00<?, ?it/s][A
  3%|▎         | 300/10748 [00:00<00:04, 2380.43it/s][A
  6%|▌         | 600/10748 [00:00<00:04, 2302.80it/s][A
  8%|▊         | 900/10748 [00:00<00:04, 2305.62it/s][A
 11%|█         | 1200/10748 [00:00<00:04, 2308.55it/s][A
 14%|█▍        | 1500/10748 [00:00<00:04, 2290.19it/s][A
 17%|█▋        | 1800/10748 [00:00<00:03, 2290.90it/s][A
 20%|█▉        | 2100/10748 [00:00<00:03, 2287.16it/s][A
 22%|██▏       | 2400/10748 [00:01<00:03, 2288.64it/s][A
 25%|██▌       | 2700/10748 [00:01<00:03, 2283.03it/s][A
 28%|██▊       | 3000/10748 [00:01<00:03, 2263.99it/s][A
 31%|███       | 3300/10748 [00:01<00:03, 2277.03it/s][A
 33%|███▎      | 3600/10748 [00:01<00:03, 2282.72it/s][A
 36%|███▋      | 3900/10748 [00:01<00:02, 2283.56it/s][A
 39%|███▉      | 4200/10748 [00:01<00:02, 2282.04it/s][A
 42%|████▏     | 4500/10748 [00:01<00:02, 2291.69it/s][A
 45%|████▍     | 4800/10748 [00:02<00:02, 2296.52it/s][A
 47%|████▋     | 5100/10748 [

epoch [21/100], Loss: 0.0142



  0%|          | 0/10748 [00:00<?, ?it/s][A
  3%|▎         | 300/10748 [00:00<00:04, 2354.52it/s][A
  6%|▌         | 600/10748 [00:00<00:04, 2314.91it/s][A
  8%|▊         | 900/10748 [00:00<00:04, 2284.82it/s][A
 11%|█         | 1200/10748 [00:00<00:04, 2276.32it/s][A
 14%|█▍        | 1500/10748 [00:00<00:04, 2262.16it/s][A
 17%|█▋        | 1800/10748 [00:00<00:03, 2285.05it/s][A
 20%|█▉        | 2100/10748 [00:00<00:03, 2269.99it/s][A
 22%|██▏       | 2400/10748 [00:01<00:03, 2273.42it/s][A
 25%|██▌       | 2700/10748 [00:01<00:03, 2286.50it/s][A
 28%|██▊       | 3000/10748 [00:01<00:03, 2273.82it/s][A
 31%|███       | 3300/10748 [00:01<00:03, 2278.90it/s][A
 33%|███▎      | 3600/10748 [00:01<00:03, 2286.17it/s][A
 36%|███▋      | 3900/10748 [00:01<00:03, 2279.96it/s][A
 39%|███▉      | 4200/10748 [00:01<00:02, 2281.71it/s][A
 42%|████▏     | 4500/10748 [00:01<00:02, 2278.28it/s][A
 45%|████▍     | 4800/10748 [00:02<00:02, 2289.29it/s][A
 47%|████▋     | 5100/10748 [

epoch [22/100], Loss: 0.0117



  0%|          | 0/10748 [00:00<?, ?it/s][A
  3%|▎         | 300/10748 [00:00<00:04, 2326.07it/s][A
  6%|▌         | 600/10748 [00:00<00:04, 2312.00it/s][A
  8%|▊         | 900/10748 [00:00<00:04, 2297.29it/s][A
 11%|█         | 1200/10748 [00:00<00:04, 2299.53it/s][A
 14%|█▍        | 1500/10748 [00:00<00:04, 2285.88it/s][A
 17%|█▋        | 1800/10748 [00:00<00:03, 2296.37it/s][A
 20%|█▉        | 2100/10748 [00:00<00:03, 2291.22it/s][A
 22%|██▏       | 2400/10748 [00:01<00:03, 2295.29it/s][A
 25%|██▌       | 2700/10748 [00:01<00:03, 2294.73it/s][A
 28%|██▊       | 3000/10748 [00:01<00:03, 2295.91it/s][A
 31%|███       | 3300/10748 [00:01<00:03, 2293.08it/s][A
 33%|███▎      | 3600/10748 [00:01<00:03, 2293.83it/s][A
 36%|███▋      | 3900/10748 [00:01<00:02, 2289.32it/s][A
 39%|███▉      | 4200/10748 [00:01<00:02, 2285.06it/s][A
 42%|████▏     | 4500/10748 [00:01<00:02, 2279.11it/s][A
 45%|████▍     | 4800/10748 [00:02<00:02, 2284.89it/s][A
 47%|████▋     | 5100/10748 [

epoch [23/100], Loss: 0.0104



  0%|          | 0/10748 [00:00<?, ?it/s][A
  3%|▎         | 300/10748 [00:00<00:04, 2362.70it/s][A
  6%|▌         | 600/10748 [00:00<00:04, 2295.85it/s][A
  8%|▊         | 900/10748 [00:00<00:04, 2275.65it/s][A
 11%|█         | 1200/10748 [00:00<00:04, 2279.53it/s][A
 14%|█▍        | 1500/10748 [00:00<00:04, 2285.30it/s][A
 17%|█▋        | 1800/10748 [00:00<00:03, 2280.20it/s][A
 20%|█▉        | 2100/10748 [00:00<00:03, 2277.01it/s][A
 22%|██▏       | 2400/10748 [00:01<00:03, 2264.10it/s][A
 25%|██▌       | 2700/10748 [00:01<00:03, 2245.86it/s][A
 28%|██▊       | 3000/10748 [00:01<00:03, 2251.03it/s][A
 31%|███       | 3300/10748 [00:01<00:03, 2256.56it/s][A
 33%|███▎      | 3600/10748 [00:01<00:03, 2268.65it/s][A
 36%|███▋      | 3900/10748 [00:01<00:03, 2274.36it/s][A
 39%|███▉      | 4200/10748 [00:01<00:02, 2282.25it/s][A
 42%|████▏     | 4500/10748 [00:01<00:02, 2287.61it/s][A
 45%|████▍     | 4800/10748 [00:02<00:02, 2290.23it/s][A
 47%|████▋     | 5100/10748 [

epoch [24/100], Loss: 0.0102



  0%|          | 0/10748 [00:00<?, ?it/s][A
  3%|▎         | 300/10748 [00:00<00:04, 2342.27it/s][A
  6%|▌         | 600/10748 [00:00<00:04, 2295.90it/s][A
  8%|▊         | 900/10748 [00:00<00:04, 2301.86it/s][A
 11%|█         | 1200/10748 [00:00<00:04, 2277.77it/s][A
 14%|█▍        | 1500/10748 [00:00<00:04, 2291.02it/s][A
 17%|█▋        | 1800/10748 [00:00<00:03, 2293.96it/s][A
 20%|█▉        | 2100/10748 [00:00<00:03, 2292.31it/s][A
 22%|██▏       | 2400/10748 [00:01<00:03, 2291.76it/s][A
 25%|██▌       | 2700/10748 [00:01<00:03, 2283.25it/s][A
 28%|██▊       | 3000/10748 [00:01<00:03, 2282.52it/s][A
 31%|███       | 3300/10748 [00:01<00:03, 2287.16it/s][A
 33%|███▎      | 3600/10748 [00:01<00:03, 2284.24it/s][A
 36%|███▋      | 3900/10748 [00:01<00:03, 2282.13it/s][A
 39%|███▉      | 4200/10748 [00:01<00:02, 2274.32it/s][A
 42%|████▏     | 4500/10748 [00:01<00:02, 2266.25it/s][A
 45%|████▍     | 4800/10748 [00:02<00:02, 2272.90it/s][A
 47%|████▋     | 5100/10748 [

epoch [25/100], Loss: 0.0087



  0%|          | 0/10748 [00:00<?, ?it/s][A
  3%|▎         | 300/10748 [00:00<00:04, 2313.27it/s][A
  6%|▌         | 600/10748 [00:00<00:04, 2295.33it/s][A
  8%|▊         | 900/10748 [00:00<00:04, 2286.94it/s][A
 11%|█         | 1200/10748 [00:00<00:04, 2285.08it/s][A
 14%|█▍        | 1500/10748 [00:00<00:04, 2281.99it/s][A
 17%|█▋        | 1800/10748 [00:00<00:03, 2277.56it/s][A
 20%|█▉        | 2100/10748 [00:00<00:03, 2271.38it/s][A
 22%|██▏       | 2400/10748 [00:01<00:03, 2282.80it/s][A
 25%|██▌       | 2700/10748 [00:01<00:03, 2294.44it/s][A
 28%|██▊       | 3000/10748 [00:01<00:03, 2305.08it/s][A
 31%|███       | 3300/10748 [00:01<00:03, 2283.05it/s][A
 33%|███▎      | 3600/10748 [00:01<00:03, 2279.20it/s][A
 36%|███▋      | 3900/10748 [00:01<00:02, 2289.51it/s][A
 39%|███▉      | 4200/10748 [00:01<00:02, 2286.29it/s][A
 42%|████▏     | 4500/10748 [00:01<00:02, 2283.79it/s][A
 45%|████▍     | 4800/10748 [00:02<00:02, 2276.63it/s][A
 47%|████▋     | 5100/10748 [

epoch [26/100], Loss: 0.0072



  0%|          | 0/10748 [00:00<?, ?it/s][A
  3%|▎         | 300/10748 [00:00<00:04, 2347.22it/s][A
  6%|▌         | 600/10748 [00:00<00:04, 2298.72it/s][A
  8%|▊         | 900/10748 [00:00<00:04, 2284.25it/s][A
 11%|█         | 1200/10748 [00:00<00:04, 2282.51it/s][A
 14%|█▍        | 1500/10748 [00:00<00:04, 2287.82it/s][A
 17%|█▋        | 1800/10748 [00:00<00:03, 2284.15it/s][A
 20%|█▉        | 2100/10748 [00:00<00:03, 2289.07it/s][A
 22%|██▏       | 2400/10748 [00:01<00:03, 2286.67it/s][A
 25%|██▌       | 2700/10748 [00:01<00:03, 2292.80it/s][A
 28%|██▊       | 3000/10748 [00:01<00:03, 2298.01it/s][A
 31%|███       | 3300/10748 [00:01<00:03, 2285.69it/s][A
 33%|███▎      | 3600/10748 [00:01<00:03, 2262.43it/s][A
 36%|███▋      | 3900/10748 [00:01<00:03, 2269.99it/s][A
 39%|███▉      | 4200/10748 [00:01<00:02, 2270.95it/s][A
 42%|████▏     | 4500/10748 [00:01<00:02, 2274.63it/s][A
 45%|████▍     | 4800/10748 [00:02<00:02, 2286.57it/s][A
 47%|████▋     | 5100/10748 [

epoch [27/100], Loss: 0.0062



  0%|          | 0/10748 [00:00<?, ?it/s][A
  3%|▎         | 300/10748 [00:00<00:04, 2347.93it/s][A
  6%|▌         | 600/10748 [00:00<00:04, 2321.87it/s][A
  8%|▊         | 900/10748 [00:00<00:04, 2304.01it/s][A
 11%|█         | 1200/10748 [00:00<00:04, 2296.68it/s][A
 14%|█▍        | 1500/10748 [00:00<00:04, 2297.91it/s][A
 17%|█▋        | 1800/10748 [00:00<00:03, 2291.76it/s][A
 20%|█▉        | 2100/10748 [00:00<00:03, 2292.93it/s][A
 22%|██▏       | 2400/10748 [00:01<00:03, 2287.62it/s][A
 25%|██▌       | 2700/10748 [00:01<00:03, 2288.72it/s][A
 28%|██▊       | 3000/10748 [00:01<00:03, 2277.47it/s][A
 31%|███       | 3300/10748 [00:01<00:03, 2271.71it/s][A
 33%|███▎      | 3600/10748 [00:01<00:03, 2272.98it/s][A
 36%|███▋      | 3900/10748 [00:01<00:02, 2289.74it/s][A
 39%|███▉      | 4200/10748 [00:01<00:02, 2285.45it/s][A
 42%|████▏     | 4500/10748 [00:01<00:02, 2278.61it/s][A
 45%|████▍     | 4800/10748 [00:02<00:02, 2289.09it/s][A
 47%|████▋     | 5100/10748 [

epoch [28/100], Loss: 0.0059



  0%|          | 0/10748 [00:00<?, ?it/s][A
  3%|▎         | 300/10748 [00:00<00:04, 2302.69it/s][A
  6%|▌         | 600/10748 [00:00<00:04, 2253.13it/s][A
  8%|▊         | 900/10748 [00:00<00:04, 2267.22it/s][A
 11%|█         | 1200/10748 [00:00<00:04, 2281.14it/s][A
 14%|█▍        | 1500/10748 [00:00<00:04, 2287.14it/s][A
 17%|█▋        | 1800/10748 [00:00<00:03, 2293.27it/s][A
 20%|█▉        | 2100/10748 [00:00<00:03, 2283.39it/s][A
 22%|██▏       | 2400/10748 [00:01<00:03, 2278.92it/s][A
 25%|██▌       | 2700/10748 [00:01<00:03, 2286.67it/s][A
 28%|██▊       | 3000/10748 [00:01<00:03, 2275.14it/s][A
 31%|███       | 3300/10748 [00:01<00:03, 2272.72it/s][A
 33%|███▎      | 3600/10748 [00:01<00:03, 2264.90it/s][A
 36%|███▋      | 3900/10748 [00:01<00:03, 2258.18it/s][A
 39%|███▉      | 4200/10748 [00:01<00:02, 2266.56it/s][A
 42%|████▏     | 4500/10748 [00:01<00:02, 2278.82it/s][A
 45%|████▍     | 4800/10748 [00:02<00:02, 2270.76it/s][A
 47%|████▋     | 5100/10748 [

epoch [29/100], Loss: 0.0052



  0%|          | 0/10748 [00:00<?, ?it/s][A
  3%|▎         | 300/10748 [00:00<00:04, 2354.58it/s][A
  6%|▌         | 600/10748 [00:00<00:04, 2309.34it/s][A
  8%|▊         | 900/10748 [00:00<00:04, 2306.58it/s][A
 11%|█         | 1200/10748 [00:00<00:04, 2296.23it/s][A
 14%|█▍        | 1500/10748 [00:00<00:04, 2294.47it/s][A
 17%|█▋        | 1800/10748 [00:00<00:03, 2290.71it/s][A
 20%|█▉        | 2100/10748 [00:00<00:03, 2285.53it/s][A
 22%|██▏       | 2400/10748 [00:01<00:03, 2290.55it/s][A
 25%|██▌       | 2700/10748 [00:01<00:03, 2262.55it/s][A
 28%|██▊       | 3000/10748 [00:01<00:03, 2280.11it/s][A
 31%|███       | 3300/10748 [00:01<00:03, 2263.66it/s][A
 33%|███▎      | 3600/10748 [00:01<00:03, 2277.32it/s][A
 36%|███▋      | 3900/10748 [00:01<00:02, 2284.02it/s][A
 39%|███▉      | 4200/10748 [00:01<00:02, 2281.79it/s][A
 42%|████▏     | 4500/10748 [00:01<00:02, 2281.11it/s][A
 45%|████▍     | 4800/10748 [00:02<00:02, 2275.07it/s][A
 47%|████▋     | 5100/10748 [

epoch [30/100], Loss: 0.0046



  0%|          | 0/10748 [00:00<?, ?it/s][A
  3%|▎         | 300/10748 [00:00<00:04, 2324.60it/s][A
  6%|▌         | 600/10748 [00:00<00:04, 2246.89it/s][A
  8%|▊         | 900/10748 [00:00<00:04, 2253.67it/s][A
 11%|█         | 1200/10748 [00:00<00:04, 2247.08it/s][A
 14%|█▍        | 1500/10748 [00:00<00:04, 2266.88it/s][A
 17%|█▋        | 1800/10748 [00:00<00:03, 2280.75it/s][A
 20%|█▉        | 2100/10748 [00:00<00:03, 2283.20it/s][A
 22%|██▏       | 2400/10748 [00:01<00:03, 2288.98it/s][A
 25%|██▌       | 2700/10748 [00:01<00:03, 2292.15it/s][A
 28%|██▊       | 3000/10748 [00:01<00:03, 2285.55it/s][A
 31%|███       | 3300/10748 [00:01<00:03, 2285.11it/s][A
 33%|███▎      | 3600/10748 [00:01<00:03, 2274.81it/s][A
 36%|███▋      | 3900/10748 [00:01<00:03, 2272.45it/s][A
 39%|███▉      | 4200/10748 [00:01<00:02, 2284.47it/s][A
 42%|████▏     | 4500/10748 [00:01<00:02, 2287.45it/s][A
 45%|████▍     | 4800/10748 [00:02<00:02, 2272.42it/s][A
 47%|████▋     | 5100/10748 [

epoch [31/100], Loss: 0.0039



  0%|          | 0/10748 [00:00<?, ?it/s][A
  3%|▎         | 300/10748 [00:00<00:04, 2366.92it/s][A
  6%|▌         | 600/10748 [00:00<00:04, 2294.20it/s][A
  8%|▊         | 900/10748 [00:00<00:04, 2274.14it/s][A
 11%|█         | 1200/10748 [00:00<00:04, 2287.34it/s][A
 14%|█▍        | 1500/10748 [00:00<00:04, 2284.08it/s][A
 17%|█▋        | 1800/10748 [00:00<00:03, 2290.78it/s][A
 20%|█▉        | 2100/10748 [00:00<00:03, 2292.29it/s][A
 22%|██▏       | 2400/10748 [00:01<00:03, 2283.65it/s][A
 25%|██▌       | 2700/10748 [00:01<00:03, 2276.47it/s][A
 28%|██▊       | 3000/10748 [00:01<00:03, 2274.86it/s][A
 31%|███       | 3300/10748 [00:01<00:03, 2274.08it/s][A
 33%|███▎      | 3600/10748 [00:01<00:03, 2274.32it/s][A
 36%|███▋      | 3900/10748 [00:01<00:03, 2272.21it/s][A
 39%|███▉      | 4200/10748 [00:01<00:02, 2279.99it/s][A
 42%|████▏     | 4500/10748 [00:01<00:02, 2271.51it/s][A
 45%|████▍     | 4800/10748 [00:02<00:02, 2287.35it/s][A
 47%|████▋     | 5100/10748 [

epoch [32/100], Loss: 0.0037



  0%|          | 0/10748 [00:00<?, ?it/s][A
  3%|▎         | 300/10748 [00:00<00:04, 2363.04it/s][A
  6%|▌         | 600/10748 [00:00<00:04, 2293.43it/s][A
  8%|▊         | 900/10748 [00:00<00:04, 2284.74it/s][A
 11%|█         | 1200/10748 [00:00<00:04, 2268.01it/s][A
 14%|█▍        | 1500/10748 [00:00<00:04, 2248.30it/s][A
 17%|█▋        | 1800/10748 [00:00<00:03, 2253.76it/s][A
 20%|█▉        | 2100/10748 [00:00<00:03, 2259.12it/s][A
 22%|██▏       | 2400/10748 [00:01<00:03, 2274.65it/s][A
 25%|██▌       | 2700/10748 [00:01<00:03, 2285.75it/s][A
 28%|██▊       | 3000/10748 [00:01<00:03, 2285.88it/s][A
 31%|███       | 3300/10748 [00:01<00:03, 2289.59it/s][A
 33%|███▎      | 3600/10748 [00:01<00:03, 2284.24it/s][A
 36%|███▋      | 3900/10748 [00:01<00:02, 2284.50it/s][A
 39%|███▉      | 4200/10748 [00:01<00:02, 2276.50it/s][A
 42%|████▏     | 4500/10748 [00:01<00:02, 2254.52it/s][A
 45%|████▍     | 4800/10748 [00:02<00:02, 2263.42it/s][A
 47%|████▋     | 5100/10748 [

epoch [33/100], Loss: 0.0035



  0%|          | 0/10748 [00:00<?, ?it/s][A
  3%|▎         | 300/10748 [00:00<00:04, 2328.97it/s][A
  6%|▌         | 600/10748 [00:00<00:04, 2277.60it/s][A
  8%|▊         | 900/10748 [00:00<00:04, 2296.35it/s][A
 11%|█         | 1200/10748 [00:00<00:04, 2300.28it/s][A
 14%|█▍        | 1500/10748 [00:00<00:04, 2300.82it/s][A
 17%|█▋        | 1800/10748 [00:00<00:03, 2264.71it/s][A
 20%|█▉        | 2100/10748 [00:00<00:03, 2260.83it/s][A
 22%|██▏       | 2400/10748 [00:01<00:03, 2274.67it/s][A
 25%|██▌       | 2700/10748 [00:01<00:03, 2286.73it/s][A
 28%|██▊       | 3000/10748 [00:01<00:03, 2275.28it/s][A
 31%|███       | 3300/10748 [00:01<00:03, 2285.52it/s][A
 33%|███▎      | 3600/10748 [00:01<00:03, 2285.06it/s][A
 36%|███▋      | 3900/10748 [00:01<00:02, 2283.89it/s][A
 39%|███▉      | 4200/10748 [00:01<00:02, 2271.30it/s][A
 42%|████▏     | 4500/10748 [00:01<00:02, 2284.84it/s][A
 45%|████▍     | 4800/10748 [00:02<00:02, 2284.14it/s][A
 47%|████▋     | 5100/10748 [

epoch [34/100], Loss: 0.0036



  0%|          | 0/10748 [00:00<?, ?it/s][A
  3%|▎         | 300/10748 [00:00<00:04, 2367.47it/s][A
  6%|▌         | 600/10748 [00:00<00:04, 2307.52it/s][A
  8%|▊         | 900/10748 [00:00<00:04, 2290.33it/s][A
 11%|█         | 1200/10748 [00:00<00:04, 2243.20it/s][A
 14%|█▍        | 1500/10748 [00:00<00:04, 2249.15it/s][A
 17%|█▋        | 1800/10748 [00:00<00:03, 2271.39it/s][A
 20%|█▉        | 2100/10748 [00:00<00:03, 2266.32it/s][A
 22%|██▏       | 2400/10748 [00:01<00:03, 2257.06it/s][A
 25%|██▌       | 2700/10748 [00:01<00:03, 2272.10it/s][A
 28%|██▊       | 3000/10748 [00:01<00:03, 2273.13it/s][A
 31%|███       | 3300/10748 [00:01<00:03, 2277.47it/s][A
 33%|███▎      | 3600/10748 [00:01<00:03, 2276.75it/s][A
 36%|███▋      | 3900/10748 [00:01<00:03, 2272.98it/s][A
 39%|███▉      | 4200/10748 [00:01<00:02, 2277.97it/s][A
 42%|████▏     | 4500/10748 [00:01<00:02, 2290.55it/s][A
 45%|████▍     | 4800/10748 [00:02<00:02, 2274.94it/s][A
 47%|████▋     | 5100/10748 [

epoch [35/100], Loss: 0.0033



  0%|          | 0/10748 [00:00<?, ?it/s][A
  3%|▎         | 300/10748 [00:00<00:04, 2316.24it/s][A
  6%|▌         | 600/10748 [00:00<00:04, 2319.16it/s][A
  8%|▊         | 900/10748 [00:00<00:04, 2318.54it/s][A
 11%|█         | 1200/10748 [00:00<00:04, 2303.52it/s][A
 14%|█▍        | 1500/10748 [00:00<00:04, 2301.43it/s][A
 17%|█▋        | 1800/10748 [00:00<00:03, 2296.04it/s][A
 20%|█▉        | 2100/10748 [00:00<00:03, 2304.41it/s][A
 22%|██▏       | 2400/10748 [00:01<00:03, 2309.33it/s][A
 25%|██▌       | 2700/10748 [00:01<00:03, 2308.55it/s][A
 28%|██▊       | 3000/10748 [00:01<00:03, 2291.93it/s][A
 31%|███       | 3300/10748 [00:01<00:03, 2297.53it/s][A
 33%|███▎      | 3600/10748 [00:01<00:03, 2286.58it/s][A
 36%|███▋      | 3900/10748 [00:01<00:02, 2295.25it/s][A
 39%|███▉      | 4200/10748 [00:01<00:02, 2290.17it/s][A
 42%|████▏     | 4500/10748 [00:01<00:02, 2274.31it/s][A
 45%|████▍     | 4800/10748 [00:02<00:02, 2278.41it/s][A
 47%|████▋     | 5100/10748 [

epoch [36/100], Loss: 0.0027



  0%|          | 0/10748 [00:00<?, ?it/s][A
  3%|▎         | 300/10748 [00:00<00:04, 2373.67it/s][A
  6%|▌         | 600/10748 [00:00<00:04, 2285.14it/s][A
  8%|▊         | 900/10748 [00:00<00:04, 2276.60it/s][A
 11%|█         | 1200/10748 [00:00<00:04, 2291.42it/s][A
 14%|█▍        | 1500/10748 [00:00<00:04, 2299.06it/s][A
 17%|█▋        | 1800/10748 [00:00<00:03, 2305.51it/s][A
 20%|█▉        | 2100/10748 [00:00<00:03, 2281.23it/s][A
 22%|██▏       | 2400/10748 [00:01<00:03, 2268.93it/s][A
 25%|██▌       | 2700/10748 [00:01<00:03, 2282.00it/s][A
 28%|██▊       | 3000/10748 [00:01<00:03, 2284.26it/s][A
 31%|███       | 3300/10748 [00:01<00:03, 2285.88it/s][A
 33%|███▎      | 3600/10748 [00:01<00:03, 2294.00it/s][A
 36%|███▋      | 3900/10748 [00:01<00:02, 2292.68it/s][A
 39%|███▉      | 4200/10748 [00:01<00:02, 2284.51it/s][A
 42%|████▏     | 4500/10748 [00:01<00:02, 2291.38it/s][A
 45%|████▍     | 4800/10748 [00:02<00:02, 2294.55it/s][A
 47%|████▋     | 5100/10748 [

epoch [37/100], Loss: 0.0025



  0%|          | 0/10748 [00:00<?, ?it/s][A
  3%|▎         | 300/10748 [00:00<00:04, 2264.60it/s][A
  6%|▌         | 600/10748 [00:00<00:04, 2269.29it/s][A
  8%|▊         | 900/10748 [00:00<00:04, 2274.50it/s][A
 11%|█         | 1200/10748 [00:00<00:04, 2264.17it/s][A
 14%|█▍        | 1500/10748 [00:00<00:04, 2273.32it/s][A
 17%|█▋        | 1800/10748 [00:00<00:03, 2264.73it/s][A
 20%|█▉        | 2100/10748 [00:00<00:03, 2272.53it/s][A
 22%|██▏       | 2400/10748 [00:01<00:03, 2272.54it/s][A
 25%|██▌       | 2700/10748 [00:01<00:03, 2272.34it/s][A
 28%|██▊       | 3000/10748 [00:01<00:03, 2283.71it/s][A
 31%|███       | 3300/10748 [00:01<00:03, 2288.72it/s][A
 33%|███▎      | 3600/10748 [00:01<00:03, 2278.42it/s][A
 36%|███▋      | 3900/10748 [00:01<00:02, 2292.48it/s][A
 39%|███▉      | 4200/10748 [00:01<00:02, 2302.64it/s][A
 42%|████▏     | 4500/10748 [00:01<00:02, 2287.06it/s][A
 45%|████▍     | 4800/10748 [00:02<00:02, 2285.77it/s][A
 47%|████▋     | 5100/10748 [

epoch [38/100], Loss: 0.0023



  0%|          | 0/10748 [00:00<?, ?it/s][A
  3%|▎         | 300/10748 [00:00<00:04, 2331.37it/s][A
  6%|▌         | 600/10748 [00:00<00:04, 2285.99it/s][A
  8%|▊         | 900/10748 [00:00<00:04, 2284.01it/s][A
 11%|█         | 1200/10748 [00:00<00:04, 2269.23it/s][A
 14%|█▍        | 1500/10748 [00:00<00:04, 2277.39it/s][A
 17%|█▋        | 1800/10748 [00:00<00:03, 2285.32it/s][A
 20%|█▉        | 2100/10748 [00:00<00:03, 2264.65it/s][A
 22%|██▏       | 2400/10748 [00:01<00:03, 2260.52it/s][A
 25%|██▌       | 2700/10748 [00:01<00:03, 2265.20it/s][A
 28%|██▊       | 3000/10748 [00:01<00:03, 2279.12it/s][A
 31%|███       | 3300/10748 [00:01<00:03, 2271.84it/s][A
 33%|███▎      | 3600/10748 [00:01<00:03, 2259.14it/s][A
 36%|███▋      | 3900/10748 [00:01<00:03, 2254.89it/s][A
 39%|███▉      | 4200/10748 [00:01<00:02, 2255.66it/s][A
 42%|████▏     | 4500/10748 [00:01<00:02, 2271.73it/s][A
 45%|████▍     | 4800/10748 [00:02<00:02, 2272.76it/s][A
 47%|████▋     | 5100/10748 [

epoch [39/100], Loss: 0.0022



  0%|          | 0/10748 [00:00<?, ?it/s][A
  3%|▎         | 300/10748 [00:00<00:04, 2327.65it/s][A
  6%|▌         | 600/10748 [00:00<00:04, 2295.55it/s][A
  8%|▊         | 900/10748 [00:00<00:04, 2288.30it/s][A
 11%|█         | 1200/10748 [00:00<00:04, 2283.08it/s][A
 14%|█▍        | 1500/10748 [00:00<00:04, 2278.55it/s][A
 17%|█▋        | 1800/10748 [00:00<00:03, 2291.01it/s][A
 20%|█▉        | 2100/10748 [00:00<00:03, 2300.44it/s][A
 22%|██▏       | 2400/10748 [00:01<00:03, 2286.43it/s][A
 25%|██▌       | 2700/10748 [00:01<00:03, 2276.53it/s][A
 28%|██▊       | 3000/10748 [00:01<00:03, 2264.28it/s][A
 31%|███       | 3300/10748 [00:01<00:03, 2260.54it/s][A
 33%|███▎      | 3600/10748 [00:01<00:03, 2266.95it/s][A
 36%|███▋      | 3900/10748 [00:01<00:03, 2262.54it/s][A
 39%|███▉      | 4200/10748 [00:01<00:02, 2266.85it/s][A
 42%|████▏     | 4500/10748 [00:01<00:02, 2275.22it/s][A
 45%|████▍     | 4800/10748 [00:02<00:02, 2274.83it/s][A
 47%|████▋     | 5100/10748 [

epoch [40/100], Loss: 0.0020



  0%|          | 0/10748 [00:00<?, ?it/s][A
  3%|▎         | 300/10748 [00:00<00:04, 2377.01it/s][A
  6%|▌         | 600/10748 [00:00<00:04, 2314.95it/s][A
  8%|▊         | 900/10748 [00:00<00:04, 2310.05it/s][A
 11%|█         | 1200/10748 [00:00<00:04, 2284.93it/s][A
 14%|█▍        | 1500/10748 [00:00<00:04, 2296.89it/s][A
 17%|█▋        | 1800/10748 [00:00<00:03, 2279.82it/s][A
 20%|█▉        | 2100/10748 [00:00<00:03, 2262.23it/s][A
 22%|██▏       | 2400/10748 [00:01<00:03, 2278.02it/s][A
 25%|██▌       | 2700/10748 [00:01<00:03, 2288.82it/s][A
 28%|██▊       | 3000/10748 [00:01<00:03, 2296.25it/s][A
 31%|███       | 3300/10748 [00:01<00:03, 2282.10it/s][A
 33%|███▎      | 3600/10748 [00:01<00:03, 2280.81it/s][A
 36%|███▋      | 3900/10748 [00:01<00:02, 2284.64it/s][A
 39%|███▉      | 4200/10748 [00:01<00:02, 2278.40it/s][A
 42%|████▏     | 4500/10748 [00:01<00:02, 2283.13it/s][A
 45%|████▍     | 4800/10748 [00:02<00:02, 2282.11it/s][A
 47%|████▋     | 5100/10748 [

epoch [41/100], Loss: 0.0020



  0%|          | 0/10748 [00:00<?, ?it/s][A
  3%|▎         | 300/10748 [00:00<00:04, 2316.83it/s][A
  6%|▌         | 600/10748 [00:00<00:04, 2313.66it/s][A
  8%|▊         | 900/10748 [00:00<00:04, 2312.37it/s][A
 11%|█         | 1200/10748 [00:00<00:04, 2298.31it/s][A
 14%|█▍        | 1500/10748 [00:00<00:04, 2267.89it/s][A
 17%|█▋        | 1800/10748 [00:00<00:03, 2269.40it/s][A
 20%|█▉        | 2100/10748 [00:00<00:03, 2286.98it/s][A
 22%|██▏       | 2400/10748 [00:01<00:03, 2291.42it/s][A
 25%|██▌       | 2700/10748 [00:01<00:03, 2293.27it/s][A
 28%|██▊       | 3000/10748 [00:01<00:03, 2305.28it/s][A
 31%|███       | 3300/10748 [00:01<00:03, 2285.07it/s][A
 33%|███▎      | 3600/10748 [00:01<00:03, 2276.19it/s][A
 36%|███▋      | 3900/10748 [00:01<00:03, 2273.20it/s][A
 39%|███▉      | 4200/10748 [00:01<00:02, 2267.09it/s][A
 42%|████▏     | 4500/10748 [00:01<00:02, 2270.12it/s][A
 45%|████▍     | 4800/10748 [00:02<00:02, 2264.27it/s][A
 47%|████▋     | 5100/10748 [

epoch [42/100], Loss: 0.0020



  0%|          | 0/10748 [00:00<?, ?it/s][A
  3%|▎         | 300/10748 [00:00<00:04, 2363.12it/s][A
  6%|▌         | 600/10748 [00:00<00:04, 2312.36it/s][A
  8%|▊         | 900/10748 [00:00<00:04, 2318.78it/s][A
 11%|█         | 1200/10748 [00:00<00:04, 2302.91it/s][A
 14%|█▍        | 1500/10748 [00:00<00:04, 2289.88it/s][A
 17%|█▋        | 1800/10748 [00:00<00:03, 2291.95it/s][A
 20%|█▉        | 2100/10748 [00:00<00:03, 2289.80it/s][A
 22%|██▏       | 2400/10748 [00:01<00:03, 2286.13it/s][A
 25%|██▌       | 2700/10748 [00:01<00:03, 2275.43it/s][A
 28%|██▊       | 3000/10748 [00:01<00:03, 2279.76it/s][A
 31%|███       | 3300/10748 [00:01<00:03, 2256.48it/s][A
 33%|███▎      | 3600/10748 [00:01<00:03, 2254.26it/s][A
 36%|███▋      | 3900/10748 [00:01<00:03, 2265.45it/s][A
 39%|███▉      | 4200/10748 [00:01<00:02, 2274.29it/s][A
 42%|████▏     | 4500/10748 [00:01<00:02, 2282.16it/s][A
 45%|████▍     | 4800/10748 [00:02<00:02, 2292.92it/s][A
 47%|████▋     | 5100/10748 [

epoch [43/100], Loss: 0.0018



  0%|          | 0/10748 [00:00<?, ?it/s][A
  3%|▎         | 300/10748 [00:00<00:04, 2334.64it/s][A
  6%|▌         | 600/10748 [00:00<00:04, 2268.66it/s][A
  8%|▊         | 900/10748 [00:00<00:04, 2269.64it/s][A
 11%|█         | 1200/10748 [00:00<00:04, 2286.15it/s][A
 14%|█▍        | 1500/10748 [00:00<00:04, 2275.66it/s][A
 17%|█▋        | 1800/10748 [00:00<00:03, 2289.54it/s][A
 20%|█▉        | 2100/10748 [00:00<00:03, 2294.26it/s][A
 22%|██▏       | 2400/10748 [00:01<00:03, 2268.04it/s][A
 25%|██▌       | 2700/10748 [00:01<00:03, 2261.74it/s][A
 28%|██▊       | 3000/10748 [00:01<00:03, 2269.93it/s][A
 31%|███       | 3300/10748 [00:01<00:03, 2267.33it/s][A
 33%|███▎      | 3600/10748 [00:01<00:03, 2269.31it/s][A
 36%|███▋      | 3900/10748 [00:01<00:03, 2269.18it/s][A
 39%|███▉      | 4200/10748 [00:01<00:02, 2279.10it/s][A
 42%|████▏     | 4500/10748 [00:01<00:02, 2292.77it/s][A
 45%|████▍     | 4800/10748 [00:02<00:02, 2288.25it/s][A
 47%|████▋     | 5100/10748 [

epoch [44/100], Loss: 0.0017



  0%|          | 0/10748 [00:00<?, ?it/s][A
  3%|▎         | 300/10748 [00:00<00:04, 2336.95it/s][A
  6%|▌         | 600/10748 [00:00<00:04, 2309.24it/s][A
  8%|▊         | 900/10748 [00:00<00:04, 2318.64it/s][A
 11%|█         | 1200/10748 [00:00<00:04, 2321.44it/s][A
 14%|█▍        | 1500/10748 [00:00<00:03, 2321.06it/s][A
 17%|█▋        | 1800/10748 [00:00<00:03, 2322.66it/s][A
 20%|█▉        | 2100/10748 [00:00<00:03, 2312.31it/s][A
 22%|██▏       | 2400/10748 [00:01<00:03, 2291.95it/s][A
 25%|██▌       | 2700/10748 [00:01<00:03, 2296.33it/s][A
 28%|██▊       | 3000/10748 [00:01<00:03, 2283.82it/s][A
 31%|███       | 3300/10748 [00:01<00:03, 2278.38it/s][A
 33%|███▎      | 3600/10748 [00:01<00:03, 2285.29it/s][A
 36%|███▋      | 3900/10748 [00:01<00:03, 2275.79it/s][A
 39%|███▉      | 4200/10748 [00:01<00:02, 2264.20it/s][A
 42%|████▏     | 4500/10748 [00:01<00:02, 2270.52it/s][A
 45%|████▍     | 4800/10748 [00:02<00:02, 2270.00it/s][A
 47%|████▋     | 5100/10748 [

epoch [45/100], Loss: 0.0043



  0%|          | 0/10748 [00:00<?, ?it/s][A
  3%|▎         | 300/10748 [00:00<00:04, 2351.16it/s][A
  6%|▌         | 600/10748 [00:00<00:04, 2320.37it/s][A
  8%|▊         | 900/10748 [00:00<00:04, 2288.91it/s][A
 11%|█         | 1200/10748 [00:00<00:04, 2243.54it/s][A
 14%|█▍        | 1500/10748 [00:00<00:04, 2223.78it/s][A
 17%|█▋        | 1800/10748 [00:00<00:03, 2237.98it/s][A
 20%|█▉        | 2100/10748 [00:00<00:03, 2262.52it/s][A
 22%|██▏       | 2400/10748 [00:01<00:03, 2263.54it/s][A
 25%|██▌       | 2700/10748 [00:01<00:03, 2275.37it/s][A
 28%|██▊       | 3000/10748 [00:01<00:03, 2274.34it/s][A
 31%|███       | 3300/10748 [00:01<00:03, 2279.26it/s][A
 33%|███▎      | 3600/10748 [00:01<00:03, 2282.07it/s][A
 36%|███▋      | 3900/10748 [00:01<00:03, 2275.31it/s][A
 39%|███▉      | 4200/10748 [00:01<00:02, 2266.35it/s][A
 42%|████▏     | 4500/10748 [00:01<00:02, 2264.42it/s][A
 45%|████▍     | 4800/10748 [00:02<00:02, 2261.16it/s][A
 47%|████▋     | 5100/10748 [

epoch [46/100], Loss: 0.0100



  0%|          | 0/10748 [00:00<?, ?it/s][A
  3%|▎         | 300/10748 [00:00<00:04, 2358.41it/s][A
  6%|▌         | 600/10748 [00:00<00:04, 2302.68it/s][A
  8%|▊         | 900/10748 [00:00<00:04, 2305.82it/s][A
 11%|█         | 1200/10748 [00:00<00:04, 2300.21it/s][A
 14%|█▍        | 1500/10748 [00:00<00:04, 2293.13it/s][A
 17%|█▋        | 1800/10748 [00:00<00:03, 2284.40it/s][A
 20%|█▉        | 2100/10748 [00:00<00:03, 2285.71it/s][A
 22%|██▏       | 2400/10748 [00:01<00:03, 2282.42it/s][A
 25%|██▌       | 2700/10748 [00:01<00:03, 2280.18it/s][A
 28%|██▊       | 3000/10748 [00:01<00:03, 2288.84it/s][A
 31%|███       | 3300/10748 [00:01<00:03, 2279.26it/s][A
 33%|███▎      | 3600/10748 [00:01<00:03, 2282.38it/s][A
 36%|███▋      | 3900/10748 [00:01<00:03, 2268.17it/s][A
 39%|███▉      | 4200/10748 [00:01<00:02, 2267.29it/s][A
 42%|████▏     | 4500/10748 [00:01<00:02, 2278.75it/s][A
 45%|████▍     | 4800/10748 [00:02<00:02, 2274.26it/s][A
 47%|████▋     | 5100/10748 [

epoch [47/100], Loss: 0.0189



  0%|          | 0/10748 [00:00<?, ?it/s][A
  3%|▎         | 300/10748 [00:00<00:04, 2367.06it/s][A
  6%|▌         | 600/10748 [00:00<00:04, 2332.78it/s][A
  8%|▊         | 900/10748 [00:00<00:04, 2282.51it/s][A
 11%|█         | 1200/10748 [00:00<00:04, 2288.04it/s][A
 14%|█▍        | 1500/10748 [00:00<00:04, 2287.15it/s][A
 17%|█▋        | 1800/10748 [00:00<00:03, 2279.22it/s][A
 20%|█▉        | 2100/10748 [00:00<00:03, 2272.29it/s][A
 22%|██▏       | 2400/10748 [00:01<00:03, 2289.73it/s][A
 25%|██▌       | 2700/10748 [00:01<00:03, 2279.49it/s][A
 28%|██▊       | 3000/10748 [00:01<00:03, 2294.08it/s][A
 31%|███       | 3300/10748 [00:01<00:03, 2275.81it/s][A
 33%|███▎      | 3600/10748 [00:01<00:03, 2278.07it/s][A
 36%|███▋      | 3900/10748 [00:01<00:03, 2277.71it/s][A
 39%|███▉      | 4200/10748 [00:01<00:02, 2259.90it/s][A
 42%|████▏     | 4500/10748 [00:01<00:02, 2261.53it/s][A
 45%|████▍     | 4800/10748 [00:02<00:02, 2270.41it/s][A
 47%|████▋     | 5100/10748 [

epoch [48/100], Loss: 0.0186



  0%|          | 0/10748 [00:00<?, ?it/s][A
  3%|▎         | 300/10748 [00:00<00:04, 2337.28it/s][A
  6%|▌         | 600/10748 [00:00<00:04, 2267.64it/s][A
  8%|▊         | 900/10748 [00:00<00:04, 2279.48it/s][A
 11%|█         | 1200/10748 [00:00<00:04, 2260.82it/s][A
 14%|█▍        | 1500/10748 [00:00<00:04, 2263.56it/s][A
 17%|█▋        | 1800/10748 [00:00<00:03, 2264.22it/s][A
 20%|█▉        | 2100/10748 [00:00<00:03, 2265.12it/s][A
 22%|██▏       | 2400/10748 [00:01<00:03, 2268.81it/s][A
 25%|██▌       | 2700/10748 [00:01<00:03, 2268.71it/s][A
 28%|██▊       | 3000/10748 [00:01<00:03, 2266.81it/s][A
 31%|███       | 3300/10748 [00:01<00:03, 2277.24it/s][A
 33%|███▎      | 3600/10748 [00:01<00:03, 2276.47it/s][A
 36%|███▋      | 3900/10748 [00:01<00:03, 2271.57it/s][A
 39%|███▉      | 4200/10748 [00:01<00:02, 2274.73it/s][A
 42%|████▏     | 4500/10748 [00:01<00:02, 2285.17it/s][A
 45%|████▍     | 4800/10748 [00:02<00:02, 2284.87it/s][A
 47%|████▋     | 5100/10748 [

epoch [49/100], Loss: 0.0127



  0%|          | 0/10748 [00:00<?, ?it/s][A
  3%|▎         | 300/10748 [00:00<00:04, 2329.23it/s][A
  6%|▌         | 600/10748 [00:00<00:04, 2293.03it/s][A
  8%|▊         | 900/10748 [00:00<00:04, 2298.52it/s][A
 11%|█         | 1200/10748 [00:00<00:04, 2281.34it/s][A
 14%|█▍        | 1500/10748 [00:00<00:04, 2283.80it/s][A
 17%|█▋        | 1800/10748 [00:00<00:03, 2280.06it/s][A
 20%|█▉        | 2100/10748 [00:00<00:03, 2280.02it/s][A
 22%|██▏       | 2400/10748 [00:01<00:03, 2282.10it/s][A
 25%|██▌       | 2700/10748 [00:01<00:03, 2291.61it/s][A
 28%|██▊       | 3000/10748 [00:01<00:03, 2290.75it/s][A
 30%|███       | 3261/10748 [00:01<00:03, 2371.15it/s][A
 33%|███▎      | 3500/10748 [00:01<00:03, 2199.06it/s][A
 35%|███▌      | 3800/10748 [00:01<00:03, 2225.94it/s][A
 38%|███▊      | 4100/10748 [00:01<00:02, 2230.43it/s][A
 41%|████      | 4400/10748 [00:01<00:02, 2236.05it/s][A
 44%|████▎     | 4700/10748 [00:02<00:02, 2242.63it/s][A
 47%|████▋     | 5000/10748 [

epoch [50/100], Loss: 0.0066



  0%|          | 0/10748 [00:00<?, ?it/s][A
  3%|▎         | 300/10748 [00:00<00:04, 2348.32it/s][A
  6%|▌         | 600/10748 [00:00<00:04, 2325.87it/s][A
  8%|▊         | 900/10748 [00:00<00:04, 2318.79it/s][A
 11%|█         | 1200/10748 [00:00<00:04, 2301.60it/s][A
 14%|█▍        | 1500/10748 [00:00<00:04, 2302.62it/s][A
 17%|█▋        | 1800/10748 [00:00<00:03, 2291.76it/s][A
 20%|█▉        | 2100/10748 [00:00<00:03, 2300.22it/s][A
 22%|██▏       | 2400/10748 [00:01<00:03, 2277.63it/s][A
 25%|██▌       | 2700/10748 [00:01<00:03, 2274.42it/s][A
 28%|██▊       | 3000/10748 [00:01<00:03, 2285.18it/s][A
 31%|███       | 3300/10748 [00:01<00:03, 2278.31it/s][A
 33%|███▎      | 3600/10748 [00:01<00:03, 2290.17it/s][A
 36%|███▋      | 3900/10748 [00:01<00:02, 2295.28it/s][A
 39%|███▉      | 4200/10748 [00:01<00:02, 2286.09it/s][A
 42%|████▏     | 4500/10748 [00:01<00:02, 2284.00it/s][A
 45%|████▍     | 4800/10748 [00:02<00:02, 2293.30it/s][A
 47%|████▋     | 5100/10748 [

epoch [51/100], Loss: 0.0041



  0%|          | 0/10748 [00:00<?, ?it/s][A
  3%|▎         | 300/10748 [00:00<00:04, 2340.17it/s][A
  6%|▌         | 600/10748 [00:00<00:04, 2317.65it/s][A
  8%|▊         | 900/10748 [00:00<00:04, 2303.03it/s][A
 11%|█         | 1200/10748 [00:00<00:04, 2291.53it/s][A
 14%|█▍        | 1500/10748 [00:00<00:04, 2288.20it/s][A
 17%|█▋        | 1800/10748 [00:00<00:03, 2293.80it/s][A
 20%|█▉        | 2100/10748 [00:00<00:03, 2281.93it/s][A
 22%|██▏       | 2400/10748 [00:01<00:03, 2278.58it/s][A
 25%|██▌       | 2700/10748 [00:01<00:03, 2284.01it/s][A
 28%|██▊       | 3000/10748 [00:01<00:03, 2282.32it/s][A
 31%|███       | 3300/10748 [00:01<00:03, 2289.44it/s][A
 33%|███▎      | 3600/10748 [00:01<00:03, 2289.48it/s][A
 36%|███▋      | 3900/10748 [00:01<00:03, 2269.50it/s][A
 39%|███▉      | 4200/10748 [00:01<00:02, 2257.66it/s][A
 42%|████▏     | 4500/10748 [00:01<00:02, 2255.67it/s][A
 45%|████▍     | 4800/10748 [00:02<00:02, 2266.74it/s][A
 47%|████▋     | 5100/10748 [

epoch [52/100], Loss: 0.0027



  0%|          | 0/10748 [00:00<?, ?it/s][A
  3%|▎         | 300/10748 [00:00<00:04, 2348.72it/s][A
  6%|▌         | 600/10748 [00:00<00:04, 2311.05it/s][A
  8%|▊         | 900/10748 [00:00<00:04, 2307.59it/s][A
 11%|█         | 1200/10748 [00:00<00:04, 2307.85it/s][A
 14%|█▍        | 1500/10748 [00:00<00:04, 2305.04it/s][A
 17%|█▋        | 1800/10748 [00:00<00:03, 2305.16it/s][A
 20%|█▉        | 2100/10748 [00:00<00:03, 2297.49it/s][A
 22%|██▏       | 2400/10748 [00:01<00:03, 2302.10it/s][A
 25%|██▌       | 2700/10748 [00:01<00:03, 2291.48it/s][A
 28%|██▊       | 3000/10748 [00:01<00:03, 2288.09it/s][A
 31%|███       | 3300/10748 [00:01<00:03, 2289.76it/s][A
 33%|███▎      | 3600/10748 [00:01<00:03, 2291.68it/s][A
 36%|███▋      | 3900/10748 [00:01<00:02, 2286.60it/s][A
 39%|███▉      | 4200/10748 [00:01<00:02, 2290.06it/s][A
 42%|████▏     | 4500/10748 [00:01<00:02, 2282.38it/s][A
 45%|████▍     | 4800/10748 [00:02<00:02, 2279.98it/s][A
 47%|████▋     | 5100/10748 [

epoch [53/100], Loss: 0.0020



  0%|          | 0/10748 [00:00<?, ?it/s][A
  3%|▎         | 300/10748 [00:00<00:04, 2392.06it/s][A
  6%|▌         | 600/10748 [00:00<00:04, 2319.37it/s][A
  8%|▊         | 900/10748 [00:00<00:04, 2299.22it/s][A
 11%|█         | 1200/10748 [00:00<00:04, 2287.19it/s][A
 14%|█▍        | 1500/10748 [00:00<00:04, 2287.03it/s][A
 17%|█▋        | 1800/10748 [00:00<00:03, 2293.96it/s][A
 20%|█▉        | 2100/10748 [00:00<00:03, 2281.15it/s][A
 22%|██▏       | 2400/10748 [00:01<00:03, 2276.07it/s][A
 25%|██▌       | 2700/10748 [00:01<00:03, 2279.44it/s][A
 28%|██▊       | 3000/10748 [00:01<00:03, 2286.19it/s][A
 31%|███       | 3300/10748 [00:01<00:03, 2292.63it/s][A
 33%|███▎      | 3600/10748 [00:01<00:03, 2290.85it/s][A
 36%|███▋      | 3900/10748 [00:01<00:02, 2297.00it/s][A
 39%|███▉      | 4200/10748 [00:01<00:02, 2300.22it/s][A
 42%|████▏     | 4500/10748 [00:01<00:02, 2292.03it/s][A
 45%|████▍     | 4800/10748 [00:02<00:02, 2292.73it/s][A
 47%|████▋     | 5100/10748 [

epoch [54/100], Loss: 0.0016



  0%|          | 0/10748 [00:00<?, ?it/s][A
  3%|▎         | 300/10748 [00:00<00:04, 2364.28it/s][A
  6%|▌         | 600/10748 [00:00<00:04, 2302.73it/s][A
  8%|▊         | 900/10748 [00:00<00:04, 2289.63it/s][A
 11%|█         | 1200/10748 [00:00<00:04, 2280.25it/s][A
 14%|█▍        | 1500/10748 [00:00<00:04, 2282.72it/s][A
 17%|█▋        | 1800/10748 [00:00<00:03, 2281.29it/s][A
 20%|█▉        | 2100/10748 [00:00<00:03, 2281.89it/s][A
 22%|██▏       | 2400/10748 [00:01<00:03, 2283.48it/s][A
 25%|██▌       | 2700/10748 [00:01<00:03, 2292.87it/s][A
 28%|██▊       | 3000/10748 [00:01<00:03, 2296.23it/s][A
 31%|███       | 3300/10748 [00:01<00:03, 2284.76it/s][A
 33%|███▎      | 3600/10748 [00:01<00:03, 2277.01it/s][A
 36%|███▋      | 3900/10748 [00:01<00:03, 2257.72it/s][A
 39%|███▉      | 4200/10748 [00:01<00:02, 2271.98it/s][A
 42%|████▏     | 4500/10748 [00:01<00:02, 2285.70it/s][A
 45%|████▍     | 4800/10748 [00:02<00:02, 2287.96it/s][A
 47%|████▋     | 5100/10748 [

epoch [55/100], Loss: 0.0013



  0%|          | 0/10748 [00:00<?, ?it/s][A
  3%|▎         | 300/10748 [00:00<00:04, 2356.35it/s][A
  6%|▌         | 600/10748 [00:00<00:04, 2297.56it/s][A
  8%|▊         | 900/10748 [00:00<00:04, 2294.21it/s][A
 11%|█         | 1200/10748 [00:00<00:04, 2283.78it/s][A
 14%|█▍        | 1500/10748 [00:00<00:04, 2286.38it/s][A
 17%|█▋        | 1800/10748 [00:00<00:03, 2286.83it/s][A
 20%|█▉        | 2100/10748 [00:00<00:03, 2286.34it/s][A
 22%|██▏       | 2400/10748 [00:01<00:03, 2279.99it/s][A
 25%|██▌       | 2700/10748 [00:01<00:03, 2267.25it/s][A
 28%|██▊       | 3000/10748 [00:01<00:03, 2265.52it/s][A
 31%|███       | 3300/10748 [00:01<00:03, 2279.16it/s][A
 33%|███▎      | 3600/10748 [00:01<00:03, 2289.43it/s][A
 36%|███▋      | 3900/10748 [00:01<00:02, 2288.05it/s][A
 39%|███▉      | 4200/10748 [00:01<00:02, 2280.71it/s][A
 42%|████▏     | 4500/10748 [00:01<00:02, 2280.07it/s][A
 45%|████▍     | 4800/10748 [00:02<00:02, 2266.10it/s][A
 47%|████▋     | 5100/10748 [

epoch [56/100], Loss: 0.0012



  0%|          | 0/10748 [00:00<?, ?it/s][A
  3%|▎         | 300/10748 [00:00<00:04, 2289.81it/s][A
  6%|▌         | 600/10748 [00:00<00:04, 2272.92it/s][A
  8%|▊         | 900/10748 [00:00<00:04, 2271.49it/s][A
 11%|█         | 1200/10748 [00:00<00:04, 2286.76it/s][A
 14%|█▍        | 1500/10748 [00:00<00:04, 2280.37it/s][A
 17%|█▋        | 1800/10748 [00:00<00:03, 2299.18it/s][A
 20%|█▉        | 2100/10748 [00:00<00:03, 2292.13it/s][A
 22%|██▏       | 2400/10748 [00:01<00:03, 2284.20it/s][A
 25%|██▌       | 2700/10748 [00:01<00:03, 2297.31it/s][A
 28%|██▊       | 3000/10748 [00:01<00:03, 2299.64it/s][A
 31%|███       | 3300/10748 [00:01<00:03, 2305.08it/s][A
 33%|███▎      | 3600/10748 [00:01<00:03, 2295.70it/s][A
 36%|███▋      | 3900/10748 [00:01<00:02, 2283.96it/s][A
 39%|███▉      | 4200/10748 [00:01<00:02, 2274.36it/s][A
 42%|████▏     | 4500/10748 [00:01<00:02, 2278.98it/s][A
 45%|████▍     | 4800/10748 [00:02<00:02, 2258.98it/s][A
 47%|████▋     | 5100/10748 [

epoch [57/100], Loss: 0.0011



  0%|          | 0/10748 [00:00<?, ?it/s][A
  3%|▎         | 300/10748 [00:00<00:04, 2379.72it/s][A
  6%|▌         | 600/10748 [00:00<00:04, 2343.07it/s][A
  8%|▊         | 900/10748 [00:00<00:04, 2294.58it/s][A
 11%|█         | 1200/10748 [00:00<00:04, 2280.98it/s][A
 14%|█▍        | 1500/10748 [00:00<00:04, 2259.17it/s][A
 17%|█▋        | 1800/10748 [00:00<00:03, 2240.83it/s][A
 20%|█▉        | 2100/10748 [00:00<00:03, 2253.87it/s][A
 22%|██▏       | 2400/10748 [00:01<00:03, 2264.29it/s][A
 25%|██▌       | 2700/10748 [00:01<00:03, 2274.02it/s][A
 28%|██▊       | 3000/10748 [00:01<00:03, 2272.02it/s][A
 31%|███       | 3300/10748 [00:01<00:03, 2288.12it/s][A
 33%|███▎      | 3600/10748 [00:01<00:03, 2276.74it/s][A
 36%|███▋      | 3900/10748 [00:01<00:02, 2285.41it/s][A
 39%|███▉      | 4200/10748 [00:01<00:02, 2278.00it/s][A
 42%|████▏     | 4500/10748 [00:01<00:02, 2277.97it/s][A
 45%|████▍     | 4800/10748 [00:02<00:02, 2272.43it/s][A
 47%|████▋     | 5100/10748 [

epoch [58/100], Loss: 0.0011



  0%|          | 0/10748 [00:00<?, ?it/s][A
  3%|▎         | 300/10748 [00:00<00:04, 2316.69it/s][A
  6%|▌         | 600/10748 [00:00<00:04, 2287.16it/s][A
  8%|▊         | 900/10748 [00:00<00:04, 2289.43it/s][A
 11%|█         | 1200/10748 [00:00<00:04, 2289.00it/s][A
 14%|█▍        | 1500/10748 [00:00<00:04, 2289.82it/s][A
 17%|█▋        | 1800/10748 [00:00<00:03, 2296.62it/s][A
 20%|█▉        | 2100/10748 [00:00<00:03, 2292.62it/s][A
 22%|██▏       | 2400/10748 [00:01<00:03, 2294.47it/s][A
 25%|██▌       | 2700/10748 [00:01<00:03, 2297.82it/s][A
 28%|██▊       | 3000/10748 [00:01<00:03, 2293.08it/s][A
 31%|███       | 3300/10748 [00:01<00:03, 2283.16it/s][A
 33%|███▎      | 3600/10748 [00:01<00:03, 2277.82it/s][A
 36%|███▋      | 3900/10748 [00:01<00:03, 2280.30it/s][A
 39%|███▉      | 4200/10748 [00:01<00:02, 2282.13it/s][A
 42%|████▏     | 4500/10748 [00:01<00:02, 2273.65it/s][A
 45%|████▍     | 4800/10748 [00:02<00:02, 2286.06it/s][A
 47%|████▋     | 5100/10748 [

epoch [59/100], Loss: 0.0011



  0%|          | 0/10748 [00:00<?, ?it/s][A
  3%|▎         | 300/10748 [00:00<00:04, 2362.10it/s][A
  6%|▌         | 600/10748 [00:00<00:04, 2331.62it/s][A
  8%|▊         | 900/10748 [00:00<00:04, 2304.11it/s][A
 11%|█         | 1200/10748 [00:00<00:04, 2309.91it/s][A
 14%|█▍        | 1500/10748 [00:00<00:04, 2299.14it/s][A
 17%|█▋        | 1800/10748 [00:00<00:03, 2273.56it/s][A
 20%|█▉        | 2100/10748 [00:00<00:03, 2279.74it/s][A
 22%|██▏       | 2400/10748 [00:01<00:03, 2277.69it/s][A
 25%|██▌       | 2700/10748 [00:01<00:03, 2277.44it/s][A
 28%|██▊       | 3000/10748 [00:01<00:03, 2282.16it/s][A
 31%|███       | 3300/10748 [00:01<00:03, 2289.85it/s][A
 33%|███▎      | 3600/10748 [00:01<00:03, 2288.50it/s][A
 36%|███▋      | 3900/10748 [00:01<00:03, 2277.64it/s][A
 39%|███▉      | 4200/10748 [00:01<00:02, 2283.94it/s][A
 42%|████▏     | 4500/10748 [00:01<00:02, 2280.52it/s][A
 45%|████▍     | 4800/10748 [00:02<00:02, 2290.62it/s][A
 47%|████▋     | 5100/10748 [

epoch [60/100], Loss: 0.0010



  0%|          | 0/10748 [00:00<?, ?it/s][A
  3%|▎         | 300/10748 [00:00<00:04, 2348.24it/s][A
  6%|▌         | 600/10748 [00:00<00:04, 2344.33it/s][A
  8%|▊         | 900/10748 [00:00<00:04, 2335.51it/s][A
 11%|█         | 1200/10748 [00:00<00:04, 2329.78it/s][A
 14%|█▍        | 1500/10748 [00:00<00:04, 2309.21it/s][A
 17%|█▋        | 1800/10748 [00:00<00:03, 2282.37it/s][A
 20%|█▉        | 2100/10748 [00:00<00:03, 2279.42it/s][A
 22%|██▏       | 2400/10748 [00:01<00:03, 2286.96it/s][A
 25%|██▌       | 2700/10748 [00:01<00:03, 2277.13it/s][A
 28%|██▊       | 3000/10748 [00:01<00:03, 2271.41it/s][A
 31%|███       | 3300/10748 [00:01<00:03, 2278.74it/s][A
 33%|███▎      | 3600/10748 [00:01<00:03, 2272.21it/s][A
 36%|███▋      | 3900/10748 [00:01<00:03, 2267.78it/s][A
 39%|███▉      | 4200/10748 [00:01<00:02, 2270.62it/s][A
 42%|████▏     | 4500/10748 [00:01<00:02, 2253.69it/s][A
 45%|████▍     | 4800/10748 [00:02<00:02, 2259.23it/s][A
 47%|████▋     | 5100/10748 [

epoch [61/100], Loss: 0.0010



  0%|          | 0/10748 [00:00<?, ?it/s][A
  3%|▎         | 300/10748 [00:00<00:04, 2387.87it/s][A
  6%|▌         | 600/10748 [00:00<00:04, 2322.34it/s][A
  8%|▊         | 900/10748 [00:00<00:04, 2308.12it/s][A
 11%|█         | 1200/10748 [00:00<00:04, 2292.48it/s][A
 14%|█▍        | 1500/10748 [00:00<00:04, 2289.75it/s][A
 17%|█▋        | 1800/10748 [00:00<00:03, 2276.65it/s][A
 20%|█▉        | 2100/10748 [00:00<00:03, 2239.74it/s][A
 22%|██▏       | 2400/10748 [00:01<00:03, 2243.59it/s][A
 25%|██▌       | 2700/10748 [00:01<00:03, 2242.19it/s][A
 28%|██▊       | 3000/10748 [00:01<00:03, 2230.42it/s][A
 31%|███       | 3300/10748 [00:01<00:03, 2249.42it/s][A
 33%|███▎      | 3600/10748 [00:01<00:03, 2266.66it/s][A
 36%|███▋      | 3900/10748 [00:01<00:02, 2285.56it/s][A
 39%|███▉      | 4200/10748 [00:01<00:02, 2289.60it/s][A
 42%|████▏     | 4500/10748 [00:01<00:02, 2288.16it/s][A
 45%|████▍     | 4800/10748 [00:02<00:02, 2289.21it/s][A
 47%|████▋     | 5100/10748 [

epoch [62/100], Loss: 0.0010



  0%|          | 0/10748 [00:00<?, ?it/s][A
  3%|▎         | 300/10748 [00:00<00:04, 2407.52it/s][A
  6%|▌         | 600/10748 [00:00<00:04, 2340.28it/s][A
  8%|▊         | 900/10748 [00:00<00:04, 2328.52it/s][A
 11%|█         | 1200/10748 [00:00<00:04, 2315.93it/s][A
 14%|█▍        | 1500/10748 [00:00<00:04, 2259.47it/s][A
 17%|█▋        | 1800/10748 [00:00<00:03, 2258.81it/s][A
 20%|█▉        | 2100/10748 [00:00<00:03, 2259.61it/s][A
 22%|██▏       | 2400/10748 [00:01<00:03, 2260.08it/s][A
 25%|██▌       | 2700/10748 [00:01<00:03, 2267.84it/s][A
 28%|██▊       | 3000/10748 [00:01<00:03, 2275.34it/s][A
 31%|███       | 3300/10748 [00:01<00:03, 2283.38it/s][A
 33%|███▎      | 3600/10748 [00:01<00:03, 2292.04it/s][A
 36%|███▋      | 3900/10748 [00:01<00:03, 2282.15it/s][A
 39%|███▉      | 4200/10748 [00:01<00:02, 2293.83it/s][A
 42%|████▏     | 4500/10748 [00:01<00:02, 2283.61it/s][A
 45%|████▍     | 4800/10748 [00:02<00:02, 2277.88it/s][A
 47%|████▋     | 5100/10748 [

epoch [63/100], Loss: 0.0009



  0%|          | 0/10748 [00:00<?, ?it/s][A
  3%|▎         | 300/10748 [00:00<00:04, 2278.61it/s][A
  6%|▌         | 600/10748 [00:00<00:04, 2277.41it/s][A
  8%|▊         | 900/10748 [00:00<00:04, 2252.86it/s][A
 11%|█         | 1200/10748 [00:00<00:04, 2269.78it/s][A
 14%|█▍        | 1500/10748 [00:00<00:04, 2268.80it/s][A
 17%|█▋        | 1800/10748 [00:00<00:03, 2267.56it/s][A
 20%|█▉        | 2100/10748 [00:00<00:03, 2273.27it/s][A
 22%|██▏       | 2400/10748 [00:01<00:03, 2279.73it/s][A
 25%|██▌       | 2700/10748 [00:01<00:03, 2271.18it/s][A
 28%|██▊       | 3000/10748 [00:01<00:03, 2280.62it/s][A
 31%|███       | 3300/10748 [00:01<00:03, 2283.62it/s][A
 33%|███▎      | 3600/10748 [00:01<00:03, 2283.72it/s][A
 36%|███▋      | 3900/10748 [00:01<00:02, 2290.16it/s][A
 39%|███▉      | 4200/10748 [00:01<00:02, 2280.71it/s][A
 42%|████▏     | 4500/10748 [00:01<00:02, 2281.76it/s][A
 45%|████▍     | 4800/10748 [00:02<00:02, 2293.71it/s][A
 47%|████▋     | 5100/10748 [

epoch [64/100], Loss: 0.0008



  0%|          | 0/10748 [00:00<?, ?it/s][A
  3%|▎         | 300/10748 [00:00<00:04, 2364.29it/s][A
  6%|▌         | 600/10748 [00:00<00:04, 2329.49it/s][A
  8%|▊         | 900/10748 [00:00<00:04, 2306.21it/s][A
 11%|█         | 1200/10748 [00:00<00:04, 2307.99it/s][A
 14%|█▍        | 1500/10748 [00:00<00:04, 2306.98it/s][A
 17%|█▋        | 1800/10748 [00:00<00:03, 2298.76it/s][A
 20%|█▉        | 2100/10748 [00:00<00:03, 2286.23it/s][A
 22%|██▏       | 2400/10748 [00:01<00:03, 2264.95it/s][A
 25%|██▌       | 2700/10748 [00:01<00:03, 2281.72it/s][A
 28%|██▊       | 3000/10748 [00:01<00:03, 2267.21it/s][A
 31%|███       | 3300/10748 [00:01<00:03, 2263.21it/s][A
 33%|███▎      | 3600/10748 [00:01<00:03, 2276.24it/s][A
 36%|███▋      | 3900/10748 [00:01<00:02, 2289.15it/s][A
 39%|███▉      | 4200/10748 [00:01<00:02, 2283.85it/s][A
 42%|████▏     | 4500/10748 [00:01<00:02, 2275.53it/s][A
 45%|████▍     | 4800/10748 [00:02<00:02, 2259.56it/s][A
 47%|████▋     | 5100/10748 [

epoch [65/100], Loss: 0.0010



  0%|          | 0/10748 [00:00<?, ?it/s][A
  3%|▎         | 300/10748 [00:00<00:04, 2302.48it/s][A
  6%|▌         | 600/10748 [00:00<00:04, 2243.80it/s][A
  8%|▊         | 900/10748 [00:00<00:04, 2185.06it/s][A
 11%|█         | 1200/10748 [00:00<00:04, 2213.39it/s][A
 14%|█▍        | 1500/10748 [00:00<00:04, 2229.35it/s][A
 17%|█▋        | 1800/10748 [00:00<00:03, 2255.29it/s][A
 20%|█▉        | 2100/10748 [00:00<00:03, 2241.84it/s][A
 22%|██▏       | 2400/10748 [00:01<00:03, 2247.83it/s][A
 25%|██▌       | 2700/10748 [00:01<00:03, 2265.42it/s][A
 28%|██▊       | 3000/10748 [00:01<00:03, 2262.85it/s][A
 31%|███       | 3300/10748 [00:01<00:03, 2271.09it/s][A
 33%|███▎      | 3600/10748 [00:01<00:03, 2265.63it/s][A
 36%|███▋      | 3900/10748 [00:01<00:03, 2240.17it/s][A
 39%|███▉      | 4200/10748 [00:01<00:02, 2243.35it/s][A
 42%|████▏     | 4500/10748 [00:02<00:02, 2262.09it/s][A
 45%|████▍     | 4800/10748 [00:02<00:02, 2265.56it/s][A
 47%|████▋     | 5100/10748 [

epoch [66/100], Loss: 0.0010



  0%|          | 0/10748 [00:00<?, ?it/s][A
  3%|▎         | 300/10748 [00:00<00:04, 2343.30it/s][A
  6%|▌         | 600/10748 [00:00<00:04, 2293.03it/s][A
  8%|▊         | 900/10748 [00:00<00:04, 2289.59it/s][A
 11%|█         | 1200/10748 [00:00<00:04, 2283.84it/s][A
 14%|█▍        | 1500/10748 [00:00<00:04, 2282.52it/s][A
 17%|█▋        | 1800/10748 [00:00<00:03, 2275.38it/s][A
 20%|█▉        | 2100/10748 [00:00<00:03, 2278.06it/s][A
 22%|██▏       | 2400/10748 [00:01<00:03, 2266.68it/s][A
 25%|██▌       | 2700/10748 [00:01<00:03, 2280.23it/s][A
 28%|██▊       | 3000/10748 [00:01<00:03, 2274.62it/s][A
 31%|███       | 3300/10748 [00:01<00:03, 2276.25it/s][A
 33%|███▎      | 3600/10748 [00:01<00:03, 2275.29it/s][A
 36%|███▋      | 3900/10748 [00:01<00:03, 2242.37it/s][A
 39%|███▉      | 4200/10748 [00:01<00:02, 2256.85it/s][A
 42%|████▏     | 4500/10748 [00:01<00:02, 2264.10it/s][A
 45%|████▍     | 4800/10748 [00:02<00:02, 2257.05it/s][A
 47%|████▋     | 5100/10748 [

epoch [67/100], Loss: 0.0008



  0%|          | 0/10748 [00:00<?, ?it/s][A
  3%|▎         | 300/10748 [00:00<00:04, 2334.04it/s][A
  6%|▌         | 600/10748 [00:00<00:04, 2298.04it/s][A
  8%|▊         | 900/10748 [00:00<00:04, 2296.24it/s][A
 11%|█         | 1200/10748 [00:00<00:04, 2281.06it/s][A
 14%|█▍        | 1500/10748 [00:00<00:04, 2268.33it/s][A
 17%|█▋        | 1800/10748 [00:00<00:03, 2264.06it/s][A
 20%|█▉        | 2100/10748 [00:00<00:03, 2259.19it/s][A
 22%|██▏       | 2400/10748 [00:01<00:03, 2237.85it/s][A
 25%|██▌       | 2700/10748 [00:01<00:03, 2259.53it/s][A
 28%|██▊       | 3000/10748 [00:01<00:03, 2277.83it/s][A
 31%|███       | 3300/10748 [00:01<00:03, 2293.52it/s][A
 33%|███▎      | 3600/10748 [00:01<00:03, 2287.77it/s][A
 36%|███▋      | 3900/10748 [00:01<00:03, 2279.34it/s][A
 39%|███▉      | 4200/10748 [00:01<00:02, 2272.11it/s][A
 42%|████▏     | 4500/10748 [00:01<00:02, 2263.24it/s][A
 45%|████▍     | 4800/10748 [00:02<00:02, 2265.56it/s][A
 47%|████▋     | 5100/10748 [

epoch [68/100], Loss: 0.0008



  0%|          | 0/10748 [00:00<?, ?it/s][A
  3%|▎         | 300/10748 [00:00<00:04, 2353.11it/s][A
  6%|▌         | 600/10748 [00:00<00:04, 2309.19it/s][A
  8%|▊         | 900/10748 [00:00<00:04, 2283.48it/s][A
 11%|█         | 1200/10748 [00:00<00:04, 2280.26it/s][A
 14%|█▍        | 1500/10748 [00:00<00:04, 2280.56it/s][A
 17%|█▋        | 1800/10748 [00:00<00:03, 2283.33it/s][A
 20%|█▉        | 2100/10748 [00:00<00:03, 2259.63it/s][A
 22%|██▏       | 2400/10748 [00:01<00:03, 2258.15it/s][A
 25%|██▌       | 2700/10748 [00:01<00:03, 2265.14it/s][A
 28%|██▊       | 3000/10748 [00:01<00:03, 2260.15it/s][A
 31%|███       | 3300/10748 [00:01<00:03, 2272.68it/s][A
 33%|███▎      | 3600/10748 [00:01<00:03, 2271.05it/s][A
 36%|███▋      | 3900/10748 [00:01<00:03, 2268.21it/s][A
 39%|███▉      | 4200/10748 [00:01<00:02, 2264.28it/s][A
 42%|████▏     | 4500/10748 [00:01<00:02, 2264.12it/s][A
 45%|████▍     | 4800/10748 [00:02<00:02, 2266.26it/s][A
 47%|████▋     | 5100/10748 [

epoch [69/100], Loss: 0.0009



  0%|          | 0/10748 [00:00<?, ?it/s][A
  3%|▎         | 300/10748 [00:00<00:04, 2344.93it/s][A
  6%|▌         | 600/10748 [00:00<00:04, 2296.83it/s][A
  8%|▊         | 900/10748 [00:00<00:04, 2307.14it/s][A
 11%|█         | 1200/10748 [00:00<00:04, 2307.60it/s][A
 14%|█▍        | 1500/10748 [00:00<00:04, 2300.68it/s][A
 17%|█▋        | 1800/10748 [00:00<00:03, 2291.48it/s][A
 20%|█▉        | 2100/10748 [00:00<00:03, 2292.94it/s][A
 22%|██▏       | 2400/10748 [00:01<00:03, 2276.63it/s][A
 25%|██▌       | 2700/10748 [00:01<00:03, 2270.11it/s][A
 28%|██▊       | 3000/10748 [00:01<00:03, 2269.37it/s][A
 31%|███       | 3300/10748 [00:01<00:03, 2274.77it/s][A
 33%|███▎      | 3600/10748 [00:01<00:03, 2270.84it/s][A
 36%|███▋      | 3900/10748 [00:01<00:03, 2280.32it/s][A
 39%|███▉      | 4200/10748 [00:01<00:02, 2285.05it/s][A
 42%|████▏     | 4500/10748 [00:01<00:02, 2278.72it/s][A
 45%|████▍     | 4800/10748 [00:02<00:02, 2277.10it/s][A
 47%|████▋     | 5100/10748 [

epoch [70/100], Loss: 0.0008



  0%|          | 0/10748 [00:00<?, ?it/s][A
  3%|▎         | 300/10748 [00:00<00:04, 2362.44it/s][A
  6%|▌         | 600/10748 [00:00<00:04, 2294.53it/s][A
  8%|▊         | 900/10748 [00:00<00:04, 2266.33it/s][A
 11%|█         | 1200/10748 [00:00<00:04, 2271.07it/s][A
 14%|█▍        | 1500/10748 [00:00<00:04, 2283.46it/s][A
 17%|█▋        | 1800/10748 [00:00<00:03, 2288.05it/s][A
 20%|█▉        | 2100/10748 [00:00<00:03, 2281.99it/s][A
 22%|██▏       | 2400/10748 [00:01<00:03, 2287.12it/s][A
 25%|██▌       | 2700/10748 [00:01<00:03, 2284.87it/s][A
 28%|██▊       | 3000/10748 [00:01<00:03, 2294.31it/s][A
 31%|███       | 3300/10748 [00:01<00:03, 2295.40it/s][A
 33%|███▎      | 3600/10748 [00:01<00:03, 2303.59it/s][A
 36%|███▋      | 3900/10748 [00:01<00:02, 2300.57it/s][A
 39%|███▉      | 4200/10748 [00:01<00:02, 2282.16it/s][A
 42%|████▏     | 4500/10748 [00:01<00:02, 2282.72it/s][A
 45%|████▍     | 4800/10748 [00:02<00:02, 2285.26it/s][A
 47%|████▋     | 5100/10748 [

epoch [71/100], Loss: 0.0007



  0%|          | 0/10748 [00:00<?, ?it/s][A
  3%|▎         | 300/10748 [00:00<00:04, 2308.83it/s][A
  6%|▌         | 600/10748 [00:00<00:04, 2299.70it/s][A
  8%|▊         | 900/10748 [00:00<00:04, 2289.99it/s][A
 11%|█         | 1200/10748 [00:00<00:04, 2277.41it/s][A
 14%|█▍        | 1500/10748 [00:00<00:04, 2265.94it/s][A
 17%|█▋        | 1800/10748 [00:00<00:03, 2256.40it/s][A
 20%|█▉        | 2100/10748 [00:00<00:03, 2268.94it/s][A
 22%|██▏       | 2400/10748 [00:01<00:03, 2271.18it/s][A
 25%|██▌       | 2700/10748 [00:01<00:03, 2275.77it/s][A
 28%|██▊       | 3000/10748 [00:01<00:03, 2286.62it/s][A
 31%|███       | 3300/10748 [00:01<00:03, 2270.14it/s][A
 33%|███▎      | 3600/10748 [00:01<00:03, 2283.86it/s][A
 36%|███▋      | 3900/10748 [00:01<00:03, 2278.22it/s][A
 39%|███▉      | 4200/10748 [00:01<00:02, 2278.73it/s][A
 42%|████▏     | 4500/10748 [00:01<00:02, 2290.72it/s][A
 45%|████▍     | 4800/10748 [00:02<00:02, 2282.50it/s][A
 47%|████▋     | 5100/10748 [

epoch [72/100], Loss: 0.0006



  0%|          | 0/10748 [00:00<?, ?it/s][A
  3%|▎         | 300/10748 [00:00<00:04, 2340.48it/s][A
  6%|▌         | 600/10748 [00:00<00:04, 2273.09it/s][A
  8%|▊         | 900/10748 [00:00<00:04, 2265.20it/s][A
 11%|█         | 1200/10748 [00:00<00:04, 2262.60it/s][A
 14%|█▍        | 1500/10748 [00:00<00:04, 2266.23it/s][A
 17%|█▋        | 1800/10748 [00:00<00:03, 2267.05it/s][A
 20%|█▉        | 2100/10748 [00:00<00:03, 2274.94it/s][A
 22%|██▏       | 2400/10748 [00:01<00:03, 2291.67it/s][A
 25%|██▌       | 2700/10748 [00:01<00:03, 2282.21it/s][A
 28%|██▊       | 3000/10748 [00:01<00:03, 2291.25it/s][A
 31%|███       | 3300/10748 [00:01<00:03, 2300.48it/s][A
 33%|███▎      | 3600/10748 [00:01<00:03, 2294.96it/s][A
 36%|███▋      | 3900/10748 [00:01<00:02, 2286.57it/s][A
 39%|███▉      | 4200/10748 [00:01<00:02, 2283.39it/s][A
 42%|████▏     | 4500/10748 [00:01<00:02, 2287.05it/s][A
 45%|████▍     | 4800/10748 [00:02<00:02, 2294.96it/s][A
 47%|████▋     | 5100/10748 [

epoch [73/100], Loss: 0.0006



  0%|          | 0/10748 [00:00<?, ?it/s][A
  3%|▎         | 300/10748 [00:00<00:04, 2321.45it/s][A
  6%|▌         | 600/10748 [00:00<00:04, 2313.06it/s][A
  8%|▊         | 900/10748 [00:00<00:04, 2254.37it/s][A
 11%|█         | 1200/10748 [00:00<00:04, 2268.12it/s][A
 14%|█▍        | 1500/10748 [00:00<00:04, 2251.76it/s][A
 17%|█▋        | 1800/10748 [00:00<00:03, 2252.08it/s][A
 20%|█▉        | 2100/10748 [00:00<00:03, 2249.97it/s][A
 22%|██▏       | 2400/10748 [00:01<00:03, 2237.09it/s][A
 25%|██▌       | 2700/10748 [00:01<00:03, 2249.81it/s][A
 28%|██▊       | 3000/10748 [00:01<00:03, 2255.64it/s][A
 31%|███       | 3300/10748 [00:01<00:03, 2254.73it/s][A
 33%|███▎      | 3600/10748 [00:01<00:03, 2245.64it/s][A
 36%|███▋      | 3900/10748 [00:01<00:03, 2254.36it/s][A
 39%|███▉      | 4200/10748 [00:01<00:02, 2263.26it/s][A
 42%|████▏     | 4500/10748 [00:01<00:02, 2277.15it/s][A
 45%|████▍     | 4800/10748 [00:02<00:02, 2285.69it/s][A
 47%|████▋     | 5100/10748 [

epoch [74/100], Loss: 0.0006



  0%|          | 0/10748 [00:00<?, ?it/s][A
  3%|▎         | 300/10748 [00:00<00:04, 2357.66it/s][A
  6%|▌         | 600/10748 [00:00<00:04, 2323.80it/s][A
  8%|▊         | 900/10748 [00:00<00:04, 2293.87it/s][A
 11%|█         | 1200/10748 [00:00<00:04, 2266.82it/s][A
 14%|█▍        | 1500/10748 [00:00<00:04, 2262.69it/s][A
 17%|█▋        | 1800/10748 [00:00<00:03, 2269.50it/s][A
 20%|█▉        | 2100/10748 [00:00<00:03, 2261.90it/s][A
 22%|██▏       | 2400/10748 [00:01<00:03, 2260.62it/s][A
 25%|██▌       | 2700/10748 [00:01<00:03, 2268.44it/s][A
 28%|██▊       | 3000/10748 [00:01<00:03, 2257.77it/s][A
 31%|███       | 3300/10748 [00:01<00:03, 2262.24it/s][A
 33%|███▎      | 3600/10748 [00:01<00:03, 2283.88it/s][A
 36%|███▋      | 3900/10748 [00:01<00:02, 2284.05it/s][A
 39%|███▉      | 4200/10748 [00:01<00:02, 2276.92it/s][A
 42%|████▏     | 4500/10748 [00:01<00:02, 2270.91it/s][A
 45%|████▍     | 4800/10748 [00:02<00:02, 2280.59it/s][A
 47%|████▋     | 5100/10748 [

epoch [75/100], Loss: 0.0006



  0%|          | 0/10748 [00:00<?, ?it/s][A
  3%|▎         | 300/10748 [00:00<00:04, 2319.34it/s][A
  6%|▌         | 600/10748 [00:00<00:04, 2278.21it/s][A
  8%|▊         | 900/10748 [00:00<00:04, 2269.66it/s][A
 11%|█         | 1200/10748 [00:00<00:04, 2271.23it/s][A
 14%|█▍        | 1500/10748 [00:00<00:04, 2276.01it/s][A
 17%|█▋        | 1800/10748 [00:00<00:03, 2271.12it/s][A
 20%|█▉        | 2100/10748 [00:00<00:03, 2263.03it/s][A
 22%|██▏       | 2400/10748 [00:01<00:03, 2265.56it/s][A
 25%|██▌       | 2700/10748 [00:01<00:03, 2245.16it/s][A
 28%|██▊       | 3000/10748 [00:01<00:03, 2257.03it/s][A
 31%|███       | 3300/10748 [00:01<00:03, 2270.92it/s][A
 33%|███▎      | 3600/10748 [00:01<00:03, 2267.63it/s][A
 36%|███▋      | 3900/10748 [00:01<00:03, 2260.74it/s][A
 39%|███▉      | 4200/10748 [00:01<00:02, 2262.36it/s][A
 42%|████▏     | 4500/10748 [00:01<00:02, 2258.66it/s][A
 45%|████▍     | 4800/10748 [00:02<00:02, 2269.21it/s][A
 47%|████▋     | 5100/10748 [

epoch [76/100], Loss: 0.0006



  0%|          | 0/10748 [00:00<?, ?it/s][A
  3%|▎         | 300/10748 [00:00<00:04, 2373.68it/s][A
  6%|▌         | 600/10748 [00:00<00:04, 2310.40it/s][A
  8%|▊         | 900/10748 [00:00<00:04, 2304.43it/s][A
 11%|█         | 1200/10748 [00:00<00:04, 2289.66it/s][A
 14%|█▍        | 1500/10748 [00:00<00:04, 2268.14it/s][A
 17%|█▋        | 1800/10748 [00:00<00:03, 2277.53it/s][A
 20%|█▉        | 2100/10748 [00:00<00:03, 2266.89it/s][A
 22%|██▏       | 2400/10748 [00:01<00:03, 2234.00it/s][A
 25%|██▌       | 2700/10748 [00:01<00:03, 2231.06it/s][A
 28%|██▊       | 3000/10748 [00:01<00:03, 2237.79it/s][A
 31%|███       | 3300/10748 [00:01<00:03, 2251.58it/s][A
 33%|███▎      | 3600/10748 [00:01<00:03, 2262.94it/s][A
 36%|███▋      | 3900/10748 [00:01<00:03, 2261.49it/s][A
 39%|███▉      | 4200/10748 [00:01<00:02, 2274.16it/s][A
 42%|████▏     | 4500/10748 [00:01<00:02, 2272.89it/s][A
 45%|████▍     | 4800/10748 [00:02<00:02, 2268.20it/s][A
 47%|████▋     | 5100/10748 [

epoch [77/100], Loss: 0.0006



  0%|          | 0/10748 [00:00<?, ?it/s][A
  3%|▎         | 300/10748 [00:00<00:04, 2326.14it/s][A
  6%|▌         | 600/10748 [00:00<00:04, 2293.28it/s][A
  8%|▊         | 900/10748 [00:00<00:04, 2287.45it/s][A
 11%|█         | 1200/10748 [00:00<00:04, 2280.81it/s][A
 14%|█▍        | 1500/10748 [00:00<00:04, 2293.00it/s][A
 17%|█▋        | 1800/10748 [00:00<00:03, 2303.08it/s][A
 20%|█▉        | 2100/10748 [00:00<00:03, 2308.98it/s][A
 22%|██▏       | 2400/10748 [00:01<00:03, 2302.56it/s][A
 25%|██▌       | 2700/10748 [00:01<00:03, 2302.06it/s][A
 28%|██▊       | 3000/10748 [00:01<00:03, 2296.27it/s][A
 31%|███       | 3300/10748 [00:01<00:03, 2295.71it/s][A
 33%|███▎      | 3600/10748 [00:01<00:03, 2293.30it/s][A
 36%|███▋      | 3900/10748 [00:01<00:02, 2292.54it/s][A
 39%|███▉      | 4200/10748 [00:01<00:02, 2275.96it/s][A
 42%|████▏     | 4500/10748 [00:01<00:02, 2272.87it/s][A
 45%|████▍     | 4800/10748 [00:02<00:02, 2271.42it/s][A
 47%|████▋     | 5100/10748 [

epoch [78/100], Loss: 0.0006



  0%|          | 0/10748 [00:00<?, ?it/s][A
  3%|▎         | 300/10748 [00:00<00:04, 2371.83it/s][A
  6%|▌         | 600/10748 [00:00<00:04, 2315.48it/s][A
  8%|▊         | 900/10748 [00:00<00:04, 2312.63it/s][A
 11%|█         | 1200/10748 [00:00<00:04, 2318.70it/s][A
 14%|█▍        | 1500/10748 [00:00<00:04, 2299.90it/s][A
 17%|█▋        | 1800/10748 [00:00<00:03, 2283.55it/s][A
 20%|█▉        | 2100/10748 [00:00<00:03, 2274.70it/s][A
 22%|██▏       | 2400/10748 [00:01<00:03, 2281.24it/s][A
 25%|██▌       | 2700/10748 [00:01<00:03, 2271.12it/s][A
 28%|██▊       | 3000/10748 [00:01<00:03, 2271.63it/s][A
 31%|███       | 3300/10748 [00:01<00:03, 2280.58it/s][A
 33%|███▎      | 3600/10748 [00:01<00:03, 2278.28it/s][A
 36%|███▋      | 3900/10748 [00:01<00:03, 2265.98it/s][A
 39%|███▉      | 4200/10748 [00:01<00:02, 2267.05it/s][A
 42%|████▏     | 4500/10748 [00:01<00:02, 2272.50it/s][A
 45%|████▍     | 4800/10748 [00:02<00:02, 2284.16it/s][A
 47%|████▋     | 5100/10748 [

epoch [79/100], Loss: 0.0005



  0%|          | 0/10748 [00:00<?, ?it/s][A
  3%|▎         | 300/10748 [00:00<00:04, 2338.28it/s][A
  6%|▌         | 600/10748 [00:00<00:04, 2295.75it/s][A
  8%|▊         | 900/10748 [00:00<00:04, 2273.32it/s][A
 11%|█         | 1200/10748 [00:00<00:04, 2277.61it/s][A
 14%|█▍        | 1500/10748 [00:00<00:04, 2283.41it/s][A
 17%|█▋        | 1800/10748 [00:00<00:03, 2265.14it/s][A
 20%|█▉        | 2100/10748 [00:00<00:03, 2268.28it/s][A
 22%|██▏       | 2400/10748 [00:01<00:03, 2282.68it/s][A
 25%|██▌       | 2700/10748 [00:01<00:03, 2270.31it/s][A
 28%|██▊       | 3000/10748 [00:01<00:03, 2285.97it/s][A
 31%|███       | 3300/10748 [00:01<00:03, 2285.97it/s][A
 33%|███▎      | 3600/10748 [00:01<00:03, 2275.28it/s][A
 36%|███▋      | 3900/10748 [00:01<00:02, 2284.18it/s][A
 39%|███▉      | 4200/10748 [00:01<00:02, 2286.87it/s][A
 42%|████▏     | 4500/10748 [00:01<00:02, 2285.97it/s][A
 45%|████▍     | 4800/10748 [00:02<00:02, 2281.71it/s][A
 47%|████▋     | 5100/10748 [

epoch [80/100], Loss: 0.0005



  0%|          | 0/10748 [00:00<?, ?it/s][A
  3%|▎         | 300/10748 [00:00<00:04, 2398.03it/s][A
  6%|▌         | 600/10748 [00:00<00:04, 2340.13it/s][A
  8%|▊         | 900/10748 [00:00<00:04, 2318.23it/s][A
 11%|█         | 1200/10748 [00:00<00:04, 2294.54it/s][A
 14%|█▍        | 1500/10748 [00:00<00:04, 2279.50it/s][A
 17%|█▋        | 1800/10748 [00:00<00:03, 2278.42it/s][A
 20%|█▉        | 2100/10748 [00:00<00:03, 2284.70it/s][A
 22%|██▏       | 2400/10748 [00:01<00:03, 2276.72it/s][A
 25%|██▌       | 2700/10748 [00:01<00:03, 2282.49it/s][A
 28%|██▊       | 3000/10748 [00:01<00:03, 2287.50it/s][A
 31%|███       | 3300/10748 [00:01<00:03, 2288.58it/s][A
 33%|███▎      | 3600/10748 [00:01<00:03, 2292.59it/s][A
 36%|███▋      | 3900/10748 [00:01<00:02, 2286.33it/s][A
 39%|███▉      | 4200/10748 [00:01<00:02, 2292.80it/s][A
 42%|████▏     | 4500/10748 [00:01<00:02, 2299.43it/s][A
 45%|████▍     | 4800/10748 [00:02<00:02, 2299.71it/s][A
 47%|████▋     | 5100/10748 [

epoch [81/100], Loss: 0.0005



  0%|          | 0/10748 [00:00<?, ?it/s][A
  3%|▎         | 300/10748 [00:00<00:04, 2310.41it/s][A
  6%|▌         | 600/10748 [00:00<00:04, 2307.40it/s][A
  8%|▊         | 900/10748 [00:00<00:04, 2297.04it/s][A
 11%|█         | 1200/10748 [00:00<00:04, 2281.81it/s][A
 14%|█▍        | 1500/10748 [00:00<00:04, 2266.00it/s][A
 17%|█▋        | 1800/10748 [00:00<00:03, 2265.01it/s][A
 20%|█▉        | 2100/10748 [00:00<00:03, 2267.99it/s][A
 22%|██▏       | 2400/10748 [00:01<00:03, 2264.64it/s][A
 25%|██▌       | 2700/10748 [00:01<00:03, 2273.73it/s][A
 28%|██▊       | 3000/10748 [00:01<00:03, 2279.62it/s][A
 31%|███       | 3300/10748 [00:01<00:03, 2277.43it/s][A
 33%|███▎      | 3600/10748 [00:01<00:03, 2275.62it/s][A
 36%|███▋      | 3900/10748 [00:01<00:02, 2282.96it/s][A
 39%|███▉      | 4200/10748 [00:01<00:02, 2279.79it/s][A
 42%|████▏     | 4500/10748 [00:01<00:02, 2257.22it/s][A
 45%|████▍     | 4800/10748 [00:02<00:02, 2264.91it/s][A
 47%|████▋     | 5100/10748 [

epoch [82/100], Loss: 0.0005



  0%|          | 0/10748 [00:00<?, ?it/s][A
  3%|▎         | 300/10748 [00:00<00:04, 2368.45it/s][A
  6%|▌         | 600/10748 [00:00<00:04, 2289.75it/s][A
  8%|▊         | 900/10748 [00:00<00:04, 2290.32it/s][A
 11%|█         | 1200/10748 [00:00<00:04, 2290.14it/s][A
 14%|█▍        | 1500/10748 [00:00<00:04, 2290.94it/s][A
 17%|█▋        | 1800/10748 [00:00<00:03, 2296.69it/s][A
 20%|█▉        | 2100/10748 [00:00<00:03, 2277.23it/s][A
 22%|██▏       | 2400/10748 [00:01<00:03, 2259.42it/s][A
 25%|██▌       | 2700/10748 [00:01<00:03, 2267.11it/s][A
 28%|██▊       | 3000/10748 [00:01<00:03, 2262.29it/s][A
 31%|███       | 3300/10748 [00:01<00:03, 2263.09it/s][A
 33%|███▎      | 3600/10748 [00:01<00:03, 2279.27it/s][A
 36%|███▋      | 3900/10748 [00:01<00:02, 2283.73it/s][A
 39%|███▉      | 4200/10748 [00:01<00:02, 2276.54it/s][A
 42%|████▏     | 4500/10748 [00:01<00:02, 2289.27it/s][A
 45%|████▍     | 4800/10748 [00:02<00:02, 2290.13it/s][A
 47%|████▋     | 5100/10748 [

epoch [83/100], Loss: 0.0005



  0%|          | 0/10748 [00:00<?, ?it/s][A
  3%|▎         | 300/10748 [00:00<00:04, 2315.52it/s][A
  6%|▌         | 600/10748 [00:00<00:04, 2284.87it/s][A
  8%|▊         | 900/10748 [00:00<00:04, 2286.33it/s][A
 11%|█         | 1200/10748 [00:00<00:04, 2293.83it/s][A
 14%|█▍        | 1500/10748 [00:00<00:04, 2287.42it/s][A
 17%|█▋        | 1800/10748 [00:00<00:03, 2270.87it/s][A
 20%|█▉        | 2100/10748 [00:00<00:03, 2260.30it/s][A
 22%|██▏       | 2400/10748 [00:01<00:03, 2243.81it/s][A
 25%|██▌       | 2700/10748 [00:01<00:03, 2255.85it/s][A
 28%|██▊       | 3000/10748 [00:01<00:03, 2257.14it/s][A
 31%|███       | 3300/10748 [00:01<00:03, 2255.47it/s][A
 33%|███▎      | 3600/10748 [00:01<00:03, 2274.02it/s][A
 36%|███▋      | 3900/10748 [00:01<00:03, 2280.26it/s][A
 39%|███▉      | 4200/10748 [00:01<00:02, 2293.51it/s][A
 42%|████▏     | 4500/10748 [00:01<00:02, 2289.67it/s][A
 45%|████▍     | 4800/10748 [00:02<00:02, 2294.73it/s][A
 47%|████▋     | 5100/10748 [

epoch [84/100], Loss: 0.0005



  0%|          | 0/10748 [00:00<?, ?it/s][A
  3%|▎         | 300/10748 [00:00<00:04, 2332.69it/s][A
  6%|▌         | 600/10748 [00:00<00:04, 2303.20it/s][A
  8%|▊         | 900/10748 [00:00<00:04, 2280.10it/s][A
 11%|█         | 1200/10748 [00:00<00:04, 2268.76it/s][A
 14%|█▍        | 1500/10748 [00:00<00:04, 2266.26it/s][A
 17%|█▋        | 1800/10748 [00:00<00:03, 2278.67it/s][A
 20%|█▉        | 2100/10748 [00:00<00:03, 2271.35it/s][A
 22%|██▏       | 2400/10748 [00:01<00:03, 2274.45it/s][A
 25%|██▌       | 2700/10748 [00:01<00:03, 2262.94it/s][A
 28%|██▊       | 3000/10748 [00:01<00:03, 2279.18it/s][A
 31%|███       | 3300/10748 [00:01<00:03, 2267.30it/s][A
 33%|███▎      | 3600/10748 [00:01<00:03, 2267.90it/s][A
 36%|███▋      | 3900/10748 [00:01<00:03, 2270.17it/s][A
 39%|███▉      | 4200/10748 [00:01<00:02, 2262.66it/s][A
 42%|████▏     | 4500/10748 [00:01<00:02, 2267.34it/s][A
 45%|████▍     | 4800/10748 [00:02<00:02, 2277.80it/s][A
 47%|████▋     | 5100/10748 [

epoch [85/100], Loss: 0.0004



  0%|          | 0/10748 [00:00<?, ?it/s][A
  3%|▎         | 300/10748 [00:00<00:04, 2314.64it/s][A
  6%|▌         | 600/10748 [00:00<00:04, 2308.86it/s][A
  8%|▊         | 900/10748 [00:00<00:04, 2292.17it/s][A
 11%|█         | 1200/10748 [00:00<00:04, 2296.52it/s][A
 14%|█▍        | 1500/10748 [00:00<00:04, 2295.10it/s][A
 17%|█▋        | 1800/10748 [00:00<00:03, 2285.84it/s][A
 20%|█▉        | 2100/10748 [00:00<00:03, 2272.03it/s][A
 22%|██▏       | 2400/10748 [00:01<00:03, 2258.66it/s][A
 25%|██▌       | 2700/10748 [00:01<00:03, 2254.45it/s][A
 28%|██▊       | 3000/10748 [00:01<00:03, 2265.62it/s][A
 31%|███       | 3300/10748 [00:01<00:03, 2276.10it/s][A
 33%|███▎      | 3600/10748 [00:01<00:03, 2280.88it/s][A
 36%|███▋      | 3900/10748 [00:01<00:03, 2272.89it/s][A
 39%|███▉      | 4200/10748 [00:01<00:02, 2276.23it/s][A
 42%|████▏     | 4500/10748 [00:01<00:02, 2285.23it/s][A
 45%|████▍     | 4800/10748 [00:02<00:02, 2279.53it/s][A
 47%|████▋     | 5100/10748 [

epoch [86/100], Loss: 0.0004



  0%|          | 0/10748 [00:00<?, ?it/s][A
  3%|▎         | 300/10748 [00:00<00:04, 2357.79it/s][A
  6%|▌         | 600/10748 [00:00<00:04, 2311.66it/s][A
  8%|▊         | 900/10748 [00:00<00:04, 2223.22it/s][A
 11%|█         | 1200/10748 [00:00<00:04, 2239.41it/s][A
 14%|█▍        | 1500/10748 [00:00<00:04, 2218.85it/s][A
 17%|█▋        | 1800/10748 [00:00<00:03, 2242.63it/s][A
 20%|█▉        | 2100/10748 [00:00<00:03, 2239.17it/s][A
 22%|██▏       | 2400/10748 [00:01<00:03, 2243.40it/s][A
 25%|██▌       | 2700/10748 [00:01<00:03, 2255.20it/s][A
 28%|██▊       | 3000/10748 [00:01<00:03, 2271.48it/s][A
 31%|███       | 3300/10748 [00:01<00:03, 2287.60it/s][A
 33%|███▎      | 3600/10748 [00:01<00:03, 2289.11it/s][A
 36%|███▋      | 3900/10748 [00:01<00:02, 2293.31it/s][A
 39%|███▉      | 4200/10748 [00:01<00:02, 2285.25it/s][A
 42%|████▏     | 4500/10748 [00:01<00:02, 2272.34it/s][A
 45%|████▍     | 4800/10748 [00:02<00:02, 2285.99it/s][A
 47%|████▋     | 5100/10748 [

epoch [87/100], Loss: 0.0005



  0%|          | 0/10748 [00:00<?, ?it/s][A
  3%|▎         | 300/10748 [00:00<00:04, 2328.58it/s][A
  6%|▌         | 600/10748 [00:00<00:04, 2297.90it/s][A
  8%|▊         | 900/10748 [00:00<00:04, 2303.66it/s][A
 11%|█         | 1200/10748 [00:00<00:04, 2286.95it/s][A
 14%|█▍        | 1500/10748 [00:00<00:04, 2286.83it/s][A
 17%|█▋        | 1800/10748 [00:00<00:03, 2284.02it/s][A
 20%|█▉        | 2100/10748 [00:00<00:03, 2291.04it/s][A
 22%|██▏       | 2400/10748 [00:01<00:03, 2274.39it/s][A
 25%|██▌       | 2700/10748 [00:01<00:03, 2285.59it/s][A
 28%|██▊       | 3000/10748 [00:01<00:03, 2278.22it/s][A
 31%|███       | 3300/10748 [00:01<00:03, 2282.80it/s][A
 33%|███▎      | 3600/10748 [00:01<00:03, 2279.99it/s][A
 36%|███▋      | 3900/10748 [00:01<00:03, 2259.78it/s][A
 39%|███▉      | 4200/10748 [00:01<00:02, 2267.62it/s][A
 42%|████▏     | 4500/10748 [00:01<00:02, 2270.93it/s][A
 45%|████▍     | 4800/10748 [00:02<00:02, 2275.28it/s][A
 47%|████▋     | 5100/10748 [

epoch [88/100], Loss: 0.0004



  0%|          | 0/10748 [00:00<?, ?it/s][A
  3%|▎         | 300/10748 [00:00<00:04, 2380.45it/s][A
  6%|▌         | 600/10748 [00:00<00:04, 2316.74it/s][A
  8%|▊         | 900/10748 [00:00<00:04, 2287.17it/s][A
 11%|█         | 1200/10748 [00:00<00:04, 2272.13it/s][A
 14%|█▍        | 1500/10748 [00:00<00:04, 2270.10it/s][A
 17%|█▋        | 1800/10748 [00:00<00:03, 2265.56it/s][A
 20%|█▉        | 2100/10748 [00:00<00:03, 2271.89it/s][A
 22%|██▏       | 2400/10748 [00:01<00:03, 2274.06it/s][A
 25%|██▌       | 2700/10748 [00:01<00:03, 2282.78it/s][A
 28%|██▊       | 3000/10748 [00:01<00:03, 2267.63it/s][A
 31%|███       | 3300/10748 [00:01<00:03, 2277.05it/s][A
 33%|███▎      | 3600/10748 [00:01<00:03, 2275.11it/s][A
 36%|███▋      | 3900/10748 [00:01<00:03, 2266.46it/s][A
 39%|███▉      | 4200/10748 [00:01<00:02, 2275.58it/s][A
 42%|████▏     | 4500/10748 [00:01<00:02, 2284.73it/s][A
 45%|████▍     | 4800/10748 [00:02<00:02, 2279.24it/s][A
 47%|████▋     | 5100/10748 [

epoch [89/100], Loss: 0.0004



  0%|          | 0/10748 [00:00<?, ?it/s][A
  3%|▎         | 300/10748 [00:00<00:04, 2352.73it/s][A
  6%|▌         | 600/10748 [00:00<00:04, 2309.25it/s][A
  8%|▊         | 900/10748 [00:00<00:04, 2296.53it/s][A
 11%|█         | 1200/10748 [00:00<00:04, 2300.57it/s][A
 14%|█▍        | 1500/10748 [00:00<00:04, 2292.63it/s][A
 17%|█▋        | 1800/10748 [00:00<00:03, 2286.05it/s][A
 20%|█▉        | 2100/10748 [00:00<00:03, 2287.60it/s][A
 22%|██▏       | 2400/10748 [00:01<00:03, 2274.99it/s][A
 25%|██▌       | 2700/10748 [00:01<00:03, 2280.85it/s][A
 28%|██▊       | 3000/10748 [00:01<00:03, 2270.69it/s][A
 31%|███       | 3300/10748 [00:01<00:03, 2256.32it/s][A
 33%|███▎      | 3600/10748 [00:01<00:03, 2269.38it/s][A
 36%|███▋      | 3900/10748 [00:01<00:03, 2280.86it/s][A
 39%|███▉      | 4200/10748 [00:01<00:02, 2283.35it/s][A
 42%|████▏     | 4500/10748 [00:01<00:02, 2279.13it/s][A
 45%|████▍     | 4800/10748 [00:02<00:02, 2274.86it/s][A
 47%|████▋     | 5100/10748 [

epoch [90/100], Loss: 0.0004



  0%|          | 0/10748 [00:00<?, ?it/s][A
  3%|▎         | 300/10748 [00:00<00:04, 2326.27it/s][A
  6%|▌         | 600/10748 [00:00<00:04, 2287.41it/s][A
  8%|▊         | 900/10748 [00:00<00:04, 2286.51it/s][A
 11%|█         | 1200/10748 [00:00<00:04, 2293.14it/s][A
 14%|█▍        | 1500/10748 [00:00<00:04, 2292.92it/s][A
 17%|█▋        | 1800/10748 [00:00<00:03, 2295.87it/s][A
 20%|█▉        | 2100/10748 [00:00<00:03, 2290.82it/s][A
 22%|██▏       | 2400/10748 [00:01<00:03, 2289.69it/s][A
 25%|██▌       | 2700/10748 [00:01<00:03, 2281.55it/s][A
 28%|██▊       | 3000/10748 [00:01<00:03, 2273.64it/s][A
 31%|███       | 3300/10748 [00:01<00:03, 2271.19it/s][A
 33%|███▎      | 3600/10748 [00:01<00:03, 2264.82it/s][A
 36%|███▋      | 3900/10748 [00:01<00:03, 2272.33it/s][A
 39%|███▉      | 4200/10748 [00:01<00:02, 2290.47it/s][A
 42%|████▏     | 4500/10748 [00:01<00:02, 2291.20it/s][A
 45%|████▍     | 4800/10748 [00:02<00:02, 2280.83it/s][A
 47%|████▋     | 5100/10748 [

epoch [91/100], Loss: 0.0005



  0%|          | 0/10748 [00:00<?, ?it/s][A
  3%|▎         | 300/10748 [00:00<00:04, 2347.22it/s][A
  6%|▌         | 600/10748 [00:00<00:04, 2305.76it/s][A
  8%|▊         | 900/10748 [00:00<00:04, 2317.10it/s][A
 11%|█         | 1200/10748 [00:00<00:04, 2313.26it/s][A
 14%|█▍        | 1500/10748 [00:00<00:04, 2305.44it/s][A
 17%|█▋        | 1800/10748 [00:00<00:03, 2280.16it/s][A
 20%|█▉        | 2100/10748 [00:00<00:03, 2289.43it/s][A
 22%|██▏       | 2400/10748 [00:01<00:03, 2299.62it/s][A
 25%|██▌       | 2700/10748 [00:01<00:03, 2288.94it/s][A
 28%|██▊       | 3000/10748 [00:01<00:03, 2283.32it/s][A
 31%|███       | 3300/10748 [00:01<00:03, 2294.36it/s][A
 33%|███▎      | 3600/10748 [00:01<00:03, 2295.45it/s][A
 36%|███▋      | 3900/10748 [00:01<00:03, 2277.22it/s][A
 39%|███▉      | 4200/10748 [00:01<00:02, 2261.85it/s][A
 42%|████▏     | 4500/10748 [00:01<00:02, 2276.48it/s][A
 45%|████▍     | 4800/10748 [00:02<00:02, 2258.84it/s][A
 47%|████▋     | 5100/10748 [

epoch [92/100], Loss: 0.0005



  0%|          | 0/10748 [00:00<?, ?it/s][A
  3%|▎         | 300/10748 [00:00<00:04, 2351.80it/s][A
  6%|▌         | 600/10748 [00:00<00:04, 2308.86it/s][A
  8%|▊         | 900/10748 [00:00<00:04, 2298.38it/s][A
 11%|█         | 1200/10748 [00:00<00:04, 2288.56it/s][A
 14%|█▍        | 1500/10748 [00:00<00:04, 2274.99it/s][A
 17%|█▋        | 1800/10748 [00:00<00:03, 2283.69it/s][A
 20%|█▉        | 2100/10748 [00:00<00:03, 2281.58it/s][A
 22%|██▏       | 2400/10748 [00:01<00:03, 2269.48it/s][A
 25%|██▌       | 2700/10748 [00:01<00:03, 2263.01it/s][A
 28%|██▊       | 3000/10748 [00:01<00:03, 2273.59it/s][A
 31%|███       | 3300/10748 [00:01<00:03, 2276.41it/s][A
 33%|███▎      | 3600/10748 [00:01<00:03, 2289.86it/s][A
 36%|███▋      | 3900/10748 [00:01<00:02, 2286.37it/s][A
 39%|███▉      | 4200/10748 [00:01<00:02, 2280.81it/s][A
 42%|████▏     | 4500/10748 [00:01<00:02, 2286.80it/s][A
 45%|████▍     | 4800/10748 [00:02<00:02, 2283.27it/s][A
 47%|████▋     | 5100/10748 [

epoch [93/100], Loss: 0.0004



  0%|          | 0/10748 [00:00<?, ?it/s][A
  3%|▎         | 300/10748 [00:00<00:04, 2340.31it/s][A
  6%|▌         | 600/10748 [00:00<00:04, 2286.62it/s][A
  8%|▊         | 900/10748 [00:00<00:04, 2290.84it/s][A
 11%|█         | 1200/10748 [00:00<00:04, 2280.28it/s][A
 14%|█▍        | 1500/10748 [00:00<00:04, 2280.48it/s][A
 17%|█▋        | 1800/10748 [00:00<00:03, 2281.45it/s][A
 20%|█▉        | 2100/10748 [00:00<00:03, 2273.85it/s][A
 22%|██▏       | 2400/10748 [00:01<00:03, 2275.39it/s][A
 25%|██▌       | 2700/10748 [00:01<00:03, 2260.54it/s][A
 28%|██▊       | 3000/10748 [00:01<00:03, 2261.73it/s][A
 31%|███       | 3300/10748 [00:01<00:03, 2271.40it/s][A
 33%|███▎      | 3600/10748 [00:01<00:03, 2263.77it/s][A
 36%|███▋      | 3900/10748 [00:01<00:03, 2257.65it/s][A
 39%|███▉      | 4200/10748 [00:01<00:02, 2271.44it/s][A
 42%|████▏     | 4500/10748 [00:01<00:02, 2257.54it/s][A
 45%|████▍     | 4800/10748 [00:02<00:02, 2260.41it/s][A
 47%|████▋     | 5100/10748 [

epoch [94/100], Loss: 0.0004



  0%|          | 0/10748 [00:00<?, ?it/s][A
  3%|▎         | 300/10748 [00:00<00:04, 2349.86it/s][A
  6%|▌         | 600/10748 [00:00<00:04, 2307.71it/s][A
  8%|▊         | 900/10748 [00:00<00:04, 2273.32it/s][A
 11%|█         | 1200/10748 [00:00<00:04, 2283.59it/s][A
 14%|█▍        | 1500/10748 [00:00<00:04, 2279.90it/s][A
 17%|█▋        | 1800/10748 [00:00<00:03, 2288.13it/s][A
 20%|█▉        | 2100/10748 [00:00<00:03, 2282.19it/s][A
 22%|██▏       | 2400/10748 [00:01<00:03, 2298.19it/s][A
 25%|██▌       | 2700/10748 [00:01<00:03, 2295.85it/s][A
 28%|██▊       | 3000/10748 [00:01<00:03, 2303.30it/s][A
 31%|███       | 3300/10748 [00:01<00:03, 2294.91it/s][A
 33%|███▎      | 3600/10748 [00:01<00:03, 2278.14it/s][A
 36%|███▋      | 3900/10748 [00:01<00:03, 2280.79it/s][A
 39%|███▉      | 4200/10748 [00:01<00:02, 2274.15it/s][A
 42%|████▏     | 4500/10748 [00:01<00:02, 2283.99it/s][A
 45%|████▍     | 4800/10748 [00:02<00:02, 2286.25it/s][A
 47%|████▋     | 5100/10748 [

epoch [95/100], Loss: 0.0003



  0%|          | 0/10748 [00:00<?, ?it/s][A
  3%|▎         | 300/10748 [00:00<00:04, 2394.56it/s][A
  6%|▌         | 600/10748 [00:00<00:04, 2319.32it/s][A
  8%|▊         | 900/10748 [00:00<00:04, 2296.43it/s][A
 11%|█         | 1200/10748 [00:00<00:04, 2302.27it/s][A
 14%|█▍        | 1500/10748 [00:00<00:04, 2286.92it/s][A
 17%|█▋        | 1800/10748 [00:00<00:03, 2288.75it/s][A
 20%|█▉        | 2100/10748 [00:00<00:03, 2283.07it/s][A
 22%|██▏       | 2400/10748 [00:01<00:03, 2277.70it/s][A
 25%|██▌       | 2700/10748 [00:01<00:03, 2289.98it/s][A
 28%|██▊       | 3000/10748 [00:01<00:03, 2300.50it/s][A
 31%|███       | 3300/10748 [00:01<00:03, 2298.00it/s][A
 33%|███▎      | 3600/10748 [00:01<00:03, 2294.02it/s][A
 36%|███▋      | 3900/10748 [00:01<00:02, 2296.00it/s][A
 39%|███▉      | 4200/10748 [00:01<00:02, 2298.68it/s][A
 42%|████▏     | 4500/10748 [00:01<00:02, 2301.33it/s][A
 45%|████▍     | 4800/10748 [00:02<00:02, 2289.74it/s][A
 47%|████▋     | 5100/10748 [

epoch [96/100], Loss: 0.0004



  0%|          | 0/10748 [00:00<?, ?it/s][A
  3%|▎         | 300/10748 [00:00<00:04, 2397.01it/s][A
  6%|▌         | 600/10748 [00:00<00:04, 2305.63it/s][A
  8%|▊         | 900/10748 [00:00<00:04, 2300.31it/s][A
 11%|█         | 1200/10748 [00:00<00:04, 2265.15it/s][A
 14%|█▍        | 1500/10748 [00:00<00:04, 2240.74it/s][A
 17%|█▋        | 1800/10748 [00:00<00:03, 2247.51it/s][A
 20%|█▉        | 2100/10748 [00:00<00:03, 2269.09it/s][A
 22%|██▏       | 2400/10748 [00:01<00:03, 2277.40it/s][A
 25%|██▌       | 2700/10748 [00:01<00:03, 2281.59it/s][A
 28%|██▊       | 3000/10748 [00:01<00:03, 2283.80it/s][A
 31%|███       | 3300/10748 [00:01<00:03, 2282.19it/s][A
 33%|███▎      | 3600/10748 [00:01<00:03, 2295.30it/s][A
 36%|███▋      | 3900/10748 [00:01<00:03, 2281.69it/s][A
 39%|███▉      | 4200/10748 [00:01<00:02, 2273.93it/s][A
 42%|████▏     | 4500/10748 [00:01<00:02, 2270.77it/s][A
 45%|████▍     | 4800/10748 [00:02<00:02, 2276.04it/s][A
 47%|████▋     | 5100/10748 [

epoch [97/100], Loss: 0.0004



  0%|          | 0/10748 [00:00<?, ?it/s][A
  3%|▎         | 300/10748 [00:00<00:04, 2387.83it/s][A
  6%|▌         | 600/10748 [00:00<00:04, 2299.69it/s][A
  8%|▊         | 900/10748 [00:00<00:04, 2293.91it/s][A
 11%|█         | 1200/10748 [00:00<00:04, 2273.26it/s][A
 14%|█▍        | 1500/10748 [00:00<00:04, 2269.03it/s][A
 17%|█▋        | 1800/10748 [00:00<00:03, 2278.32it/s][A
 20%|█▉        | 2100/10748 [00:00<00:03, 2273.79it/s][A
 22%|██▏       | 2400/10748 [00:01<00:03, 2271.48it/s][A
 25%|██▌       | 2700/10748 [00:01<00:03, 2271.01it/s][A
 28%|██▊       | 3000/10748 [00:01<00:03, 2264.52it/s][A
 31%|███       | 3300/10748 [00:01<00:03, 2262.43it/s][A
 33%|███▎      | 3600/10748 [00:01<00:03, 2273.33it/s][A
 36%|███▋      | 3900/10748 [00:01<00:03, 2281.94it/s][A
 39%|███▉      | 4200/10748 [00:01<00:02, 2278.40it/s][A
 42%|████▏     | 4500/10748 [00:01<00:02, 2274.30it/s][A
 45%|████▍     | 4800/10748 [00:02<00:02, 2279.44it/s][A
 47%|████▋     | 5100/10748 [

epoch [98/100], Loss: 0.0003



  0%|          | 0/10748 [00:00<?, ?it/s][A
  3%|▎         | 300/10748 [00:00<00:04, 2296.23it/s][A
  6%|▌         | 600/10748 [00:00<00:04, 2283.57it/s][A
  8%|▊         | 900/10748 [00:00<00:04, 2269.02it/s][A
 11%|█         | 1200/10748 [00:00<00:04, 2272.00it/s][A
 14%|█▍        | 1500/10748 [00:00<00:04, 2282.87it/s][A
 17%|█▋        | 1800/10748 [00:00<00:03, 2280.45it/s][A
 20%|█▉        | 2100/10748 [00:00<00:03, 2274.20it/s][A
 22%|██▏       | 2400/10748 [00:01<00:03, 2263.52it/s][A
 25%|██▌       | 2700/10748 [00:01<00:03, 2265.59it/s][A
 28%|██▊       | 3000/10748 [00:01<00:03, 2265.89it/s][A
 31%|███       | 3300/10748 [00:01<00:03, 2264.92it/s][A
 33%|███▎      | 3600/10748 [00:01<00:03, 2273.57it/s][A
 36%|███▋      | 3900/10748 [00:01<00:02, 2282.84it/s][A
 39%|███▉      | 4200/10748 [00:01<00:02, 2282.32it/s][A
 42%|████▏     | 4500/10748 [00:01<00:02, 2280.27it/s][A
 45%|████▍     | 4800/10748 [00:02<00:02, 2280.49it/s][A
 47%|████▋     | 5100/10748 [

epoch [99/100], Loss: 0.0003



  0%|          | 0/10748 [00:00<?, ?it/s][A
  3%|▎         | 300/10748 [00:00<00:04, 2303.39it/s][A
  6%|▌         | 600/10748 [00:00<00:04, 2270.67it/s][A
  8%|▊         | 900/10748 [00:00<00:04, 2267.58it/s][A
 11%|█         | 1200/10748 [00:00<00:04, 2270.61it/s][A
 14%|█▍        | 1500/10748 [00:00<00:04, 2270.62it/s][A
 17%|█▋        | 1800/10748 [00:00<00:03, 2278.60it/s][A
 20%|█▉        | 2100/10748 [00:00<00:03, 2265.85it/s][A
 22%|██▏       | 2400/10748 [00:01<00:03, 2272.67it/s][A
 25%|██▌       | 2700/10748 [00:01<00:03, 2279.97it/s][A
 28%|██▊       | 3000/10748 [00:01<00:03, 2270.44it/s][A
 31%|███       | 3300/10748 [00:01<00:03, 2246.58it/s][A
 33%|███▎      | 3600/10748 [00:01<00:03, 2264.23it/s][A
 36%|███▋      | 3900/10748 [00:01<00:03, 2274.82it/s][A
 39%|███▉      | 4200/10748 [00:01<00:02, 2269.99it/s][A
 42%|████▏     | 4500/10748 [00:01<00:02, 2279.34it/s][A
 45%|████▍     | 4800/10748 [00:02<00:02, 2288.64it/s][A
 47%|████▋     | 5100/10748 [

epoch [100/100], Loss: 0.0003





# 基于双向lstm+CRF

In [None]:
import torch.nn.functional as F # pytorch 激活函数的类
from torch import nn,optim # 构建模型和优化器
from torchcrf import CRF


# 构建基于bilstm+crf实现ner
class bilstm_crf(nn.Module):
    def __init__(self, parameter):
        super(bilstm_crf, self).__init__()
        word_size = parameter['word_size']
        embedding_dim = parameter['d_model']
        self.embedding = nn.Embedding(word_size, embedding_dim, padding_idx=0)

        hidden_size = parameter['hid_dim']
        num_layers = parameter['n_layers']
        dropout = parameter['dropout']
        self.lstm = nn.LSTM(embedding_dim, hidden_size, num_layers, bidirectional=True, batch_first=True, dropout=dropout)

        output_size = parameter['output_size']
        self.fc = nn.Linear(hidden_size*2, output_size)
        
        self.crf = CRF(output_size,batch_first=True)
        
    def forward(self, x):
        out = self.embedding(x)
        out,(h, c)= self.lstm(out)
        out = self.fc(out)
        return out

In [None]:
import os
import shutil
import pickle as pk
from torch.utils.tensorboard import SummaryWriter


# 构建模型
model = bilstm_crf(parameter).to(parameter['device'])

# 确定训练模式
model.train()

# 确定优化器和损失
optimizer = torch.optim.SGD(model.parameters(),lr=0.00005, momentum=0.95, nesterov=True)
# optimizer = torch.optim.Adam(model.parameters(),lr = parameter['lr'], \
#                              weight_decay = 0.01)

# 准备学习率策略
scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=10, gamma=0.9)

# 准备迭代器
train_yield = batch_yield(parameter)

# 开始训练
loss_cal = []
min_loss = float('inf')
while 1:
        inputs,targets,epoch,keys = next(train_yield)
        if keys:
            break
        out = model(inputs.long().to(parameter['device']))
        # crf被用于损失
        loss = -model.crf(out,targets.long().to(parameter['device']))
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        loss_cal.append(loss.item())
        if epoch is not None:
            if (epoch+1)%1 == 0:
                loss_cal = sum(loss_cal)/len(loss_cal)
                if loss_cal < min_loss:
                    min_loss = loss_cal
                    torch.save(model.state_dict(), 'bilstm_crf.h5')
                print('epoch [{}/{}], Loss: {:.4f}'.format(epoch+1, \
                                                       parameter['epoch'],loss_cal))
            loss_cal = [loss.item()]
            scheduler.step()


100%|███████████████████████████████████████████████████████████████████████████| 10748/10748 [00:19<00:00, 554.59it/s]
  1%|▋                                                                            | 100/10748 [00:00<00:19, 534.18it/s]

epoch [1/100], Loss: 3310.1270


100%|███████████████████████████████████████████████████████████████████████████| 10748/10748 [00:19<00:00, 560.79it/s]
  0%|                                                                                        | 0/10748 [00:00<?, ?it/s]

epoch [2/100], Loss: 945.8785


100%|███████████████████████████████████████████████████████████████████████████| 10748/10748 [00:19<00:00, 556.26it/s]
  1%|▋                                                                            | 100/10748 [00:00<00:17, 612.56it/s]

epoch [3/100], Loss: 568.3059


100%|███████████████████████████████████████████████████████████████████████████| 10748/10748 [00:19<00:00, 550.21it/s]
  0%|                                                                                        | 0/10748 [00:00<?, ?it/s]

epoch [4/100], Loss: 421.6440


100%|███████████████████████████████████████████████████████████████████████████| 10748/10748 [00:19<00:00, 543.80it/s]
  1%|▋                                                                            | 100/10748 [00:00<00:17, 606.59it/s]

epoch [5/100], Loss: 332.4437


100%|███████████████████████████████████████████████████████████████████████████| 10748/10748 [00:19<00:00, 560.74it/s]
  0%|                                                                                        | 0/10748 [00:00<?, ?it/s]

epoch [6/100], Loss: 265.2279


100%|███████████████████████████████████████████████████████████████████████████| 10748/10748 [00:19<00:00, 546.27it/s]
  1%|▋                                                                            | 100/10748 [00:00<00:17, 604.00it/s]

epoch [7/100], Loss: 211.9677


100%|███████████████████████████████████████████████████████████████████████████| 10748/10748 [00:19<00:00, 554.90it/s]
  0%|                                                                                        | 0/10748 [00:00<?, ?it/s]

epoch [8/100], Loss: 172.5639


100%|███████████████████████████████████████████████████████████████████████████| 10748/10748 [00:19<00:00, 548.82it/s]
  1%|▋                                                                            | 100/10748 [00:00<00:17, 604.36it/s]

epoch [9/100], Loss: 137.0499


100%|███████████████████████████████████████████████████████████████████████████| 10748/10748 [00:19<00:00, 552.32it/s]
  1%|▋                                                                            | 100/10748 [00:00<00:17, 616.92it/s]

epoch [10/100], Loss: 110.4967


100%|███████████████████████████████████████████████████████████████████████████| 10748/10748 [00:19<00:00, 553.67it/s]
  1%|▋                                                                            | 100/10748 [00:00<00:17, 607.46it/s]

epoch [11/100], Loss: 86.3848


100%|███████████████████████████████████████████████████████████████████████████| 10748/10748 [00:19<00:00, 562.04it/s]
  0%|                                                                                        | 0/10748 [00:00<?, ?it/s]

epoch [12/100], Loss: 70.5258


100%|███████████████████████████████████████████████████████████████████████████| 10748/10748 [00:19<00:00, 556.99it/s]
  0%|                                                                                        | 0/10748 [00:00<?, ?it/s]

epoch [13/100], Loss: 60.7880


100%|███████████████████████████████████████████████████████████████████████████| 10748/10748 [00:19<00:00, 556.83it/s]
  0%|                                                                                        | 0/10748 [00:00<?, ?it/s]

epoch [14/100], Loss: 52.8391


100%|███████████████████████████████████████████████████████████████████████████| 10748/10748 [00:19<00:00, 557.60it/s]
  1%|▋                                                                            | 100/10748 [00:00<00:17, 599.37it/s]

epoch [15/100], Loss: 45.8495


100%|███████████████████████████████████████████████████████████████████████████| 10748/10748 [00:19<00:00, 556.36it/s]
  0%|                                                                                        | 0/10748 [00:00<?, ?it/s]

epoch [16/100], Loss: 37.5853


100%|███████████████████████████████████████████████████████████████████████████| 10748/10748 [00:19<00:00, 558.22it/s]
  1%|▋                                                                            | 100/10748 [00:00<00:18, 573.40it/s]

epoch [17/100], Loss: 33.9748


100%|███████████████████████████████████████████████████████████████████████████| 10748/10748 [00:19<00:00, 547.87it/s]
  1%|▋                                                                            | 100/10748 [00:00<00:17, 606.50it/s]

epoch [18/100], Loss: 32.3178


100%|███████████████████████████████████████████████████████████████████████████| 10748/10748 [00:19<00:00, 551.10it/s]
  1%|▋                                                                            | 100/10748 [00:00<00:18, 572.35it/s]

epoch [19/100], Loss: 26.8575


100%|███████████████████████████████████████████████████████████████████████████| 10748/10748 [00:19<00:00, 559.75it/s]
  0%|                                                                                        | 0/10748 [00:00<?, ?it/s]

epoch [20/100], Loss: 22.8522


100%|███████████████████████████████████████████████████████████████████████████| 10748/10748 [00:19<00:00, 543.41it/s]
  1%|▋                                                                            | 100/10748 [00:00<00:18, 589.77it/s]

epoch [21/100], Loss: 20.1551


100%|███████████████████████████████████████████████████████████████████████████| 10748/10748 [00:19<00:00, 546.52it/s]
  1%|▋                                                                            | 100/10748 [00:00<00:17, 604.59it/s]

epoch [22/100], Loss: 21.0407


100%|███████████████████████████████████████████████████████████████████████████| 10748/10748 [00:19<00:00, 552.09it/s]
  1%|▋                                                                            | 100/10748 [00:00<00:18, 587.24it/s]

epoch [23/100], Loss: 20.1733


100%|███████████████████████████████████████████████████████████████████████████| 10748/10748 [00:19<00:00, 561.06it/s]
  0%|                                                                                        | 0/10748 [00:00<?, ?it/s]

epoch [24/100], Loss: 18.3727


100%|███████████████████████████████████████████████████████████████████████████| 10748/10748 [00:19<00:00, 546.06it/s]
  1%|▋                                                                            | 100/10748 [00:00<00:18, 577.86it/s]

epoch [25/100], Loss: 15.7001


100%|███████████████████████████████████████████████████████████████████████████| 10748/10748 [00:19<00:00, 551.88it/s]
  0%|                                                                                        | 0/10748 [00:00<?, ?it/s]

epoch [26/100], Loss: 17.1244


100%|███████████████████████████████████████████████████████████████████████████| 10748/10748 [00:19<00:00, 556.94it/s]
  1%|▋                                                                            | 100/10748 [00:00<00:18, 591.21it/s]

epoch [27/100], Loss: 16.1325


100%|███████████████████████████████████████████████████████████████████████████| 10748/10748 [00:19<00:00, 557.31it/s]
  1%|▋                                                                            | 100/10748 [00:00<00:17, 601.41it/s]

epoch [28/100], Loss: 14.1012


100%|███████████████████████████████████████████████████████████████████████████| 10748/10748 [00:19<00:00, 553.04it/s]
  1%|▋                                                                            | 100/10748 [00:00<00:17, 610.69it/s]

epoch [29/100], Loss: 12.5347


100%|███████████████████████████████████████████████████████████████████████████| 10748/10748 [00:19<00:00, 547.17it/s]
  1%|▋                                                                            | 100/10748 [00:00<00:17, 601.46it/s]

epoch [30/100], Loss: 11.2677


100%|███████████████████████████████████████████████████████████████████████████| 10748/10748 [00:19<00:00, 554.99it/s]
  0%|                                                                                        | 0/10748 [00:00<?, ?it/s]

epoch [31/100], Loss: 10.3089


100%|███████████████████████████████████████████████████████████████████████████| 10748/10748 [00:19<00:00, 555.31it/s]
  1%|▋                                                                            | 100/10748 [00:00<00:17, 602.58it/s]

epoch [32/100], Loss: 9.1061


100%|███████████████████████████████████████████████████████████████████████████| 10748/10748 [00:19<00:00, 559.09it/s]
  1%|▋                                                                            | 100/10748 [00:00<00:17, 599.39it/s]

epoch [33/100], Loss: 9.2686


100%|███████████████████████████████████████████████████████████████████████████| 10748/10748 [00:19<00:00, 558.17it/s]
  1%|▋                                                                            | 100/10748 [00:00<00:17, 607.68it/s]

epoch [34/100], Loss: 8.2703


100%|███████████████████████████████████████████████████████████████████████████| 10748/10748 [00:19<00:00, 560.78it/s]
  1%|▋                                                                            | 100/10748 [00:00<00:17, 606.88it/s]

epoch [35/100], Loss: 8.0974


100%|███████████████████████████████████████████████████████████████████████████| 10748/10748 [00:19<00:00, 558.02it/s]
  1%|▋                                                                            | 100/10748 [00:00<00:17, 592.78it/s]

epoch [36/100], Loss: 7.6383


100%|███████████████████████████████████████████████████████████████████████████| 10748/10748 [00:19<00:00, 545.02it/s]
  0%|                                                                                        | 0/10748 [00:00<?, ?it/s]

epoch [37/100], Loss: 6.9660


100%|███████████████████████████████████████████████████████████████████████████| 10748/10748 [00:19<00:00, 559.77it/s]
  0%|                                                                                        | 0/10748 [00:00<?, ?it/s]

epoch [38/100], Loss: 8.3725


100%|███████████████████████████████████████████████████████████████████████████| 10748/10748 [00:18<00:00, 570.13it/s]
  0%|                                                                                        | 0/10748 [00:00<?, ?it/s]

epoch [39/100], Loss: 6.8889


100%|███████████████████████████████████████████████████████████████████████████| 10748/10748 [00:19<00:00, 547.72it/s]
  1%|▋                                                                            | 100/10748 [00:00<00:18, 588.49it/s]

epoch [40/100], Loss: 5.7368


100%|███████████████████████████████████████████████████████████████████████████| 10748/10748 [00:19<00:00, 557.79it/s]
  1%|▋                                                                            | 100/10748 [00:00<00:17, 602.64it/s]

epoch [41/100], Loss: 5.8219


100%|███████████████████████████████████████████████████████████████████████████| 10748/10748 [00:19<00:00, 557.06it/s]
  0%|                                                                                        | 0/10748 [00:00<?, ?it/s]

epoch [42/100], Loss: 5.4784


100%|███████████████████████████████████████████████████████████████████████████| 10748/10748 [00:19<00:00, 551.10it/s]
  1%|▋                                                                            | 100/10748 [00:00<00:17, 620.61it/s]

epoch [43/100], Loss: 5.8280


100%|███████████████████████████████████████████████████████████████████████████| 10748/10748 [00:19<00:00, 552.25it/s]
  1%|▋                                                                            | 100/10748 [00:00<00:17, 593.65it/s]

epoch [44/100], Loss: 5.2383


100%|███████████████████████████████████████████████████████████████████████████| 10748/10748 [00:19<00:00, 551.41it/s]
  1%|▋                                                                            | 100/10748 [00:00<00:19, 552.45it/s]

epoch [45/100], Loss: 4.5247


100%|███████████████████████████████████████████████████████████████████████████| 10748/10748 [00:19<00:00, 552.42it/s]
  0%|                                                                                        | 0/10748 [00:00<?, ?it/s]

epoch [46/100], Loss: 4.7619


100%|███████████████████████████████████████████████████████████████████████████| 10748/10748 [00:19<00:00, 554.44it/s]
  0%|                                                                                        | 0/10748 [00:00<?, ?it/s]

epoch [47/100], Loss: 5.0268


100%|███████████████████████████████████████████████████████████████████████████| 10748/10748 [00:19<00:00, 549.72it/s]
  0%|                                                                                        | 0/10748 [00:00<?, ?it/s]

epoch [48/100], Loss: 4.5702


100%|███████████████████████████████████████████████████████████████████████████| 10748/10748 [00:19<00:00, 554.92it/s]
  0%|                                                                                        | 0/10748 [00:00<?, ?it/s]

epoch [49/100], Loss: 4.0506


100%|███████████████████████████████████████████████████████████████████████████| 10748/10748 [00:19<00:00, 544.36it/s]
  1%|▋                                                                            | 100/10748 [00:00<00:17, 612.28it/s]

epoch [50/100], Loss: 3.9747


100%|███████████████████████████████████████████████████████████████████████████| 10748/10748 [00:18<00:00, 566.85it/s]
  1%|▋                                                                            | 100/10748 [00:00<00:18, 573.75it/s]

epoch [51/100], Loss: 3.6528


100%|███████████████████████████████████████████████████████████████████████████| 10748/10748 [00:18<00:00, 566.78it/s]
  1%|▋                                                                            | 100/10748 [00:00<00:17, 605.36it/s]

epoch [52/100], Loss: 3.8242


100%|███████████████████████████████████████████████████████████████████████████| 10748/10748 [00:19<00:00, 565.14it/s]
  0%|                                                                                        | 0/10748 [00:00<?, ?it/s]

epoch [53/100], Loss: 3.4696


100%|███████████████████████████████████████████████████████████████████████████| 10748/10748 [00:19<00:00, 559.49it/s]
  1%|▋                                                                            | 100/10748 [00:00<00:18, 584.56it/s]

epoch [54/100], Loss: 3.4050


100%|███████████████████████████████████████████████████████████████████████████| 10748/10748 [00:19<00:00, 541.87it/s]
  0%|                                                                                        | 0/10748 [00:00<?, ?it/s]

epoch [55/100], Loss: 3.3332


100%|███████████████████████████████████████████████████████████████████████████| 10748/10748 [00:19<00:00, 560.86it/s]
  1%|▋                                                                            | 100/10748 [00:00<00:17, 602.95it/s]

epoch [56/100], Loss: 3.6641


100%|███████████████████████████████████████████████████████████████████████████| 10748/10748 [00:19<00:00, 563.90it/s]
  0%|                                                                                        | 0/10748 [00:00<?, ?it/s]

epoch [57/100], Loss: 3.1048


100%|███████████████████████████████████████████████████████████████████████████| 10748/10748 [00:19<00:00, 558.28it/s]
  1%|▋                                                                            | 100/10748 [00:00<00:17, 609.29it/s]

epoch [58/100], Loss: 3.0770


100%|███████████████████████████████████████████████████████████████████████████| 10748/10748 [00:19<00:00, 553.23it/s]
  1%|▋                                                                            | 100/10748 [00:00<00:19, 550.31it/s]

epoch [59/100], Loss: 2.9419


100%|███████████████████████████████████████████████████████████████████████████| 10748/10748 [00:19<00:00, 550.72it/s]
  1%|▋                                                                            | 100/10748 [00:00<00:17, 615.77it/s]

epoch [60/100], Loss: 2.7535


100%|███████████████████████████████████████████████████████████████████████████| 10748/10748 [00:19<00:00, 552.00it/s]
  0%|                                                                                        | 0/10748 [00:00<?, ?it/s]

epoch [61/100], Loss: 2.8441


100%|███████████████████████████████████████████████████████████████████████████| 10748/10748 [00:19<00:00, 548.30it/s]
  1%|▋                                                                            | 100/10748 [00:00<00:17, 607.68it/s]

epoch [62/100], Loss: 2.7974


100%|███████████████████████████████████████████████████████████████████████████| 10748/10748 [00:19<00:00, 555.44it/s]
  1%|▋                                                                            | 100/10748 [00:00<00:18, 580.15it/s]

epoch [63/100], Loss: 2.9439


100%|███████████████████████████████████████████████████████████████████████████| 10748/10748 [00:19<00:00, 561.21it/s]
  1%|▋                                                                            | 100/10748 [00:00<00:17, 613.83it/s]

epoch [64/100], Loss: 2.6525


100%|███████████████████████████████████████████████████████████████████████████| 10748/10748 [00:19<00:00, 559.24it/s]
  0%|                                                                                        | 0/10748 [00:00<?, ?it/s]

epoch [65/100], Loss: 2.5621


100%|███████████████████████████████████████████████████████████████████████████| 10748/10748 [00:19<00:00, 551.83it/s]
  1%|▋                                                                            | 100/10748 [00:00<00:17, 598.75it/s]

epoch [66/100], Loss: 2.3355


100%|███████████████████████████████████████████████████████████████████████████| 10748/10748 [00:19<00:00, 559.63it/s]
  0%|                                                                                        | 0/10748 [00:00<?, ?it/s]

epoch [67/100], Loss: 2.6173


100%|███████████████████████████████████████████████████████████████████████████| 10748/10748 [00:19<00:00, 553.38it/s]
  0%|                                                                                        | 0/10748 [00:00<?, ?it/s]

epoch [68/100], Loss: 2.5438


100%|███████████████████████████████████████████████████████████████████████████| 10748/10748 [00:19<00:00, 554.81it/s]
  1%|▋                                                                            | 100/10748 [00:00<00:17, 604.69it/s]

epoch [69/100], Loss: 2.4093


100%|███████████████████████████████████████████████████████████████████████████| 10748/10748 [00:19<00:00, 557.72it/s]
  0%|                                                                                        | 0/10748 [00:00<?, ?it/s]

epoch [70/100], Loss: 2.4099


100%|███████████████████████████████████████████████████████████████████████████| 10748/10748 [00:19<00:00, 549.45it/s]
  0%|                                                                                        | 0/10748 [00:00<?, ?it/s]

epoch [71/100], Loss: 2.4447


100%|███████████████████████████████████████████████████████████████████████████| 10748/10748 [00:19<00:00, 549.34it/s]
  1%|▋                                                                            | 100/10748 [00:00<00:17, 614.44it/s]

epoch [72/100], Loss: 2.1743


100%|███████████████████████████████████████████████████████████████████████████| 10748/10748 [00:19<00:00, 554.88it/s]
  0%|                                                                                        | 0/10748 [00:00<?, ?it/s]

epoch [73/100], Loss: 2.2345


100%|███████████████████████████████████████████████████████████████████████████| 10748/10748 [00:19<00:00, 557.54it/s]
  1%|▋                                                                            | 100/10748 [00:00<00:17, 610.05it/s]

epoch [74/100], Loss: 2.2251


100%|███████████████████████████████████████████████████████████████████████████| 10748/10748 [00:19<00:00, 563.19it/s]
  0%|                                                                                        | 0/10748 [00:00<?, ?it/s]

epoch [75/100], Loss: 2.1140


100%|███████████████████████████████████████████████████████████████████████████| 10748/10748 [00:19<00:00, 558.09it/s]
  1%|▋                                                                            | 100/10748 [00:00<00:17, 615.31it/s]

epoch [76/100], Loss: 2.1768


100%|███████████████████████████████████████████████████████████████████████████| 10748/10748 [00:19<00:00, 549.07it/s]
  1%|▋                                                                            | 100/10748 [00:00<00:17, 598.83it/s]

epoch [77/100], Loss: 2.1434


100%|███████████████████████████████████████████████████████████████████████████| 10748/10748 [00:19<00:00, 562.42it/s]
  0%|                                                                                        | 0/10748 [00:00<?, ?it/s]

epoch [78/100], Loss: 2.0958


100%|███████████████████████████████████████████████████████████████████████████| 10748/10748 [00:19<00:00, 553.86it/s]
  0%|                                                                                        | 0/10748 [00:00<?, ?it/s]

epoch [79/100], Loss: 1.8684


100%|███████████████████████████████████████████████████████████████████████████| 10748/10748 [00:19<00:00, 549.46it/s]
  0%|                                                                                        | 0/10748 [00:00<?, ?it/s]

epoch [80/100], Loss: 2.0949


100%|███████████████████████████████████████████████████████████████████████████| 10748/10748 [00:19<00:00, 558.79it/s]
  1%|▋                                                                            | 100/10748 [00:00<00:17, 602.19it/s]

epoch [81/100], Loss: 1.7868


100%|███████████████████████████████████████████████████████████████████████████| 10748/10748 [00:19<00:00, 561.99it/s]
  1%|▋                                                                            | 100/10748 [00:00<00:17, 609.67it/s]

epoch [82/100], Loss: 1.8721


100%|███████████████████████████████████████████████████████████████████████████| 10748/10748 [00:19<00:00, 560.16it/s]
  1%|▋                                                                            | 100/10748 [00:00<00:17, 604.53it/s]

epoch [83/100], Loss: 1.7435


100%|███████████████████████████████████████████████████████████████████████████| 10748/10748 [00:19<00:00, 561.69it/s]
  1%|▋                                                                            | 100/10748 [00:00<00:17, 613.85it/s]

epoch [84/100], Loss: 1.7943


100%|███████████████████████████████████████████████████████████████████████████| 10748/10748 [00:19<00:00, 556.86it/s]
  1%|▋                                                                            | 100/10748 [00:00<00:17, 599.09it/s]

epoch [85/100], Loss: 1.7375


100%|███████████████████████████████████████████████████████████████████████████| 10748/10748 [00:18<00:00, 571.89it/s]
  1%|▋                                                                            | 100/10748 [00:00<00:17, 606.74it/s]

epoch [86/100], Loss: 1.7620


100%|███████████████████████████████████████████████████████████████████████████| 10748/10748 [00:19<00:00, 565.28it/s]
  1%|▋                                                                            | 100/10748 [00:00<00:17, 598.98it/s]

epoch [87/100], Loss: 1.7210


100%|███████████████████████████████████████████████████████████████████████████| 10748/10748 [00:19<00:00, 557.47it/s]
  0%|                                                                                        | 0/10748 [00:00<?, ?it/s]

epoch [88/100], Loss: 1.7424


100%|███████████████████████████████████████████████████████████████████████████| 10748/10748 [00:19<00:00, 560.73it/s]
  0%|                                                                                        | 0/10748 [00:00<?, ?it/s]

epoch [89/100], Loss: 1.6349


100%|███████████████████████████████████████████████████████████████████████████| 10748/10748 [00:19<00:00, 548.33it/s]
  1%|▋                                                                            | 100/10748 [00:00<00:18, 586.17it/s]

epoch [90/100], Loss: 1.6981


100%|███████████████████████████████████████████████████████████████████████████| 10748/10748 [00:19<00:00, 557.53it/s]
  0%|                                                                                        | 0/10748 [00:00<?, ?it/s]

epoch [91/100], Loss: 1.6422


100%|███████████████████████████████████████████████████████████████████████████| 10748/10748 [00:19<00:00, 553.97it/s]
  0%|                                                                                        | 0/10748 [00:00<?, ?it/s]

epoch [92/100], Loss: 1.7439


100%|███████████████████████████████████████████████████████████████████████████| 10748/10748 [00:19<00:00, 562.09it/s]
  0%|                                                                                        | 0/10748 [00:00<?, ?it/s]

epoch [93/100], Loss: 1.7348


100%|███████████████████████████████████████████████████████████████████████████| 10748/10748 [00:19<00:00, 543.86it/s]
  0%|                                                                                        | 0/10748 [00:00<?, ?it/s]

epoch [94/100], Loss: 1.7032


100%|███████████████████████████████████████████████████████████████████████████| 10748/10748 [00:19<00:00, 558.31it/s]
  1%|▋                                                                            | 100/10748 [00:00<00:17, 605.88it/s]

epoch [95/100], Loss: 1.4579


100%|███████████████████████████████████████████████████████████████████████████| 10748/10748 [00:19<00:00, 553.60it/s]
  0%|                                                                                        | 0/10748 [00:00<?, ?it/s]

epoch [96/100], Loss: 1.6124


100%|███████████████████████████████████████████████████████████████████████████| 10748/10748 [00:19<00:00, 558.65it/s]
  1%|▋                                                                            | 100/10748 [00:00<00:17, 605.82it/s]

epoch [97/100], Loss: 1.5664


100%|███████████████████████████████████████████████████████████████████████████| 10748/10748 [00:19<00:00, 555.56it/s]
  0%|                                                                                        | 0/10748 [00:00<?, ?it/s]

epoch [98/100], Loss: 1.6461


100%|███████████████████████████████████████████████████████████████████████████| 10748/10748 [00:19<00:00, 557.72it/s]
  1%|▋                                                                            | 100/10748 [00:00<00:18, 589.17it/s]

epoch [99/100], Loss: 1.5255


100%|███████████████████████████████████████████████████████████████████████████| 10748/10748 [00:19<00:00, 556.13it/s]


epoch [100/100], Loss: 1.5273
