In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
# Initialization: importing the packages that we will use
import torch
device = 'cuda' if torch.cuda.is_available() else 'cpu' # Google colab offers time limited use of GPU for free

# Training parameters 
BATCH_SIZE = 64

In [3]:
# For loading the data
from PIL import Image
import os
import os.path

import torchvision
import torch.utils.data
import torchvision.transforms as transforms
from torch.utils.data import DataLoader

import numpy as np
import pandas as pd
import random

In [4]:
# For constructing the network
import torch.optim as optim
import torch.utils.data
import torch.nn as nn
import torchvision.models as models
import torch.utils.data
import torch.backends.cudnn as cudnn

In [5]:
from tqdm import tqdm

In [6]:
# Solve the imshow dead kernel problem
import os    
os.environ['KMP_DUPLICATE_LIB_OK']='True'

In [7]:
'''
Start loading the data
'''
print('================== START LOADING DATA ==================')



In [8]:
path_drive = '/content/drive/My Drive/'

In [9]:
!cp /content/drive/MyDrive/Task4/pretrain_features.csv /content
!cp /content/drive/MyDrive/Task4/pretrain_labels.csv /content
!cp /content/drive/MyDrive/Task4/test_features.csv /content
!cp /content/drive/MyDrive/Task4/train_features.csv /content
!cp /content/drive/MyDrive/Task4/train_labels.csv /content

In [10]:
r1 = pd.read_csv('pretrain_features.csv')  # features
r2 = pd.read_csv('pretrain_labels.csv')  # labels


print(r1.shape)
print(r2.shape)

# Merge two dataframes
all_data_st = pd.merge(r2, r1, on='Id')
print(all_data_st.shape)
print(all_data_st.head(10))

# # Output to csv
# all_data_st.to_csv("training_set.csv", index=None)

(50000, 1002)
(50000, 2)
(50000, 1003)
   Id  lumo_energy                                             smiles  \
0   0    -3.111521  c1occ2c1c1ccc3cscc3c1c1ncc3cc(ccc3c21)-c1cccc2...   
1   1    -3.219118  C1C=c2c(cc3ncc4c5[SiH2]C=Cc5oc4c3c2=C1)-c1scc2...   
2   2    -3.114145  C1C=c2c3cccnc3c3c4c[nH]cc4c4cc(cnc4c3c2=C1)-c1...   
3   3    -3.161867  [SiH2]1C=Cc2c1csc2-c1cnc2c(c1)c1ccccc1c1cc3ccc...   
4   4    -3.687744        c1occ2c1c(cc1[se]c3ccncc3c21)-c1cccc2nsnc12   
5   5    -2.791261  [SiH2]1C=Cc2[nH]c3c(oc4cc(sc34)-c3scc4cc[se]c3...   
6   6    -3.688235          c1ccc(nc1)-c1cc2ncc3c4cnccc4sc3c2c2nsnc12   
7   7    -3.243368  C1C=c2c3cccnc3c3c4cocc4c4C=C(Cc4c3c2=C1)c1scc2...   
8   8    -3.508069  c1cc2csc(-c3cc4c5cscc5c5c6occc6c6cscc6c5c4c4ns...   
9   9    -3.440629  [SiH2]1C=c2c3cc(oc3c3c4cocc4c4ccc5cscc5c4c3c2=...   

   feature_0000  feature_0001  feature_0002  feature_0003  feature_0004  \
0           0.0           0.0           0.0           0.0           0.0   
1      

In [11]:
# all_data_st = all_data_st.sample(frac=1)
# print(all_data_st.head(10))
all_data_st_noidsmiles = all_data_st.drop(['Id', 'smiles'], axis=1)
print(all_data_st_noidsmiles.head(10))
all_data_array = np.array(all_data_st_noidsmiles)
all_data_tensor = torch.tensor(all_data_array)
print(all_data_tensor)

   lumo_energy  feature_0000  feature_0001  feature_0002  feature_0003  \
0    -3.111521           0.0           0.0           0.0           0.0   
1    -3.219118           0.0           0.0           0.0           0.0   
2    -3.114145           0.0           0.0           0.0           0.0   
3    -3.161867           0.0           0.0           0.0           0.0   
4    -3.687744           0.0           0.0           0.0           0.0   
5    -2.791261           0.0           0.0           0.0           1.0   
6    -3.688235           0.0           0.0           0.0           1.0   
7    -3.243368           0.0           0.0           0.0           0.0   
8    -3.508069           0.0           0.0           0.0           0.0   
9    -3.440629           0.0           0.0           0.0           0.0   

   feature_0004  feature_0005  feature_0006  feature_0007  feature_0008  ...  \
0           0.0           0.0           0.0           0.0           0.0  ...   
1           1.0          

In [12]:
torch.tensor(all_data_st_noidsmiles.iloc[0][1:], dtype=torch.double)

tensor([0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 1.,
        0., 0., 0., 0., 0., 0., 0., 1., 

In [13]:
# For reproducbility
cudnn.benchmark = False
cudnn.deterministic = True
np.random.seed(1998)
torch.manual_seed(1998)
torch.cuda.manual_seed(1998)
torch.backends.cudnn.deterministic = True
random.seed(1998)

In [14]:
# # Get the feature of the training set
# def get_features(filename):
#     features = pd.read_csv('./'+filename)
#     print(features.head(12))
#     # train = train.sample(frac=1).reset_index(drop=True)
#     # print(train.head())
#     return features

# # Get the labels of the training set
# def get_labels(filename):
#     labels = pd.read_csv('./'+filename)
#     print(labels.head(12))
#     # train = train.sample(frac=1).reset_index(drop=True)
#     # print(train.head())
#     return labels

# # Get the features of the testing set
# def get_test_feature(filename):
#     test_features = pd.read_csv('./'+filename)
#     print(test_features.head(12))
#     # train = train.sample(frac=1).reset_index(drop=True)
#     # print(train.head())
#     return test_features

# pretrain_features = get_features('pretrain_features.csv')
# pretrain_labels = get_labels('pretrain_labels.csv')
# train_features = get_features('train_features.csv')
# train_labels = get_labels('train_labels.csv')
# test_features = get_test_feature('test_features.csv')
# print(pretrain_features.shape)
# print(pretrain_labels.shape)
# print(train_features.shape)
# print(train_labels.shape)
# print(test_features.shape)

In [15]:
'''
Start constructing the network
'''
print('================== START CONSTRUCTING NETWORK ==================')



In [16]:
class FeatureExtract(nn.Module):
    def __init__(self, **kwargs):
        super().__init__()
        self.encoder_hidden_layer_1 = nn.Linear(
            in_features=kwargs["input_shape"], out_features=512
        )
        self.encoder_hidden_layer_2 = nn.Linear(
            in_features=512, out_features=256
        )
        self.encoder_hidden_layer_3 = nn.Linear(
            in_features=256, out_features=128
        )
        self.encoder_hidden_layer_4 = nn.Linear(
            in_features=128, out_features=64
        )
        self.prediction_layer = nn.Linear(
            in_features=64, out_features=1
        )
 
    def forward(self, features):
        activation_1 = self.encoder_hidden_layer_1(features)
        activation_1 = torch.relu(activation_1)
        activation_2 = self.encoder_hidden_layer_2(activation_1)
        activation_2 = torch.relu(activation_2)
        activation_3 = self.encoder_hidden_layer_3(activation_2)
        activation_3 = torch.relu(activation_3)
        activation_4 = self.encoder_hidden_layer_4(activation_3)
        activation_4 = torch.relu(activation_4)
        output = self.prediction_layer(activation_4)
        return output

In [17]:
model = FeatureExtract(input_shape=1000).to(device)

In [18]:
# Construct the loss and optimizer
criterion = nn.MSELoss()

optimizer = optim.SGD(model.parameters(),
                            lr=0.0005,
                            momentum=0.9,
                            weight_decay=2e-3,#The value used in the paper is 1e-3
                            nesterov=True)

In [20]:
len(all_data_st_noidsmiles)

50000

In [21]:
training = all_data_st_noidsmiles.sample(frac=0.95, random_state=1998)

validation = all_data_st_noidsmiles.drop(training.index)

print(len(training))
print(len(validation))

47500
2500


In [24]:
input = torch.tensor(training.iloc[0][1:], dtype=torch.float).to(device)
result = model(input)
loss = criterion(result, torch.tensor(training.iloc[0][0:1], dtype=torch.float).to(device))
print(loss)

tensor(15.2968, device='cuda:0', grad_fn=<MseLossBackward0>)


In [25]:
def val(model, criterion, valset, datapoint):

  torch.cuda.empty_cache()
  
  validation_loss_sum = 0

  for i in range(len(valset)):

    # Get one input from the validation set
    input = torch.tensor(valset.iloc[i][1:], dtype=torch.float).to(device)
    
    # Calculate the output
    output = model(input)

    # Calculate the MSE loss
    validation_loss_point = criterion(output, torch.tensor(valset.iloc[i][0:1], dtype=torch.float).to(device))

    # update validation_loss
    validation_loss_sum += validation_loss_point.item()

  print(f'{datapoint} Validation Loss: {validation_loss_sum / len(valset):.3f}')

  torch.cuda.empty_cache()


In [26]:
def train(model, criterion, optimizer, epochs, training_set, validation_set):
  
  # Empty the cache of CUDA  
  torch.cuda.empty_cache()
  
  print('================== START TRAINING ==================')

  # Change to train mode
  model.train()

  for epoch in range(epochs):
    running_loss = 0
    for data_point in range(len(training_set)):

      # Get one input from the training set
      input = torch.tensor(training_set.iloc[data_point][1:], dtype=torch.float).to(device)

      # Calculate its corresponding output
      result = model(input)
      # print(result)

      # Calculate the MSE loss
      loss = criterion(result, torch.tensor(training_set.iloc[data_point][0:1], dtype=torch.float).to(device))
      if ((data_point+1) % 100) == 0:
        print(f'training {data_point+1} loss: {loss}')

      # Zero the gradient
      optimizer.zero_grad()
                
      # Back prop and update
      loss.backward()
      optimizer.step()

      running_loss += loss.item()

      # Validation
      if ((data_point+1) % 5000) == 0:
        # Change to evaluation mode
        model.eval()

        val(model, criterion, validation_set, data_point+1)

        # Change back to training mode
        model.train()

    print(f'[{epoch + 1}] average loss per epoch: {running_loss / len(training_set):.3f}')

    save_path = f'/content/drive/My Drive/Task4/test1/model_epoch{epoch+1}.pt'
    torch.save({'epoch': epoch+1, 'model_state_dict': model.state_dict()}, save_path)
    print(f'Saved model checkpoint to {save_path}')

In [27]:
train(model, criterion, optimizer, 5, training, validation)

training 100 loss: 0.006629479117691517
training 200 loss: 0.1727408468723297
training 300 loss: 0.17348702251911163
training 400 loss: 0.05562751740217209
training 500 loss: 0.3524382710456848
training 600 loss: 0.05999352037906647
training 700 loss: 0.0006808076286688447
training 800 loss: 0.0036711804568767548
training 900 loss: 0.010571641847491264
training 1000 loss: 0.06037874147295952
training 1100 loss: 0.07002365589141846
training 1200 loss: 0.0031184267718344927
training 1300 loss: 0.004192217253148556
training 1400 loss: 0.00508978171274066
training 1500 loss: 0.015186920762062073
training 1600 loss: 0.006078209728002548
training 1700 loss: 0.010147289372980595
training 1800 loss: 0.06858671456575394
training 1900 loss: 0.0006138374446891248
training 2000 loss: 0.001844224170781672
training 2100 loss: 0.0010619936510920525
training 2200 loss: 0.015589496120810509
training 2300 loss: 0.00087946024723351
training 2400 loss: 0.10230690240859985
training 2500 loss: 0.00788085907

In [28]:
feature_gap = pd.read_csv('train_features.csv')  # features
label_gap = pd.read_csv('train_labels.csv')  # labels


print(feature_gap.shape)
print(label_gap.shape)

# Merge two dataframes
train_gap_data = pd.merge(label_gap, feature_gap, on='Id')
print(train_gap_data.shape)
print(train_gap_data.head(10))

train_gap_data_noidsmiles = train_gap_data.drop(['Id', 'smiles'], axis=1)
print(train_gap_data_noidsmiles.head(10))

(100, 1002)
(100, 2)
(100, 1003)
      Id  homo_lumo_gap                                             smiles  \
0  50000       2.052872    C1C=c2c3ccoc3c3c4ccccc4c(cc3c2=C1)-c1scc2ccsc12   
1  50001       1.325530  c1cc([se]c1-c1sc(-c2cccc3nsnc23)c2nccnc12)-c1c...   
2  50002       1.837294  [SiH2]1C=CC=C1c1cc2cnc3c(sc4ccc5c[nH]cc5c34)c2...   
3  50003       1.388601  C1C=c2ccc3c4cocc4c4c([se]c5cc(-c6cccs6)c6nsnc6...   
4  50004       0.991851  C1c(ccc1-c1sc(-c2nccc3nsnc23)c2ccoc12)-c1scc2c...   
5  50005       1.181848  c1c[nH]c(c1)-c1sc(-c2ccc(-c3scc4[se]ccc34)c3ns...   
6  50006       1.469864  C1C(=Cc2c1c1cnc3ccc4=C[SiH2]C=c4c3c1c1c[nH]cc2...   
7  50007       1.780535  c1c[nH]c(c1)-c1ccc([nH]1)-c1sc(-c2scc3ccoc23)c...   
8  50008       2.959695        c1cc2oc3c(ccc4cc(cnc34)-c3cccc4ccccc34)c2s1   
9  50009       2.224978  c1cc2csc(-c3cc4cc5c6c[nH]cc6c6cc[se]c6c5cc4[nH...   

   feature_0000  feature_0001  feature_0002  feature_0003  feature_0004  \
0           0.0           0.0    

In [39]:
for name, param in model.named_parameters():
  print(name)
  if ('encoder_hidden_layer_2' in name) or ('encoder_hidden_layer_3' in name) or ('encoder_hidden_layer_4' in name) or ('prediction_layer' in name):
    print(name)
    param.requires_grad = True
  else:
    param.requires_grad = False

encoder_hidden_layer_1.weight
encoder_hidden_layer_1.bias
encoder_hidden_layer_2.weight
encoder_hidden_layer_2.weight
encoder_hidden_layer_2.bias
encoder_hidden_layer_2.bias
encoder_hidden_layer_3.weight
encoder_hidden_layer_3.weight
encoder_hidden_layer_3.bias
encoder_hidden_layer_3.bias
encoder_hidden_layer_4.weight
encoder_hidden_layer_4.weight
encoder_hidden_layer_4.bias
encoder_hidden_layer_4.bias
prediction_layer.weight
prediction_layer.weight
prediction_layer.bias
prediction_layer.bias


In [40]:
criterion_gap = nn.MSELoss()

optimizer_gap = optim.SGD(filter(lambda p: p.requires_grad, model.parameters()),
                            lr=0.00005,
                            momentum=0.9,
                            weight_decay=2e-3,#The value used in the paper is 1e-3
                            nesterov=True)

def train_gap(model, criterion, optimizer, training_set_gap):
  # Empty the cache of CUDA  
  torch.cuda.empty_cache()
  
  print("model.layer1.weight", model.encoder_hidden_layer_1.weight)
  print("model.layer2.weight", model.encoder_hidden_layer_2.weight)
  print("model.layer3.weight", model.encoder_hidden_layer_3.weight)
  print("model.layer4.weight", model.encoder_hidden_layer_4.weight)
  print("model.output.weight", model.prediction_layer.weight) 
  print('================== START TRANSFER LEARNING ==================')

  # Change to train mode
  model.train()

  running_loss = 0
  for i in range(len(training_set_gap)):
    # Get one input from the training set
    input = torch.tensor(training_set_gap.iloc[i][1:], dtype=torch.float).to(device)

    # Calculate its corresponding output
    result = model(input)
    # print(result)

    # Calculate the MSE loss
    loss = criterion(result, torch.tensor(training_set_gap.iloc[i][0:1], dtype=torch.float).to(device))

    # print(f'training {i+1} loss: {loss}')

    # Zero the gradient
    optimizer.zero_grad()
              
    # Back prop and update
    loss.backward()
    optimizer.step()

    running_loss += loss.item()

  print(f'average loss per epoch: {running_loss / len(training_set_gap):.3f}')

  print("model.layer1.weight", model.encoder_hidden_layer_1.weight)
  print("model.layer2.weight", model.encoder_hidden_layer_2.weight)
  print("model.layer3.weight", model.encoder_hidden_layer_3.weight)
  print("model.layer4.weight", model.encoder_hidden_layer_4.weight)
  print("model.output.weight", model.prediction_layer.weight)

  return model

In [None]:
train_gap(model, criterion_gap, optimizer_gap, train_gap_data_noidsmiles)

average loss per epoch: 0.061


FeatureExtract(
  (encoder_hidden_layer_1): Linear(in_features=1000, out_features=512, bias=True)
  (encoder_hidden_layer_2): Linear(in_features=512, out_features=256, bias=True)
  (encoder_hidden_layer_3): Linear(in_features=256, out_features=128, bias=True)
  (encoder_hidden_layer_4): Linear(in_features=128, out_features=64, bias=True)
  (prediction_layer): Linear(in_features=64, out_features=1, bias=True)
)

In [47]:
for i in range(900):
  train_gap(model, criterion_gap, optimizer_gap, train_gap_data_noidsmiles)

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
        [ 0.0009,  0.0036,  0.0054,  ...,  0.0042,  0.0039, -0.0030]],
       device='cuda:0', requires_grad=True)
model.layer3.weight Parameter containing:
tensor([[ 0.0002, -0.0012,  0.0001,  ...,  0.0008,  0.0062,  0.0052],
        [-0.0017, -0.0045, -0.0015,  ..., -0.0026, -0.0019,  0.0030],
        [-0.0024,  0.0015, -0.0002,  ...,  0.0048, -0.0040,  0.0013],
        ...,
        [-0.0028,  0.0012, -0.0042,  ...,  0.0032, -0.0013, -0.0025],
        [-0.0024, -0.0009,  0.0090,  ..., -0.0057, -0.0236, -0.0048],
        [ 0.0014,  0.0032, -0.0048,  ...,  0.0049,  0.0002, -0.0042]],
       device='cuda:0', requires_grad=True)
model.layer4.weight Parameter containing:
tensor([[-5.7237e-03, -3.9983e-03,  2.8990e-04,  ..., -4.6358e-03,
         -4.8249e-03, -5.5626e-03],
        [-3.2827e-03, -5.1221e-03,  1.6641e-03,  ...,  6.1138e-03,
         -7.0742e-04, -1.3342e-03],
        [-2.5899e-03,  7.1646e-05, -6.2731e-03,  ...

In [53]:
model(torch.tensor(train_gap_data_noidsmiles.iloc[97][1:], dtype=torch.float).to(device))

tensor([2.0767], device='cuda:0', grad_fn=<AddBackward0>)

In [None]:
model.eval()
with torch.no_grad():
  result = model(torch.tensor(train_gap_data_noidsmiles.iloc[98][1:], dtype=torch.float).to(device))
  print(result)
criterion(result, torch.tensor(train_gap_data_noidsmiles.iloc[98][0:1], dtype=torch.float).to(device))

tensor([1.0400], device='cuda:0')


tensor(0.3399, device='cuda:0')

In [54]:
test_feature = pd.read_csv('test_features.csv')
test_id = test_feature['Id']
print(test_id.head(10))
test_feature_noidsmiles = test_feature.drop(['Id', 'smiles'], axis=1)
print(test_feature_noidsmiles.head(10))

0    50100
1    50101
2    50102
3    50103
4    50104
5    50105
6    50106
7    50107
8    50108
9    50109
Name: Id, dtype: int64
   feature_0000  feature_0001  feature_0002  feature_0003  feature_0004  \
0           0.0           0.0           0.0           1.0           1.0   
1           0.0           0.0           0.0           0.0           0.0   
2           0.0           0.0           0.0           0.0           0.0   
3           0.0           0.0           0.0           0.0           1.0   
4           0.0           0.0           0.0           1.0           0.0   
5           0.0           0.0           0.0           1.0           0.0   
6           0.0           0.0           0.0           0.0           1.0   
7           0.0           0.0           0.0           1.0           0.0   
8           0.0           0.0           0.0           0.0           0.0   
9           0.0           0.0           0.0           0.0           0.0   

   feature_0005  feature_0006  feature_00

In [None]:
y = []
y.append(10)
y.append(11)
y.append(12)
df = pd.DataFrame(y, columns=['y'])
print(df)
id = []
id.append(0)
id.append(1)
id.append(2)
id_df = pd.DataFrame(id, columns=['id'])
print(id_df)
det = pd.concat([id_df, df], join = 'outer', axis = 1)
print(det)
det.to_csv('/content/drive/MyDrive/test.csv', index=False, header=True)

    y
0  10
1  11
2  12
   id
0   0
1   1
2   2
   id   y
0   0  10
1   1  11
2   2  12


In [55]:
def predict(model, test_features, test_id, epoch_run):
  
  model.eval()

  y = []
  for i in range(len(test_feature)):
    input = torch.tensor(test_features.iloc[i], dtype=torch.float).to(device)

    # Calculate its corresponding output
    with torch.no_grad():
      result = model(input)
      result = result.item()
      y.append(result)

  output_df = pd.DataFrame(y, columns=['y'])

  prediction = pd.concat([test_id, output_df], join='outer', axis=1)

  prediction.to_csv('/content/drive/MyDrive/Task4/test1/submission_epoch{0}.csv'.format(epoch_run), index=False, header=True)




In [56]:
predict(model, test_feature_noidsmiles, test_id, 5)

In [None]:
def train(model, criterion, optimizer, epochs, trainloader, valloader, testloader):

  # Create an iterator object for valloader, for selecting a random batch from val set for validation
  valloader_iterator = iter(valloader)

  # Empty the cache of CUDA  
  torch.cuda.empty_cache()
  
  print('================== START TRAINING ==================')
  # Change to train mode
  model.train()
  for epoch in range(epochs):
      running_loss = 0
      for batch_idx, (data0, data1, data2) in enumerate(tqdm(trainloader)):
          anchor, positive, negative = data0, data1, data2
          anchor = Variable(anchor)
          positive = Variable(positive)
          negative = Variable(negative)
          # print('anchor', anchor.size())
          # print('positive', positive.size())
          # print('negative', negative.size())
          
          # Calculate the output of three networks
          embedded_a, embedded_p, embedded_n = model(anchor, positive, negative)
          
          # Calculate the loss
          loss = criterion(embedded_a, embedded_p, embedded_n)
          print("mini Batch {0} Loss: {1}".format(batch_idx+1, loss.data))
          
          # Zero the gradient
          optimizer.zero_grad()
          
          # Back prop and update
          loss.backward()
          optimizer.step()
          
          # print statistics
          running_loss += loss.item()

          if batch_idx % 300 == 0 and batch_idx != 0:
            print("Training Batch: {0} | Training Loss: {1}".format(batch_idx+1, loss.data))

            ''' For Validation'''
            # Change to evaluation mode
            model.eval()

            mean_accuracy = val_accuracy(model, valloader, valloader_iterator)
            print(mean_accuracy)

            # ''' For Prediction '''
            # print('================== START PREDICTION ==================')

            # redicted_labels = np.zeros(len(testloader))
            # pred_test=[]

            # #Predict labels 1 or 0 for each test triplet
            # for batch_idx_predict_in_epoch, (data1, data2, data3) in enumerate(tqdm(testloader)):

            #     data1, data2, data3 = data1.cuda(), data2.cuda(), data3.cuda()

            #     # wrap in torch.autograd.Variable
            #     data1, data2, data3 = Variable(data1), Variable(data2), Variable(data3)

            #     with torch.no_grad():
            #         # compute output and loss
            #         embedded_x, embedded_y, embedded_z = model(data1, data2, data3)

            #     dist_a = F.pairwise_distance(embedded_x, embedded_y, 2)
            #     dist_b = F.pairwise_distance(embedded_x, embedded_z, 2)
            #     #print(np.squeeze(embedded_a.cpu().detach().numpy()).shape)
                

            #     pred_test.append(1*(dist_a <= dist_b))

            #     print('batch id predict in epoch: ', batch_idx_predict_in_epoch)

            # pred_test_np = []
            # for i in range(len(pred_test)):
            #   pred_test_cpu = pred_test[i].cpu().detach().numpy()
            #   pred_test_np += list(pred_test_cpu)
            # print(len(pred_test_np))
            # predicted_labels = np.hstack(pred_test_np)
            # print(predicted_labels)

            # # Write submisison file, should be saved to the current training batch ID instead of the prediction ID 
            # df = pd.DataFrame(predicted_labels)
            # df.to_csv('/content/drive/MyDrive/test8/submission_epoch{0}_batch{1}.txt'.format(epoch+1, batch_idx+1), index=False, header=None) #write CSV

            # save_path = f'/content/drive/My Drive/test8/model_epoch_{epoch+1}_batch_{batch_idx+1}.pt'
            # torch.save({'Batch id in epoch': batch_idx+1, 'model_state_dict': model.state_dict()}, save_path)
            # print("Training Batch: {0} | Model saved to: {1}".format(batch_idx+1, save_path))

            # Change back to train mode
            model.train()

            # Empty the cache of CUDA  
            torch.cuda.empty_cache()

          
      print(f'[{epoch + 1}] average loss per epoch: {running_loss / len(train_loader):.3f}')
      # # save checkpoint of model
      # if epoch % 5 == 0 and epoch > 0:

      save_path = f'/content/drive/My Drive/test9/model_epoch{epoch+1}.pt'
      torch.save({'epoch': epoch, 'model_state_dict': model.state_dict()}, save_path)
      print(f'Saved model checkpoint to {save_path}')

      ''' For Validation'''
      # Change to evaluation mode
      model.eval()

      mean_accuracy = val_accuracy(model, valloader, valloader_iterator)
      print(mean_accuracy)

      ''' For Prediction '''
      print('================== START PREDICTION ==================')

      redicted_labels = np.zeros(len(testloader))
      pred_test=[]

      #Predict labels 1 or 0 for each test triplet
      for batch_idx_predict_after_epoch, (data1, data2, data3) in enumerate(tqdm(testloader)):

          data1, data2, data3 = data1.cuda(), data2.cuda(), data3.cuda()

          # wrap in torch.autograd.Variable
          data1, data2, data3 = Variable(data1), Variable(data2), Variable(data3)

          with torch.no_grad():
              # compute output and loss
              embedded_x, embedded_y, embedded_z = model(data1, data2, data3)

          dist_a = F.pairwise_distance(embedded_x, embedded_y, 2)
          dist_b = F.pairwise_distance(embedded_x, embedded_z, 2)
          #print(np.squeeze(embedded_a.cpu().detach().numpy()).shape)
          

          pred_test.append(1*(dist_a <= dist_b))

          print('batch id predict after epoch: ', batch_idx_predict_after_epoch+1)

      pred_test_np = []
      for i in range(len(pred_test)):
        pred_test_cpu = pred_test[i].cpu().detach().numpy()
        pred_test_np += list(pred_test_cpu)
      len(pred_test_np)
      predicted_labels = np.hstack(pred_test_np)
      print(predicted_labels)

      #Write submisison file
      df = pd.DataFrame(predicted_labels)
      df.to_csv('/content/drive/MyDrive/test9/submission_epoch{0}.txt'.format(epoch+1), index=False, header=None) #write CSV

      # Change back to train mode
      model.train()

      # Empty the cache of CUDA  
      torch.cuda.empty_cache()

  
  print('Finished Training')
  return model

In [None]:
# !pip install hiddenlayer



In [None]:
# ''' Used to visualize the network structure '''
# import hiddenlayer as hl

# transforms = [hl.transforms.Prune('Constant')] # Removes Constant nodes from graph.

# graph = hl.build_graph(model, torch.zeros([10000]).to(device), transforms)
# graph.theme = hl.graph.THEMES['blue'].copy()
# graph.save('layers', format='png')

In [None]:
class AE(nn.Module):
    def __init__(self, **kwargs):
        super().__init__()
        self.encoder_hidden_layer = nn.Linear(
            in_features=kwargs["input_shape"], out_features=128
        )
        self.encoder_output_layer = nn.Linear(
            in_features=128, out_features=128
        )
        self.decoder_hidden_layer = nn.Linear(
            in_features=128, out_features=128
        )
        self.decoder_output_layer = nn.Linear(
            in_features=128, out_features=kwargs["input_shape"]
        )

    def forward(self, features):
        activation = self.encoder_hidden_layer(features)
        activation = torch.relu(activation)
        code = self.encoder_output_layer(activation)
        code = torch.relu(code)
        activation = self.decoder_hidden_layer(code)
        activation = torch.relu(activation)
        activation = self.decoder_output_layer(activation)
        reconstructed = torch.relu(activation)
        return reconstructed

In [None]:
model_autoencoder = AE(input_shape=784).to(device)

In [None]:
random_data = torch.rand((784)).to(device)
result = model(random_data)
print(result.size())

torch.Size([784])


In [None]:
graph = hl.build_graph(model_autoencoder, torch.rand([784]).to(device), transforms)
graph.theme = hl.graph.THEMES['blue'].copy()
graph.save('layers', format='png')

In [None]:
# Function of spliting the dataset into training set and validation set
def split_huge_file(file, out1, out2, percentage):
    """Splits a file in 2 given the approximate `percentage` to go in the large file."""
    with open(file, 'r',encoding="utf-8") as fin, \
         open(out1, 'w') as foutBig, \
         open(out2, 'w') as foutSmall:

        for line in fin:
            r = random.random() 
            if r < percentage:
                foutBig.write(line)
            else:
                foutSmall.write(line)

In [None]:
path = '/content/drive/My Drive/'
split_huge_file(os.path.join(path, f'train_triplets.txt'), 'train_triplets_splits.txt', 'val_triplets_splits.txt', percentage=0.9)

In [None]:
torch.cuda.empty_cache()

In [None]:
!nvidia-smi

Wed May 11 04:42:46 2022       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 460.32.03    Driver Version: 460.32.03    CUDA Version: 11.2     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  Tesla P100-PCIE...  Off  | 00000000:00:04.0 Off |                    0 |
| N/A   36C    P0    26W / 250W |      2MiB / 16280MiB |      0%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Proces

In [None]:
# Image loader helper function
def default_image_loader(path):
    return Image.open(path).convert('RGB')

In [None]:
im = Image.open(r"/content/drive/My Drive/food/00003.jpg")

In [None]:
# display(im)

In [None]:
data = np.asarray(im)
data.shape

(329, 468, 3)

In [None]:
# im.resize((354,242))

In [None]:
class TripletImageLoader(torch.utils.data.Dataset):
    def __init__(self, base_path, triplets_file_name, transform=None, loader=default_image_loader):
        """ base_path: The path contains the text file of the training triplets
            triplets_file_name: The text file with each line containing three integers, 
            where integer i refers to the i-th image in the filenames file.  
            Each line contains three integers (a triplet).
            For example, the triplet "00723 00478 02630" denotes that the dish in image "00723.jpg" is more similar in taste 
            to the dish in image "00478.jpg" than to the dish in image "02630.jpg" according to a human annotator.
         """
        self.base_path = base_path  
        triplets = []
        for line in open(triplets_file_name):
            triplets.append((line.split()[0], line.split()[1], line.split()[2])) # anchor, positive, negative
        self.triplets = triplets
        self.transform = transform
        self.loader = loader

    def __getitem__(self, index):
        path1, path2, path3 = self.triplets[index]
        img1 = self.loader(os.path.join(self.base_path, f'{path1}.jpg'))
        img2 = self.loader(os.path.join(self.base_path, f'{path2}.jpg'))
        img3 = self.loader(os.path.join(self.base_path, f'{path3}.jpg'))
        if self.transform is not None:
            img1 = self.transform(img1)
            img2 = self.transform(img2)
            img3 = self.transform(img3)

        return img1, img2, img3

    def __len__(self):
        return len(self.triplets)

In [None]:
torch.cuda.is_available()

True

In [None]:
device

'cuda'

In [None]:
# Copy the test_triplets dataset to the working directory
!cp /content/drive/MyDrive/test_triplets.txt /content

In [None]:
# Dataset and Trasformations

############# Datasets and Dataloaders ################
transform_train = transforms.Compose([
    transforms.ToTensor(), # The output of torchvision datasets are PILImage images of range [0, 1].
    transforms.Resize(IMAGE_SIZE),

    # we want our network to be robust over geometrical transformations that leave the image semantically invariant
    # transforms.RandomVerticalFlip(p=0.5),
    transforms.RandomHorizontalFlip(p=0.5),
    # transforms.RandomRotation(45),
    # transforms.ColorJitter(brightness=0.2, contrast=0.2),

    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)), #  We transform them to Tensors of normalized range [-1, 1].
    # (mean, mean, mean) , (std, std, std): output[channel] = (input[channel] - mean[channel]) / std[channel]
])

transform_val = transforms.Compose([
    transforms.ToTensor(),
    transforms.Resize(IMAGE_SIZE),
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)),
])

transform_test = transforms.Compose([
    transforms.ToTensor(),
    transforms.Resize(IMAGE_SIZE),
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)),
])


path = '/content/drive/MyDrive/food'
train_dataset = TripletImageLoader(path.rstrip('\n'), 'train_triplets_splits.txt', transform=transform_train)
val_dataset = TripletImageLoader(path.rstrip('\n'), 'val_triplets_splits.txt', transform=transform_val)
test_dataset = TripletImageLoader(path.rstrip('\n'), 'test_triplets.txt', transform=transform_test)

In [None]:
len(train_dataset), len(val_dataset), len(test_dataset)

(58933, 582, 59544)

In [None]:
test_dataset[0]

(tensor([[[ 0.6848,  0.6188,  0.6295,  ...,  0.6845,  0.6174,  0.5918],
          [ 0.7634,  0.6081,  0.6502,  ...,  0.7868,  0.7373,  0.6763],
          [ 0.7259,  0.7257,  0.6671,  ...,  0.7282,  0.7121,  0.6763],
          ...,
          [ 0.1368,  0.1779,  0.1858,  ...,  0.5491,  0.5174,  0.5192],
          [ 0.1719,  0.1433,  0.1679,  ...,  0.5418,  0.5576,  0.5111],
          [ 0.2298,  0.0134,  0.0636,  ...,  0.6480,  0.7295,  0.7176]],
 
         [[ 0.3083,  0.2423,  0.2557,  ...,  0.2424,  0.3027,  0.3446],
          [ 0.3778,  0.2235,  0.2622,  ...,  0.3537,  0.3888,  0.3711],
          [ 0.3208,  0.3215,  0.2648,  ...,  0.3097,  0.3167,  0.2928],
          ...,
          [-0.2550, -0.2139, -0.2107,  ...,  0.0953,  0.1322,  0.1737],
          [-0.2046, -0.2332, -0.2138,  ...,  0.0879,  0.1528,  0.1360],
          [-0.1141, -0.3449, -0.3147,  ...,  0.1256,  0.0728, -0.0351]],
 
         [[ 0.1512,  0.0762,  0.0626,  ..., -0.0076, -0.0635, -0.0872],
          [ 0.2222,  0.0503,

In [None]:
train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True, num_workers=0)

val_loader = torch.utils.data.DataLoader(val_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=0)

test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=0)

In [None]:
len(train_loader), len(val_loader), len(test_loader)

(921, 10, 931)

In [None]:
type(train_loader)

torch.utils.data.dataloader.DataLoader

In [None]:
# Visualization of Dataset
import matplotlib.pyplot as plt
import numpy as np

# functions to show an image
def imshow(img):
    img = img / 2 + 0.5     # unnormalize
    plt.figure()
    plt.imshow(img.permute(1, 2, 0))
    plt.show()


# get some random training images
dataiter = iter(train_loader)
images_anchor, images_positive, images_negative = dataiter.next()

# show images
# imshow(torchvision.utils.make_grid(images_anchor))
# imshow(torchvision.utils.make_grid(images_positive))
# imshow(torchvision.utils.make_grid(images_negative))

In [None]:
images_anchor

tensor([[[[ 0.6884,  0.6038,  0.7176,  ...,  0.8562,  0.6681,  0.6141],
          [ 0.6604,  0.6287,  0.6395,  ...,  0.7792,  0.7544,  0.7447],
          [ 0.6743,  0.6843,  0.6265,  ...,  0.7189,  0.7146,  0.7013],
          ...,
          [ 0.4680,  0.4406,  0.4628,  ...,  0.7125,  0.7397,  0.8157],
          [ 0.3393,  0.3476,  0.4362,  ...,  0.7232,  0.7844,  0.8184],
          [ 0.3865,  0.4128,  0.5302,  ...,  0.6692,  0.7686,  0.7089]],

         [[ 0.1080,  0.0235,  0.1323,  ...,  0.5122,  0.3442,  0.3048],
          [ 0.0800,  0.0474,  0.0532,  ...,  0.4193,  0.4160,  0.4125],
          [ 0.0898,  0.0983,  0.0382,  ...,  0.3534,  0.3613,  0.3512],
          ...,
          [ 0.4019,  0.3458,  0.3217,  ...,  0.6948,  0.7417,  0.8163],
          [ 0.3007,  0.2792,  0.3206,  ...,  0.6954,  0.7830,  0.8350],
          [ 0.3882,  0.3781,  0.4382,  ...,  0.6367,  0.7788,  0.7444]],

         [[-0.2449, -0.3295, -0.2058,  ...,  0.4415,  0.2583,  0.1909],
          [-0.2730, -0.3027, -

In [None]:
'''
Data loaded
'''
print('================== DATA LOADED ==================')



In [None]:
'''
Construct a triplet net, where three CNNs with shared weights and a modified 1024 fully connected output are working in parallel
The 1024 dimension output were used to calculate the Euclidean distance to construnct the Triplet Margin loss
'''
#########################NET##############################

#The CNNs with shared weights and the fully connected layer replaced
def FeatureExtractNET(**kwargs):
    """
    Construct a pretrained ResNet-18 model
    Returns: The CNN for feature extraction with a fully connected layer
    """
    model = models.resnet18(pretrained=True)

    return EmbeddingNet(model)

#The CNN used by Triplet Net with 'model' as its backbone and a final fully connected Layer
class EmbeddingNet(nn.Module):
    """ 
    EmbeddingNet using the specified model in FeatureExtractNET(). It replace the last fully connected layer of the specified model with
    another fully connected layer of 1024 demensions' output. 
    """

    def __init__(self, resnet):
        """Initialize EmbeddingNet model."""
        super(EmbeddingNet, self).__init__()

        # Everything excluding the last linear layer
        self.features = nn.Sequential(*list(resnet.children())[:-1])
        num_ftrs =  resnet.fc.in_features

        # Map from the CNN's output dimension(512) to the feature space of dimension 1024
        self.fc1 = nn.Sequential(nn.Linear(num_ftrs, 1024))
        # nn.BatchNorm1d(1024),
        # nn.Dropout(0.3)

    def forward(self, x):
        """Forward pass of EmbeddingNet."""
        out = self.features(x)
        out = out.view(out.size(0), -1)
        out = self.fc1(out)
        return out
    
#The overall network consisting of three embedding nets with shared weights and three 1024 dimension outputs
class TripletNet(nn.Module):
    """Triplet Network."""

    def __init__(self, embeddingnet):
        """Triplet Network Builder."""
        super(TripletNet, self).__init__()
        self.embeddingnet = embeddingnet

    def forward(self, a, p, n):
        """Forward pass."""
        # anchor
        embedded_a = self.embeddingnet(a)

        # positive examples
        embedded_p = self.embeddingnet(p)

        # negative examples
        embedded_n = self.embeddingnet(n)

        return embedded_a, embedded_p, embedded_n

In [None]:
net = TripletNet(FeatureExtractNET())

#Move the net to GPU for training
print("==> Initialize CUDA support for TripletNet model ...")
net = torch.nn.DataParallel(net).cuda()
# cudnn.benchmark = True
net

==> Initialize CUDA support for TripletNet model ...


DataParallel(
  (module): TripletNet(
    (embeddingnet): EmbeddingNet(
      (features): Sequential(
        (0): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
        (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (2): ReLU(inplace=True)
        (3): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
        (4): Sequential(
          (0): BasicBlock(
            (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
            (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
            (relu): ReLU(inplace=True)
            (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
            (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
          )
          (1): BasicBlock(
            (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1)

In [None]:
torch.cuda.empty_cache()

In [None]:
# batch = next(iter(train_loader))
# batch[0], batch[1], batch[2] = batch[0].cuda(), batch[1].cuda(), batch[2].cuda()
# net(batch[0],batch[1],batch[2])[0].size()

In [None]:
torch.cuda.empty_cache()

In [None]:
# from torch.autograd import Variable
# for epoch in range(1):

#         running_loss = 0.0
#         loss_train = 0.0
#         for batch_idx, (data1, data2, data3) in enumerate(train_loader):

# #             if is_gpu:
# #                 data1, data2, data3 = data1.cuda(), data2.cuda(), data3.cuda()

#             # wrap in torch.autograd.Variable
#             data1, data2, data3 = Variable(
#                 data1), Variable(data2), Variable(data3)
#             print('anchor', data1.size())
#             print('positive', data2.size())
#             print('negative', data3.size())

#             # compute output and loss
#             embedded_a, embedded_p, embedded_n = net(data1, data2, data3)
#             loss = criterion(embedded_a, embedded_p, embedded_n)
#             print(loss)

#             # compute gradient and do optimizer step
#             optimizer.zero_grad()
#             loss.backward()
#             optimizer.step()

#             # print the loss
#             running_loss += loss.data

# #             loss_train_cls = torch.sum(
# #                 1 * (criterion_val(embedded_a, embedded_p,
# #                                    embedded_n) > 0)) / train_batch_size  # CHANGED, MAY NEED TO REVERT BACK

# #             loss_train += loss_train_cls.data

#             if batch_idx % 30 == 0:
#                 print("mini Batch Loss: {}".format(loss.data))

In [None]:
'''
Network constructed
'''
print('================== NETWORK CONSTRUCTED ==================')



In [None]:
'''Used to test the accuracy function'''
# import torch
# import torch.nn as nn
# pdist = nn.PairwiseDistance(p=2)
# input1 = torch.randn(64, 1024)
# input2 = torch.randn(64, 1024)
# input3 = torch.randn(64, 1024)
# print(input1.size())
# print(input2.size())
# print(input3.size())
# dist1 = pdist(input1, input2)
# dist2 = pdist(input1, input3)
# print(dist1.size())
# print(dist2.size())
# pred = dist1 - dist2
# print(pred.size())
# sum = 0
# for i in range(pred.size()[0]):
#   if pred[i] < 0:
#     sum+=1
# print(sum/pred.size()[0])
# print((pred < 0).sum()*1.0/pred.size()[0])

'Used to test the accuracy function'

In [None]:
# import random
# for i in range(10):
#   a = random.randint(0,10)
#   print(a)

In [None]:
import torch.nn.functional as F
from torch.autograd import Variable

'''
Given an input of a tensor with multiple sets of triplets in the validation set, where dista is expected to smaller than distb, calculate
how many dista are exactly smaller than distb
'''
def accuracy(dista, distb):
    pred = (dista - distb).cpu().data
    return (pred < 0).sum()*1.0/dista.size()[0]

'''
Return the accuracy of a random batch from the validation set
'''
def val_accuracy(trainednet, valloader, valloader_iter):

  try:
    data1, data2, data3 = next(valloader_iter)
  except StopIteration:
    valloader_iterator = iter(valloader)
    data1, data2, data3 = next(valloader_iterator)

  data1, data2, data3 = data1.cuda(), data2.cuda(), data3.cuda()

  # wrap in torch.autograd.Variable
  data1, data2, data3 = Variable(data1), Variable(data2), Variable(data3)

  with torch.no_grad():
    # compute output and loss
    embedded_x, embedded_y, embedded_z = trainednet(data1, data2, data3)
    dist_a = F.pairwise_distance(embedded_x, embedded_y, 2)
    dist_b = F.pairwise_distance(embedded_x, embedded_z, 2)
    print('dist a: {0}, dist b: {1}'.format(dist_a, dist_b))
    batch_accuracy = accuracy(dist_a, dist_b)
    print('random batch accuracy: {0} '.format(batch_accuracy))

  # mean_accuracy = sum_accuracy / num_batch_evaluated
  return batch_accuracy

In [None]:
# torch.cuda.empty_cache()
# batch = next(iter(train_loader))
# batch[0], batch[1], batch[2] = batch[0].cuda(), batch[1].cuda(), batch[2].cuda()
# embedded_x, embedded_y, embedded_z = net(batch[0],batch[1],batch[2])
# dist_a = F.pairwise_distance(embedded_x, embedded_y, 2)
# dist_b = F.pairwise_distance(embedded_x, embedded_z, 2)
# accuracy = accuracy(dist_a, dist_b)
# print(accuracy)

In [None]:
trained_net = train(net, criterion, optimizer, 1, train_loader, val_loader, test_loader)



  0%|          | 0/921 [00:00<?, ?it/s]

mini Batch 1 Loss: 4.577956676483154


  0%|          | 1/921 [00:42<10:58:57, 42.98s/it]

mini Batch 2 Loss: 4.295666694641113


  0%|          | 2/921 [01:23<10:36:24, 41.55s/it]

mini Batch 3 Loss: 4.576473236083984


  0%|          | 3/921 [02:03<10:24:03, 40.79s/it]

mini Batch 4 Loss: 4.245948791503906


  0%|          | 4/921 [02:44<10:22:16, 40.72s/it]

mini Batch 5 Loss: 4.7447099685668945


  1%|          | 5/921 [03:19<9:54:07, 38.92s/it] 

mini Batch 6 Loss: 4.502058506011963


  1%|          | 6/921 [03:55<9:35:34, 37.74s/it]

mini Batch 7 Loss: 4.7680463790893555


  1%|          | 7/921 [04:33<9:35:46, 37.80s/it]

mini Batch 8 Loss: 4.822842597961426


  1%|          | 8/921 [05:07<9:19:32, 36.77s/it]

mini Batch 9 Loss: 4.5449442863464355


  1%|          | 9/921 [05:42<9:08:18, 36.07s/it]

mini Batch 10 Loss: 4.905109882354736


  1%|          | 10/921 [06:16<9:00:02, 35.57s/it]

mini Batch 11 Loss: 4.675559043884277


  1%|          | 11/921 [06:44<8:22:36, 33.14s/it]

mini Batch 12 Loss: 4.543739318847656


  1%|▏         | 12/921 [07:11<7:56:16, 31.44s/it]

mini Batch 13 Loss: 4.637313365936279


  1%|▏         | 13/921 [07:38<7:34:58, 30.06s/it]

mini Batch 14 Loss: 4.495257377624512


  2%|▏         | 14/921 [08:04<7:15:24, 28.80s/it]

mini Batch 15 Loss: 4.703888893127441


  2%|▏         | 15/921 [08:29<6:55:38, 27.53s/it]

mini Batch 16 Loss: 4.526910781860352


  2%|▏         | 16/921 [08:53<6:42:46, 26.70s/it]

mini Batch 17 Loss: 4.522108554840088


  2%|▏         | 17/921 [09:17<6:28:15, 25.77s/it]

mini Batch 18 Loss: 4.813433647155762


  2%|▏         | 18/921 [09:41<6:18:21, 25.14s/it]

mini Batch 19 Loss: 4.636017799377441


  2%|▏         | 19/921 [09:58<5:43:44, 22.86s/it]

mini Batch 20 Loss: 4.73684549331665


  2%|▏         | 20/921 [10:20<5:36:06, 22.38s/it]

mini Batch 21 Loss: 4.779278755187988


  2%|▏         | 21/921 [10:41<5:29:32, 21.97s/it]

mini Batch 22 Loss: 4.469632148742676


  2%|▏         | 22/921 [10:58<5:10:36, 20.73s/it]

mini Batch 23 Loss: 4.714325904846191


  2%|▏         | 23/921 [11:18<5:04:48, 20.37s/it]

mini Batch 24 Loss: 4.227836608886719


  3%|▎         | 24/921 [11:36<4:54:53, 19.73s/it]

mini Batch 25 Loss: 4.15363883972168


  3%|▎         | 25/921 [11:57<4:57:58, 19.95s/it]

mini Batch 26 Loss: 4.271585464477539


  3%|▎         | 26/921 [12:15<4:48:16, 19.33s/it]

mini Batch 27 Loss: 4.177492141723633


  3%|▎         | 27/921 [12:31<4:34:56, 18.45s/it]

mini Batch 28 Loss: 4.24307918548584


  3%|▎         | 28/921 [12:45<4:12:51, 16.99s/it]

mini Batch 29 Loss: 4.411628723144531


  3%|▎         | 29/921 [13:04<4:21:34, 17.60s/it]

mini Batch 30 Loss: 4.407473564147949


  3%|▎         | 30/921 [13:20<4:15:17, 17.19s/it]

mini Batch 31 Loss: 4.521507263183594


  3%|▎         | 31/921 [13:35<4:08:04, 16.72s/it]

mini Batch 32 Loss: 4.399228096008301


  3%|▎         | 32/921 [13:48<3:49:04, 15.46s/it]

mini Batch 33 Loss: 4.634552001953125


  4%|▎         | 33/921 [13:58<3:24:21, 13.81s/it]

mini Batch 34 Loss: 4.205892562866211


  4%|▎         | 34/921 [14:12<3:27:31, 14.04s/it]

mini Batch 35 Loss: 4.702113151550293


  4%|▍         | 35/921 [14:27<3:28:48, 14.14s/it]

mini Batch 36 Loss: 4.723673343658447


  4%|▍         | 36/921 [14:39<3:20:35, 13.60s/it]

mini Batch 37 Loss: 3.7579641342163086


  4%|▍         | 37/921 [14:50<3:07:21, 12.72s/it]

mini Batch 38 Loss: 4.412712097167969


  4%|▍         | 38/921 [14:59<2:51:31, 11.66s/it]

mini Batch 39 Loss: 4.002339839935303


  4%|▍         | 39/921 [15:12<2:55:23, 11.93s/it]

mini Batch 40 Loss: 4.61607027053833


  4%|▍         | 40/921 [15:24<2:55:37, 11.96s/it]

mini Batch 41 Loss: 4.634289741516113


  4%|▍         | 41/921 [15:35<2:51:58, 11.73s/it]

mini Batch 42 Loss: 4.677764892578125


  5%|▍         | 42/921 [15:44<2:39:52, 10.91s/it]

mini Batch 43 Loss: 3.973055124282837


  5%|▍         | 43/921 [15:53<2:30:46, 10.30s/it]

mini Batch 44 Loss: 4.782166481018066


  5%|▍         | 44/921 [16:02<2:25:09,  9.93s/it]

mini Batch 45 Loss: 3.82766056060791


  5%|▍         | 45/921 [16:16<2:42:02, 11.10s/it]

mini Batch 46 Loss: 4.069517135620117


  5%|▍         | 46/921 [16:26<2:38:03, 10.84s/it]

mini Batch 47 Loss: 4.000639915466309


  5%|▌         | 47/921 [16:35<2:30:30, 10.33s/it]

mini Batch 48 Loss: 4.060513973236084


  5%|▌         | 48/921 [16:43<2:20:44,  9.67s/it]

mini Batch 49 Loss: 4.304469108581543


  5%|▌         | 49/921 [16:53<2:20:33,  9.67s/it]

mini Batch 50 Loss: 4.426229476928711


  5%|▌         | 50/921 [17:02<2:19:22,  9.60s/it]

mini Batch 51 Loss: 4.402433395385742


  6%|▌         | 51/921 [17:13<2:22:39,  9.84s/it]

mini Batch 52 Loss: 4.308222770690918


  6%|▌         | 52/921 [17:19<2:07:14,  8.79s/it]

mini Batch 53 Loss: 4.409813404083252


  6%|▌         | 53/921 [17:30<2:16:35,  9.44s/it]

mini Batch 54 Loss: 3.889352321624756


  6%|▌         | 54/921 [17:39<2:13:11,  9.22s/it]

mini Batch 55 Loss: 3.6559386253356934


  6%|▌         | 55/921 [17:44<1:56:01,  8.04s/it]

mini Batch 56 Loss: 3.882993221282959


  6%|▌         | 56/921 [17:51<1:51:25,  7.73s/it]

mini Batch 57 Loss: 3.8151731491088867


  6%|▌         | 57/921 [18:00<1:56:32,  8.09s/it]

mini Batch 58 Loss: 4.020481109619141


  6%|▋         | 58/921 [18:07<1:53:45,  7.91s/it]

mini Batch 59 Loss: 3.963303327560425


  6%|▋         | 59/921 [18:15<1:54:22,  7.96s/it]

mini Batch 60 Loss: 3.43123197555542


  7%|▋         | 60/921 [18:23<1:53:01,  7.88s/it]

mini Batch 61 Loss: 3.6491308212280273


  7%|▋         | 61/921 [18:31<1:53:58,  7.95s/it]

mini Batch 62 Loss: 3.2912278175354004


  7%|▋         | 62/921 [18:36<1:42:17,  7.15s/it]

mini Batch 63 Loss: 4.30610466003418


  7%|▋         | 63/921 [18:43<1:39:17,  6.94s/it]

mini Batch 64 Loss: 3.8958754539489746


  7%|▋         | 64/921 [18:50<1:38:05,  6.87s/it]

mini Batch 65 Loss: 3.8177666664123535


  7%|▋         | 65/921 [18:56<1:37:03,  6.80s/it]

mini Batch 66 Loss: 3.6224608421325684


  7%|▋         | 66/921 [19:02<1:30:17,  6.34s/it]

mini Batch 67 Loss: 3.8265116214752197


  7%|▋         | 67/921 [19:06<1:21:49,  5.75s/it]

mini Batch 68 Loss: 3.561000108718872


  7%|▋         | 68/921 [19:13<1:27:00,  6.12s/it]

mini Batch 69 Loss: 3.660714626312256


  7%|▋         | 69/921 [19:19<1:28:35,  6.24s/it]

mini Batch 70 Loss: 4.450490951538086


  8%|▊         | 70/921 [19:24<1:21:15,  5.73s/it]

mini Batch 71 Loss: 4.100796222686768


  8%|▊         | 71/921 [19:28<1:16:10,  5.38s/it]

mini Batch 72 Loss: 3.7530677318573


  8%|▊         | 72/921 [19:34<1:14:39,  5.28s/it]

mini Batch 73 Loss: 4.132476329803467


  8%|▊         | 73/921 [19:39<1:14:46,  5.29s/it]

mini Batch 74 Loss: 4.03544282913208


  8%|▊         | 74/921 [19:45<1:17:02,  5.46s/it]

mini Batch 75 Loss: 3.4225001335144043


  8%|▊         | 75/921 [19:51<1:18:43,  5.58s/it]

mini Batch 76 Loss: 3.9144082069396973


  8%|▊         | 76/921 [19:55<1:14:41,  5.30s/it]

mini Batch 77 Loss: 3.8980226516723633


  8%|▊         | 77/921 [20:00<1:11:24,  5.08s/it]

mini Batch 78 Loss: 2.8729348182678223


  8%|▊         | 78/921 [20:06<1:15:15,  5.36s/it]

mini Batch 79 Loss: 3.7692723274230957


  9%|▊         | 79/921 [20:11<1:15:47,  5.40s/it]

mini Batch 80 Loss: 3.7334775924682617


  9%|▊         | 80/921 [20:15<1:08:55,  4.92s/it]

mini Batch 81 Loss: 3.5148305892944336


  9%|▉         | 81/921 [20:20<1:07:04,  4.79s/it]

mini Batch 82 Loss: 3.048290491104126


  9%|▉         | 82/921 [20:23<1:00:54,  4.36s/it]

mini Batch 83 Loss: 3.222562313079834


  9%|▉         | 83/921 [20:27<59:54,  4.29s/it]  

mini Batch 84 Loss: 3.6185572147369385


  9%|▉         | 84/921 [20:32<1:04:28,  4.62s/it]

mini Batch 85 Loss: 3.2893483638763428


  9%|▉         | 85/921 [20:37<1:04:13,  4.61s/it]

mini Batch 86 Loss: 3.479970693588257


  9%|▉         | 86/921 [20:41<59:51,  4.30s/it]  

mini Batch 87 Loss: 3.315471887588501


  9%|▉         | 87/921 [20:44<54:29,  3.92s/it]

mini Batch 88 Loss: 3.93784236907959


 10%|▉         | 88/921 [20:47<50:45,  3.66s/it]

mini Batch 89 Loss: 3.785296678543091


 10%|▉         | 89/921 [20:51<51:51,  3.74s/it]

mini Batch 90 Loss: 4.405688285827637


 10%|▉         | 90/921 [20:55<54:36,  3.94s/it]

mini Batch 91 Loss: 3.745541572570801


 10%|▉         | 91/921 [20:59<55:26,  4.01s/it]

mini Batch 92 Loss: 4.266786575317383


 10%|▉         | 92/921 [21:04<56:46,  4.11s/it]

mini Batch 93 Loss: 3.6296660900115967


 10%|█         | 93/921 [21:08<56:13,  4.07s/it]

mini Batch 94 Loss: 3.3604483604431152


 10%|█         | 94/921 [21:11<53:51,  3.91s/it]

mini Batch 95 Loss: 3.9797000885009766


 10%|█         | 95/921 [21:15<52:07,  3.79s/it]

mini Batch 96 Loss: 3.2269439697265625


 10%|█         | 96/921 [21:18<51:30,  3.75s/it]

mini Batch 97 Loss: 3.4888856410980225


 11%|█         | 97/921 [21:22<50:56,  3.71s/it]

mini Batch 98 Loss: 3.5289111137390137


 11%|█         | 98/921 [21:25<47:51,  3.49s/it]

mini Batch 99 Loss: 3.132528305053711


 11%|█         | 99/921 [21:28<48:21,  3.53s/it]

mini Batch 100 Loss: 3.462822914123535


 11%|█         | 100/921 [21:32<47:42,  3.49s/it]

mini Batch 101 Loss: 3.8173084259033203


 11%|█         | 101/921 [21:35<45:37,  3.34s/it]

mini Batch 102 Loss: 3.461411237716675


 11%|█         | 102/921 [21:38<45:18,  3.32s/it]

mini Batch 103 Loss: 2.959843635559082


 11%|█         | 103/921 [21:42<46:36,  3.42s/it]

mini Batch 104 Loss: 3.5624947547912598


 11%|█▏        | 104/921 [21:45<44:19,  3.25s/it]

mini Batch 105 Loss: 3.693429946899414


 11%|█▏        | 105/921 [21:48<46:20,  3.41s/it]

mini Batch 106 Loss: 3.4628570079803467


 12%|█▏        | 106/921 [21:51<42:05,  3.10s/it]

mini Batch 107 Loss: 3.0760724544525146


 12%|█▏        | 107/921 [21:54<41:38,  3.07s/it]

mini Batch 108 Loss: 3.055321216583252


 12%|█▏        | 108/921 [21:56<40:07,  2.96s/it]

mini Batch 109 Loss: 3.0794296264648438


 12%|█▏        | 109/921 [22:00<40:55,  3.02s/it]

mini Batch 110 Loss: 3.6278092861175537


 12%|█▏        | 110/921 [22:03<41:46,  3.09s/it]

mini Batch 111 Loss: 3.348193645477295


 12%|█▏        | 111/921 [22:07<44:44,  3.31s/it]

mini Batch 112 Loss: 3.5192198753356934


 12%|█▏        | 112/921 [22:10<45:18,  3.36s/it]

mini Batch 113 Loss: 2.7058746814727783


 12%|█▏        | 113/921 [22:13<44:44,  3.32s/it]

mini Batch 114 Loss: 2.9337759017944336


 12%|█▏        | 114/921 [22:16<43:33,  3.24s/it]

mini Batch 115 Loss: 3.4520599842071533


 12%|█▏        | 115/921 [22:20<43:01,  3.20s/it]

mini Batch 116 Loss: 3.518089771270752


 13%|█▎        | 116/921 [22:22<41:35,  3.10s/it]

mini Batch 117 Loss: 3.1233417987823486


 13%|█▎        | 117/921 [22:26<44:41,  3.34s/it]

mini Batch 118 Loss: 2.965437650680542


 13%|█▎        | 118/921 [22:29<42:52,  3.20s/it]

mini Batch 119 Loss: 2.8931527137756348


 13%|█▎        | 119/921 [22:32<40:04,  3.00s/it]

mini Batch 120 Loss: 3.3376195430755615


 13%|█▎        | 120/921 [22:35<40:00,  3.00s/it]

mini Batch 121 Loss: 3.353379964828491


 13%|█▎        | 121/921 [22:39<43:37,  3.27s/it]

mini Batch 122 Loss: 4.280056476593018


 13%|█▎        | 122/921 [22:42<42:04,  3.16s/it]

mini Batch 123 Loss: 3.2694759368896484


 13%|█▎        | 123/921 [22:45<41:15,  3.10s/it]

mini Batch 124 Loss: 2.9568569660186768


 13%|█▎        | 124/921 [22:47<40:02,  3.01s/it]

mini Batch 125 Loss: 3.5385220050811768


 14%|█▎        | 125/921 [22:50<39:34,  2.98s/it]

mini Batch 126 Loss: 4.589137077331543


 14%|█▎        | 126/921 [22:53<38:51,  2.93s/it]

mini Batch 127 Loss: 3.6920366287231445


 14%|█▍        | 127/921 [22:55<36:56,  2.79s/it]

mini Batch 128 Loss: 3.860515594482422


 14%|█▍        | 128/921 [22:58<36:21,  2.75s/it]

mini Batch 129 Loss: 3.4087390899658203


 14%|█▍        | 129/921 [23:01<36:34,  2.77s/it]

mini Batch 130 Loss: 3.30956768989563


 14%|█▍        | 130/921 [23:04<35:56,  2.73s/it]

mini Batch 131 Loss: 3.107409954071045


 14%|█▍        | 131/921 [23:06<34:48,  2.64s/it]

mini Batch 132 Loss: 2.7721080780029297


 14%|█▍        | 132/921 [23:08<33:44,  2.57s/it]

mini Batch 133 Loss: 3.891716718673706


 14%|█▍        | 133/921 [23:11<33:24,  2.54s/it]

mini Batch 134 Loss: 3.7352328300476074


 15%|█▍        | 134/921 [23:13<33:07,  2.53s/it]

mini Batch 135 Loss: 2.6187729835510254


 15%|█▍        | 135/921 [23:16<33:56,  2.59s/it]

mini Batch 136 Loss: 3.136824131011963


 15%|█▍        | 136/921 [23:18<32:51,  2.51s/it]

mini Batch 137 Loss: 3.0992703437805176


 15%|█▍        | 137/921 [23:21<33:21,  2.55s/it]

mini Batch 138 Loss: 3.6401894092559814


 15%|█▍        | 138/921 [23:24<33:46,  2.59s/it]

mini Batch 139 Loss: 3.5362205505371094


 15%|█▌        | 139/921 [23:26<33:16,  2.55s/it]

mini Batch 140 Loss: 3.5941338539123535


 15%|█▌        | 140/921 [23:30<36:59,  2.84s/it]

mini Batch 141 Loss: 3.1442301273345947


 15%|█▌        | 141/921 [23:32<35:38,  2.74s/it]

mini Batch 142 Loss: 3.5010573863983154


 15%|█▌        | 142/921 [23:34<33:24,  2.57s/it]

mini Batch 143 Loss: 3.5789403915405273


 16%|█▌        | 143/921 [23:37<33:08,  2.56s/it]

mini Batch 144 Loss: 2.7984585762023926


 16%|█▌        | 144/921 [23:40<34:20,  2.65s/it]

mini Batch 145 Loss: 2.9135682582855225


 16%|█▌        | 145/921 [23:42<33:49,  2.62s/it]

mini Batch 146 Loss: 3.2671546936035156


 16%|█▌        | 146/921 [23:45<32:41,  2.53s/it]

mini Batch 147 Loss: 3.749579668045044


 16%|█▌        | 147/921 [23:47<32:29,  2.52s/it]

mini Batch 148 Loss: 3.463634490966797


 16%|█▌        | 148/921 [23:50<32:17,  2.51s/it]

mini Batch 149 Loss: 3.3082337379455566


 16%|█▌        | 149/921 [23:52<33:17,  2.59s/it]

mini Batch 150 Loss: 3.9792799949645996


 16%|█▋        | 150/921 [23:55<31:29,  2.45s/it]

mini Batch 151 Loss: 3.9914357662200928


 16%|█▋        | 151/921 [23:57<31:23,  2.45s/it]

mini Batch 152 Loss: 3.7021079063415527


 17%|█▋        | 152/921 [24:00<32:21,  2.52s/it]

mini Batch 153 Loss: 2.558927059173584


 17%|█▋        | 153/921 [24:02<31:33,  2.47s/it]

mini Batch 154 Loss: 3.2986392974853516


 17%|█▋        | 154/921 [24:05<34:29,  2.70s/it]

mini Batch 155 Loss: 2.7452688217163086


 17%|█▋        | 155/921 [24:08<32:44,  2.56s/it]

mini Batch 156 Loss: 3.6780853271484375


 17%|█▋        | 156/921 [24:11<34:46,  2.73s/it]

mini Batch 157 Loss: 3.4499576091766357


 17%|█▋        | 157/921 [24:13<32:54,  2.58s/it]

mini Batch 158 Loss: 3.3409650325775146


 17%|█▋        | 158/921 [24:15<32:06,  2.52s/it]

mini Batch 159 Loss: 3.8878884315490723


 17%|█▋        | 159/921 [24:18<32:17,  2.54s/it]

mini Batch 160 Loss: 2.8579256534576416


 17%|█▋        | 160/921 [24:20<31:49,  2.51s/it]

mini Batch 161 Loss: 3.4289560317993164


 17%|█▋        | 161/921 [24:23<30:49,  2.43s/it]

mini Batch 162 Loss: 3.725266933441162


 18%|█▊        | 162/921 [24:25<30:52,  2.44s/it]

mini Batch 163 Loss: 4.2526679039001465


 18%|█▊        | 163/921 [24:27<30:14,  2.39s/it]

mini Batch 164 Loss: 3.25984787940979


 18%|█▊        | 164/921 [24:29<29:17,  2.32s/it]

mini Batch 165 Loss: 3.436659097671509


 18%|█▊        | 165/921 [24:32<30:49,  2.45s/it]

mini Batch 166 Loss: 3.4472928047180176


 18%|█▊        | 166/921 [24:34<30:02,  2.39s/it]

mini Batch 167 Loss: 3.247138023376465


 18%|█▊        | 167/921 [24:37<30:26,  2.42s/it]

mini Batch 168 Loss: 2.4959278106689453


 18%|█▊        | 168/921 [24:39<29:16,  2.33s/it]

mini Batch 169 Loss: 2.9243483543395996


 18%|█▊        | 169/921 [24:41<28:57,  2.31s/it]

mini Batch 170 Loss: 3.8001821041107178


 18%|█▊        | 170/921 [24:44<30:34,  2.44s/it]

mini Batch 171 Loss: 2.976076126098633


 19%|█▊        | 171/921 [24:46<29:55,  2.39s/it]

mini Batch 172 Loss: 3.020503282546997


 19%|█▊        | 172/921 [24:49<29:06,  2.33s/it]

mini Batch 173 Loss: 3.640380859375


 19%|█▉        | 173/921 [24:51<29:45,  2.39s/it]

mini Batch 174 Loss: 3.5494165420532227


 19%|█▉        | 174/921 [24:54<29:59,  2.41s/it]

mini Batch 175 Loss: 2.2843666076660156


 19%|█▉        | 175/921 [24:56<30:40,  2.47s/it]

mini Batch 176 Loss: 3.281114339828491


 19%|█▉        | 176/921 [24:58<29:40,  2.39s/it]

mini Batch 177 Loss: 3.4943668842315674


 19%|█▉        | 177/921 [25:01<30:57,  2.50s/it]

mini Batch 178 Loss: 3.322695255279541


 19%|█▉        | 178/921 [25:03<29:53,  2.41s/it]

mini Batch 179 Loss: 2.448610782623291


 19%|█▉        | 179/921 [25:06<29:44,  2.40s/it]

mini Batch 180 Loss: 4.020915508270264


 20%|█▉        | 180/921 [25:08<28:49,  2.33s/it]

mini Batch 181 Loss: 3.4034626483917236


 20%|█▉        | 181/921 [25:10<29:44,  2.41s/it]

mini Batch 182 Loss: 2.965223789215088


 20%|█▉        | 182/921 [25:13<28:50,  2.34s/it]

mini Batch 183 Loss: 2.932486057281494


 20%|█▉        | 183/921 [25:15<28:38,  2.33s/it]

mini Batch 184 Loss: 3.7024648189544678


 20%|█▉        | 184/921 [25:17<28:43,  2.34s/it]

mini Batch 185 Loss: 3.371516466140747


 20%|██        | 185/921 [25:20<28:31,  2.33s/it]

mini Batch 186 Loss: 3.207469940185547


 20%|██        | 186/921 [25:22<28:14,  2.31s/it]

mini Batch 187 Loss: 3.108297348022461


 20%|██        | 187/921 [25:24<28:08,  2.30s/it]

mini Batch 188 Loss: 3.030947685241699


 20%|██        | 188/921 [25:26<27:48,  2.28s/it]

mini Batch 189 Loss: 3.3123645782470703


 21%|██        | 189/921 [25:29<28:29,  2.34s/it]

mini Batch 190 Loss: 3.4401793479919434


 21%|██        | 190/921 [25:31<29:00,  2.38s/it]

mini Batch 191 Loss: 3.6560730934143066


 21%|██        | 191/921 [25:34<28:35,  2.35s/it]

mini Batch 192 Loss: 2.2197704315185547


 21%|██        | 192/921 [25:36<29:26,  2.42s/it]

mini Batch 193 Loss: 3.309600591659546


 21%|██        | 193/921 [25:39<31:13,  2.57s/it]

mini Batch 194 Loss: 3.7958085536956787


 21%|██        | 194/921 [25:42<31:10,  2.57s/it]

mini Batch 195 Loss: 3.646332263946533


 21%|██        | 195/921 [25:44<30:11,  2.50s/it]

mini Batch 196 Loss: 3.902076482772827


 21%|██▏       | 196/921 [25:46<29:01,  2.40s/it]

mini Batch 197 Loss: 3.453652858734131


 21%|██▏       | 197/921 [25:49<28:41,  2.38s/it]

mini Batch 198 Loss: 3.568187713623047


 21%|██▏       | 198/921 [25:51<28:57,  2.40s/it]

mini Batch 199 Loss: 2.5994482040405273


 22%|██▏       | 199/921 [25:53<28:27,  2.37s/it]

mini Batch 200 Loss: 3.030174970626831


 22%|██▏       | 200/921 [25:55<27:50,  2.32s/it]

mini Batch 201 Loss: 3.8665339946746826


 22%|██▏       | 201/921 [25:58<27:38,  2.30s/it]

mini Batch 202 Loss: 2.9105403423309326


 22%|██▏       | 202/921 [26:00<28:29,  2.38s/it]

mini Batch 203 Loss: 3.9709367752075195


 22%|██▏       | 203/921 [26:03<29:29,  2.47s/it]

mini Batch 204 Loss: 2.9225449562072754


 22%|██▏       | 204/921 [26:05<28:25,  2.38s/it]

mini Batch 205 Loss: 3.928217887878418


 22%|██▏       | 205/921 [26:08<28:43,  2.41s/it]

mini Batch 206 Loss: 3.1796746253967285


 22%|██▏       | 206/921 [26:10<27:47,  2.33s/it]

mini Batch 207 Loss: 3.6593427658081055


 22%|██▏       | 207/921 [26:12<27:29,  2.31s/it]

mini Batch 208 Loss: 3.2384250164031982


 23%|██▎       | 208/921 [26:14<27:27,  2.31s/it]

mini Batch 209 Loss: 4.148115634918213


 23%|██▎       | 209/921 [26:17<27:22,  2.31s/it]

mini Batch 210 Loss: 3.261077880859375


 23%|██▎       | 210/921 [26:19<27:37,  2.33s/it]

mini Batch 211 Loss: 3.5539681911468506


 23%|██▎       | 211/921 [26:21<27:29,  2.32s/it]

mini Batch 212 Loss: 3.5713772773742676


 23%|██▎       | 212/921 [26:24<27:18,  2.31s/it]

mini Batch 213 Loss: 3.366415500640869


 23%|██▎       | 213/921 [26:26<27:56,  2.37s/it]

mini Batch 214 Loss: 2.8526668548583984


 23%|██▎       | 214/921 [26:28<27:04,  2.30s/it]

mini Batch 215 Loss: 3.4995851516723633


 23%|██▎       | 215/921 [26:30<26:50,  2.28s/it]

mini Batch 216 Loss: 3.107961893081665


 23%|██▎       | 216/921 [26:33<26:30,  2.26s/it]

mini Batch 217 Loss: 3.5415751934051514


 24%|██▎       | 217/921 [26:35<26:33,  2.26s/it]

mini Batch 218 Loss: 2.846789836883545


 24%|██▎       | 218/921 [26:37<26:17,  2.24s/it]

mini Batch 219 Loss: 3.486933708190918


 24%|██▍       | 219/921 [26:40<27:00,  2.31s/it]

mini Batch 220 Loss: 3.5918397903442383


 24%|██▍       | 220/921 [26:42<26:29,  2.27s/it]

mini Batch 221 Loss: 2.953474521636963


 24%|██▍       | 221/921 [26:44<27:08,  2.33s/it]

mini Batch 222 Loss: 3.1002683639526367


 24%|██▍       | 222/921 [26:46<26:32,  2.28s/it]

mini Batch 223 Loss: 3.18076491355896


 24%|██▍       | 223/921 [26:49<26:35,  2.29s/it]

mini Batch 224 Loss: 3.770408868789673


 24%|██▍       | 224/921 [26:51<27:07,  2.34s/it]

mini Batch 225 Loss: 2.707871198654175


 24%|██▍       | 225/921 [26:53<26:46,  2.31s/it]

mini Batch 226 Loss: 3.8868167400360107


 25%|██▍       | 226/921 [26:56<26:24,  2.28s/it]

mini Batch 227 Loss: 3.006560802459717


 25%|██▍       | 227/921 [26:58<26:29,  2.29s/it]

mini Batch 228 Loss: 3.6763696670532227


 25%|██▍       | 228/921 [27:00<26:22,  2.28s/it]

mini Batch 229 Loss: 2.7856547832489014


 25%|██▍       | 229/921 [27:03<26:24,  2.29s/it]

mini Batch 230 Loss: 3.3263025283813477


 25%|██▍       | 230/921 [27:05<25:56,  2.25s/it]

mini Batch 231 Loss: 2.9950461387634277


 25%|██▌       | 231/921 [27:07<25:53,  2.25s/it]

mini Batch 232 Loss: 3.578641414642334


 25%|██▌       | 232/921 [27:09<25:39,  2.23s/it]

mini Batch 233 Loss: 2.3718581199645996


 25%|██▌       | 233/921 [27:11<25:47,  2.25s/it]

mini Batch 234 Loss: 3.4424307346343994


 25%|██▌       | 234/921 [27:14<25:28,  2.23s/it]

mini Batch 235 Loss: 3.650972366333008


 26%|██▌       | 235/921 [27:16<25:42,  2.25s/it]

mini Batch 236 Loss: 2.7819571495056152


 26%|██▌       | 236/921 [27:18<26:17,  2.30s/it]

mini Batch 237 Loss: 2.768601894378662


 26%|██▌       | 237/921 [27:21<26:06,  2.29s/it]

mini Batch 238 Loss: 3.430253744125366


 26%|██▌       | 238/921 [27:23<25:39,  2.25s/it]

mini Batch 239 Loss: 3.093632698059082


 26%|██▌       | 239/921 [27:25<25:47,  2.27s/it]

mini Batch 240 Loss: 2.3999500274658203


 26%|██▌       | 240/921 [27:27<25:20,  2.23s/it]

mini Batch 241 Loss: 3.1852564811706543


 26%|██▌       | 241/921 [27:29<25:27,  2.25s/it]

mini Batch 242 Loss: 3.5059404373168945


 26%|██▋       | 242/921 [27:32<25:13,  2.23s/it]

mini Batch 243 Loss: 3.212421417236328


 26%|██▋       | 243/921 [27:34<25:17,  2.24s/it]

mini Batch 244 Loss: 3.7430942058563232


 26%|██▋       | 244/921 [27:36<25:10,  2.23s/it]

mini Batch 245 Loss: 2.455962657928467


 27%|██▋       | 245/921 [27:38<25:22,  2.25s/it]

mini Batch 246 Loss: 4.240352153778076


 27%|██▋       | 246/921 [27:41<25:46,  2.29s/it]

mini Batch 247 Loss: 3.317368268966675


 27%|██▋       | 247/921 [27:43<25:44,  2.29s/it]

mini Batch 248 Loss: 3.0023512840270996


 27%|██▋       | 248/921 [27:45<25:28,  2.27s/it]

mini Batch 249 Loss: 3.3050026893615723


 27%|██▋       | 249/921 [27:48<27:37,  2.47s/it]

mini Batch 250 Loss: 3.705193519592285


 27%|██▋       | 250/921 [27:50<26:49,  2.40s/it]

mini Batch 251 Loss: 3.217198610305786


 27%|██▋       | 251/921 [27:53<27:23,  2.45s/it]

mini Batch 252 Loss: 3.8012425899505615


 27%|██▋       | 252/921 [27:55<26:31,  2.38s/it]

mini Batch 253 Loss: 3.4390459060668945


 27%|██▋       | 253/921 [27:58<26:02,  2.34s/it]

mini Batch 254 Loss: 2.9580111503601074


 28%|██▊       | 254/921 [28:00<25:34,  2.30s/it]

mini Batch 255 Loss: 3.156752109527588


 28%|██▊       | 255/921 [28:02<25:37,  2.31s/it]

mini Batch 256 Loss: 3.0320582389831543


 28%|██▊       | 256/921 [28:05<26:10,  2.36s/it]

mini Batch 257 Loss: 2.9917712211608887


 28%|██▊       | 257/921 [28:07<25:46,  2.33s/it]

mini Batch 258 Loss: 3.885608196258545


 28%|██▊       | 258/921 [28:09<25:13,  2.28s/it]

mini Batch 259 Loss: 2.603217601776123


 28%|██▊       | 259/921 [28:11<25:02,  2.27s/it]

mini Batch 260 Loss: 3.6967246532440186


 28%|██▊       | 260/921 [28:13<24:44,  2.25s/it]

mini Batch 261 Loss: 2.242882251739502


 28%|██▊       | 261/921 [28:16<24:54,  2.26s/it]

mini Batch 262 Loss: 2.9281890392303467


 28%|██▊       | 262/921 [28:18<24:27,  2.23s/it]

mini Batch 263 Loss: 3.4062159061431885


 29%|██▊       | 263/921 [28:20<24:34,  2.24s/it]

mini Batch 264 Loss: 3.558443307876587


 29%|██▊       | 264/921 [28:22<24:25,  2.23s/it]

mini Batch 265 Loss: 3.3763043880462646


 29%|██▉       | 265/921 [28:25<24:27,  2.24s/it]

mini Batch 266 Loss: 3.171320915222168


 29%|██▉       | 266/921 [28:27<24:08,  2.21s/it]

mini Batch 267 Loss: 3.1497552394866943


 29%|██▉       | 267/921 [28:29<24:14,  2.22s/it]

mini Batch 268 Loss: 3.1302809715270996


 29%|██▉       | 268/921 [28:31<24:45,  2.27s/it]

mini Batch 269 Loss: 3.3563427925109863


 29%|██▉       | 269/921 [28:34<24:43,  2.27s/it]

mini Batch 270 Loss: 3.628129243850708


 29%|██▉       | 270/921 [28:36<24:33,  2.26s/it]

mini Batch 271 Loss: 2.9582581520080566


 29%|██▉       | 271/921 [28:38<24:38,  2.27s/it]

mini Batch 272 Loss: 3.0365586280822754


 30%|██▉       | 272/921 [28:40<24:12,  2.24s/it]

mini Batch 273 Loss: 3.513641357421875


 30%|██▉       | 273/921 [28:43<24:14,  2.24s/it]

mini Batch 274 Loss: 3.535649299621582


 30%|██▉       | 274/921 [28:45<24:02,  2.23s/it]

mini Batch 275 Loss: 2.8524701595306396


 30%|██▉       | 275/921 [28:47<24:06,  2.24s/it]

mini Batch 276 Loss: 3.0298335552215576


 30%|██▉       | 276/921 [28:49<23:51,  2.22s/it]

mini Batch 277 Loss: 3.192291736602783


 30%|███       | 277/921 [28:52<23:56,  2.23s/it]

mini Batch 278 Loss: 3.5278525352478027


 30%|███       | 278/921 [28:54<23:40,  2.21s/it]

mini Batch 279 Loss: 2.476844310760498


 30%|███       | 279/921 [28:56<23:51,  2.23s/it]

mini Batch 280 Loss: 2.786240577697754


 30%|███       | 280/921 [28:58<23:41,  2.22s/it]

mini Batch 281 Loss: 3.6997525691986084


 31%|███       | 281/921 [29:00<23:54,  2.24s/it]

mini Batch 282 Loss: 3.0960853099823


 31%|███       | 282/921 [29:03<23:39,  2.22s/it]

mini Batch 283 Loss: 2.943690776824951


 31%|███       | 283/921 [29:05<23:40,  2.23s/it]

mini Batch 284 Loss: 3.922342538833618


 31%|███       | 284/921 [29:07<23:36,  2.22s/it]

mini Batch 285 Loss: 3.544161558151245


 31%|███       | 285/921 [29:09<23:32,  2.22s/it]

mini Batch 286 Loss: 2.316169023513794


 31%|███       | 286/921 [29:11<23:16,  2.20s/it]

mini Batch 287 Loss: 3.1347248554229736


 31%|███       | 287/921 [29:14<23:19,  2.21s/it]

mini Batch 288 Loss: 2.99723482131958


 31%|███▏      | 288/921 [29:16<23:53,  2.26s/it]

mini Batch 289 Loss: 3.212191581726074


 31%|███▏      | 289/921 [29:18<23:46,  2.26s/it]

mini Batch 290 Loss: 2.6421005725860596


 31%|███▏      | 290/921 [29:21<23:37,  2.25s/it]

mini Batch 291 Loss: 3.361417770385742


 32%|███▏      | 291/921 [29:23<23:37,  2.25s/it]

mini Batch 292 Loss: 4.142337799072266


 32%|███▏      | 292/921 [29:25<23:21,  2.23s/it]

mini Batch 293 Loss: 3.273332118988037


 32%|███▏      | 293/921 [29:27<23:20,  2.23s/it]

mini Batch 294 Loss: 3.678446054458618


 32%|███▏      | 294/921 [29:29<23:15,  2.23s/it]

mini Batch 295 Loss: 3.1631884574890137


 32%|███▏      | 295/921 [29:32<23:27,  2.25s/it]

mini Batch 296 Loss: 2.5628366470336914


 32%|███▏      | 296/921 [29:34<23:01,  2.21s/it]

mini Batch 297 Loss: 2.497835636138916


 32%|███▏      | 297/921 [29:36<23:09,  2.23s/it]

mini Batch 298 Loss: 3.0778980255126953


 32%|███▏      | 298/921 [29:38<22:52,  2.20s/it]

mini Batch 299 Loss: 2.584913730621338


 32%|███▏      | 299/921 [29:41<23:08,  2.23s/it]

mini Batch 300 Loss: 3.1145248413085938


 33%|███▎      | 300/921 [29:43<22:54,  2.21s/it]

mini Batch 301 Loss: 2.4715065956115723
Training Batch: 301 | Training Loss: 2.4715065956115723


 33%|███▎      | 301/921 [29:47<28:23,  2.75s/it]

dist a: tensor([17.2512, 14.8187, 15.7347, 15.1565, 18.3583, 15.1871, 17.0808, 15.4252,
        15.7013, 18.6569, 15.4873, 20.2660, 25.4944, 15.7391, 17.3116, 21.8955,
        25.5517, 13.8718, 16.2582, 18.8535, 22.5204, 14.3691, 14.3313, 18.6666,
        15.8105, 21.7361, 12.5812, 20.2613, 16.7073, 14.3430, 11.6596, 15.6545,
        15.5607, 13.0018, 20.9484, 19.4390, 11.8390, 11.7173, 14.1874, 16.7124,
        16.5078, 20.0335, 17.5223, 15.7098, 24.7483, 18.0417, 21.3500, 21.9261,
        16.0472,  9.6546, 28.8203, 14.5118, 24.9797, 13.8255, 24.5305, 12.9841,
        18.1042, 18.5842, 20.1253, 17.5018, 14.4048, 18.5859, 20.9483, 22.2195],
       device='cuda:0'), dist b: tensor([34.7203, 21.2302, 27.9610, 16.6818, 17.2295, 24.0931, 24.1914, 17.5049,
        20.6082, 30.6129, 12.1478, 21.3581, 24.4492, 19.4709, 21.4066, 20.5960,
        28.6892, 32.5311, 18.7760, 16.6906, 24.0101, 30.9468, 26.5451, 19.4563,
        22.7352, 19.8277, 16.4449, 28.6580, 17.7409, 17.0765, 22.2641, 13.1401

 33%|███▎      | 302/921 [29:49<26:36,  2.58s/it]

mini Batch 303 Loss: 2.013302803039551


 33%|███▎      | 303/921 [29:51<25:37,  2.49s/it]

mini Batch 304 Loss: 2.977487802505493


 33%|███▎      | 304/921 [29:53<24:31,  2.38s/it]

mini Batch 305 Loss: 2.771360158920288


 33%|███▎      | 305/921 [29:56<24:10,  2.35s/it]

mini Batch 306 Loss: 3.5858495235443115


 33%|███▎      | 306/921 [29:58<23:46,  2.32s/it]

mini Batch 307 Loss: 3.158568859100342


 33%|███▎      | 307/921 [30:00<23:39,  2.31s/it]

mini Batch 308 Loss: 2.5442914962768555


 33%|███▎      | 308/921 [30:02<23:09,  2.27s/it]

mini Batch 309 Loss: 2.5720925331115723


 34%|███▎      | 309/921 [30:05<23:03,  2.26s/it]

mini Batch 310 Loss: 3.034118413925171


 34%|███▎      | 310/921 [30:07<23:42,  2.33s/it]

mini Batch 311 Loss: 2.5039968490600586


 34%|███▍      | 311/921 [30:09<23:27,  2.31s/it]

mini Batch 312 Loss: 2.939868450164795


 34%|███▍      | 312/921 [30:11<23:02,  2.27s/it]

mini Batch 313 Loss: 2.9491777420043945


 34%|███▍      | 313/921 [30:14<22:57,  2.27s/it]

mini Batch 314 Loss: 2.660122871398926


 34%|███▍      | 314/921 [30:16<22:45,  2.25s/it]

mini Batch 315 Loss: 2.4166452884674072


 34%|███▍      | 315/921 [30:18<22:53,  2.27s/it]

mini Batch 316 Loss: 2.860379219055176


 34%|███▍      | 316/921 [30:20<22:39,  2.25s/it]

mini Batch 317 Loss: 4.060441970825195


 34%|███▍      | 317/921 [30:23<22:45,  2.26s/it]

mini Batch 318 Loss: 3.5636494159698486


 35%|███▍      | 318/921 [30:25<22:33,  2.24s/it]

mini Batch 319 Loss: 2.8862335681915283


 35%|███▍      | 319/921 [30:27<22:38,  2.26s/it]

mini Batch 320 Loss: 2.6047439575195312


 35%|███▍      | 320/921 [30:29<22:29,  2.25s/it]

mini Batch 321 Loss: 2.9244384765625


 35%|███▍      | 321/921 [30:32<22:33,  2.26s/it]

mini Batch 322 Loss: 2.615211009979248


 35%|███▍      | 322/921 [30:34<22:29,  2.25s/it]

mini Batch 323 Loss: 3.143432140350342


 35%|███▌      | 323/921 [30:36<22:30,  2.26s/it]

mini Batch 324 Loss: 2.8807919025421143


 35%|███▌      | 324/921 [30:38<22:12,  2.23s/it]

mini Batch 325 Loss: 2.979689359664917


 35%|███▌      | 325/921 [30:41<22:31,  2.27s/it]

mini Batch 326 Loss: 3.837158441543579


 35%|███▌      | 326/921 [30:43<22:16,  2.25s/it]

mini Batch 327 Loss: 2.93329119682312


 36%|███▌      | 327/921 [30:45<22:24,  2.26s/it]

mini Batch 328 Loss: 4.217135429382324


 36%|███▌      | 328/921 [30:47<22:08,  2.24s/it]

mini Batch 329 Loss: 2.950429916381836


 36%|███▌      | 329/921 [30:50<22:15,  2.26s/it]

mini Batch 330 Loss: 3.0772907733917236


 36%|███▌      | 330/921 [30:52<22:03,  2.24s/it]

mini Batch 331 Loss: 3.4184889793395996


 36%|███▌      | 331/921 [30:54<22:10,  2.25s/it]

mini Batch 332 Loss: 2.389617443084717


 36%|███▌      | 332/921 [30:56<21:53,  2.23s/it]

mini Batch 333 Loss: 3.4322612285614014


 36%|███▌      | 333/921 [30:59<22:04,  2.25s/it]

mini Batch 334 Loss: 2.9032363891601562


 36%|███▋      | 334/921 [31:01<22:30,  2.30s/it]

mini Batch 335 Loss: 2.938242197036743


 36%|███▋      | 335/921 [31:03<22:19,  2.29s/it]

mini Batch 336 Loss: 3.522921085357666


 36%|███▋      | 336/921 [31:06<21:58,  2.25s/it]

mini Batch 337 Loss: 2.298552989959717


 37%|███▋      | 337/921 [31:08<22:13,  2.28s/it]

mini Batch 338 Loss: 3.355799674987793


 37%|███▋      | 338/921 [31:10<21:53,  2.25s/it]

mini Batch 339 Loss: 2.8984851837158203


 37%|███▋      | 339/921 [31:12<21:52,  2.26s/it]

mini Batch 340 Loss: 3.026343584060669


 37%|███▋      | 340/921 [31:15<21:42,  2.24s/it]

mini Batch 341 Loss: 2.986603260040283


 37%|███▋      | 341/921 [31:17<21:46,  2.25s/it]

mini Batch 342 Loss: 2.946960210800171


 37%|███▋      | 342/921 [31:19<21:33,  2.23s/it]

mini Batch 343 Loss: 3.270505428314209


 37%|███▋      | 343/921 [31:21<21:38,  2.25s/it]

mini Batch 344 Loss: 3.6782145500183105


 37%|███▋      | 344/921 [31:23<21:24,  2.23s/it]

mini Batch 345 Loss: 3.247650623321533


 37%|███▋      | 345/921 [31:26<21:30,  2.24s/it]

mini Batch 346 Loss: 3.0677943229675293


 38%|███▊      | 346/921 [31:28<21:17,  2.22s/it]

mini Batch 347 Loss: 2.750505208969116


 38%|███▊      | 347/921 [31:30<21:26,  2.24s/it]

mini Batch 348 Loss: 3.069660186767578


 38%|███▊      | 348/921 [31:32<21:11,  2.22s/it]

mini Batch 349 Loss: 3.621187925338745


 38%|███▊      | 349/921 [31:35<21:17,  2.23s/it]

mini Batch 350 Loss: 2.62723970413208


 38%|███▊      | 350/921 [31:37<21:07,  2.22s/it]

mini Batch 351 Loss: 4.058065414428711


 38%|███▊      | 351/921 [31:39<21:14,  2.24s/it]

mini Batch 352 Loss: 2.5188705921173096


 38%|███▊      | 352/921 [31:41<21:02,  2.22s/it]

mini Batch 353 Loss: 2.7625508308410645


 38%|███▊      | 353/921 [31:44<21:20,  2.25s/it]

mini Batch 354 Loss: 3.379486560821533


 38%|███▊      | 354/921 [31:46<21:03,  2.23s/it]

mini Batch 355 Loss: 2.801891803741455


 39%|███▊      | 355/921 [31:48<21:04,  2.23s/it]

mini Batch 356 Loss: 3.5838253498077393


 39%|███▊      | 356/921 [31:50<20:48,  2.21s/it]

mini Batch 357 Loss: 2.719676971435547


 39%|███▉      | 357/921 [31:52<20:51,  2.22s/it]

mini Batch 358 Loss: 3.4394850730895996


 39%|███▉      | 358/921 [31:55<20:42,  2.21s/it]

mini Batch 359 Loss: 3.5827479362487793


 39%|███▉      | 359/921 [31:57<20:55,  2.23s/it]

mini Batch 360 Loss: 3.3427133560180664


 39%|███▉      | 360/921 [31:59<20:39,  2.21s/it]

mini Batch 361 Loss: 3.3490147590637207


 39%|███▉      | 361/921 [32:01<20:47,  2.23s/it]

mini Batch 362 Loss: 2.6895389556884766


 39%|███▉      | 362/921 [32:03<20:33,  2.21s/it]

mini Batch 363 Loss: 2.8771238327026367


 39%|███▉      | 363/921 [32:06<20:50,  2.24s/it]

mini Batch 364 Loss: 3.0046324729919434


 40%|███▉      | 364/921 [32:08<20:33,  2.22s/it]

mini Batch 365 Loss: 3.3139610290527344


 40%|███▉      | 365/921 [32:10<20:38,  2.23s/it]

mini Batch 366 Loss: 2.728820323944092


 40%|███▉      | 366/921 [32:12<20:27,  2.21s/it]

mini Batch 367 Loss: 3.4897239208221436


 40%|███▉      | 367/921 [32:15<20:36,  2.23s/it]

mini Batch 368 Loss: 3.0793566703796387


 40%|███▉      | 368/921 [32:17<20:26,  2.22s/it]

mini Batch 369 Loss: 3.0046019554138184


 40%|████      | 369/921 [32:19<20:40,  2.25s/it]

mini Batch 370 Loss: 3.649467945098877


 40%|████      | 370/921 [32:21<20:31,  2.23s/it]

mini Batch 371 Loss: 4.114497184753418


 40%|████      | 371/921 [32:24<20:33,  2.24s/it]

mini Batch 372 Loss: 2.7658419609069824


 40%|████      | 372/921 [32:26<20:20,  2.22s/it]

mini Batch 373 Loss: 2.9450154304504395


 40%|████      | 373/921 [32:28<20:33,  2.25s/it]

mini Batch 374 Loss: 4.1438751220703125


 41%|████      | 374/921 [32:30<20:23,  2.24s/it]

mini Batch 375 Loss: 3.3782339096069336


 41%|████      | 375/921 [32:33<20:20,  2.23s/it]

mini Batch 376 Loss: 3.1953392028808594


 41%|████      | 376/921 [32:35<20:07,  2.22s/it]

mini Batch 377 Loss: 2.8804006576538086


 41%|████      | 377/921 [32:37<20:17,  2.24s/it]

mini Batch 378 Loss: 2.7428224086761475


 41%|████      | 378/921 [32:39<20:02,  2.21s/it]

mini Batch 379 Loss: 3.1208839416503906


 41%|████      | 379/921 [32:41<20:01,  2.22s/it]

mini Batch 380 Loss: 3.105616569519043


 41%|████▏     | 380/921 [32:44<19:53,  2.21s/it]

mini Batch 381 Loss: 3.103212356567383


 41%|████▏     | 381/921 [32:46<20:01,  2.22s/it]

mini Batch 382 Loss: 2.755878448486328


 41%|████▏     | 382/921 [32:48<19:51,  2.21s/it]

mini Batch 383 Loss: 2.875131607055664


 42%|████▏     | 383/921 [32:50<19:55,  2.22s/it]

mini Batch 384 Loss: 3.1437950134277344


 42%|████▏     | 384/921 [32:52<19:44,  2.21s/it]

mini Batch 385 Loss: 2.9344887733459473


 42%|████▏     | 385/921 [32:55<19:54,  2.23s/it]

mini Batch 386 Loss: 2.1932859420776367


 42%|████▏     | 386/921 [32:57<19:52,  2.23s/it]

mini Batch 387 Loss: 3.017974376678467


 42%|████▏     | 387/921 [32:59<19:53,  2.24s/it]

mini Batch 388 Loss: 2.9618420600891113


 42%|████▏     | 388/921 [33:01<19:35,  2.21s/it]

mini Batch 389 Loss: 3.0532751083374023


 42%|████▏     | 389/921 [33:04<19:54,  2.25s/it]

mini Batch 390 Loss: 2.8125345706939697


 42%|████▏     | 390/921 [33:06<19:49,  2.24s/it]

mini Batch 391 Loss: 3.130962371826172


 42%|████▏     | 391/921 [33:08<19:47,  2.24s/it]

mini Batch 392 Loss: 2.9959816932678223


 43%|████▎     | 392/921 [33:10<19:35,  2.22s/it]

mini Batch 393 Loss: 3.517688274383545


 43%|████▎     | 393/921 [33:13<19:45,  2.25s/it]

mini Batch 394 Loss: 3.060929775238037


 43%|████▎     | 394/921 [33:15<19:37,  2.24s/it]

mini Batch 395 Loss: 2.698037624359131


 43%|████▎     | 395/921 [33:17<19:40,  2.24s/it]

mini Batch 396 Loss: 3.2329261302948


 43%|████▎     | 396/921 [33:19<19:26,  2.22s/it]

mini Batch 397 Loss: 3.0604677200317383


 43%|████▎     | 397/921 [33:22<19:40,  2.25s/it]

mini Batch 398 Loss: 3.4519553184509277


 43%|████▎     | 398/921 [33:24<19:28,  2.23s/it]

mini Batch 399 Loss: 3.292025566101074


 43%|████▎     | 399/921 [33:26<19:25,  2.23s/it]

mini Batch 400 Loss: 2.8628571033477783


 43%|████▎     | 400/921 [33:28<19:13,  2.21s/it]

mini Batch 401 Loss: 2.8916001319885254


 44%|████▎     | 401/921 [33:30<19:19,  2.23s/it]

mini Batch 402 Loss: 3.6843740940093994


 44%|████▎     | 402/921 [33:33<19:12,  2.22s/it]

mini Batch 403 Loss: 3.415006160736084


 44%|████▍     | 403/921 [33:35<19:29,  2.26s/it]

mini Batch 404 Loss: 3.536078453063965


 44%|████▍     | 404/921 [33:37<19:20,  2.25s/it]

mini Batch 405 Loss: 3.210266590118408


 44%|████▍     | 405/921 [33:39<19:22,  2.25s/it]

mini Batch 406 Loss: 2.680741310119629


 44%|████▍     | 406/921 [33:42<19:09,  2.23s/it]

mini Batch 407 Loss: 3.2025909423828125


 44%|████▍     | 407/921 [33:44<19:14,  2.25s/it]

mini Batch 408 Loss: 2.927096366882324


 44%|████▍     | 408/921 [33:46<19:03,  2.23s/it]

mini Batch 409 Loss: 3.2264533042907715


 44%|████▍     | 409/921 [33:48<19:09,  2.24s/it]

mini Batch 410 Loss: 2.7671422958374023


 45%|████▍     | 410/921 [33:51<18:54,  2.22s/it]

mini Batch 411 Loss: 3.014249801635742


 45%|████▍     | 411/921 [33:53<19:07,  2.25s/it]

mini Batch 412 Loss: 2.7349836826324463


 45%|████▍     | 412/921 [33:55<18:55,  2.23s/it]

mini Batch 413 Loss: 3.6300930976867676


 45%|████▍     | 413/921 [33:57<18:54,  2.23s/it]

mini Batch 414 Loss: 2.81680965423584


 45%|████▍     | 414/921 [33:59<18:42,  2.21s/it]

mini Batch 415 Loss: 3.2980141639709473


 45%|████▌     | 415/921 [34:02<18:46,  2.23s/it]

mini Batch 416 Loss: 2.2288944721221924


 45%|████▌     | 416/921 [34:04<18:32,  2.20s/it]

mini Batch 417 Loss: 2.5465896129608154


 45%|████▌     | 417/921 [34:06<19:02,  2.27s/it]

mini Batch 418 Loss: 3.5598297119140625


 45%|████▌     | 418/921 [34:08<18:43,  2.23s/it]

mini Batch 419 Loss: 2.4816946983337402


 45%|████▌     | 419/921 [34:11<18:49,  2.25s/it]

mini Batch 420 Loss: 4.588530540466309


 46%|████▌     | 420/921 [34:13<18:35,  2.23s/it]

mini Batch 421 Loss: 3.612978458404541


 46%|████▌     | 421/921 [34:15<18:37,  2.24s/it]

mini Batch 422 Loss: 2.406468391418457


 46%|████▌     | 422/921 [34:17<18:24,  2.21s/it]

mini Batch 423 Loss: 3.5878794193267822


 46%|████▌     | 423/921 [34:20<18:36,  2.24s/it]

mini Batch 424 Loss: 2.138962745666504


 46%|████▌     | 424/921 [34:22<18:20,  2.21s/it]

mini Batch 425 Loss: 2.485625982284546


 46%|████▌     | 425/921 [34:24<18:28,  2.24s/it]

mini Batch 426 Loss: 2.959331512451172


 46%|████▋     | 426/921 [34:26<18:19,  2.22s/it]

mini Batch 427 Loss: 2.5833616256713867


 46%|████▋     | 427/921 [34:29<18:26,  2.24s/it]

mini Batch 428 Loss: 2.9177327156066895


 46%|████▋     | 428/921 [34:31<18:12,  2.22s/it]

mini Batch 429 Loss: 2.517420768737793


 47%|████▋     | 429/921 [34:33<18:10,  2.22s/it]

mini Batch 430 Loss: 2.5621304512023926


 47%|████▋     | 430/921 [34:35<18:10,  2.22s/it]

mini Batch 431 Loss: 2.3515753746032715


 47%|████▋     | 431/921 [34:37<18:15,  2.24s/it]

mini Batch 432 Loss: 2.775132656097412


 47%|████▋     | 432/921 [34:40<18:12,  2.23s/it]

mini Batch 433 Loss: 3.81311297416687


 47%|████▋     | 433/921 [34:42<18:13,  2.24s/it]

mini Batch 434 Loss: 3.1119370460510254


 47%|████▋     | 434/921 [34:44<18:00,  2.22s/it]

mini Batch 435 Loss: 3.0976247787475586


 47%|████▋     | 435/921 [34:46<18:03,  2.23s/it]

mini Batch 436 Loss: 2.2229526042938232


 47%|████▋     | 436/921 [34:49<17:50,  2.21s/it]

mini Batch 437 Loss: 3.24778413772583


 47%|████▋     | 437/921 [34:51<17:54,  2.22s/it]

mini Batch 438 Loss: 3.2206640243530273


 48%|████▊     | 438/921 [34:53<17:40,  2.20s/it]

mini Batch 439 Loss: 2.3794429302215576


 48%|████▊     | 439/921 [34:55<17:43,  2.21s/it]

mini Batch 440 Loss: 3.4823853969573975


 48%|████▊     | 440/921 [34:57<17:30,  2.18s/it]

mini Batch 441 Loss: 3.159738540649414


 48%|████▊     | 441/921 [35:00<17:45,  2.22s/it]

mini Batch 442 Loss: 3.2644214630126953


 48%|████▊     | 442/921 [35:02<17:32,  2.20s/it]

mini Batch 443 Loss: 2.3916542530059814


 48%|████▊     | 443/921 [35:04<17:43,  2.22s/it]

mini Batch 444 Loss: 2.6291966438293457


 48%|████▊     | 444/921 [35:06<17:33,  2.21s/it]

mini Batch 445 Loss: 2.5317726135253906


 48%|████▊     | 445/921 [35:08<17:31,  2.21s/it]

mini Batch 446 Loss: 2.830789566040039


 48%|████▊     | 446/921 [35:11<17:26,  2.20s/it]

mini Batch 447 Loss: 3.10418963432312


 49%|████▊     | 447/921 [35:13<17:34,  2.22s/it]

mini Batch 448 Loss: 3.4819512367248535


 49%|████▊     | 448/921 [35:15<17:17,  2.19s/it]

mini Batch 449 Loss: 3.3869988918304443


 49%|████▉     | 449/921 [35:17<17:23,  2.21s/it]

mini Batch 450 Loss: 2.4699647426605225


 49%|████▉     | 450/921 [35:19<17:19,  2.21s/it]

mini Batch 451 Loss: 2.7045180797576904


 49%|████▉     | 451/921 [35:22<17:21,  2.22s/it]

mini Batch 452 Loss: 3.2921745777130127


 49%|████▉     | 452/921 [35:24<17:13,  2.20s/it]

mini Batch 453 Loss: 2.6387245655059814


 49%|████▉     | 453/921 [35:26<17:12,  2.21s/it]

mini Batch 454 Loss: 2.823216199874878


 49%|████▉     | 454/921 [35:28<17:11,  2.21s/it]

mini Batch 455 Loss: 3.3216676712036133


 49%|████▉     | 455/921 [35:31<17:19,  2.23s/it]

mini Batch 456 Loss: 2.912710428237915


 50%|████▉     | 456/921 [35:33<17:04,  2.20s/it]

mini Batch 457 Loss: 2.2997093200683594


 50%|████▉     | 457/921 [35:35<17:10,  2.22s/it]

mini Batch 458 Loss: 2.706310272216797


 50%|████▉     | 458/921 [35:37<17:01,  2.21s/it]

mini Batch 459 Loss: 2.532674789428711


 50%|████▉     | 459/921 [35:39<17:21,  2.25s/it]

mini Batch 460 Loss: 3.066567897796631


 50%|████▉     | 460/921 [35:42<17:17,  2.25s/it]

mini Batch 461 Loss: 2.8434343338012695


 50%|█████     | 461/921 [35:44<17:31,  2.29s/it]

mini Batch 462 Loss: 2.642997980117798


 50%|█████     | 462/921 [35:46<17:22,  2.27s/it]

mini Batch 463 Loss: 2.616079330444336


 50%|█████     | 463/921 [35:49<17:23,  2.28s/it]

mini Batch 464 Loss: 3.1356287002563477


 50%|█████     | 464/921 [35:51<17:10,  2.25s/it]

mini Batch 465 Loss: 2.8132174015045166


 50%|█████     | 465/921 [35:53<17:10,  2.26s/it]

mini Batch 466 Loss: 3.104367256164551


 51%|█████     | 466/921 [35:55<17:02,  2.25s/it]

mini Batch 467 Loss: 2.6000804901123047


 51%|█████     | 467/921 [35:58<17:07,  2.26s/it]

mini Batch 468 Loss: 3.4291744232177734


 51%|█████     | 468/921 [36:00<16:58,  2.25s/it]

mini Batch 469 Loss: 2.9692728519439697


 51%|█████     | 469/921 [36:02<16:58,  2.25s/it]

mini Batch 470 Loss: 3.461374044418335


 51%|█████     | 470/921 [36:04<16:46,  2.23s/it]

mini Batch 471 Loss: 3.1265110969543457


 51%|█████     | 471/921 [36:07<16:54,  2.25s/it]

mini Batch 472 Loss: 3.412533760070801


 51%|█████     | 472/921 [36:09<16:41,  2.23s/it]

mini Batch 473 Loss: 2.352898120880127


 51%|█████▏    | 473/921 [36:11<16:46,  2.25s/it]

mini Batch 474 Loss: 2.7674601078033447


 51%|█████▏    | 474/921 [36:13<16:39,  2.24s/it]

mini Batch 475 Loss: 2.950988292694092


 52%|█████▏    | 475/921 [36:16<16:44,  2.25s/it]

mini Batch 476 Loss: 3.813631534576416


 52%|█████▏    | 476/921 [36:18<16:33,  2.23s/it]

mini Batch 477 Loss: 2.0835509300231934


 52%|█████▏    | 477/921 [36:20<16:35,  2.24s/it]

mini Batch 478 Loss: 2.6948447227478027


 52%|█████▏    | 478/921 [36:22<16:25,  2.22s/it]

mini Batch 479 Loss: 2.6012251377105713


 52%|█████▏    | 479/921 [36:24<16:28,  2.24s/it]

mini Batch 480 Loss: 3.277055501937866


 52%|█████▏    | 480/921 [36:27<16:17,  2.22s/it]

mini Batch 481 Loss: 2.4402108192443848


 52%|█████▏    | 481/921 [36:29<16:30,  2.25s/it]

mini Batch 482 Loss: 2.7061047554016113


 52%|█████▏    | 482/921 [36:31<16:16,  2.22s/it]

mini Batch 483 Loss: 3.4216322898864746


 52%|█████▏    | 483/921 [36:33<16:28,  2.26s/it]

mini Batch 484 Loss: 3.541707992553711


 53%|█████▎    | 484/921 [36:36<16:25,  2.25s/it]

mini Batch 485 Loss: 3.398357391357422


 53%|█████▎    | 485/921 [36:38<16:35,  2.28s/it]

mini Batch 486 Loss: 2.514309883117676


 53%|█████▎    | 486/921 [36:40<16:16,  2.24s/it]

mini Batch 487 Loss: 2.3792619705200195


 53%|█████▎    | 487/921 [36:42<16:20,  2.26s/it]

mini Batch 488 Loss: 2.844548463821411


 53%|█████▎    | 488/921 [36:45<16:13,  2.25s/it]

mini Batch 489 Loss: 3.0441348552703857


 53%|█████▎    | 489/921 [36:47<16:14,  2.26s/it]

mini Batch 490 Loss: 2.2916717529296875


 53%|█████▎    | 490/921 [36:49<16:00,  2.23s/it]

mini Batch 491 Loss: 3.181821823120117


 53%|█████▎    | 491/921 [36:51<16:06,  2.25s/it]

mini Batch 492 Loss: 3.632152795791626


 53%|█████▎    | 492/921 [36:54<15:58,  2.23s/it]

mini Batch 493 Loss: 2.3026342391967773


 54%|█████▎    | 493/921 [36:56<16:03,  2.25s/it]

mini Batch 494 Loss: 2.538144826889038


 54%|█████▎    | 494/921 [36:58<15:52,  2.23s/it]

mini Batch 495 Loss: 3.25551176071167


 54%|█████▎    | 495/921 [37:00<15:55,  2.24s/it]

mini Batch 496 Loss: 3.0880684852600098


 54%|█████▍    | 496/921 [37:03<15:50,  2.24s/it]

mini Batch 497 Loss: 2.452806234359741


 54%|█████▍    | 497/921 [37:05<15:49,  2.24s/it]

mini Batch 498 Loss: 2.8497753143310547


 54%|█████▍    | 498/921 [37:07<15:39,  2.22s/it]

mini Batch 499 Loss: 2.794295310974121


 54%|█████▍    | 499/921 [37:09<15:48,  2.25s/it]

mini Batch 500 Loss: 2.0581836700439453


 54%|█████▍    | 500/921 [37:11<15:36,  2.22s/it]

mini Batch 501 Loss: 1.7988526821136475


 54%|█████▍    | 501/921 [37:14<15:48,  2.26s/it]

mini Batch 502 Loss: 2.531459093093872


 55%|█████▍    | 502/921 [37:16<15:40,  2.24s/it]

mini Batch 503 Loss: 2.747042655944824


 55%|█████▍    | 503/921 [37:18<15:46,  2.26s/it]

mini Batch 504 Loss: 3.2150490283966064


 55%|█████▍    | 504/921 [37:21<15:32,  2.24s/it]

mini Batch 505 Loss: 2.645484685897827


 55%|█████▍    | 505/921 [37:23<15:33,  2.24s/it]

mini Batch 506 Loss: 2.6104228496551514


 55%|█████▍    | 506/921 [37:25<15:22,  2.22s/it]

mini Batch 507 Loss: 2.480177640914917


 55%|█████▌    | 507/921 [37:27<15:28,  2.24s/it]

mini Batch 508 Loss: 2.6025609970092773


 55%|█████▌    | 508/921 [37:29<15:16,  2.22s/it]

mini Batch 509 Loss: 2.563704252243042


 55%|█████▌    | 509/921 [37:32<15:24,  2.24s/it]

mini Batch 510 Loss: 3.55558180809021


 55%|█████▌    | 510/921 [37:34<15:16,  2.23s/it]

mini Batch 511 Loss: 2.3379998207092285


 55%|█████▌    | 511/921 [37:36<15:15,  2.23s/it]

mini Batch 512 Loss: 3.2321741580963135


 56%|█████▌    | 512/921 [37:38<15:05,  2.21s/it]

mini Batch 513 Loss: 3.1589345932006836


 56%|█████▌    | 513/921 [37:41<15:09,  2.23s/it]

mini Batch 514 Loss: 3.358006000518799


 56%|█████▌    | 514/921 [37:43<15:07,  2.23s/it]

mini Batch 515 Loss: 2.773125410079956


 56%|█████▌    | 515/921 [37:45<15:09,  2.24s/it]

mini Batch 516 Loss: 2.6965842247009277


 56%|█████▌    | 516/921 [37:47<15:06,  2.24s/it]

mini Batch 517 Loss: 3.1161303520202637


 56%|█████▌    | 517/921 [37:50<15:11,  2.26s/it]

mini Batch 518 Loss: 3.4476473331451416


 56%|█████▌    | 518/921 [37:52<15:07,  2.25s/it]

mini Batch 519 Loss: 3.1152682304382324


 56%|█████▋    | 519/921 [37:54<15:11,  2.27s/it]

mini Batch 520 Loss: 2.87373423576355


 56%|█████▋    | 520/921 [37:56<15:02,  2.25s/it]

mini Batch 521 Loss: 3.0070652961730957


 57%|█████▋    | 521/921 [37:59<15:07,  2.27s/it]

mini Batch 522 Loss: 2.9769511222839355


 57%|█████▋    | 522/921 [38:01<14:53,  2.24s/it]

mini Batch 523 Loss: 3.664851188659668


 57%|█████▋    | 523/921 [38:03<14:57,  2.25s/it]

mini Batch 524 Loss: 2.7627153396606445


 57%|█████▋    | 524/921 [38:05<14:48,  2.24s/it]

mini Batch 525 Loss: 3.318394899368286


 57%|█████▋    | 525/921 [38:08<14:57,  2.27s/it]

mini Batch 526 Loss: 2.2126388549804688


 57%|█████▋    | 526/921 [38:10<14:44,  2.24s/it]

mini Batch 527 Loss: 2.1738622188568115


 57%|█████▋    | 527/921 [38:12<14:45,  2.25s/it]

mini Batch 528 Loss: 2.599853038787842


 57%|█████▋    | 528/921 [38:14<14:34,  2.22s/it]

mini Batch 529 Loss: 1.9559986591339111


 57%|█████▋    | 529/921 [38:17<14:37,  2.24s/it]

mini Batch 530 Loss: 2.9831700325012207


 58%|█████▊    | 530/921 [38:19<14:26,  2.22s/it]

mini Batch 531 Loss: 2.3633031845092773


 58%|█████▊    | 531/921 [38:21<14:29,  2.23s/it]

mini Batch 532 Loss: 2.475559949874878


 58%|█████▊    | 532/921 [38:23<14:18,  2.21s/it]

mini Batch 533 Loss: 2.9213860034942627


 58%|█████▊    | 533/921 [38:25<14:22,  2.22s/it]

mini Batch 534 Loss: 2.476931571960449


 58%|█████▊    | 534/921 [38:28<14:12,  2.20s/it]

mini Batch 535 Loss: 2.765690565109253


 58%|█████▊    | 535/921 [38:30<14:24,  2.24s/it]

mini Batch 536 Loss: 3.556746482849121


 58%|█████▊    | 536/921 [38:32<14:09,  2.21s/it]

mini Batch 537 Loss: 3.27571439743042


 58%|█████▊    | 537/921 [38:34<14:13,  2.22s/it]

mini Batch 538 Loss: 2.6680026054382324


 58%|█████▊    | 538/921 [38:36<14:06,  2.21s/it]

mini Batch 539 Loss: 2.6633729934692383


 59%|█████▊    | 539/921 [38:39<14:12,  2.23s/it]

mini Batch 540 Loss: 2.962392807006836


 59%|█████▊    | 540/921 [38:41<14:05,  2.22s/it]

mini Batch 541 Loss: 3.214998722076416


 59%|█████▊    | 541/921 [38:43<14:10,  2.24s/it]

mini Batch 542 Loss: 2.9258241653442383


 59%|█████▉    | 542/921 [38:45<14:05,  2.23s/it]

mini Batch 543 Loss: 2.9368295669555664


 59%|█████▉    | 543/921 [38:48<14:07,  2.24s/it]

mini Batch 544 Loss: 3.9851107597351074


 59%|█████▉    | 544/921 [38:50<14:06,  2.24s/it]

mini Batch 545 Loss: 2.9236268997192383


 59%|█████▉    | 545/921 [38:52<14:14,  2.27s/it]

mini Batch 546 Loss: 2.6938138008117676


 59%|█████▉    | 546/921 [38:54<14:03,  2.25s/it]

mini Batch 547 Loss: 3.001333713531494


 59%|█████▉    | 547/921 [38:57<14:04,  2.26s/it]

mini Batch 548 Loss: 2.5766615867614746


 60%|█████▉    | 548/921 [38:59<13:54,  2.24s/it]

mini Batch 549 Loss: 2.947265386581421


 60%|█████▉    | 549/921 [39:01<13:55,  2.25s/it]

mini Batch 550 Loss: 3.717423439025879


 60%|█████▉    | 550/921 [39:03<13:44,  2.22s/it]

mini Batch 551 Loss: 2.80985689163208


 60%|█████▉    | 551/921 [39:06<13:47,  2.24s/it]

mini Batch 552 Loss: 2.49017333984375


 60%|█████▉    | 552/921 [39:08<13:36,  2.21s/it]

mini Batch 553 Loss: 2.7928431034088135


 60%|██████    | 553/921 [39:10<13:35,  2.22s/it]

mini Batch 554 Loss: 3.6154093742370605


 60%|██████    | 554/921 [39:12<13:29,  2.21s/it]

mini Batch 555 Loss: 2.412044048309326


 60%|██████    | 555/921 [39:14<13:32,  2.22s/it]

mini Batch 556 Loss: 2.790013313293457


 60%|██████    | 556/921 [39:17<13:27,  2.21s/it]

mini Batch 557 Loss: 2.613081455230713


 60%|██████    | 557/921 [39:19<13:31,  2.23s/it]

mini Batch 558 Loss: 2.579249382019043


 61%|██████    | 558/921 [39:21<13:24,  2.22s/it]

mini Batch 559 Loss: 3.534654140472412


 61%|██████    | 559/921 [39:23<13:32,  2.24s/it]

mini Batch 560 Loss: 3.1529457569122314


 61%|██████    | 560/921 [39:26<13:33,  2.25s/it]

mini Batch 561 Loss: 3.0949556827545166


 61%|██████    | 561/921 [39:28<13:35,  2.27s/it]

mini Batch 562 Loss: 3.3954458236694336


 61%|██████    | 562/921 [39:30<13:24,  2.24s/it]

mini Batch 563 Loss: 2.313676357269287


 61%|██████    | 563/921 [39:32<13:27,  2.26s/it]

mini Batch 564 Loss: 2.6581358909606934


 61%|██████    | 564/921 [39:35<13:13,  2.22s/it]

mini Batch 565 Loss: 2.760218620300293


 61%|██████▏   | 565/921 [39:37<13:18,  2.24s/it]

mini Batch 566 Loss: 2.535055637359619


 61%|██████▏   | 566/921 [39:39<13:08,  2.22s/it]

mini Batch 567 Loss: 2.4158315658569336


 62%|██████▏   | 567/921 [39:41<13:07,  2.23s/it]

mini Batch 568 Loss: 3.3445520401000977


 62%|██████▏   | 568/921 [39:43<13:01,  2.21s/it]

mini Batch 569 Loss: 2.287977457046509


 62%|██████▏   | 569/921 [39:46<13:08,  2.24s/it]

mini Batch 570 Loss: 2.7989463806152344


 62%|██████▏   | 570/921 [39:48<12:58,  2.22s/it]

mini Batch 571 Loss: 3.233793258666992


 62%|██████▏   | 571/921 [39:50<13:02,  2.23s/it]

mini Batch 572 Loss: 2.7155749797821045


 62%|██████▏   | 572/921 [39:52<12:48,  2.20s/it]

mini Batch 573 Loss: 3.3191943168640137


 62%|██████▏   | 573/921 [39:55<12:55,  2.23s/it]

mini Batch 574 Loss: 2.860440969467163


 62%|██████▏   | 574/921 [39:57<12:46,  2.21s/it]

mini Batch 575 Loss: 2.693223237991333


 62%|██████▏   | 575/921 [39:59<12:50,  2.23s/it]

mini Batch 576 Loss: 2.9798693656921387


 63%|██████▎   | 576/921 [40:01<12:38,  2.20s/it]

mini Batch 577 Loss: 3.2357077598571777


 63%|██████▎   | 577/921 [40:03<12:43,  2.22s/it]

mini Batch 578 Loss: 2.8518123626708984


 63%|██████▎   | 578/921 [40:06<12:31,  2.19s/it]

mini Batch 579 Loss: 2.2349328994750977


 63%|██████▎   | 579/921 [40:08<12:34,  2.21s/it]

mini Batch 580 Loss: 3.0626652240753174


 63%|██████▎   | 580/921 [40:10<12:32,  2.21s/it]

mini Batch 581 Loss: 2.881194829940796


 63%|██████▎   | 581/921 [40:12<12:35,  2.22s/it]

mini Batch 582 Loss: 3.33770489692688


 63%|██████▎   | 582/921 [40:14<12:27,  2.21s/it]

mini Batch 583 Loss: 2.6243019104003906


 63%|██████▎   | 583/921 [40:17<12:32,  2.23s/it]

mini Batch 584 Loss: 3.2350974082946777


 63%|██████▎   | 584/921 [40:19<12:22,  2.20s/it]

mini Batch 585 Loss: 3.1361074447631836


 64%|██████▎   | 585/921 [40:21<12:24,  2.22s/it]

mini Batch 586 Loss: 3.0198826789855957


 64%|██████▎   | 586/921 [40:23<12:11,  2.18s/it]

mini Batch 587 Loss: 2.8913192749023438


 64%|██████▎   | 587/921 [40:26<12:15,  2.20s/it]

mini Batch 588 Loss: 3.049316883087158


 64%|██████▍   | 588/921 [40:28<12:08,  2.19s/it]

mini Batch 589 Loss: 3.2024240493774414


 64%|██████▍   | 589/921 [40:30<12:10,  2.20s/it]

mini Batch 590 Loss: 2.75077748298645


 64%|██████▍   | 590/921 [40:32<12:02,  2.18s/it]

mini Batch 591 Loss: 2.9059290885925293


 64%|██████▍   | 591/921 [40:34<12:07,  2.20s/it]

mini Batch 592 Loss: 2.937469244003296


 64%|██████▍   | 592/921 [40:36<11:59,  2.19s/it]

mini Batch 593 Loss: 2.4457364082336426


 64%|██████▍   | 593/921 [40:39<12:04,  2.21s/it]

mini Batch 594 Loss: 2.4240379333496094


 64%|██████▍   | 594/921 [40:41<12:02,  2.21s/it]

mini Batch 595 Loss: 3.4312872886657715


 65%|██████▍   | 595/921 [40:43<12:10,  2.24s/it]

mini Batch 596 Loss: 2.232903242111206


 65%|██████▍   | 596/921 [40:45<12:00,  2.22s/it]

mini Batch 597 Loss: 3.891843318939209


 65%|██████▍   | 597/921 [40:48<12:09,  2.25s/it]

mini Batch 598 Loss: 2.4679174423217773


 65%|██████▍   | 598/921 [40:50<12:02,  2.24s/it]

mini Batch 599 Loss: 2.567636489868164


 65%|██████▌   | 599/921 [40:52<12:06,  2.26s/it]

mini Batch 600 Loss: 2.4752445220947266


 65%|██████▌   | 600/921 [40:54<12:00,  2.25s/it]

mini Batch 601 Loss: 3.360140323638916
Training Batch: 601 | Training Loss: 3.360140323638916
dist a: tensor([19.5161, 21.3808, 25.9780, 15.1805, 20.3441, 15.8408, 16.9566, 24.6744,
        17.5234, 18.9670, 13.9449, 18.6611, 17.0900, 21.2645, 19.3398, 15.7754,
        19.3951, 15.8543, 15.2223, 13.5748, 12.5808, 12.7820, 11.7996, 15.1110,
        14.7237, 15.9804, 23.6077, 18.1216, 15.2118, 16.4989, 17.6168, 15.1286,
        15.6812, 23.9400, 22.4267, 15.4282, 24.9222, 20.4716, 15.5525, 19.5150,
        16.1520, 19.9990, 16.4289, 16.4693, 17.3713, 17.3139, 19.9360, 16.3454,
        16.0974, 16.9764, 22.9631, 13.1532, 15.4406, 20.3953, 12.5956, 14.5692,
        17.8177, 20.2939, 17.1076, 12.5907, 20.1593, 24.3604, 17.4935, 22.0199],
       device='cuda:0'), dist b: tensor([18.8790, 21.5328, 26.7816, 20.0979, 20.2804, 27.9299, 25.7022, 30.4102,
        17.3573, 22.6416, 15.1500, 20.5566, 21.2981, 19.7277, 21.6624, 18.7269,
        31.2981, 15.8575, 17.7189, 29.7437, 27.0873, 13.9570, 17

 65%|██████▌   | 601/921 [40:59<14:57,  2.80s/it]

mini Batch 602 Loss: 2.9007344245910645


 65%|██████▌   | 602/921 [41:01<14:00,  2.63s/it]

mini Batch 603 Loss: 2.8582935333251953


 65%|██████▌   | 603/921 [41:03<13:29,  2.55s/it]

mini Batch 604 Loss: 3.1815004348754883


 66%|██████▌   | 604/921 [41:05<13:01,  2.47s/it]

mini Batch 605 Loss: 2.5236830711364746


 66%|██████▌   | 605/921 [41:08<12:41,  2.41s/it]

mini Batch 606 Loss: 3.6291136741638184


 66%|██████▌   | 606/921 [41:10<12:26,  2.37s/it]

mini Batch 607 Loss: 2.6513710021972656


 66%|██████▌   | 607/921 [41:12<12:13,  2.33s/it]

mini Batch 608 Loss: 3.1539125442504883


 66%|██████▌   | 608/921 [41:14<11:53,  2.28s/it]

mini Batch 609 Loss: 3.0523295402526855


 66%|██████▌   | 609/921 [41:17<11:56,  2.30s/it]

mini Batch 610 Loss: 2.9625940322875977


 66%|██████▌   | 610/921 [41:19<11:40,  2.25s/it]

mini Batch 611 Loss: 2.690016984939575


 66%|██████▋   | 611/921 [41:21<11:41,  2.26s/it]

mini Batch 612 Loss: 2.7133278846740723


 66%|██████▋   | 612/921 [41:23<11:30,  2.24s/it]

mini Batch 613 Loss: 2.5299432277679443


 67%|██████▋   | 613/921 [41:26<11:34,  2.25s/it]

mini Batch 614 Loss: 2.55067777633667


 67%|██████▋   | 614/921 [41:28<11:24,  2.23s/it]

mini Batch 615 Loss: 2.6525847911834717


 67%|██████▋   | 615/921 [41:30<11:35,  2.27s/it]

mini Batch 616 Loss: 3.1136069297790527


 67%|██████▋   | 616/921 [41:32<11:25,  2.25s/it]

mini Batch 617 Loss: 2.567044258117676


 67%|██████▋   | 617/921 [41:35<11:26,  2.26s/it]

mini Batch 618 Loss: 1.9885358810424805


 67%|██████▋   | 618/921 [41:37<11:17,  2.23s/it]

mini Batch 619 Loss: 2.8079543113708496


 67%|██████▋   | 619/921 [41:39<11:18,  2.25s/it]

mini Batch 620 Loss: 2.16542911529541


 67%|██████▋   | 620/921 [41:41<11:09,  2.23s/it]

mini Batch 621 Loss: 3.1196112632751465


 67%|██████▋   | 621/921 [41:44<11:11,  2.24s/it]

mini Batch 622 Loss: 2.7854771614074707


 68%|██████▊   | 622/921 [41:46<11:02,  2.22s/it]

mini Batch 623 Loss: 2.32326602935791


 68%|██████▊   | 623/921 [41:48<11:06,  2.24s/it]

mini Batch 624 Loss: 3.3466529846191406


 68%|██████▊   | 624/921 [41:50<11:06,  2.24s/it]

mini Batch 625 Loss: 2.5944156646728516


 68%|██████▊   | 625/921 [41:52<11:05,  2.25s/it]

mini Batch 626 Loss: 2.71854829788208


 68%|██████▊   | 626/921 [41:55<10:57,  2.23s/it]

mini Batch 627 Loss: 2.602644205093384


 68%|██████▊   | 627/921 [41:57<10:58,  2.24s/it]

mini Batch 628 Loss: 2.66317081451416


 68%|██████▊   | 628/921 [41:59<10:49,  2.22s/it]

mini Batch 629 Loss: 2.5901315212249756


 68%|██████▊   | 629/921 [42:01<10:54,  2.24s/it]

mini Batch 630 Loss: 3.2630014419555664


 68%|██████▊   | 630/921 [42:04<10:49,  2.23s/it]

mini Batch 631 Loss: 2.225332736968994


 69%|██████▊   | 631/921 [42:06<10:54,  2.26s/it]

mini Batch 632 Loss: 2.5561013221740723


 69%|██████▊   | 632/921 [42:08<10:43,  2.23s/it]

mini Batch 633 Loss: 3.5386996269226074


 69%|██████▊   | 633/921 [42:10<10:51,  2.26s/it]

mini Batch 634 Loss: 2.586362838745117


 69%|██████▉   | 634/921 [42:13<10:41,  2.24s/it]

mini Batch 635 Loss: 3.929776668548584


 69%|██████▉   | 635/921 [42:15<10:40,  2.24s/it]

mini Batch 636 Loss: 2.593989849090576


 69%|██████▉   | 636/921 [42:17<10:33,  2.22s/it]

mini Batch 637 Loss: 2.112483501434326


 69%|██████▉   | 637/921 [42:19<10:36,  2.24s/it]

mini Batch 638 Loss: 2.9610142707824707


 69%|██████▉   | 638/921 [42:21<10:29,  2.22s/it]

mini Batch 639 Loss: 3.039731502532959


 69%|██████▉   | 639/921 [42:24<10:31,  2.24s/it]

mini Batch 640 Loss: 2.1875762939453125


 69%|██████▉   | 640/921 [42:26<10:23,  2.22s/it]

mini Batch 641 Loss: 3.0841426849365234


 70%|██████▉   | 641/921 [42:28<10:32,  2.26s/it]

mini Batch 642 Loss: 3.0298194885253906


 70%|██████▉   | 642/921 [42:30<10:19,  2.22s/it]

mini Batch 643 Loss: 2.724501609802246


 70%|██████▉   | 643/921 [42:33<10:26,  2.25s/it]

mini Batch 644 Loss: 2.4479448795318604


 70%|██████▉   | 644/921 [42:35<10:24,  2.25s/it]

mini Batch 645 Loss: 2.6443538665771484


 70%|███████   | 645/921 [42:37<10:22,  2.26s/it]

mini Batch 646 Loss: 3.5024383068084717


 70%|███████   | 646/921 [42:39<10:14,  2.24s/it]

mini Batch 647 Loss: 2.948514699935913


 70%|███████   | 647/921 [42:42<10:16,  2.25s/it]

mini Batch 648 Loss: 3.126659870147705


 70%|███████   | 648/921 [42:44<10:12,  2.24s/it]

mini Batch 649 Loss: 2.6974689960479736


 70%|███████   | 649/921 [42:46<10:11,  2.25s/it]

mini Batch 650 Loss: 2.899257183074951


 71%|███████   | 650/921 [42:48<10:03,  2.23s/it]

mini Batch 651 Loss: 2.753635883331299


 71%|███████   | 651/921 [42:51<10:12,  2.27s/it]

mini Batch 652 Loss: 2.7460737228393555


 71%|███████   | 652/921 [42:53<10:03,  2.24s/it]

mini Batch 653 Loss: 2.7767906188964844


 71%|███████   | 653/921 [42:55<10:07,  2.27s/it]

mini Batch 654 Loss: 3.2130939960479736


 71%|███████   | 654/921 [42:57<09:58,  2.24s/it]

mini Batch 655 Loss: 2.5223660469055176


 71%|███████   | 655/921 [43:00<10:04,  2.27s/it]

mini Batch 656 Loss: 3.3092715740203857


 71%|███████   | 656/921 [43:02<09:54,  2.24s/it]

mini Batch 657 Loss: 2.7573845386505127


 71%|███████▏  | 657/921 [43:04<09:53,  2.25s/it]

mini Batch 658 Loss: 2.4856553077697754


 71%|███████▏  | 658/921 [43:07<09:56,  2.27s/it]

mini Batch 659 Loss: 2.670217752456665


 72%|███████▏  | 659/921 [43:09<09:55,  2.27s/it]

mini Batch 660 Loss: 2.411203384399414


 72%|███████▏  | 660/921 [43:11<09:47,  2.25s/it]

mini Batch 661 Loss: 3.0292210578918457


 72%|███████▏  | 661/921 [43:13<09:47,  2.26s/it]

mini Batch 662 Loss: 2.6753716468811035


 72%|███████▏  | 662/921 [43:16<09:42,  2.25s/it]

mini Batch 663 Loss: 3.258632183074951


 72%|███████▏  | 663/921 [43:18<09:42,  2.26s/it]

mini Batch 664 Loss: 3.415350914001465


 72%|███████▏  | 664/921 [43:20<09:34,  2.23s/it]

mini Batch 665 Loss: 2.829542636871338


 72%|███████▏  | 665/921 [43:22<09:35,  2.25s/it]

mini Batch 666 Loss: 3.869438409805298


 72%|███████▏  | 666/921 [43:24<09:30,  2.24s/it]

mini Batch 667 Loss: 2.4920358657836914


 72%|███████▏  | 667/921 [43:27<09:32,  2.25s/it]

mini Batch 668 Loss: 2.9681649208068848


 73%|███████▎  | 668/921 [43:29<09:24,  2.23s/it]

mini Batch 669 Loss: 3.2288732528686523


 73%|███████▎  | 669/921 [43:31<09:24,  2.24s/it]

mini Batch 670 Loss: 2.554542064666748


 73%|███████▎  | 670/921 [43:33<09:15,  2.21s/it]

mini Batch 671 Loss: 1.7800647020339966


 73%|███████▎  | 671/921 [43:36<09:16,  2.23s/it]

mini Batch 672 Loss: 3.4296746253967285


 73%|███████▎  | 672/921 [43:38<09:10,  2.21s/it]

mini Batch 673 Loss: 2.888622522354126


 73%|███████▎  | 673/921 [43:40<09:13,  2.23s/it]

mini Batch 674 Loss: 2.5069446563720703


 73%|███████▎  | 674/921 [43:42<09:09,  2.22s/it]

mini Batch 675 Loss: 2.2173938751220703


 73%|███████▎  | 675/921 [43:45<09:14,  2.25s/it]

mini Batch 676 Loss: 2.5585694313049316


 73%|███████▎  | 676/921 [43:47<09:07,  2.24s/it]

mini Batch 677 Loss: 2.0938618183135986


 74%|███████▎  | 677/921 [43:49<09:08,  2.25s/it]

mini Batch 678 Loss: 3.8626017570495605


 74%|███████▎  | 678/921 [43:51<09:00,  2.23s/it]

mini Batch 679 Loss: 2.7320525646209717


 74%|███████▎  | 679/921 [43:54<09:07,  2.26s/it]

mini Batch 680 Loss: 2.533916473388672


 74%|███████▍  | 680/921 [43:56<09:04,  2.26s/it]

mini Batch 681 Loss: 2.8547487258911133


 74%|███████▍  | 681/921 [43:58<09:08,  2.28s/it]

mini Batch 682 Loss: 2.3242416381835938


 74%|███████▍  | 682/921 [44:00<09:01,  2.26s/it]

mini Batch 683 Loss: 3.2108912467956543


 74%|███████▍  | 683/921 [44:03<08:59,  2.27s/it]

mini Batch 684 Loss: 2.7873780727386475


 74%|███████▍  | 684/921 [44:05<08:51,  2.24s/it]

mini Batch 685 Loss: 3.0876002311706543


 74%|███████▍  | 685/921 [44:07<08:51,  2.25s/it]

mini Batch 686 Loss: 3.055992841720581


 74%|███████▍  | 686/921 [44:09<08:48,  2.25s/it]

mini Batch 687 Loss: 2.5890111923217773


 75%|███████▍  | 687/921 [44:12<08:50,  2.27s/it]

mini Batch 688 Loss: 2.6662309169769287


 75%|███████▍  | 688/921 [44:14<08:43,  2.25s/it]

mini Batch 689 Loss: 3.1805570125579834


 75%|███████▍  | 689/921 [44:16<08:44,  2.26s/it]

mini Batch 690 Loss: 2.593703269958496


 75%|███████▍  | 690/921 [44:18<08:37,  2.24s/it]

mini Batch 691 Loss: 2.4529271125793457


 75%|███████▌  | 691/921 [44:21<08:36,  2.25s/it]

mini Batch 692 Loss: 3.001389265060425


 75%|███████▌  | 692/921 [44:23<08:34,  2.24s/it]

mini Batch 693 Loss: 2.8479695320129395


 75%|███████▌  | 693/921 [44:25<08:35,  2.26s/it]

mini Batch 694 Loss: 3.2004032135009766


 75%|███████▌  | 694/921 [44:27<08:27,  2.23s/it]

mini Batch 695 Loss: 2.6940555572509766


 75%|███████▌  | 695/921 [44:30<08:29,  2.25s/it]

mini Batch 696 Loss: 2.4113237857818604


 76%|███████▌  | 696/921 [44:32<08:21,  2.23s/it]

mini Batch 697 Loss: 2.484877109527588


 76%|███████▌  | 697/921 [44:34<08:21,  2.24s/it]

mini Batch 698 Loss: 2.969876527786255


 76%|███████▌  | 698/921 [44:36<08:12,  2.21s/it]

mini Batch 699 Loss: 1.988408088684082


 76%|███████▌  | 699/921 [44:38<08:14,  2.23s/it]

mini Batch 700 Loss: 1.74098539352417


 76%|███████▌  | 700/921 [44:41<08:12,  2.23s/it]

mini Batch 701 Loss: 3.157313346862793


 76%|███████▌  | 701/921 [44:43<08:17,  2.26s/it]

mini Batch 702 Loss: 2.6436901092529297


 76%|███████▌  | 702/921 [44:45<08:13,  2.25s/it]

mini Batch 703 Loss: 2.5504708290100098


 76%|███████▋  | 703/921 [44:48<08:15,  2.27s/it]

mini Batch 704 Loss: 3.569624900817871


 76%|███████▋  | 704/921 [44:50<08:10,  2.26s/it]

mini Batch 705 Loss: 2.6808395385742188


 77%|███████▋  | 705/921 [44:52<08:09,  2.27s/it]

mini Batch 706 Loss: 2.578277587890625


 77%|███████▋  | 706/921 [44:54<08:00,  2.23s/it]

mini Batch 707 Loss: 2.3469836711883545


 77%|███████▋  | 707/921 [44:57<08:01,  2.25s/it]

mini Batch 708 Loss: 3.334895133972168


 77%|███████▋  | 708/921 [44:59<07:55,  2.23s/it]

mini Batch 709 Loss: 2.9339728355407715


 77%|███████▋  | 709/921 [45:01<07:53,  2.24s/it]

mini Batch 710 Loss: 2.741166830062866


 77%|███████▋  | 710/921 [45:03<07:49,  2.22s/it]

mini Batch 711 Loss: 3.1073241233825684


 77%|███████▋  | 711/921 [45:06<07:51,  2.24s/it]

mini Batch 712 Loss: 3.1115031242370605


 77%|███████▋  | 712/921 [45:08<07:48,  2.24s/it]

mini Batch 713 Loss: 2.792783498764038


 77%|███████▋  | 713/921 [45:10<07:48,  2.25s/it]

mini Batch 714 Loss: 2.714242935180664


 78%|███████▊  | 714/921 [45:12<07:42,  2.23s/it]

mini Batch 715 Loss: 3.0881950855255127


 78%|███████▊  | 715/921 [45:14<07:43,  2.25s/it]

mini Batch 716 Loss: 2.529963493347168


 78%|███████▊  | 716/921 [45:17<07:33,  2.21s/it]

mini Batch 717 Loss: 2.1035192012786865


 78%|███████▊  | 717/921 [45:19<07:34,  2.23s/it]

mini Batch 718 Loss: 2.7065482139587402


 78%|███████▊  | 718/921 [45:21<07:27,  2.20s/it]

mini Batch 719 Loss: 3.3666584491729736


 78%|███████▊  | 719/921 [45:23<07:28,  2.22s/it]

mini Batch 720 Loss: 3.018312454223633


 78%|███████▊  | 720/921 [45:25<07:23,  2.20s/it]

mini Batch 721 Loss: 3.086106300354004


 78%|███████▊  | 721/921 [45:28<07:25,  2.23s/it]

mini Batch 722 Loss: 2.4602792263031006


 78%|███████▊  | 722/921 [45:30<07:19,  2.21s/it]

mini Batch 723 Loss: 2.771023750305176


 79%|███████▊  | 723/921 [45:32<07:19,  2.22s/it]

mini Batch 724 Loss: 1.8486576080322266


 79%|███████▊  | 724/921 [45:34<07:14,  2.21s/it]

mini Batch 725 Loss: 3.5939555168151855


 79%|███████▊  | 725/921 [45:37<07:17,  2.23s/it]

mini Batch 726 Loss: 3.1223092079162598


 79%|███████▉  | 726/921 [45:39<07:11,  2.21s/it]

mini Batch 727 Loss: 2.5029752254486084


 79%|███████▉  | 727/921 [45:41<07:11,  2.22s/it]

mini Batch 728 Loss: 2.952244520187378


 79%|███████▉  | 728/921 [45:43<07:06,  2.21s/it]

mini Batch 729 Loss: 2.135749101638794


 79%|███████▉  | 729/921 [45:46<07:09,  2.24s/it]

mini Batch 730 Loss: 2.6582870483398438


 79%|███████▉  | 730/921 [45:48<07:07,  2.24s/it]

mini Batch 731 Loss: 2.003695011138916


 79%|███████▉  | 731/921 [45:50<07:09,  2.26s/it]

mini Batch 732 Loss: 2.28594970703125


 79%|███████▉  | 732/921 [45:52<06:59,  2.22s/it]

mini Batch 733 Loss: 2.454667329788208


 80%|███████▉  | 733/921 [45:54<06:58,  2.23s/it]

mini Batch 734 Loss: 2.5566930770874023


 80%|███████▉  | 734/921 [45:57<06:54,  2.21s/it]

mini Batch 735 Loss: 2.5883431434631348


 80%|███████▉  | 735/921 [45:59<06:54,  2.23s/it]

mini Batch 736 Loss: 3.448201894760132


 80%|███████▉  | 736/921 [46:01<06:48,  2.21s/it]

mini Batch 737 Loss: 3.0721638202667236


 80%|████████  | 737/921 [46:03<06:53,  2.25s/it]

mini Batch 738 Loss: 3.5185885429382324


 80%|████████  | 738/921 [46:06<06:47,  2.22s/it]

mini Batch 739 Loss: 3.2318553924560547


 80%|████████  | 739/921 [46:08<06:49,  2.25s/it]

mini Batch 740 Loss: 2.7964675426483154


 80%|████████  | 740/921 [46:10<06:41,  2.22s/it]

mini Batch 741 Loss: 2.365898609161377


 80%|████████  | 741/921 [46:12<06:42,  2.23s/it]

mini Batch 742 Loss: 2.3473634719848633


 81%|████████  | 742/921 [46:14<06:37,  2.22s/it]

mini Batch 743 Loss: 2.9664201736450195


 81%|████████  | 743/921 [46:17<06:38,  2.24s/it]

mini Batch 744 Loss: 3.0242233276367188


 81%|████████  | 744/921 [46:19<06:32,  2.22s/it]

mini Batch 745 Loss: 2.0410385131835938


 81%|████████  | 745/921 [46:21<06:32,  2.23s/it]

mini Batch 746 Loss: 2.804673910140991


 81%|████████  | 746/921 [46:23<06:28,  2.22s/it]

mini Batch 747 Loss: 2.1109728813171387


 81%|████████  | 747/921 [46:26<06:28,  2.23s/it]

mini Batch 748 Loss: 2.57163143157959


 81%|████████  | 748/921 [46:28<06:20,  2.20s/it]

mini Batch 749 Loss: 2.3302485942840576


 81%|████████▏ | 749/921 [46:30<06:23,  2.23s/it]

mini Batch 750 Loss: 3.327632427215576


 81%|████████▏ | 750/921 [46:32<06:18,  2.21s/it]

mini Batch 751 Loss: 2.795046329498291


 82%|████████▏ | 751/921 [46:34<06:18,  2.22s/it]

mini Batch 752 Loss: 1.9357190132141113


 82%|████████▏ | 752/921 [46:37<06:14,  2.21s/it]

mini Batch 753 Loss: 3.5771279335021973


 82%|████████▏ | 753/921 [46:39<06:14,  2.23s/it]

mini Batch 754 Loss: 3.8179211616516113


 82%|████████▏ | 754/921 [46:41<06:10,  2.22s/it]

mini Batch 755 Loss: 2.460447072982788


 82%|████████▏ | 755/921 [46:43<06:10,  2.23s/it]

mini Batch 756 Loss: 2.6874639987945557


 82%|████████▏ | 756/921 [46:46<06:06,  2.22s/it]

mini Batch 757 Loss: 2.9547030925750732


 82%|████████▏ | 757/921 [46:48<06:11,  2.26s/it]

mini Batch 758 Loss: 2.642524480819702


 82%|████████▏ | 758/921 [46:50<06:04,  2.23s/it]

mini Batch 759 Loss: 2.5587711334228516


 82%|████████▏ | 759/921 [46:52<06:06,  2.26s/it]

mini Batch 760 Loss: 2.4931960105895996


 83%|████████▎ | 760/921 [46:55<06:02,  2.25s/it]

mini Batch 761 Loss: 2.70243239402771


 83%|████████▎ | 761/921 [46:57<05:59,  2.24s/it]

mini Batch 762 Loss: 2.8945531845092773


 83%|████████▎ | 762/921 [46:59<05:54,  2.23s/it]

mini Batch 763 Loss: 2.9071288108825684


 83%|████████▎ | 763/921 [47:01<05:54,  2.24s/it]

mini Batch 764 Loss: 2.5604727268218994


 83%|████████▎ | 764/921 [47:04<05:52,  2.24s/it]

mini Batch 765 Loss: 2.468149185180664


 83%|████████▎ | 765/921 [47:06<05:51,  2.25s/it]

mini Batch 766 Loss: 2.483027458190918


 83%|████████▎ | 766/921 [47:08<05:44,  2.22s/it]

mini Batch 767 Loss: 2.7952728271484375


 83%|████████▎ | 767/921 [47:10<05:44,  2.24s/it]

mini Batch 768 Loss: 2.3001766204833984


 83%|████████▎ | 768/921 [47:12<05:38,  2.21s/it]

mini Batch 769 Loss: 2.341980218887329


 83%|████████▎ | 769/921 [47:15<05:38,  2.23s/it]

mini Batch 770 Loss: 2.9871134757995605


 84%|████████▎ | 770/921 [47:17<05:34,  2.22s/it]

mini Batch 771 Loss: 3.4769132137298584


 84%|████████▎ | 771/921 [47:19<05:35,  2.24s/it]

mini Batch 772 Loss: 2.953859329223633


 84%|████████▍ | 772/921 [47:21<05:30,  2.22s/it]

mini Batch 773 Loss: 2.6765189170837402


 84%|████████▍ | 773/921 [47:24<05:30,  2.23s/it]

mini Batch 774 Loss: 2.960308074951172


 84%|████████▍ | 774/921 [47:26<05:26,  2.22s/it]

mini Batch 775 Loss: 2.2194814682006836


 84%|████████▍ | 775/921 [47:28<05:27,  2.24s/it]

mini Batch 776 Loss: 3.0073063373565674


 84%|████████▍ | 776/921 [47:30<05:22,  2.22s/it]

mini Batch 777 Loss: 3.0537359714508057


 84%|████████▍ | 777/921 [47:33<05:22,  2.24s/it]

mini Batch 778 Loss: 3.178084135055542


 84%|████████▍ | 778/921 [47:35<05:16,  2.22s/it]

mini Batch 779 Loss: 1.9027502536773682


 85%|████████▍ | 779/921 [47:37<05:17,  2.24s/it]

mini Batch 780 Loss: 2.4736595153808594


 85%|████████▍ | 780/921 [47:39<05:11,  2.21s/it]

mini Batch 781 Loss: 3.7334506511688232


 85%|████████▍ | 781/921 [47:41<05:12,  2.23s/it]

mini Batch 782 Loss: 2.6901071071624756


 85%|████████▍ | 782/921 [47:44<05:08,  2.22s/it]

mini Batch 783 Loss: 2.379567861557007


 85%|████████▌ | 783/921 [47:46<05:08,  2.23s/it]

mini Batch 784 Loss: 3.2002789974212646


 85%|████████▌ | 784/921 [47:48<05:05,  2.23s/it]

mini Batch 785 Loss: 2.445905923843384


 85%|████████▌ | 785/921 [47:50<05:05,  2.25s/it]

mini Batch 786 Loss: 1.985375165939331


 85%|████████▌ | 786/921 [47:53<05:03,  2.25s/it]

mini Batch 787 Loss: 2.9816150665283203


 85%|████████▌ | 787/921 [47:55<05:02,  2.26s/it]

mini Batch 788 Loss: 2.3039627075195312


 86%|████████▌ | 788/921 [47:57<04:56,  2.23s/it]

mini Batch 789 Loss: 2.6689624786376953


 86%|████████▌ | 789/921 [47:59<04:58,  2.26s/it]

mini Batch 790 Loss: 2.433199882507324


 86%|████████▌ | 790/921 [48:02<04:51,  2.23s/it]

mini Batch 791 Loss: 2.4497146606445312


 86%|████████▌ | 791/921 [48:04<04:53,  2.26s/it]

mini Batch 792 Loss: 2.480855941772461


 86%|████████▌ | 792/921 [48:06<04:47,  2.23s/it]

mini Batch 793 Loss: 2.464517593383789


 86%|████████▌ | 793/921 [48:08<04:46,  2.24s/it]

mini Batch 794 Loss: 3.245140790939331


 86%|████████▌ | 794/921 [48:11<04:41,  2.22s/it]

mini Batch 795 Loss: 2.6471781730651855


 86%|████████▋ | 795/921 [48:13<04:40,  2.23s/it]

mini Batch 796 Loss: 2.3517909049987793


 86%|████████▋ | 796/921 [48:15<04:35,  2.20s/it]

mini Batch 797 Loss: 2.249913215637207


 87%|████████▋ | 797/921 [48:17<04:37,  2.24s/it]

mini Batch 798 Loss: 3.7982163429260254


 87%|████████▋ | 798/921 [48:19<04:35,  2.24s/it]

mini Batch 799 Loss: 2.656114101409912


 87%|████████▋ | 799/921 [48:22<04:36,  2.27s/it]

mini Batch 800 Loss: 2.159979820251465


 87%|████████▋ | 800/921 [48:24<04:35,  2.28s/it]

mini Batch 801 Loss: 2.643864631652832


 87%|████████▋ | 801/921 [48:26<04:35,  2.29s/it]

mini Batch 802 Loss: 2.1997456550598145


 87%|████████▋ | 802/921 [48:29<04:29,  2.27s/it]

mini Batch 803 Loss: 2.7738037109375


 87%|████████▋ | 803/921 [48:31<04:27,  2.27s/it]

mini Batch 804 Loss: 2.8293862342834473


 87%|████████▋ | 804/921 [48:33<04:22,  2.24s/it]

mini Batch 805 Loss: 3.640592575073242


 87%|████████▋ | 805/921 [48:35<04:20,  2.25s/it]

mini Batch 806 Loss: 1.6655501127243042


 88%|████████▊ | 806/921 [48:38<04:17,  2.24s/it]

mini Batch 807 Loss: 2.65956449508667


 88%|████████▊ | 807/921 [48:40<04:19,  2.28s/it]

mini Batch 808 Loss: 2.2977633476257324


 88%|████████▊ | 808/921 [48:42<04:14,  2.26s/it]

mini Batch 809 Loss: 1.8234261274337769


 88%|████████▊ | 809/921 [48:44<04:14,  2.27s/it]

mini Batch 810 Loss: 2.377530574798584


 88%|████████▊ | 810/921 [48:47<04:09,  2.25s/it]

mini Batch 811 Loss: 1.8096728324890137


 88%|████████▊ | 811/921 [48:49<04:07,  2.25s/it]

mini Batch 812 Loss: 2.985966205596924


 88%|████████▊ | 812/921 [48:51<04:02,  2.23s/it]

mini Batch 813 Loss: 2.7585818767547607


 88%|████████▊ | 813/921 [48:53<04:01,  2.24s/it]

mini Batch 814 Loss: 2.890852451324463


 88%|████████▊ | 814/921 [48:56<04:00,  2.24s/it]

mini Batch 815 Loss: 2.969498634338379


 88%|████████▊ | 815/921 [48:58<03:59,  2.26s/it]

mini Batch 816 Loss: 2.987623929977417


 89%|████████▊ | 816/921 [49:00<03:54,  2.23s/it]

mini Batch 817 Loss: 1.7862844467163086


 89%|████████▊ | 817/921 [49:02<03:53,  2.24s/it]

mini Batch 818 Loss: 2.570322036743164


 89%|████████▉ | 818/921 [49:04<03:48,  2.22s/it]

mini Batch 819 Loss: 2.865431308746338


 89%|████████▉ | 819/921 [49:07<03:48,  2.24s/it]

mini Batch 820 Loss: 2.703648567199707


 89%|████████▉ | 820/921 [49:09<03:44,  2.22s/it]

mini Batch 821 Loss: 3.175567865371704


 89%|████████▉ | 821/921 [49:11<03:43,  2.23s/it]

mini Batch 822 Loss: 3.551316022872925


 89%|████████▉ | 822/921 [49:13<03:39,  2.21s/it]

mini Batch 823 Loss: 3.4882426261901855


 89%|████████▉ | 823/921 [49:16<03:40,  2.25s/it]

mini Batch 824 Loss: 2.8092055320739746


 89%|████████▉ | 824/921 [49:18<03:35,  2.23s/it]

mini Batch 825 Loss: 3.0462942123413086


 90%|████████▉ | 825/921 [49:20<03:37,  2.27s/it]

mini Batch 826 Loss: 2.989685297012329


 90%|████████▉ | 826/921 [49:22<03:32,  2.23s/it]

mini Batch 827 Loss: 2.953052520751953


 90%|████████▉ | 827/921 [49:25<03:30,  2.24s/it]

mini Batch 828 Loss: 3.6468091011047363


 90%|████████▉ | 828/921 [49:27<03:27,  2.23s/it]

mini Batch 829 Loss: 3.2748894691467285


 90%|█████████ | 829/921 [49:29<03:25,  2.23s/it]

mini Batch 830 Loss: 2.4862732887268066


 90%|█████████ | 830/921 [49:31<03:21,  2.22s/it]

mini Batch 831 Loss: 2.192263126373291


 90%|█████████ | 831/921 [49:34<03:21,  2.24s/it]

mini Batch 832 Loss: 1.875699520111084


 90%|█████████ | 832/921 [49:36<03:17,  2.22s/it]

mini Batch 833 Loss: 2.8451473712921143


 90%|█████████ | 833/921 [49:38<03:17,  2.25s/it]

mini Batch 834 Loss: 2.7077417373657227


 91%|█████████ | 834/921 [49:40<03:13,  2.23s/it]

mini Batch 835 Loss: 3.021605968475342


 91%|█████████ | 835/921 [49:43<03:13,  2.25s/it]

mini Batch 836 Loss: 2.417714834213257


 91%|█████████ | 836/921 [49:45<03:10,  2.24s/it]

mini Batch 837 Loss: 2.582655668258667


 91%|█████████ | 837/921 [49:47<03:08,  2.25s/it]

mini Batch 838 Loss: 3.061542510986328


 91%|█████████ | 838/921 [49:49<03:05,  2.23s/it]

mini Batch 839 Loss: 2.6876862049102783


 91%|█████████ | 839/921 [49:51<03:03,  2.24s/it]

mini Batch 840 Loss: 2.2970223426818848


 91%|█████████ | 840/921 [49:54<03:02,  2.25s/it]

mini Batch 841 Loss: 2.607792615890503


 91%|█████████▏| 841/921 [49:56<03:00,  2.26s/it]

mini Batch 842 Loss: 2.4890005588531494


 91%|█████████▏| 842/921 [49:58<02:57,  2.25s/it]

mini Batch 843 Loss: 2.1392087936401367


 92%|█████████▏| 843/921 [50:01<02:55,  2.25s/it]

mini Batch 844 Loss: 1.9620630741119385


 92%|█████████▏| 844/921 [50:03<02:51,  2.23s/it]

mini Batch 845 Loss: 3.3023180961608887


 92%|█████████▏| 845/921 [50:05<02:50,  2.25s/it]

mini Batch 846 Loss: 2.969480514526367


 92%|█████████▏| 846/921 [50:07<02:46,  2.22s/it]

mini Batch 847 Loss: 2.025167942047119


 92%|█████████▏| 847/921 [50:09<02:45,  2.23s/it]

mini Batch 848 Loss: 2.10794734954834


 92%|█████████▏| 848/921 [50:12<02:41,  2.21s/it]

mini Batch 849 Loss: 2.1371712684631348


 92%|█████████▏| 849/921 [50:14<02:41,  2.24s/it]

mini Batch 850 Loss: 3.2720532417297363


 92%|█████████▏| 850/921 [50:16<02:38,  2.24s/it]

mini Batch 851 Loss: 2.3179173469543457


 92%|█████████▏| 851/921 [50:18<02:37,  2.25s/it]

mini Batch 852 Loss: 2.292161464691162


 93%|█████████▎| 852/921 [50:21<02:34,  2.23s/it]

mini Batch 853 Loss: 3.0579707622528076


 93%|█████████▎| 853/921 [50:23<02:32,  2.25s/it]

mini Batch 854 Loss: 2.556428909301758


 93%|█████████▎| 854/921 [50:25<02:28,  2.22s/it]

mini Batch 855 Loss: 2.9001049995422363


 93%|█████████▎| 855/921 [50:27<02:27,  2.23s/it]

mini Batch 856 Loss: 2.551760673522949


 93%|█████████▎| 856/921 [50:30<02:25,  2.23s/it]

mini Batch 857 Loss: 2.0943033695220947


 93%|█████████▎| 857/921 [50:32<02:24,  2.25s/it]

mini Batch 858 Loss: 1.9376280307769775


 93%|█████████▎| 858/921 [50:34<02:20,  2.22s/it]

mini Batch 859 Loss: 2.3658218383789062


 93%|█████████▎| 859/921 [50:36<02:18,  2.24s/it]

mini Batch 860 Loss: 3.004369020462036


 93%|█████████▎| 860/921 [50:38<02:14,  2.21s/it]

mini Batch 861 Loss: 3.0388054847717285


 93%|█████████▎| 861/921 [50:41<02:14,  2.24s/it]

mini Batch 862 Loss: 3.298119068145752


 94%|█████████▎| 862/921 [50:43<02:11,  2.23s/it]

mini Batch 863 Loss: 2.597848892211914


 94%|█████████▎| 863/921 [50:45<02:09,  2.23s/it]

mini Batch 864 Loss: 3.0760743618011475


 94%|█████████▍| 864/921 [50:47<02:05,  2.20s/it]

mini Batch 865 Loss: 2.5950112342834473


 94%|█████████▍| 865/921 [50:50<02:05,  2.23s/it]

mini Batch 866 Loss: 2.323570728302002


 94%|█████████▍| 866/921 [50:52<02:01,  2.21s/it]

mini Batch 867 Loss: 2.417691707611084


 94%|█████████▍| 867/921 [50:54<02:00,  2.24s/it]

mini Batch 868 Loss: 2.5898141860961914


 94%|█████████▍| 868/921 [50:56<01:57,  2.23s/it]

mini Batch 869 Loss: 2.6526970863342285


 94%|█████████▍| 869/921 [50:58<01:55,  2.23s/it]

mini Batch 870 Loss: 2.2330267429351807


 94%|█████████▍| 870/921 [51:01<01:52,  2.21s/it]

mini Batch 871 Loss: 2.204885959625244


 95%|█████████▍| 871/921 [51:03<01:51,  2.23s/it]

mini Batch 872 Loss: 2.8577117919921875


 95%|█████████▍| 872/921 [51:05<01:48,  2.21s/it]

mini Batch 873 Loss: 2.6476759910583496


 95%|█████████▍| 873/921 [51:07<01:47,  2.24s/it]

mini Batch 874 Loss: 3.726846218109131


 95%|█████████▍| 874/921 [51:10<01:44,  2.23s/it]

mini Batch 875 Loss: 2.812556743621826


 95%|█████████▌| 875/921 [51:12<01:44,  2.27s/it]

mini Batch 876 Loss: 2.3652236461639404


 95%|█████████▌| 876/921 [51:14<01:41,  2.25s/it]

mini Batch 877 Loss: 2.82932186126709


 95%|█████████▌| 877/921 [51:16<01:38,  2.25s/it]

mini Batch 878 Loss: 2.7150120735168457


 95%|█████████▌| 878/921 [51:19<01:36,  2.24s/it]

mini Batch 879 Loss: 2.471999168395996


 95%|█████████▌| 879/921 [51:21<01:34,  2.24s/it]

mini Batch 880 Loss: 2.8681697845458984


 96%|█████████▌| 880/921 [51:23<01:30,  2.21s/it]

mini Batch 881 Loss: 3.0524754524230957


 96%|█████████▌| 881/921 [51:25<01:28,  2.22s/it]

mini Batch 882 Loss: 2.4948925971984863


 96%|█████████▌| 882/921 [51:27<01:26,  2.22s/it]

mini Batch 883 Loss: 1.4351613521575928


 96%|█████████▌| 883/921 [51:30<01:24,  2.23s/it]

mini Batch 884 Loss: 2.515151023864746


 96%|█████████▌| 884/921 [51:32<01:21,  2.21s/it]

mini Batch 885 Loss: 3.1416015625


 96%|█████████▌| 885/921 [51:34<01:21,  2.26s/it]

mini Batch 886 Loss: 2.3779854774475098


 96%|█████████▌| 886/921 [51:36<01:17,  2.23s/it]

mini Batch 887 Loss: 2.9463136196136475


 96%|█████████▋| 887/921 [51:39<01:16,  2.25s/it]

mini Batch 888 Loss: 3.1511144638061523


 96%|█████████▋| 888/921 [51:41<01:13,  2.23s/it]

mini Batch 889 Loss: 3.3673830032348633


 97%|█████████▋| 889/921 [51:43<01:11,  2.24s/it]

mini Batch 890 Loss: 1.9968807697296143


 97%|█████████▋| 890/921 [51:45<01:08,  2.22s/it]

mini Batch 891 Loss: 3.2947161197662354


 97%|█████████▋| 891/921 [51:48<01:07,  2.25s/it]

mini Batch 892 Loss: 2.0110068321228027


 97%|█████████▋| 892/921 [51:50<01:04,  2.22s/it]

mini Batch 893 Loss: 2.7616302967071533


 97%|█████████▋| 893/921 [51:52<01:02,  2.24s/it]

mini Batch 894 Loss: 2.456141948699951


 97%|█████████▋| 894/921 [51:54<00:59,  2.21s/it]

mini Batch 895 Loss: 2.0557005405426025


 97%|█████████▋| 895/921 [51:57<00:57,  2.23s/it]

mini Batch 896 Loss: 2.7068557739257812


 97%|█████████▋| 896/921 [51:59<00:55,  2.22s/it]

mini Batch 897 Loss: 2.727846384048462


 97%|█████████▋| 897/921 [52:01<00:53,  2.23s/it]

mini Batch 898 Loss: 2.436695098876953


 98%|█████████▊| 898/921 [52:03<00:51,  2.22s/it]

mini Batch 899 Loss: 2.5982840061187744


 98%|█████████▊| 899/921 [52:05<00:49,  2.24s/it]

mini Batch 900 Loss: 2.709184169769287


 98%|█████████▊| 900/921 [52:08<00:47,  2.25s/it]

mini Batch 901 Loss: 3.6341071128845215
Training Batch: 901 | Training Loss: 3.6341071128845215
dist a: tensor([13.1274, 18.0864, 15.3329, 15.9818, 22.6490, 13.7198, 15.7150, 18.0996,
        22.3999, 12.9730, 20.4360, 14.7434, 13.5513, 18.1147, 21.2765, 18.3945,
        16.2371, 15.9169, 17.6526, 16.6086, 17.7031, 21.8148, 15.6620, 20.9321,
        16.1563, 16.3295, 17.7285, 22.1554, 12.3973, 11.3647, 15.7718, 14.3426,
        13.3361, 14.4685, 17.4474, 16.2866, 12.5888, 22.9037, 12.5881, 21.1847,
        17.3223, 15.7216, 18.7899, 15.9407, 19.7012, 16.1102, 12.7259, 13.2740,
        14.6768, 16.0549, 24.2995, 20.7604, 13.2036, 19.4213, 15.1193, 21.9844,
        15.0951, 26.0800, 15.2660, 15.4347, 16.6233, 18.7002, 15.2218, 13.4515],
       device='cuda:0'), dist b: tensor([25.3777, 25.0342, 18.0924, 21.6865, 27.2508, 25.4648, 17.7126, 16.4385,
        19.6101, 21.6201, 21.2018, 17.0826, 29.6701, 19.5994, 24.8680, 20.1356,
        26.0087, 23.5325, 18.7236, 17.2878, 28.9333, 19.1508, 

 98%|█████████▊| 901/921 [52:12<00:55,  2.77s/it]

mini Batch 902 Loss: 2.7042407989501953


 98%|█████████▊| 902/921 [52:14<00:49,  2.61s/it]

mini Batch 903 Loss: 2.6720499992370605


 98%|█████████▊| 903/921 [52:16<00:45,  2.51s/it]

mini Batch 904 Loss: 3.5126190185546875


 98%|█████████▊| 904/921 [52:18<00:41,  2.42s/it]

mini Batch 905 Loss: 2.549995183944702


 98%|█████████▊| 905/921 [52:21<00:38,  2.38s/it]

mini Batch 906 Loss: 2.8374381065368652


 98%|█████████▊| 906/921 [52:23<00:34,  2.33s/it]

mini Batch 907 Loss: 2.5785298347473145


 98%|█████████▊| 907/921 [52:25<00:32,  2.32s/it]

mini Batch 908 Loss: 2.6947784423828125


 99%|█████████▊| 908/921 [52:27<00:29,  2.29s/it]

mini Batch 909 Loss: 2.4859020709991455


 99%|█████████▊| 909/921 [52:30<00:27,  2.29s/it]

mini Batch 910 Loss: 2.91414213180542


 99%|█████████▉| 910/921 [52:32<00:24,  2.25s/it]

mini Batch 911 Loss: 2.879638671875


 99%|█████████▉| 911/921 [52:34<00:22,  2.26s/it]

mini Batch 912 Loss: 2.8806076049804688


 99%|█████████▉| 912/921 [52:36<00:20,  2.26s/it]

mini Batch 913 Loss: 2.6102237701416016


 99%|█████████▉| 913/921 [52:39<00:18,  2.28s/it]

mini Batch 914 Loss: 1.7869038581848145


 99%|█████████▉| 914/921 [52:41<00:15,  2.25s/it]

mini Batch 915 Loss: 2.576892852783203


 99%|█████████▉| 915/921 [52:43<00:13,  2.26s/it]

mini Batch 916 Loss: 3.067082166671753


 99%|█████████▉| 916/921 [52:45<00:11,  2.26s/it]

mini Batch 917 Loss: 2.711897850036621


100%|█████████▉| 917/921 [52:48<00:09,  2.27s/it]

mini Batch 918 Loss: 2.3746697902679443


100%|█████████▉| 918/921 [52:50<00:06,  2.24s/it]

mini Batch 919 Loss: 2.39792799949646


100%|█████████▉| 919/921 [52:52<00:04,  2.26s/it]

mini Batch 920 Loss: 2.3789916038513184


100%|█████████▉| 920/921 [52:54<00:02,  2.23s/it]

mini Batch 921 Loss: 2.64223575592041


100%|██████████| 921/921 [52:56<00:00,  3.45s/it]


[1] average loss per epoch: 3.071
Saved model checkpoint to /content/drive/My Drive/test9/model_epoch1.pt
dist a: tensor([17.7875, 15.9823, 22.2015, 17.5548, 19.5111, 14.8157, 16.8280, 16.8718,
        14.6487, 20.2286, 16.9764, 23.4288, 19.2783, 18.7808, 22.3853, 21.5917,
        20.1528, 22.6810, 19.8231, 23.4361, 24.9590, 14.8118, 13.4784, 16.0456,
        24.0532,  8.9757, 16.3663, 16.6980, 22.7792, 18.7508, 17.3906, 19.7836,
        19.0622, 23.0941, 20.6042, 17.7793, 23.0205, 22.6846, 25.3909, 18.1984,
        14.6070, 16.8978, 16.2819, 15.0183, 17.5455, 16.1639, 18.9994, 19.4146,
        16.2352, 22.5521, 19.9938, 27.2895, 15.4500, 12.4366, 17.9645, 16.9125,
        15.3059, 16.7928, 16.5302, 20.3792, 17.0639, 17.7702, 15.1522, 18.3208],
       device='cuda:0'), dist b: tensor([20.0670, 20.4952, 26.1933, 27.6511, 22.2724, 25.4035, 21.6429, 15.7257,
        23.6638, 20.0636, 26.4778, 22.2050, 14.7474, 23.9397, 21.6554, 18.3578,
        20.1038, 21.0819, 22.2590, 22.6733, 34.4044,

  0%|          | 1/931 [01:03<16:24:31, 63.52s/it]

batch id predict after epoch:  1


  0%|          | 2/931 [01:51<14:05:22, 54.60s/it]

batch id predict after epoch:  2


  0%|          | 3/931 [02:36<12:55:59, 50.17s/it]

batch id predict after epoch:  3


  0%|          | 4/931 [03:29<13:08:11, 51.02s/it]

batch id predict after epoch:  4


  1%|          | 5/931 [04:23<13:28:57, 52.42s/it]

batch id predict after epoch:  5


  1%|          | 6/931 [05:04<12:24:40, 48.30s/it]

batch id predict after epoch:  6


  1%|          | 7/931 [05:44<11:41:01, 45.52s/it]

batch id predict after epoch:  7


  1%|          | 8/931 [06:21<10:58:52, 42.83s/it]

batch id predict after epoch:  8


  1%|          | 9/931 [07:05<11:04:46, 43.26s/it]

batch id predict after epoch:  9


  1%|          | 10/931 [07:51<11:17:38, 44.15s/it]

batch id predict after epoch:  10


  1%|          | 11/931 [08:22<10:15:13, 40.12s/it]

batch id predict after epoch:  11


  1%|▏         | 12/931 [08:54<9:34:33, 37.51s/it] 

batch id predict after epoch:  12


  1%|▏         | 13/931 [09:24<8:59:25, 35.26s/it]

batch id predict after epoch:  13


  2%|▏         | 14/931 [09:50<8:17:30, 32.55s/it]

batch id predict after epoch:  14


  2%|▏         | 15/931 [10:31<8:56:42, 35.16s/it]

batch id predict after epoch:  15


  2%|▏         | 16/931 [11:11<9:19:06, 36.66s/it]

batch id predict after epoch:  16


  2%|▏         | 17/931 [11:38<8:34:43, 33.79s/it]

batch id predict after epoch:  17


  2%|▏         | 18/931 [11:59<7:35:53, 29.96s/it]

batch id predict after epoch:  18


  2%|▏         | 19/931 [12:25<7:16:31, 28.72s/it]

batch id predict after epoch:  19


  2%|▏         | 20/931 [12:47<6:45:03, 26.68s/it]

batch id predict after epoch:  20


  2%|▏         | 21/931 [13:09<6:24:23, 25.34s/it]

batch id predict after epoch:  21


  2%|▏         | 22/931 [13:32<6:10:18, 24.44s/it]

batch id predict after epoch:  22


  2%|▏         | 23/931 [14:06<6:54:18, 27.38s/it]

batch id predict after epoch:  23


  3%|▎         | 24/931 [14:27<6:25:39, 25.51s/it]

batch id predict after epoch:  24


  3%|▎         | 25/931 [14:46<5:56:46, 23.63s/it]

batch id predict after epoch:  25


  3%|▎         | 26/931 [15:05<5:31:49, 22.00s/it]

batch id predict after epoch:  26


  3%|▎         | 27/931 [15:22<5:12:36, 20.75s/it]

batch id predict after epoch:  27


  3%|▎         | 28/931 [15:41<5:03:58, 20.20s/it]

batch id predict after epoch:  28


  3%|▎         | 29/931 [16:12<5:50:44, 23.33s/it]

batch id predict after epoch:  29


  3%|▎         | 30/931 [16:27<5:10:53, 20.70s/it]

batch id predict after epoch:  30


  3%|▎         | 31/931 [16:45<4:59:28, 19.97s/it]

batch id predict after epoch:  31


  3%|▎         | 32/931 [17:00<4:40:05, 18.69s/it]

batch id predict after epoch:  32


  4%|▎         | 33/931 [17:18<4:34:13, 18.32s/it]

batch id predict after epoch:  33


  4%|▎         | 34/931 [17:33<4:17:17, 17.21s/it]

batch id predict after epoch:  34


  4%|▍         | 35/931 [17:45<3:54:01, 15.67s/it]

batch id predict after epoch:  35


  4%|▍         | 36/931 [18:01<3:54:55, 15.75s/it]

batch id predict after epoch:  36


  4%|▍         | 37/931 [18:11<3:33:02, 14.30s/it]

batch id predict after epoch:  37


  4%|▍         | 38/931 [18:23<3:19:24, 13.40s/it]

batch id predict after epoch:  38


  4%|▍         | 39/931 [18:35<3:14:31, 13.08s/it]

batch id predict after epoch:  39


  4%|▍         | 40/931 [18:47<3:09:53, 12.79s/it]

batch id predict after epoch:  40


  4%|▍         | 41/931 [18:57<2:56:52, 11.92s/it]

batch id predict after epoch:  41


  5%|▍         | 42/931 [19:06<2:41:18, 10.89s/it]

batch id predict after epoch:  42


  5%|▍         | 43/931 [19:17<2:44:24, 11.11s/it]

batch id predict after epoch:  43


  5%|▍         | 44/931 [19:30<2:50:55, 11.56s/it]

batch id predict after epoch:  44


  5%|▍         | 45/931 [19:54<3:45:45, 15.29s/it]

batch id predict after epoch:  45


  5%|▍         | 46/931 [20:02<3:13:06, 13.09s/it]

batch id predict after epoch:  46


  5%|▌         | 47/931 [20:13<3:02:27, 12.38s/it]

batch id predict after epoch:  47


  5%|▌         | 48/931 [20:22<2:47:48, 11.40s/it]

batch id predict after epoch:  48


  5%|▌         | 49/931 [20:28<2:27:20, 10.02s/it]

batch id predict after epoch:  49


  5%|▌         | 50/931 [20:35<2:12:19,  9.01s/it]

batch id predict after epoch:  50


  5%|▌         | 51/931 [20:45<2:16:55,  9.34s/it]

batch id predict after epoch:  51


  6%|▌         | 52/931 [20:55<2:20:23,  9.58s/it]

batch id predict after epoch:  52


  6%|▌         | 53/931 [21:01<2:03:35,  8.45s/it]

batch id predict after epoch:  53


  6%|▌         | 54/931 [21:07<1:52:07,  7.67s/it]

batch id predict after epoch:  54


  6%|▌         | 55/931 [21:15<1:52:10,  7.68s/it]

batch id predict after epoch:  55


  6%|▌         | 56/931 [21:19<1:39:03,  6.79s/it]

batch id predict after epoch:  56


  6%|▌         | 57/931 [21:27<1:42:37,  7.05s/it]

batch id predict after epoch:  57


  6%|▌         | 58/931 [21:33<1:38:30,  6.77s/it]

batch id predict after epoch:  58


  6%|▋         | 59/931 [21:39<1:35:00,  6.54s/it]

batch id predict after epoch:  59


  6%|▋         | 60/931 [21:47<1:39:01,  6.82s/it]

batch id predict after epoch:  60


  7%|▋         | 61/931 [21:51<1:26:47,  5.99s/it]

batch id predict after epoch:  61


  7%|▋         | 62/931 [21:57<1:26:10,  5.95s/it]

batch id predict after epoch:  62


  7%|▋         | 63/931 [22:03<1:27:12,  6.03s/it]

batch id predict after epoch:  63


  7%|▋         | 64/931 [22:09<1:28:14,  6.11s/it]

batch id predict after epoch:  64


  7%|▋         | 65/931 [22:16<1:33:49,  6.50s/it]

batch id predict after epoch:  65


  7%|▋         | 66/931 [22:21<1:24:34,  5.87s/it]

batch id predict after epoch:  66


  7%|▋         | 67/931 [22:26<1:20:07,  5.56s/it]

batch id predict after epoch:  67


  7%|▋         | 68/931 [22:33<1:26:04,  5.98s/it]

batch id predict after epoch:  68


  7%|▋         | 69/931 [22:40<1:32:19,  6.43s/it]

batch id predict after epoch:  69


  8%|▊         | 70/931 [22:58<2:20:20,  9.78s/it]

batch id predict after epoch:  70


  8%|▊         | 71/931 [23:03<2:02:36,  8.55s/it]

batch id predict after epoch:  71


  8%|▊         | 72/931 [23:09<1:51:35,  7.79s/it]

batch id predict after epoch:  72


  8%|▊         | 73/931 [23:13<1:33:22,  6.53s/it]

batch id predict after epoch:  73


  8%|▊         | 74/931 [23:19<1:28:54,  6.22s/it]

batch id predict after epoch:  74


  8%|▊         | 75/931 [23:23<1:19:21,  5.56s/it]

batch id predict after epoch:  75


  8%|▊         | 76/931 [23:27<1:13:32,  5.16s/it]

batch id predict after epoch:  76


  8%|▊         | 77/931 [23:31<1:10:15,  4.94s/it]

batch id predict after epoch:  77


  8%|▊         | 78/931 [23:35<1:05:03,  4.58s/it]

batch id predict after epoch:  78


  8%|▊         | 79/931 [23:39<1:02:31,  4.40s/it]

batch id predict after epoch:  79


  9%|▊         | 80/931 [23:43<1:02:41,  4.42s/it]

batch id predict after epoch:  80


  9%|▊         | 81/931 [23:47<58:57,  4.16s/it]  

batch id predict after epoch:  81


  9%|▉         | 82/931 [23:50<55:20,  3.91s/it]

batch id predict after epoch:  82


  9%|▉         | 83/931 [23:53<51:35,  3.65s/it]

batch id predict after epoch:  83


  9%|▉         | 84/931 [23:58<56:32,  4.01s/it]

batch id predict after epoch:  84


  9%|▉         | 85/931 [24:02<53:54,  3.82s/it]

batch id predict after epoch:  85


  9%|▉         | 86/931 [24:05<53:51,  3.82s/it]

batch id predict after epoch:  86


  9%|▉         | 87/931 [24:09<52:21,  3.72s/it]

batch id predict after epoch:  87


  9%|▉         | 88/931 [24:12<49:35,  3.53s/it]

batch id predict after epoch:  88


 10%|▉         | 89/931 [24:16<52:55,  3.77s/it]

batch id predict after epoch:  89


 10%|▉         | 90/931 [24:22<1:00:18,  4.30s/it]

batch id predict after epoch:  90


 10%|▉         | 91/931 [24:25<55:55,  3.99s/it]  

batch id predict after epoch:  91


 10%|▉         | 92/931 [24:28<49:07,  3.51s/it]

batch id predict after epoch:  92


 10%|▉         | 93/931 [24:31<47:01,  3.37s/it]

batch id predict after epoch:  93


 10%|█         | 94/931 [24:33<42:35,  3.05s/it]

batch id predict after epoch:  94


 10%|█         | 95/931 [24:36<43:18,  3.11s/it]

batch id predict after epoch:  95


 10%|█         | 96/931 [24:39<41:06,  2.95s/it]

batch id predict after epoch:  96


 10%|█         | 97/931 [24:42<40:32,  2.92s/it]

batch id predict after epoch:  97


 11%|█         | 98/931 [24:44<36:44,  2.65s/it]

batch id predict after epoch:  98


 11%|█         | 99/931 [24:46<37:48,  2.73s/it]

batch id predict after epoch:  99


 11%|█         | 100/931 [24:50<40:00,  2.89s/it]

batch id predict after epoch:  100


 11%|█         | 101/931 [24:52<38:39,  2.79s/it]

batch id predict after epoch:  101


 11%|█         | 102/931 [24:56<43:50,  3.17s/it]

batch id predict after epoch:  102


 11%|█         | 103/931 [24:58<39:05,  2.83s/it]

batch id predict after epoch:  103


 11%|█         | 104/931 [25:01<39:40,  2.88s/it]

batch id predict after epoch:  104


 11%|█▏        | 105/931 [25:05<41:58,  3.05s/it]

batch id predict after epoch:  105


 11%|█▏        | 106/931 [25:07<37:09,  2.70s/it]

batch id predict after epoch:  106


 11%|█▏        | 107/931 [25:10<38:12,  2.78s/it]

batch id predict after epoch:  107


 12%|█▏        | 108/931 [25:13<38:25,  2.80s/it]

batch id predict after epoch:  108


 12%|█▏        | 109/931 [25:15<36:39,  2.68s/it]

batch id predict after epoch:  109


 12%|█▏        | 110/931 [25:17<33:19,  2.44s/it]

batch id predict after epoch:  110


 12%|█▏        | 111/931 [25:19<30:38,  2.24s/it]

batch id predict after epoch:  111


 12%|█▏        | 112/931 [25:22<36:07,  2.65s/it]

batch id predict after epoch:  112


 12%|█▏        | 113/931 [25:25<37:28,  2.75s/it]

batch id predict after epoch:  113


 12%|█▏        | 114/931 [25:28<37:03,  2.72s/it]

batch id predict after epoch:  114


 12%|█▏        | 115/931 [25:30<34:46,  2.56s/it]

batch id predict after epoch:  115


 12%|█▏        | 116/931 [25:32<33:00,  2.43s/it]

batch id predict after epoch:  116


 13%|█▎        | 117/931 [25:35<34:38,  2.55s/it]

batch id predict after epoch:  117


 13%|█▎        | 118/931 [25:37<30:28,  2.25s/it]

batch id predict after epoch:  118


 13%|█▎        | 119/931 [25:39<31:03,  2.29s/it]

batch id predict after epoch:  119


 13%|█▎        | 120/931 [25:42<32:46,  2.42s/it]

batch id predict after epoch:  120


 13%|█▎        | 121/931 [25:45<34:51,  2.58s/it]

batch id predict after epoch:  121


 13%|█▎        | 122/931 [25:47<33:25,  2.48s/it]

batch id predict after epoch:  122


 13%|█▎        | 123/931 [25:48<29:46,  2.21s/it]

batch id predict after epoch:  123


 13%|█▎        | 124/931 [25:51<29:22,  2.18s/it]

batch id predict after epoch:  124


 13%|█▎        | 125/931 [25:52<28:03,  2.09s/it]

batch id predict after epoch:  125


 14%|█▎        | 126/931 [25:54<25:44,  1.92s/it]

batch id predict after epoch:  126


 14%|█▎        | 127/931 [25:56<25:28,  1.90s/it]

batch id predict after epoch:  127


 14%|█▎        | 128/931 [25:58<25:45,  1.93s/it]

batch id predict after epoch:  128


 14%|█▍        | 129/931 [26:00<27:45,  2.08s/it]

batch id predict after epoch:  129


 14%|█▍        | 130/931 [26:02<26:25,  1.98s/it]

batch id predict after epoch:  130


 14%|█▍        | 131/931 [26:04<26:13,  1.97s/it]

batch id predict after epoch:  131


 14%|█▍        | 132/931 [26:06<27:00,  2.03s/it]

batch id predict after epoch:  132


 14%|█▍        | 133/931 [26:08<25:51,  1.94s/it]

batch id predict after epoch:  133


 14%|█▍        | 134/931 [26:10<26:20,  1.98s/it]

batch id predict after epoch:  134


 15%|█▍        | 135/931 [26:11<23:25,  1.77s/it]

batch id predict after epoch:  135


 15%|█▍        | 136/931 [26:13<23:27,  1.77s/it]

batch id predict after epoch:  136


 15%|█▍        | 137/931 [26:15<23:25,  1.77s/it]

batch id predict after epoch:  137


 15%|█▍        | 138/931 [26:16<23:33,  1.78s/it]

batch id predict after epoch:  138


 15%|█▍        | 139/931 [26:18<23:43,  1.80s/it]

batch id predict after epoch:  139


 15%|█▌        | 140/931 [26:20<24:55,  1.89s/it]

batch id predict after epoch:  140


 15%|█▌        | 141/931 [26:23<25:40,  1.95s/it]

batch id predict after epoch:  141


 15%|█▌        | 142/931 [26:24<24:07,  1.83s/it]

batch id predict after epoch:  142


 15%|█▌        | 143/931 [26:26<23:07,  1.76s/it]

batch id predict after epoch:  143


 15%|█▌        | 144/931 [26:27<22:17,  1.70s/it]

batch id predict after epoch:  144


 16%|█▌        | 145/931 [26:29<24:00,  1.83s/it]

batch id predict after epoch:  145


 16%|█▌        | 146/931 [26:31<21:45,  1.66s/it]

batch id predict after epoch:  146


 16%|█▌        | 147/931 [26:32<21:18,  1.63s/it]

batch id predict after epoch:  147


 16%|█▌        | 148/931 [26:35<24:17,  1.86s/it]

batch id predict after epoch:  148


 16%|█▌        | 149/931 [26:36<23:13,  1.78s/it]

batch id predict after epoch:  149


 16%|█▌        | 150/931 [26:38<21:34,  1.66s/it]

batch id predict after epoch:  150


 16%|█▌        | 151/931 [26:40<23:10,  1.78s/it]

batch id predict after epoch:  151


 16%|█▋        | 152/931 [26:42<24:09,  1.86s/it]

batch id predict after epoch:  152


 16%|█▋        | 153/931 [26:43<21:55,  1.69s/it]

batch id predict after epoch:  153


 17%|█▋        | 154/931 [26:44<20:28,  1.58s/it]

batch id predict after epoch:  154


 17%|█▋        | 155/931 [26:46<20:52,  1.61s/it]

batch id predict after epoch:  155


 17%|█▋        | 156/931 [26:48<21:23,  1.66s/it]

batch id predict after epoch:  156


 17%|█▋        | 157/931 [26:49<21:24,  1.66s/it]

batch id predict after epoch:  157


 17%|█▋        | 158/931 [26:51<22:17,  1.73s/it]

batch id predict after epoch:  158


 17%|█▋        | 159/931 [26:53<21:31,  1.67s/it]

batch id predict after epoch:  159


 17%|█▋        | 160/931 [26:54<20:13,  1.57s/it]

batch id predict after epoch:  160


 17%|█▋        | 161/931 [26:56<19:49,  1.55s/it]

batch id predict after epoch:  161


 17%|█▋        | 162/931 [26:57<20:49,  1.63s/it]

batch id predict after epoch:  162


 18%|█▊        | 163/931 [26:59<19:29,  1.52s/it]

batch id predict after epoch:  163


 18%|█▊        | 164/931 [27:01<20:44,  1.62s/it]

batch id predict after epoch:  164


 18%|█▊        | 165/931 [27:03<22:01,  1.73s/it]

batch id predict after epoch:  165


 18%|█▊        | 166/931 [27:04<20:55,  1.64s/it]

batch id predict after epoch:  166


 18%|█▊        | 167/931 [27:06<20:24,  1.60s/it]

batch id predict after epoch:  167


 18%|█▊        | 168/931 [27:07<19:47,  1.56s/it]

batch id predict after epoch:  168


 18%|█▊        | 169/931 [27:09<20:36,  1.62s/it]

batch id predict after epoch:  169


 18%|█▊        | 170/931 [27:10<19:24,  1.53s/it]

batch id predict after epoch:  170


 18%|█▊        | 171/931 [27:11<18:29,  1.46s/it]

batch id predict after epoch:  171


 18%|█▊        | 172/931 [27:13<17:45,  1.40s/it]

batch id predict after epoch:  172


 19%|█▊        | 173/931 [27:14<18:30,  1.46s/it]

batch id predict after epoch:  173


 19%|█▊        | 174/931 [27:16<19:44,  1.57s/it]

batch id predict after epoch:  174


 19%|█▉        | 175/931 [27:17<18:46,  1.49s/it]

batch id predict after epoch:  175


 19%|█▉        | 176/931 [27:19<17:56,  1.43s/it]

batch id predict after epoch:  176


 19%|█▉        | 177/931 [27:20<18:33,  1.48s/it]

batch id predict after epoch:  177


 19%|█▉        | 178/931 [27:22<17:44,  1.41s/it]

batch id predict after epoch:  178


 19%|█▉        | 179/931 [27:23<18:34,  1.48s/it]

batch id predict after epoch:  179


 19%|█▉        | 180/931 [27:25<19:21,  1.55s/it]

batch id predict after epoch:  180


 19%|█▉        | 181/931 [27:26<18:57,  1.52s/it]

batch id predict after epoch:  181


 20%|█▉        | 182/931 [27:28<19:03,  1.53s/it]

batch id predict after epoch:  182


 20%|█▉        | 183/931 [27:29<18:44,  1.50s/it]

batch id predict after epoch:  183


 20%|█▉        | 184/931 [27:31<17:55,  1.44s/it]

batch id predict after epoch:  184


 20%|█▉        | 185/931 [27:32<17:23,  1.40s/it]

batch id predict after epoch:  185


 20%|█▉        | 186/931 [27:33<16:57,  1.37s/it]

batch id predict after epoch:  186


 20%|██        | 187/931 [27:34<16:41,  1.35s/it]

batch id predict after epoch:  187


 20%|██        | 188/931 [27:36<16:41,  1.35s/it]

batch id predict after epoch:  188


 20%|██        | 189/931 [27:37<16:31,  1.34s/it]

batch id predict after epoch:  189


 20%|██        | 190/931 [27:38<16:15,  1.32s/it]

batch id predict after epoch:  190


 21%|██        | 191/931 [27:40<16:04,  1.30s/it]

batch id predict after epoch:  191


 21%|██        | 192/931 [27:41<16:27,  1.34s/it]

batch id predict after epoch:  192


 21%|██        | 193/931 [27:43<17:48,  1.45s/it]

batch id predict after epoch:  193


 21%|██        | 194/931 [27:44<17:23,  1.42s/it]

batch id predict after epoch:  194


 21%|██        | 195/931 [27:45<16:57,  1.38s/it]

batch id predict after epoch:  195


 21%|██        | 196/931 [27:47<16:33,  1.35s/it]

batch id predict after epoch:  196


 21%|██        | 197/931 [27:48<17:25,  1.42s/it]

batch id predict after epoch:  197


 21%|██▏       | 198/931 [27:50<17:05,  1.40s/it]

batch id predict after epoch:  198


 21%|██▏       | 199/931 [27:51<17:30,  1.44s/it]

batch id predict after epoch:  199


 21%|██▏       | 200/931 [27:53<17:36,  1.44s/it]

batch id predict after epoch:  200


 22%|██▏       | 201/931 [27:54<17:00,  1.40s/it]

batch id predict after epoch:  201


 22%|██▏       | 202/931 [27:56<17:40,  1.45s/it]

batch id predict after epoch:  202


 22%|██▏       | 203/931 [27:57<17:00,  1.40s/it]

batch id predict after epoch:  203


 22%|██▏       | 204/931 [27:58<16:34,  1.37s/it]

batch id predict after epoch:  204


 22%|██▏       | 205/931 [27:59<16:17,  1.35s/it]

batch id predict after epoch:  205


 22%|██▏       | 206/931 [28:01<16:11,  1.34s/it]

batch id predict after epoch:  206


 22%|██▏       | 207/931 [28:02<16:46,  1.39s/it]

batch id predict after epoch:  207


 22%|██▏       | 208/931 [28:03<16:19,  1.35s/it]

batch id predict after epoch:  208


 22%|██▏       | 209/931 [28:05<16:16,  1.35s/it]

batch id predict after epoch:  209


 23%|██▎       | 210/931 [28:06<16:14,  1.35s/it]

batch id predict after epoch:  210


 23%|██▎       | 211/931 [28:08<17:08,  1.43s/it]

batch id predict after epoch:  211


 23%|██▎       | 212/931 [28:09<16:48,  1.40s/it]

batch id predict after epoch:  212


 23%|██▎       | 213/931 [28:11<17:29,  1.46s/it]

batch id predict after epoch:  213


 23%|██▎       | 214/931 [28:12<17:06,  1.43s/it]

batch id predict after epoch:  214


 23%|██▎       | 215/931 [28:13<16:43,  1.40s/it]

batch id predict after epoch:  215


 23%|██▎       | 216/931 [28:15<17:17,  1.45s/it]

batch id predict after epoch:  216


 23%|██▎       | 217/931 [28:17<18:27,  1.55s/it]

batch id predict after epoch:  217


 23%|██▎       | 218/931 [28:18<17:23,  1.46s/it]

batch id predict after epoch:  218


 24%|██▎       | 219/931 [28:20<17:21,  1.46s/it]

batch id predict after epoch:  219


 24%|██▎       | 220/931 [28:21<16:34,  1.40s/it]

batch id predict after epoch:  220


 24%|██▎       | 221/931 [28:22<16:09,  1.37s/it]

batch id predict after epoch:  221


 24%|██▍       | 222/931 [28:23<15:48,  1.34s/it]

batch id predict after epoch:  222


 24%|██▍       | 223/931 [28:25<16:23,  1.39s/it]

batch id predict after epoch:  223


 24%|██▍       | 224/931 [28:26<16:03,  1.36s/it]

batch id predict after epoch:  224


 24%|██▍       | 225/931 [28:27<15:53,  1.35s/it]

batch id predict after epoch:  225


 24%|██▍       | 226/931 [28:29<15:39,  1.33s/it]

batch id predict after epoch:  226


 24%|██▍       | 227/931 [28:30<15:28,  1.32s/it]

batch id predict after epoch:  227


 24%|██▍       | 228/931 [28:31<15:19,  1.31s/it]

batch id predict after epoch:  228


 25%|██▍       | 229/931 [28:33<15:15,  1.30s/it]

batch id predict after epoch:  229


 25%|██▍       | 230/931 [28:34<15:12,  1.30s/it]

batch id predict after epoch:  230


 25%|██▍       | 231/931 [28:35<15:23,  1.32s/it]

batch id predict after epoch:  231


 25%|██▍       | 232/931 [28:37<15:18,  1.31s/it]

batch id predict after epoch:  232


 25%|██▌       | 233/931 [28:38<15:22,  1.32s/it]

batch id predict after epoch:  233


 25%|██▌       | 234/931 [28:39<15:13,  1.31s/it]

batch id predict after epoch:  234


 25%|██▌       | 235/931 [28:40<15:06,  1.30s/it]

batch id predict after epoch:  235


 25%|██▌       | 236/931 [28:42<14:54,  1.29s/it]

batch id predict after epoch:  236


 25%|██▌       | 237/931 [28:43<14:54,  1.29s/it]

batch id predict after epoch:  237


 26%|██▌       | 238/931 [28:44<14:54,  1.29s/it]

batch id predict after epoch:  238


 26%|██▌       | 239/931 [28:46<14:50,  1.29s/it]

batch id predict after epoch:  239


 26%|██▌       | 240/931 [28:47<14:45,  1.28s/it]

batch id predict after epoch:  240


 26%|██▌       | 241/931 [28:48<14:43,  1.28s/it]

batch id predict after epoch:  241


 26%|██▌       | 242/931 [28:49<14:36,  1.27s/it]

batch id predict after epoch:  242


 26%|██▌       | 243/931 [28:51<15:41,  1.37s/it]

batch id predict after epoch:  243


 26%|██▌       | 244/931 [28:53<16:21,  1.43s/it]

batch id predict after epoch:  244


 26%|██▋       | 245/931 [28:54<16:02,  1.40s/it]

batch id predict after epoch:  245


 26%|██▋       | 246/931 [28:55<15:30,  1.36s/it]

batch id predict after epoch:  246


 27%|██▋       | 247/931 [28:56<15:10,  1.33s/it]

batch id predict after epoch:  247


 27%|██▋       | 248/931 [28:58<15:00,  1.32s/it]

batch id predict after epoch:  248


 27%|██▋       | 249/931 [28:59<15:56,  1.40s/it]

batch id predict after epoch:  249


 27%|██▋       | 250/931 [29:01<15:34,  1.37s/it]

batch id predict after epoch:  250


 27%|██▋       | 251/931 [29:02<15:17,  1.35s/it]

batch id predict after epoch:  251


 27%|██▋       | 252/931 [29:03<15:09,  1.34s/it]

batch id predict after epoch:  252


 27%|██▋       | 253/931 [29:04<14:47,  1.31s/it]

batch id predict after epoch:  253


 27%|██▋       | 254/931 [29:06<14:33,  1.29s/it]

batch id predict after epoch:  254


 27%|██▋       | 255/931 [29:07<15:39,  1.39s/it]

batch id predict after epoch:  255


 27%|██▋       | 256/931 [29:09<15:10,  1.35s/it]

batch id predict after epoch:  256


 28%|██▊       | 257/931 [29:10<14:57,  1.33s/it]

batch id predict after epoch:  257


 28%|██▊       | 258/931 [29:11<14:54,  1.33s/it]

batch id predict after epoch:  258


 28%|██▊       | 259/931 [29:13<15:38,  1.40s/it]

batch id predict after epoch:  259


 28%|██▊       | 260/931 [29:14<15:17,  1.37s/it]

batch id predict after epoch:  260


 28%|██▊       | 261/931 [29:15<15:12,  1.36s/it]

batch id predict after epoch:  261


 28%|██▊       | 262/931 [29:17<16:02,  1.44s/it]

batch id predict after epoch:  262


 28%|██▊       | 263/931 [29:19<16:37,  1.49s/it]

batch id predict after epoch:  263


 28%|██▊       | 264/931 [29:20<16:01,  1.44s/it]

batch id predict after epoch:  264


 28%|██▊       | 265/931 [29:21<15:30,  1.40s/it]

batch id predict after epoch:  265


 29%|██▊       | 266/931 [29:23<15:12,  1.37s/it]

batch id predict after epoch:  266


 29%|██▊       | 267/931 [29:24<15:04,  1.36s/it]

batch id predict after epoch:  267


 29%|██▉       | 268/931 [29:25<14:41,  1.33s/it]

batch id predict after epoch:  268


 29%|██▉       | 269/931 [29:26<14:27,  1.31s/it]

batch id predict after epoch:  269


 29%|██▉       | 270/931 [29:28<14:21,  1.30s/it]

batch id predict after epoch:  270


 29%|██▉       | 271/931 [29:29<14:18,  1.30s/it]

batch id predict after epoch:  271


 29%|██▉       | 272/931 [29:30<14:16,  1.30s/it]

batch id predict after epoch:  272


 29%|██▉       | 273/931 [29:32<14:10,  1.29s/it]

batch id predict after epoch:  273


 29%|██▉       | 274/931 [29:33<14:03,  1.28s/it]

batch id predict after epoch:  274


 30%|██▉       | 275/931 [29:34<14:00,  1.28s/it]

batch id predict after epoch:  275


 30%|██▉       | 276/931 [29:35<13:59,  1.28s/it]

batch id predict after epoch:  276


 30%|██▉       | 277/931 [29:37<13:57,  1.28s/it]

batch id predict after epoch:  277


 30%|██▉       | 278/931 [29:38<13:54,  1.28s/it]

batch id predict after epoch:  278


 30%|██▉       | 279/931 [29:39<13:51,  1.28s/it]

batch id predict after epoch:  279


 30%|███       | 280/931 [29:40<13:50,  1.28s/it]

batch id predict after epoch:  280


 30%|███       | 281/931 [29:42<13:50,  1.28s/it]

batch id predict after epoch:  281


 30%|███       | 282/931 [29:43<13:47,  1.28s/it]

batch id predict after epoch:  282


 30%|███       | 283/931 [29:44<13:48,  1.28s/it]

batch id predict after epoch:  283


 31%|███       | 284/931 [29:46<13:48,  1.28s/it]

batch id predict after epoch:  284


 31%|███       | 285/931 [29:47<13:47,  1.28s/it]

batch id predict after epoch:  285


 31%|███       | 286/931 [29:48<13:43,  1.28s/it]

batch id predict after epoch:  286


 31%|███       | 287/931 [29:50<13:57,  1.30s/it]

batch id predict after epoch:  287


 31%|███       | 288/931 [29:51<13:52,  1.29s/it]

batch id predict after epoch:  288


 31%|███       | 289/931 [29:52<13:54,  1.30s/it]

batch id predict after epoch:  289


 31%|███       | 290/931 [29:53<13:49,  1.29s/it]

batch id predict after epoch:  290


 31%|███▏      | 291/931 [29:55<14:02,  1.32s/it]

batch id predict after epoch:  291


 31%|███▏      | 292/931 [29:56<13:50,  1.30s/it]

batch id predict after epoch:  292


 31%|███▏      | 293/931 [29:57<13:49,  1.30s/it]

batch id predict after epoch:  293


 32%|███▏      | 294/931 [29:59<13:44,  1.29s/it]

batch id predict after epoch:  294


 32%|███▏      | 295/931 [30:00<13:56,  1.31s/it]

batch id predict after epoch:  295


 32%|███▏      | 296/931 [30:01<13:57,  1.32s/it]

batch id predict after epoch:  296


 32%|███▏      | 297/931 [30:03<13:45,  1.30s/it]

batch id predict after epoch:  297


 32%|███▏      | 298/931 [30:04<13:47,  1.31s/it]

batch id predict after epoch:  298


 32%|███▏      | 299/931 [30:05<13:42,  1.30s/it]

batch id predict after epoch:  299


 32%|███▏      | 300/931 [30:07<14:44,  1.40s/it]

batch id predict after epoch:  300


 32%|███▏      | 301/931 [30:08<14:21,  1.37s/it]

batch id predict after epoch:  301


 32%|███▏      | 302/931 [30:09<14:00,  1.34s/it]

batch id predict after epoch:  302


 33%|███▎      | 303/931 [30:11<13:58,  1.34s/it]

batch id predict after epoch:  303


 33%|███▎      | 304/931 [30:12<13:55,  1.33s/it]

batch id predict after epoch:  304


 33%|███▎      | 305/931 [30:13<13:56,  1.34s/it]

batch id predict after epoch:  305


 33%|███▎      | 306/931 [30:15<13:44,  1.32s/it]

batch id predict after epoch:  306


 33%|███▎      | 307/931 [30:16<13:44,  1.32s/it]

batch id predict after epoch:  307


 33%|███▎      | 308/931 [30:17<13:34,  1.31s/it]

batch id predict after epoch:  308


 33%|███▎      | 309/931 [30:19<13:29,  1.30s/it]

batch id predict after epoch:  309


 33%|███▎      | 310/931 [30:20<13:39,  1.32s/it]

batch id predict after epoch:  310


 33%|███▎      | 311/931 [30:21<13:45,  1.33s/it]

batch id predict after epoch:  311


 34%|███▎      | 312/931 [30:23<13:40,  1.32s/it]

batch id predict after epoch:  312


 34%|███▎      | 313/931 [30:24<13:46,  1.34s/it]

batch id predict after epoch:  313


 34%|███▎      | 314/931 [30:25<13:37,  1.33s/it]

batch id predict after epoch:  314


 34%|███▍      | 315/931 [30:27<13:30,  1.32s/it]

batch id predict after epoch:  315


 34%|███▍      | 316/931 [30:28<13:33,  1.32s/it]

batch id predict after epoch:  316


 34%|███▍      | 317/931 [30:29<13:19,  1.30s/it]

batch id predict after epoch:  317


 34%|███▍      | 318/931 [30:30<13:13,  1.29s/it]

batch id predict after epoch:  318


 34%|███▍      | 319/931 [30:32<13:04,  1.28s/it]

batch id predict after epoch:  319


 34%|███▍      | 320/931 [30:33<13:00,  1.28s/it]

batch id predict after epoch:  320


 34%|███▍      | 321/931 [30:34<12:59,  1.28s/it]

batch id predict after epoch:  321


 35%|███▍      | 322/931 [30:35<13:04,  1.29s/it]

batch id predict after epoch:  322


 35%|███▍      | 323/931 [30:37<13:00,  1.28s/it]

batch id predict after epoch:  323


 35%|███▍      | 324/931 [30:38<12:54,  1.28s/it]

batch id predict after epoch:  324


 35%|███▍      | 325/931 [30:39<13:05,  1.30s/it]

batch id predict after epoch:  325


 35%|███▌      | 326/931 [30:41<13:00,  1.29s/it]

batch id predict after epoch:  326


 35%|███▌      | 327/931 [30:42<13:05,  1.30s/it]

batch id predict after epoch:  327


 35%|███▌      | 328/931 [30:43<12:58,  1.29s/it]

batch id predict after epoch:  328


 35%|███▌      | 329/931 [30:44<12:49,  1.28s/it]

batch id predict after epoch:  329


 35%|███▌      | 330/931 [30:46<12:43,  1.27s/it]

batch id predict after epoch:  330


 36%|███▌      | 331/931 [30:47<12:42,  1.27s/it]

batch id predict after epoch:  331


 36%|███▌      | 332/931 [30:48<12:39,  1.27s/it]

batch id predict after epoch:  332


 36%|███▌      | 333/931 [30:50<12:45,  1.28s/it]

batch id predict after epoch:  333


 36%|███▌      | 334/931 [30:51<12:51,  1.29s/it]

batch id predict after epoch:  334


 36%|███▌      | 335/931 [30:52<12:55,  1.30s/it]

batch id predict after epoch:  335


 36%|███▌      | 336/931 [30:54<12:55,  1.30s/it]

batch id predict after epoch:  336


 36%|███▌      | 337/931 [30:55<12:47,  1.29s/it]

batch id predict after epoch:  337


 36%|███▋      | 338/931 [30:56<12:40,  1.28s/it]

batch id predict after epoch:  338


 36%|███▋      | 339/931 [30:58<13:24,  1.36s/it]

batch id predict after epoch:  339


 37%|███▋      | 340/931 [30:59<13:08,  1.33s/it]

batch id predict after epoch:  340


 37%|███▋      | 341/931 [31:00<12:54,  1.31s/it]

batch id predict after epoch:  341


 37%|███▋      | 342/931 [31:01<12:42,  1.30s/it]

batch id predict after epoch:  342


 37%|███▋      | 343/931 [31:03<12:38,  1.29s/it]

batch id predict after epoch:  343


 37%|███▋      | 344/931 [31:04<12:29,  1.28s/it]

batch id predict after epoch:  344


 37%|███▋      | 345/931 [31:05<12:23,  1.27s/it]

batch id predict after epoch:  345


 37%|███▋      | 346/931 [31:06<12:18,  1.26s/it]

batch id predict after epoch:  346


 37%|███▋      | 347/931 [31:08<12:21,  1.27s/it]

batch id predict after epoch:  347


 37%|███▋      | 348/931 [31:09<12:19,  1.27s/it]

batch id predict after epoch:  348


 37%|███▋      | 349/931 [31:10<12:17,  1.27s/it]

batch id predict after epoch:  349


 38%|███▊      | 350/931 [31:11<12:19,  1.27s/it]

batch id predict after epoch:  350


 38%|███▊      | 351/931 [31:13<12:15,  1.27s/it]

batch id predict after epoch:  351


 38%|███▊      | 352/931 [31:14<12:18,  1.28s/it]

batch id predict after epoch:  352


 38%|███▊      | 353/931 [31:15<12:12,  1.27s/it]

batch id predict after epoch:  353


 38%|███▊      | 354/931 [31:17<12:07,  1.26s/it]

batch id predict after epoch:  354


 38%|███▊      | 355/931 [31:18<12:10,  1.27s/it]

batch id predict after epoch:  355


 38%|███▊      | 356/931 [31:19<12:06,  1.26s/it]

batch id predict after epoch:  356


 38%|███▊      | 357/931 [31:20<12:05,  1.26s/it]

batch id predict after epoch:  357


 38%|███▊      | 358/931 [31:22<12:06,  1.27s/it]

batch id predict after epoch:  358


 39%|███▊      | 359/931 [31:23<12:06,  1.27s/it]

batch id predict after epoch:  359


 39%|███▊      | 360/931 [31:24<12:14,  1.29s/it]

batch id predict after epoch:  360


 39%|███▉      | 361/931 [31:26<12:11,  1.28s/it]

batch id predict after epoch:  361


 39%|███▉      | 362/931 [31:27<12:07,  1.28s/it]

batch id predict after epoch:  362


 39%|███▉      | 363/931 [31:28<12:04,  1.27s/it]

batch id predict after epoch:  363


 39%|███▉      | 364/931 [31:29<12:01,  1.27s/it]

batch id predict after epoch:  364


 39%|███▉      | 365/931 [31:31<11:57,  1.27s/it]

batch id predict after epoch:  365


 39%|███▉      | 366/931 [31:32<11:54,  1.26s/it]

batch id predict after epoch:  366


 39%|███▉      | 367/931 [31:33<12:00,  1.28s/it]

batch id predict after epoch:  367


 40%|███▉      | 368/931 [31:34<11:56,  1.27s/it]

batch id predict after epoch:  368


 40%|███▉      | 369/931 [31:36<11:54,  1.27s/it]

batch id predict after epoch:  369


 40%|███▉      | 370/931 [31:37<11:47,  1.26s/it]

batch id predict after epoch:  370


 40%|███▉      | 371/931 [31:38<11:49,  1.27s/it]

batch id predict after epoch:  371


 40%|███▉      | 372/931 [31:39<11:43,  1.26s/it]

batch id predict after epoch:  372


 40%|████      | 373/931 [31:41<11:42,  1.26s/it]

batch id predict after epoch:  373


 40%|████      | 374/931 [31:42<11:43,  1.26s/it]

batch id predict after epoch:  374


 40%|████      | 375/931 [31:43<11:41,  1.26s/it]

batch id predict after epoch:  375


 40%|████      | 376/931 [31:44<11:37,  1.26s/it]

batch id predict after epoch:  376


 40%|████      | 377/931 [31:46<11:36,  1.26s/it]

batch id predict after epoch:  377


 41%|████      | 378/931 [31:47<11:40,  1.27s/it]

batch id predict after epoch:  378


 41%|████      | 379/931 [31:48<11:36,  1.26s/it]

batch id predict after epoch:  379


 41%|████      | 380/931 [31:50<11:43,  1.28s/it]

batch id predict after epoch:  380


 41%|████      | 381/931 [31:51<11:45,  1.28s/it]

batch id predict after epoch:  381


 41%|████      | 382/931 [31:52<11:44,  1.28s/it]

batch id predict after epoch:  382


 41%|████      | 383/931 [31:53<11:45,  1.29s/it]

batch id predict after epoch:  383


 41%|████      | 384/931 [31:55<11:40,  1.28s/it]

batch id predict after epoch:  384


 41%|████▏     | 385/931 [31:56<11:52,  1.31s/it]

batch id predict after epoch:  385


 41%|████▏     | 386/931 [31:58<12:24,  1.37s/it]

batch id predict after epoch:  386


 42%|████▏     | 387/931 [31:59<12:13,  1.35s/it]

batch id predict after epoch:  387


 42%|████▏     | 388/931 [32:00<12:06,  1.34s/it]

batch id predict after epoch:  388


 42%|████▏     | 389/931 [32:01<11:54,  1.32s/it]

batch id predict after epoch:  389


 42%|████▏     | 390/931 [32:03<11:51,  1.31s/it]

batch id predict after epoch:  390


 42%|████▏     | 391/931 [32:04<11:41,  1.30s/it]

batch id predict after epoch:  391


 42%|████▏     | 392/931 [32:05<11:37,  1.29s/it]

batch id predict after epoch:  392


 42%|████▏     | 393/931 [32:07<11:28,  1.28s/it]

batch id predict after epoch:  393


 42%|████▏     | 394/931 [32:08<11:29,  1.28s/it]

batch id predict after epoch:  394


 42%|████▏     | 395/931 [32:09<11:34,  1.29s/it]

batch id predict after epoch:  395


 43%|████▎     | 396/931 [32:10<11:31,  1.29s/it]

batch id predict after epoch:  396


 43%|████▎     | 397/931 [32:12<11:30,  1.29s/it]

batch id predict after epoch:  397


 43%|████▎     | 398/931 [32:13<11:27,  1.29s/it]

batch id predict after epoch:  398


 43%|████▎     | 399/931 [32:14<11:30,  1.30s/it]

batch id predict after epoch:  399


 43%|████▎     | 400/931 [32:16<11:27,  1.29s/it]

batch id predict after epoch:  400


 43%|████▎     | 401/931 [32:17<11:20,  1.28s/it]

batch id predict after epoch:  401


 43%|████▎     | 402/931 [32:18<11:17,  1.28s/it]

batch id predict after epoch:  402


 43%|████▎     | 403/931 [32:19<11:17,  1.28s/it]

batch id predict after epoch:  403


 43%|████▎     | 404/931 [32:21<11:11,  1.27s/it]

batch id predict after epoch:  404


 44%|████▎     | 405/931 [32:22<11:08,  1.27s/it]

batch id predict after epoch:  405


 44%|████▎     | 406/931 [32:23<11:04,  1.27s/it]

batch id predict after epoch:  406


 44%|████▎     | 407/931 [32:25<11:05,  1.27s/it]

batch id predict after epoch:  407


 44%|████▍     | 408/931 [32:26<11:02,  1.27s/it]

batch id predict after epoch:  408


 44%|████▍     | 409/931 [32:27<11:02,  1.27s/it]

batch id predict after epoch:  409


 44%|████▍     | 410/931 [32:28<11:01,  1.27s/it]

batch id predict after epoch:  410


 44%|████▍     | 411/931 [32:30<11:01,  1.27s/it]

batch id predict after epoch:  411


 44%|████▍     | 412/931 [32:31<10:55,  1.26s/it]

batch id predict after epoch:  412


 44%|████▍     | 413/931 [32:32<10:57,  1.27s/it]

batch id predict after epoch:  413


 44%|████▍     | 414/931 [32:33<10:56,  1.27s/it]

batch id predict after epoch:  414


 45%|████▍     | 415/931 [32:35<10:54,  1.27s/it]

batch id predict after epoch:  415


 45%|████▍     | 416/931 [32:36<10:56,  1.27s/it]

batch id predict after epoch:  416


 45%|████▍     | 417/931 [32:37<10:51,  1.27s/it]

batch id predict after epoch:  417


 45%|████▍     | 418/931 [32:38<10:50,  1.27s/it]

batch id predict after epoch:  418


 45%|████▌     | 419/931 [32:40<10:59,  1.29s/it]

batch id predict after epoch:  419


 45%|████▌     | 420/931 [32:41<10:55,  1.28s/it]

batch id predict after epoch:  420


 45%|████▌     | 421/931 [32:42<10:52,  1.28s/it]

batch id predict after epoch:  421


 45%|████▌     | 422/931 [32:44<10:49,  1.28s/it]

batch id predict after epoch:  422


 45%|████▌     | 423/931 [32:45<10:50,  1.28s/it]

batch id predict after epoch:  423


 46%|████▌     | 424/931 [32:46<10:44,  1.27s/it]

batch id predict after epoch:  424


 46%|████▌     | 425/931 [32:48<11:03,  1.31s/it]

batch id predict after epoch:  425


 46%|████▌     | 426/931 [32:49<10:54,  1.30s/it]

batch id predict after epoch:  426


 46%|████▌     | 427/931 [32:50<10:49,  1.29s/it]

batch id predict after epoch:  427


 46%|████▌     | 428/931 [32:51<10:53,  1.30s/it]

batch id predict after epoch:  428


 46%|████▌     | 429/931 [32:53<10:46,  1.29s/it]

batch id predict after epoch:  429


 46%|████▌     | 430/931 [32:54<10:42,  1.28s/it]

batch id predict after epoch:  430


 46%|████▋     | 431/931 [32:55<10:42,  1.28s/it]

batch id predict after epoch:  431


 46%|████▋     | 432/931 [32:57<10:40,  1.28s/it]

batch id predict after epoch:  432


 47%|████▋     | 433/931 [32:58<10:40,  1.29s/it]

batch id predict after epoch:  433


 47%|████▋     | 434/931 [32:59<10:44,  1.30s/it]

batch id predict after epoch:  434


 47%|████▋     | 435/931 [33:00<10:37,  1.29s/it]

batch id predict after epoch:  435


 47%|████▋     | 436/931 [33:02<10:31,  1.28s/it]

batch id predict after epoch:  436


 47%|████▋     | 437/931 [33:03<10:29,  1.27s/it]

batch id predict after epoch:  437


 47%|████▋     | 438/931 [33:04<10:33,  1.28s/it]

batch id predict after epoch:  438


 47%|████▋     | 439/931 [33:05<10:29,  1.28s/it]

batch id predict after epoch:  439


 47%|████▋     | 440/931 [33:07<10:27,  1.28s/it]

batch id predict after epoch:  440


 47%|████▋     | 441/931 [33:08<10:29,  1.28s/it]

batch id predict after epoch:  441


 47%|████▋     | 442/931 [33:09<10:28,  1.28s/it]

batch id predict after epoch:  442


 48%|████▊     | 443/931 [33:11<10:28,  1.29s/it]

batch id predict after epoch:  443


 48%|████▊     | 444/931 [33:12<10:27,  1.29s/it]

batch id predict after epoch:  444


 48%|████▊     | 445/931 [33:13<10:25,  1.29s/it]

batch id predict after epoch:  445


 48%|████▊     | 446/931 [33:14<10:22,  1.28s/it]

batch id predict after epoch:  446


 48%|████▊     | 447/931 [33:16<10:25,  1.29s/it]

batch id predict after epoch:  447


 48%|████▊     | 448/931 [33:17<10:19,  1.28s/it]

batch id predict after epoch:  448


 48%|████▊     | 449/931 [33:18<10:19,  1.29s/it]

batch id predict after epoch:  449


 48%|████▊     | 450/931 [33:20<10:17,  1.28s/it]

batch id predict after epoch:  450


 48%|████▊     | 451/931 [33:21<10:13,  1.28s/it]

batch id predict after epoch:  451


 49%|████▊     | 452/931 [33:22<10:07,  1.27s/it]

batch id predict after epoch:  452


 49%|████▊     | 453/931 [33:24<10:21,  1.30s/it]

batch id predict after epoch:  453


 49%|████▉     | 454/931 [33:25<10:15,  1.29s/it]

batch id predict after epoch:  454


 49%|████▉     | 455/931 [33:26<10:13,  1.29s/it]

batch id predict after epoch:  455


 49%|████▉     | 456/931 [33:27<10:08,  1.28s/it]

batch id predict after epoch:  456


 49%|████▉     | 457/931 [33:29<10:11,  1.29s/it]

batch id predict after epoch:  457


 49%|████▉     | 458/931 [33:30<10:08,  1.29s/it]

batch id predict after epoch:  458


 49%|████▉     | 459/931 [33:31<10:06,  1.29s/it]

batch id predict after epoch:  459


 49%|████▉     | 460/931 [33:32<10:00,  1.28s/it]

batch id predict after epoch:  460


 50%|████▉     | 461/931 [33:34<09:59,  1.28s/it]

batch id predict after epoch:  461


 50%|████▉     | 462/931 [33:35<09:56,  1.27s/it]

batch id predict after epoch:  462


 50%|████▉     | 463/931 [33:36<09:58,  1.28s/it]

batch id predict after epoch:  463


 50%|████▉     | 464/931 [33:38<09:58,  1.28s/it]

batch id predict after epoch:  464


 50%|████▉     | 465/931 [33:39<09:55,  1.28s/it]

batch id predict after epoch:  465


 50%|█████     | 466/931 [33:40<09:53,  1.28s/it]

batch id predict after epoch:  466


 50%|█████     | 467/931 [33:41<09:50,  1.27s/it]

batch id predict after epoch:  467


 50%|█████     | 468/931 [33:43<09:49,  1.27s/it]

batch id predict after epoch:  468


 50%|█████     | 469/931 [33:44<09:49,  1.28s/it]

batch id predict after epoch:  469


 50%|█████     | 470/931 [33:45<09:45,  1.27s/it]

batch id predict after epoch:  470


 51%|█████     | 471/931 [33:46<09:41,  1.26s/it]

batch id predict after epoch:  471


 51%|█████     | 472/931 [33:48<09:38,  1.26s/it]

batch id predict after epoch:  472


 51%|█████     | 473/931 [33:49<09:40,  1.27s/it]

batch id predict after epoch:  473


 51%|█████     | 474/931 [33:50<09:45,  1.28s/it]

batch id predict after epoch:  474


 51%|█████     | 475/931 [33:52<09:52,  1.30s/it]

batch id predict after epoch:  475


 51%|█████     | 476/931 [33:53<09:51,  1.30s/it]

batch id predict after epoch:  476


 51%|█████     | 477/931 [33:54<09:48,  1.30s/it]

batch id predict after epoch:  477


 51%|█████▏    | 478/931 [33:56<09:44,  1.29s/it]

batch id predict after epoch:  478


 51%|█████▏    | 479/931 [33:57<09:44,  1.29s/it]

batch id predict after epoch:  479


 52%|█████▏    | 480/931 [33:58<09:42,  1.29s/it]

batch id predict after epoch:  480


 52%|█████▏    | 481/931 [33:59<09:36,  1.28s/it]

batch id predict after epoch:  481


 52%|█████▏    | 482/931 [34:01<09:33,  1.28s/it]

batch id predict after epoch:  482


 52%|█████▏    | 483/931 [34:02<09:35,  1.28s/it]

batch id predict after epoch:  483


 52%|█████▏    | 484/931 [34:03<09:47,  1.31s/it]

batch id predict after epoch:  484


 52%|█████▏    | 485/931 [34:05<09:42,  1.31s/it]

batch id predict after epoch:  485


 52%|█████▏    | 486/931 [34:06<09:35,  1.29s/it]

batch id predict after epoch:  486


 52%|█████▏    | 487/931 [34:07<09:28,  1.28s/it]

batch id predict after epoch:  487


 52%|█████▏    | 488/931 [34:08<09:31,  1.29s/it]

batch id predict after epoch:  488


 53%|█████▎    | 489/931 [34:10<09:31,  1.29s/it]

batch id predict after epoch:  489


 53%|█████▎    | 490/931 [34:11<09:23,  1.28s/it]

batch id predict after epoch:  490


 53%|█████▎    | 491/931 [34:12<09:19,  1.27s/it]

batch id predict after epoch:  491


 53%|█████▎    | 492/931 [34:13<09:19,  1.28s/it]

batch id predict after epoch:  492


 53%|█████▎    | 493/931 [34:15<09:17,  1.27s/it]

batch id predict after epoch:  493


 53%|█████▎    | 494/931 [34:16<09:15,  1.27s/it]

batch id predict after epoch:  494


 53%|█████▎    | 495/931 [34:17<09:13,  1.27s/it]

batch id predict after epoch:  495


 53%|█████▎    | 496/931 [34:19<09:10,  1.27s/it]

batch id predict after epoch:  496


 53%|█████▎    | 497/931 [34:20<09:07,  1.26s/it]

batch id predict after epoch:  497


 53%|█████▎    | 498/931 [34:21<09:04,  1.26s/it]

batch id predict after epoch:  498


 54%|█████▎    | 499/931 [34:22<09:06,  1.26s/it]

batch id predict after epoch:  499


 54%|█████▎    | 500/931 [34:24<09:06,  1.27s/it]

batch id predict after epoch:  500


 54%|█████▍    | 501/931 [34:25<09:10,  1.28s/it]

batch id predict after epoch:  501


 54%|█████▍    | 502/931 [34:26<09:11,  1.28s/it]

batch id predict after epoch:  502


 54%|█████▍    | 503/931 [34:28<09:11,  1.29s/it]

batch id predict after epoch:  503


 54%|█████▍    | 504/931 [34:29<09:10,  1.29s/it]

batch id predict after epoch:  504


 54%|█████▍    | 505/931 [34:30<09:14,  1.30s/it]

batch id predict after epoch:  505


 54%|█████▍    | 506/931 [34:31<09:10,  1.29s/it]

batch id predict after epoch:  506


 54%|█████▍    | 507/931 [34:33<09:04,  1.28s/it]

batch id predict after epoch:  507


 55%|█████▍    | 508/931 [34:34<09:06,  1.29s/it]

batch id predict after epoch:  508


 55%|█████▍    | 509/931 [34:35<09:06,  1.29s/it]

batch id predict after epoch:  509


 55%|█████▍    | 510/931 [34:37<09:00,  1.28s/it]

batch id predict after epoch:  510


 55%|█████▍    | 511/931 [34:38<09:08,  1.31s/it]

batch id predict after epoch:  511


 55%|█████▍    | 512/931 [34:39<09:02,  1.29s/it]

batch id predict after epoch:  512


 55%|█████▌    | 513/931 [34:40<08:56,  1.28s/it]

batch id predict after epoch:  513


 55%|█████▌    | 514/931 [34:42<08:59,  1.29s/it]

batch id predict after epoch:  514


 55%|█████▌    | 515/931 [34:43<08:54,  1.29s/it]

batch id predict after epoch:  515


 55%|█████▌    | 516/931 [34:44<08:49,  1.28s/it]

batch id predict after epoch:  516


 56%|█████▌    | 517/931 [34:46<08:46,  1.27s/it]

batch id predict after epoch:  517


 56%|█████▌    | 518/931 [34:47<08:47,  1.28s/it]

batch id predict after epoch:  518


 56%|█████▌    | 519/931 [34:48<08:49,  1.28s/it]

batch id predict after epoch:  519


 56%|█████▌    | 520/931 [34:49<08:52,  1.30s/it]

batch id predict after epoch:  520


 56%|█████▌    | 521/931 [34:51<08:52,  1.30s/it]

batch id predict after epoch:  521


 56%|█████▌    | 522/931 [34:52<08:45,  1.28s/it]

batch id predict after epoch:  522


 56%|█████▌    | 523/931 [34:53<08:49,  1.30s/it]

batch id predict after epoch:  523


 56%|█████▋    | 524/931 [34:55<08:46,  1.29s/it]

batch id predict after epoch:  524


 56%|█████▋    | 525/931 [34:56<08:41,  1.28s/it]

batch id predict after epoch:  525


 56%|█████▋    | 526/931 [34:57<08:36,  1.28s/it]

batch id predict after epoch:  526


 57%|█████▋    | 527/931 [34:58<08:34,  1.27s/it]

batch id predict after epoch:  527


 57%|█████▋    | 528/931 [35:00<08:34,  1.28s/it]

batch id predict after epoch:  528


 57%|█████▋    | 529/931 [35:01<08:33,  1.28s/it]

batch id predict after epoch:  529


 57%|█████▋    | 530/931 [35:02<08:27,  1.27s/it]

batch id predict after epoch:  530


 57%|█████▋    | 531/931 [35:03<08:27,  1.27s/it]

batch id predict after epoch:  531


 57%|█████▋    | 532/931 [35:05<08:24,  1.26s/it]

batch id predict after epoch:  532


 57%|█████▋    | 533/931 [35:06<08:24,  1.27s/it]

batch id predict after epoch:  533


 57%|█████▋    | 534/931 [35:07<08:22,  1.26s/it]

batch id predict after epoch:  534


 57%|█████▋    | 535/931 [35:09<08:23,  1.27s/it]

batch id predict after epoch:  535


 58%|█████▊    | 536/931 [35:10<08:19,  1.26s/it]

batch id predict after epoch:  536


 58%|█████▊    | 537/931 [35:11<08:18,  1.27s/it]

batch id predict after epoch:  537


 58%|█████▊    | 538/931 [35:12<08:17,  1.26s/it]

batch id predict after epoch:  538


 58%|█████▊    | 539/931 [35:14<08:21,  1.28s/it]

batch id predict after epoch:  539


 58%|█████▊    | 540/931 [35:15<08:16,  1.27s/it]

batch id predict after epoch:  540


 58%|█████▊    | 541/931 [35:16<08:20,  1.28s/it]

batch id predict after epoch:  541


 58%|█████▊    | 542/931 [35:18<08:21,  1.29s/it]

batch id predict after epoch:  542


 58%|█████▊    | 543/931 [35:19<08:21,  1.29s/it]

batch id predict after epoch:  543


 58%|█████▊    | 544/931 [35:20<08:21,  1.30s/it]

batch id predict after epoch:  544


 59%|█████▊    | 545/931 [35:21<08:17,  1.29s/it]

batch id predict after epoch:  545


 59%|█████▊    | 546/931 [35:23<08:09,  1.27s/it]

batch id predict after epoch:  546


 59%|█████▉    | 547/931 [35:24<08:12,  1.28s/it]

batch id predict after epoch:  547


 59%|█████▉    | 548/931 [35:25<08:08,  1.28s/it]

batch id predict after epoch:  548


 59%|█████▉    | 549/931 [35:26<08:10,  1.28s/it]

batch id predict after epoch:  549


 59%|█████▉    | 550/931 [35:28<08:13,  1.29s/it]

batch id predict after epoch:  550


 59%|█████▉    | 551/931 [35:29<08:08,  1.29s/it]

batch id predict after epoch:  551


 59%|█████▉    | 552/931 [35:30<08:04,  1.28s/it]

batch id predict after epoch:  552


 59%|█████▉    | 553/931 [35:32<08:08,  1.29s/it]

batch id predict after epoch:  553


 60%|█████▉    | 554/931 [35:33<08:04,  1.29s/it]

batch id predict after epoch:  554


 60%|█████▉    | 555/931 [35:34<08:06,  1.29s/it]

batch id predict after epoch:  555


 60%|█████▉    | 556/931 [35:35<08:00,  1.28s/it]

batch id predict after epoch:  556


 60%|█████▉    | 557/931 [35:37<08:03,  1.29s/it]

batch id predict after epoch:  557


 60%|█████▉    | 558/931 [35:38<08:13,  1.32s/it]

batch id predict after epoch:  558


 60%|██████    | 559/931 [35:39<08:06,  1.31s/it]

batch id predict after epoch:  559


 60%|██████    | 560/931 [35:41<08:02,  1.30s/it]

batch id predict after epoch:  560


 60%|██████    | 561/931 [35:42<07:57,  1.29s/it]

batch id predict after epoch:  561


 60%|██████    | 562/931 [35:43<07:59,  1.30s/it]

batch id predict after epoch:  562


 60%|██████    | 563/931 [35:45<07:55,  1.29s/it]

batch id predict after epoch:  563


 61%|██████    | 564/931 [35:46<07:49,  1.28s/it]

batch id predict after epoch:  564


 61%|██████    | 565/931 [35:47<07:57,  1.31s/it]

batch id predict after epoch:  565


 61%|██████    | 566/931 [35:49<07:54,  1.30s/it]

batch id predict after epoch:  566


 61%|██████    | 567/931 [35:50<07:49,  1.29s/it]

batch id predict after epoch:  567


 61%|██████    | 568/931 [35:51<07:45,  1.28s/it]

batch id predict after epoch:  568


 61%|██████    | 569/931 [35:52<07:42,  1.28s/it]

batch id predict after epoch:  569


 61%|██████    | 570/931 [35:54<07:40,  1.27s/it]

batch id predict after epoch:  570


 61%|██████▏   | 571/931 [35:55<07:41,  1.28s/it]

batch id predict after epoch:  571


 61%|██████▏   | 572/931 [35:56<07:36,  1.27s/it]

batch id predict after epoch:  572


 62%|██████▏   | 573/931 [35:57<07:35,  1.27s/it]

batch id predict after epoch:  573


 62%|██████▏   | 574/931 [35:59<07:32,  1.27s/it]

batch id predict after epoch:  574


 62%|██████▏   | 575/931 [36:00<07:31,  1.27s/it]

batch id predict after epoch:  575


 62%|██████▏   | 576/931 [36:01<07:28,  1.26s/it]

batch id predict after epoch:  576


 62%|██████▏   | 577/931 [36:02<07:27,  1.26s/it]

batch id predict after epoch:  577


 62%|██████▏   | 578/931 [36:04<07:31,  1.28s/it]

batch id predict after epoch:  578


 62%|██████▏   | 579/931 [36:05<07:28,  1.27s/it]

batch id predict after epoch:  579


 62%|██████▏   | 580/931 [36:06<07:24,  1.27s/it]

batch id predict after epoch:  580


 62%|██████▏   | 581/931 [36:08<07:22,  1.26s/it]

batch id predict after epoch:  581


 63%|██████▎   | 582/931 [36:09<07:23,  1.27s/it]

batch id predict after epoch:  582


 63%|██████▎   | 583/931 [36:10<07:23,  1.27s/it]

batch id predict after epoch:  583


 63%|██████▎   | 584/931 [36:11<07:22,  1.27s/it]

batch id predict after epoch:  584


 63%|██████▎   | 585/931 [36:13<07:22,  1.28s/it]

batch id predict after epoch:  585


 63%|██████▎   | 586/931 [36:14<07:23,  1.28s/it]

batch id predict after epoch:  586


 63%|██████▎   | 587/931 [36:15<07:31,  1.31s/it]

batch id predict after epoch:  587


 63%|██████▎   | 588/931 [36:17<07:27,  1.30s/it]

batch id predict after epoch:  588


 63%|██████▎   | 589/931 [36:18<07:22,  1.29s/it]

batch id predict after epoch:  589


 63%|██████▎   | 590/931 [36:19<07:24,  1.30s/it]

batch id predict after epoch:  590


 63%|██████▎   | 591/931 [36:21<07:20,  1.30s/it]

batch id predict after epoch:  591


 64%|██████▎   | 592/931 [36:22<07:24,  1.31s/it]

batch id predict after epoch:  592


 64%|██████▎   | 593/931 [36:23<07:16,  1.29s/it]

batch id predict after epoch:  593


 64%|██████▍   | 594/931 [36:24<07:12,  1.28s/it]

batch id predict after epoch:  594


 64%|██████▍   | 595/931 [36:26<07:10,  1.28s/it]

batch id predict after epoch:  595


 64%|██████▍   | 596/931 [36:27<07:08,  1.28s/it]

batch id predict after epoch:  596


 64%|██████▍   | 597/931 [36:28<07:06,  1.28s/it]

batch id predict after epoch:  597


 64%|██████▍   | 598/931 [36:29<07:03,  1.27s/it]

batch id predict after epoch:  598


 64%|██████▍   | 599/931 [36:31<07:01,  1.27s/it]

batch id predict after epoch:  599


 64%|██████▍   | 600/931 [36:32<07:01,  1.27s/it]

batch id predict after epoch:  600


 65%|██████▍   | 601/931 [36:33<07:07,  1.30s/it]

batch id predict after epoch:  601


 65%|██████▍   | 602/931 [36:35<07:06,  1.30s/it]

batch id predict after epoch:  602


 65%|██████▍   | 603/931 [36:36<07:04,  1.29s/it]

batch id predict after epoch:  603


 65%|██████▍   | 604/931 [36:37<07:06,  1.30s/it]

batch id predict after epoch:  604


 65%|██████▍   | 605/931 [36:39<07:01,  1.29s/it]

batch id predict after epoch:  605


 65%|██████▌   | 606/931 [36:40<07:03,  1.30s/it]

batch id predict after epoch:  606


 65%|██████▌   | 607/931 [36:41<07:02,  1.30s/it]

batch id predict after epoch:  607


 65%|██████▌   | 608/931 [36:42<06:57,  1.29s/it]

batch id predict after epoch:  608


 65%|██████▌   | 609/931 [36:44<06:59,  1.30s/it]

batch id predict after epoch:  609


 66%|██████▌   | 610/931 [36:45<07:01,  1.31s/it]

batch id predict after epoch:  610


 66%|██████▌   | 611/931 [36:46<06:55,  1.30s/it]

batch id predict after epoch:  611


 66%|██████▌   | 612/931 [36:48<06:50,  1.29s/it]

batch id predict after epoch:  612


 66%|██████▌   | 613/931 [36:49<06:48,  1.29s/it]

batch id predict after epoch:  613


 66%|██████▌   | 614/931 [36:50<06:50,  1.30s/it]

batch id predict after epoch:  614


 66%|██████▌   | 615/931 [36:51<06:47,  1.29s/it]

batch id predict after epoch:  615


 66%|██████▌   | 616/931 [36:53<06:44,  1.28s/it]

batch id predict after epoch:  616


 66%|██████▋   | 617/931 [36:54<06:44,  1.29s/it]

batch id predict after epoch:  617


 66%|██████▋   | 618/931 [36:55<06:42,  1.29s/it]

batch id predict after epoch:  618


 66%|██████▋   | 619/931 [36:57<06:38,  1.28s/it]

batch id predict after epoch:  619


 67%|██████▋   | 620/931 [36:58<06:33,  1.26s/it]

batch id predict after epoch:  620


 67%|██████▋   | 621/931 [36:59<06:32,  1.27s/it]

batch id predict after epoch:  621


 67%|██████▋   | 622/931 [37:00<06:33,  1.27s/it]

batch id predict after epoch:  622


 67%|██████▋   | 623/931 [37:02<06:39,  1.30s/it]

batch id predict after epoch:  623


 67%|██████▋   | 624/931 [37:03<06:34,  1.28s/it]

batch id predict after epoch:  624


 67%|██████▋   | 625/931 [37:04<06:33,  1.29s/it]

batch id predict after epoch:  625


 67%|██████▋   | 626/931 [37:06<06:37,  1.30s/it]

batch id predict after epoch:  626


 67%|██████▋   | 627/931 [37:07<06:33,  1.29s/it]

batch id predict after epoch:  627


 67%|██████▋   | 628/931 [37:08<06:28,  1.28s/it]

batch id predict after epoch:  628


 68%|██████▊   | 629/931 [37:10<06:32,  1.30s/it]

batch id predict after epoch:  629


 68%|██████▊   | 630/931 [37:11<06:33,  1.31s/it]

batch id predict after epoch:  630


 68%|██████▊   | 631/931 [37:12<06:28,  1.30s/it]

batch id predict after epoch:  631


 68%|██████▊   | 632/931 [37:13<06:29,  1.30s/it]

batch id predict after epoch:  632


 68%|██████▊   | 633/931 [37:15<06:24,  1.29s/it]

batch id predict after epoch:  633


 68%|██████▊   | 634/931 [37:16<06:24,  1.30s/it]

batch id predict after epoch:  634


 68%|██████▊   | 635/931 [37:17<06:26,  1.30s/it]

batch id predict after epoch:  635


 68%|██████▊   | 636/931 [37:19<06:21,  1.29s/it]

batch id predict after epoch:  636


 68%|██████▊   | 637/931 [37:20<06:21,  1.30s/it]

batch id predict after epoch:  637


 69%|██████▊   | 638/931 [37:21<06:24,  1.31s/it]

batch id predict after epoch:  638


 69%|██████▊   | 639/931 [37:22<06:18,  1.30s/it]

batch id predict after epoch:  639


 69%|██████▊   | 640/931 [37:24<06:15,  1.29s/it]

batch id predict after epoch:  640


 69%|██████▉   | 641/931 [37:25<06:11,  1.28s/it]

batch id predict after epoch:  641


 69%|██████▉   | 642/931 [37:26<06:11,  1.29s/it]

batch id predict after epoch:  642


 69%|██████▉   | 643/931 [37:28<06:07,  1.28s/it]

batch id predict after epoch:  643


 69%|██████▉   | 644/931 [37:29<06:09,  1.29s/it]

batch id predict after epoch:  644


 69%|██████▉   | 645/931 [37:30<06:05,  1.28s/it]

batch id predict after epoch:  645


 69%|██████▉   | 646/931 [37:31<06:05,  1.28s/it]

batch id predict after epoch:  646


 69%|██████▉   | 647/931 [37:33<06:06,  1.29s/it]

batch id predict after epoch:  647


 70%|██████▉   | 648/931 [37:34<06:03,  1.28s/it]

batch id predict after epoch:  648


 70%|██████▉   | 649/931 [37:35<06:02,  1.29s/it]

batch id predict after epoch:  649


 70%|██████▉   | 650/931 [37:37<05:59,  1.28s/it]

batch id predict after epoch:  650


 70%|██████▉   | 651/931 [37:38<05:59,  1.28s/it]

batch id predict after epoch:  651


 70%|███████   | 652/931 [37:39<05:55,  1.27s/it]

batch id predict after epoch:  652


 70%|███████   | 653/931 [37:40<05:55,  1.28s/it]

batch id predict after epoch:  653


 70%|███████   | 654/931 [37:42<05:53,  1.28s/it]

batch id predict after epoch:  654


 70%|███████   | 655/931 [37:43<05:51,  1.27s/it]

batch id predict after epoch:  655


 70%|███████   | 656/931 [37:44<05:50,  1.27s/it]

batch id predict after epoch:  656


 71%|███████   | 657/931 [37:45<05:48,  1.27s/it]

batch id predict after epoch:  657


 71%|███████   | 658/931 [37:47<05:45,  1.27s/it]

batch id predict after epoch:  658


 71%|███████   | 659/931 [37:48<05:43,  1.26s/it]

batch id predict after epoch:  659


 71%|███████   | 660/931 [37:49<05:42,  1.27s/it]

batch id predict after epoch:  660


 71%|███████   | 661/931 [37:51<05:41,  1.27s/it]

batch id predict after epoch:  661


 71%|███████   | 662/931 [37:52<05:40,  1.27s/it]

batch id predict after epoch:  662


 71%|███████   | 663/931 [37:53<05:44,  1.29s/it]

batch id predict after epoch:  663


 71%|███████▏  | 664/931 [37:54<05:43,  1.29s/it]

batch id predict after epoch:  664


 71%|███████▏  | 665/931 [37:56<05:46,  1.30s/it]

batch id predict after epoch:  665


 72%|███████▏  | 666/931 [37:57<05:42,  1.29s/it]

batch id predict after epoch:  666


 72%|███████▏  | 667/931 [37:58<05:40,  1.29s/it]

batch id predict after epoch:  667


 72%|███████▏  | 668/931 [38:00<05:39,  1.29s/it]

batch id predict after epoch:  668


 72%|███████▏  | 669/931 [38:01<05:35,  1.28s/it]

batch id predict after epoch:  669


 72%|███████▏  | 670/931 [38:02<05:35,  1.28s/it]

batch id predict after epoch:  670


 72%|███████▏  | 671/931 [38:03<05:36,  1.29s/it]

batch id predict after epoch:  671


 72%|███████▏  | 672/931 [38:05<05:32,  1.28s/it]

batch id predict after epoch:  672


 72%|███████▏  | 673/931 [38:06<05:35,  1.30s/it]

batch id predict after epoch:  673


 72%|███████▏  | 674/931 [38:07<05:39,  1.32s/it]

batch id predict after epoch:  674


 73%|███████▎  | 675/931 [38:09<05:34,  1.31s/it]

batch id predict after epoch:  675


 73%|███████▎  | 676/931 [38:10<05:32,  1.31s/it]

batch id predict after epoch:  676


 73%|███████▎  | 677/931 [38:11<05:31,  1.31s/it]

batch id predict after epoch:  677


 73%|███████▎  | 678/931 [38:13<05:28,  1.30s/it]

batch id predict after epoch:  678


 73%|███████▎  | 679/931 [38:14<05:31,  1.32s/it]

batch id predict after epoch:  679


 73%|███████▎  | 680/931 [38:15<05:32,  1.32s/it]

batch id predict after epoch:  680


 73%|███████▎  | 681/931 [38:17<05:29,  1.32s/it]

batch id predict after epoch:  681


 73%|███████▎  | 682/931 [38:18<05:27,  1.32s/it]

batch id predict after epoch:  682


 73%|███████▎  | 683/931 [38:19<05:23,  1.30s/it]

batch id predict after epoch:  683


 73%|███████▎  | 684/931 [38:20<05:17,  1.29s/it]

batch id predict after epoch:  684


 74%|███████▎  | 685/931 [38:22<05:14,  1.28s/it]

batch id predict after epoch:  685


 74%|███████▎  | 686/931 [38:23<05:15,  1.29s/it]

batch id predict after epoch:  686


 74%|███████▍  | 687/931 [38:24<05:16,  1.30s/it]

batch id predict after epoch:  687


 74%|███████▍  | 688/931 [38:26<05:12,  1.28s/it]

batch id predict after epoch:  688


 74%|███████▍  | 689/931 [38:27<05:10,  1.28s/it]

batch id predict after epoch:  689


 74%|███████▍  | 690/931 [38:28<05:08,  1.28s/it]

batch id predict after epoch:  690


 74%|███████▍  | 691/931 [38:29<05:06,  1.28s/it]

batch id predict after epoch:  691


 74%|███████▍  | 692/931 [38:31<05:03,  1.27s/it]

batch id predict after epoch:  692


 74%|███████▍  | 693/931 [38:32<05:01,  1.27s/it]

batch id predict after epoch:  693


 75%|███████▍  | 694/931 [38:33<04:59,  1.26s/it]

batch id predict after epoch:  694


 75%|███████▍  | 695/931 [38:34<04:59,  1.27s/it]

batch id predict after epoch:  695


 75%|███████▍  | 696/931 [38:36<05:01,  1.28s/it]

batch id predict after epoch:  696


 75%|███████▍  | 697/931 [38:37<04:58,  1.27s/it]

batch id predict after epoch:  697


 75%|███████▍  | 698/931 [38:38<04:57,  1.28s/it]

batch id predict after epoch:  698


 75%|███████▌  | 699/931 [38:40<05:00,  1.29s/it]

batch id predict after epoch:  699


 75%|███████▌  | 700/931 [38:41<04:56,  1.28s/it]

batch id predict after epoch:  700


 75%|███████▌  | 701/931 [38:42<04:54,  1.28s/it]

batch id predict after epoch:  701


 75%|███████▌  | 702/931 [38:43<04:51,  1.28s/it]

batch id predict after epoch:  702


 76%|███████▌  | 703/931 [38:45<04:49,  1.27s/it]

batch id predict after epoch:  703


 76%|███████▌  | 704/931 [38:46<04:48,  1.27s/it]

batch id predict after epoch:  704


 76%|███████▌  | 705/931 [38:47<04:46,  1.27s/it]

batch id predict after epoch:  705


 76%|███████▌  | 706/931 [38:49<04:48,  1.28s/it]

batch id predict after epoch:  706


 76%|███████▌  | 707/931 [38:50<04:47,  1.28s/it]

batch id predict after epoch:  707


 76%|███████▌  | 708/931 [38:51<04:54,  1.32s/it]

batch id predict after epoch:  708


 76%|███████▌  | 709/931 [38:53<04:49,  1.30s/it]

batch id predict after epoch:  709


 76%|███████▋  | 710/931 [38:54<04:47,  1.30s/it]

batch id predict after epoch:  710


 76%|███████▋  | 711/931 [38:55<04:49,  1.31s/it]

batch id predict after epoch:  711


 76%|███████▋  | 712/931 [38:56<04:44,  1.30s/it]

batch id predict after epoch:  712


 77%|███████▋  | 713/931 [38:58<04:42,  1.30s/it]

batch id predict after epoch:  713


 77%|███████▋  | 714/931 [38:59<04:41,  1.30s/it]

batch id predict after epoch:  714


 77%|███████▋  | 715/931 [39:00<04:42,  1.31s/it]

batch id predict after epoch:  715


 77%|███████▋  | 716/931 [39:02<04:41,  1.31s/it]

batch id predict after epoch:  716


 77%|███████▋  | 717/931 [39:03<04:39,  1.30s/it]

batch id predict after epoch:  717


 77%|███████▋  | 718/931 [39:04<04:35,  1.29s/it]

batch id predict after epoch:  718


 77%|███████▋  | 719/931 [39:05<04:33,  1.29s/it]

batch id predict after epoch:  719


 77%|███████▋  | 720/931 [39:07<04:31,  1.29s/it]

batch id predict after epoch:  720


 77%|███████▋  | 721/931 [39:08<04:30,  1.29s/it]

batch id predict after epoch:  721


 78%|███████▊  | 722/931 [39:09<04:29,  1.29s/it]

batch id predict after epoch:  722


 78%|███████▊  | 723/931 [39:11<04:28,  1.29s/it]

batch id predict after epoch:  723


 78%|███████▊  | 724/931 [39:12<04:25,  1.28s/it]

batch id predict after epoch:  724


 78%|███████▊  | 725/931 [39:13<04:24,  1.29s/it]

batch id predict after epoch:  725


 78%|███████▊  | 726/931 [39:14<04:21,  1.28s/it]

batch id predict after epoch:  726


 78%|███████▊  | 727/931 [39:16<04:24,  1.30s/it]

batch id predict after epoch:  727


 78%|███████▊  | 728/931 [39:17<04:25,  1.31s/it]

batch id predict after epoch:  728


 78%|███████▊  | 729/931 [39:18<04:24,  1.31s/it]

batch id predict after epoch:  729


 78%|███████▊  | 730/931 [39:20<04:24,  1.32s/it]

batch id predict after epoch:  730


 79%|███████▊  | 731/931 [39:21<04:27,  1.34s/it]

batch id predict after epoch:  731


 79%|███████▊  | 732/931 [39:22<04:21,  1.32s/it]

batch id predict after epoch:  732


 79%|███████▊  | 733/931 [39:24<04:18,  1.30s/it]

batch id predict after epoch:  733


 79%|███████▉  | 734/931 [39:25<04:17,  1.31s/it]

batch id predict after epoch:  734


 79%|███████▉  | 735/931 [39:26<04:14,  1.30s/it]

batch id predict after epoch:  735


 79%|███████▉  | 736/931 [39:28<04:10,  1.29s/it]

batch id predict after epoch:  736


 79%|███████▉  | 737/931 [39:29<04:07,  1.28s/it]

batch id predict after epoch:  737


 79%|███████▉  | 738/931 [39:30<04:08,  1.29s/it]

batch id predict after epoch:  738


 79%|███████▉  | 739/931 [39:31<04:06,  1.28s/it]

batch id predict after epoch:  739


 79%|███████▉  | 740/931 [39:33<04:05,  1.29s/it]

batch id predict after epoch:  740


 80%|███████▉  | 741/931 [39:34<04:09,  1.32s/it]

batch id predict after epoch:  741


 80%|███████▉  | 742/931 [39:35<04:08,  1.31s/it]

batch id predict after epoch:  742


 80%|███████▉  | 743/931 [39:37<04:04,  1.30s/it]

batch id predict after epoch:  743


 80%|███████▉  | 744/931 [39:38<04:02,  1.29s/it]

batch id predict after epoch:  744


 80%|████████  | 745/931 [39:39<03:59,  1.29s/it]

batch id predict after epoch:  745


 80%|████████  | 746/931 [39:40<03:58,  1.29s/it]

batch id predict after epoch:  746


 80%|████████  | 747/931 [39:42<03:58,  1.29s/it]

batch id predict after epoch:  747


 80%|████████  | 748/931 [39:43<03:57,  1.30s/it]

batch id predict after epoch:  748


 80%|████████  | 749/931 [39:44<03:59,  1.32s/it]

batch id predict after epoch:  749


 81%|████████  | 750/931 [39:46<04:02,  1.34s/it]

batch id predict after epoch:  750


 81%|████████  | 751/931 [39:47<04:00,  1.33s/it]

batch id predict after epoch:  751


 81%|████████  | 752/931 [39:48<03:56,  1.32s/it]

batch id predict after epoch:  752


 81%|████████  | 753/931 [39:50<03:56,  1.33s/it]

batch id predict after epoch:  753


 81%|████████  | 754/931 [39:51<03:55,  1.33s/it]

batch id predict after epoch:  754


 81%|████████  | 755/931 [39:53<03:54,  1.33s/it]

batch id predict after epoch:  755


 81%|████████  | 756/931 [39:54<03:51,  1.32s/it]

batch id predict after epoch:  756


 81%|████████▏ | 757/931 [39:55<03:51,  1.33s/it]

batch id predict after epoch:  757


 81%|████████▏ | 758/931 [39:56<03:47,  1.31s/it]

batch id predict after epoch:  758


 82%|████████▏ | 759/931 [39:58<03:46,  1.32s/it]

batch id predict after epoch:  759


 82%|████████▏ | 760/931 [39:59<03:42,  1.30s/it]

batch id predict after epoch:  760


 82%|████████▏ | 761/931 [40:00<03:42,  1.31s/it]

batch id predict after epoch:  761


 82%|████████▏ | 762/931 [40:02<03:40,  1.30s/it]

batch id predict after epoch:  762


 82%|████████▏ | 763/931 [40:03<03:37,  1.30s/it]

batch id predict after epoch:  763


 82%|████████▏ | 764/931 [40:04<03:34,  1.28s/it]

batch id predict after epoch:  764


 82%|████████▏ | 765/931 [40:05<03:32,  1.28s/it]

batch id predict after epoch:  765


 82%|████████▏ | 766/931 [40:07<03:32,  1.29s/it]

batch id predict after epoch:  766


 82%|████████▏ | 767/931 [40:08<03:30,  1.28s/it]

batch id predict after epoch:  767


 82%|████████▏ | 768/931 [40:09<03:34,  1.32s/it]

batch id predict after epoch:  768


 83%|████████▎ | 769/931 [40:11<03:31,  1.31s/it]

batch id predict after epoch:  769


 83%|████████▎ | 770/931 [40:12<03:29,  1.30s/it]

batch id predict after epoch:  770


 83%|████████▎ | 771/931 [40:13<03:29,  1.31s/it]

batch id predict after epoch:  771


 83%|████████▎ | 772/931 [40:15<03:28,  1.31s/it]

batch id predict after epoch:  772


 83%|████████▎ | 773/931 [40:16<03:25,  1.30s/it]

batch id predict after epoch:  773


 83%|████████▎ | 774/931 [40:17<03:25,  1.31s/it]

batch id predict after epoch:  774


 83%|████████▎ | 775/931 [40:18<03:22,  1.30s/it]

batch id predict after epoch:  775


 83%|████████▎ | 776/931 [40:20<03:21,  1.30s/it]

batch id predict after epoch:  776


 83%|████████▎ | 777/931 [40:21<03:19,  1.29s/it]

batch id predict after epoch:  777


 84%|████████▎ | 778/931 [40:22<03:16,  1.29s/it]

batch id predict after epoch:  778


 84%|████████▎ | 779/931 [40:24<03:16,  1.29s/it]

batch id predict after epoch:  779


 84%|████████▍ | 780/931 [40:25<03:14,  1.29s/it]

batch id predict after epoch:  780


 84%|████████▍ | 781/931 [40:26<03:12,  1.28s/it]

batch id predict after epoch:  781


 84%|████████▍ | 782/931 [40:27<03:10,  1.28s/it]

batch id predict after epoch:  782


 84%|████████▍ | 783/931 [40:29<03:09,  1.28s/it]

batch id predict after epoch:  783


 84%|████████▍ | 784/931 [40:30<03:07,  1.28s/it]

batch id predict after epoch:  784


 84%|████████▍ | 785/931 [40:31<03:05,  1.27s/it]

batch id predict after epoch:  785


 84%|████████▍ | 786/931 [40:33<03:03,  1.27s/it]

batch id predict after epoch:  786


 85%|████████▍ | 787/931 [40:34<03:03,  1.28s/it]

batch id predict after epoch:  787


 85%|████████▍ | 788/931 [40:35<03:03,  1.28s/it]

batch id predict after epoch:  788


 85%|████████▍ | 789/931 [40:36<03:02,  1.29s/it]

batch id predict after epoch:  789


 85%|████████▍ | 790/931 [40:38<03:00,  1.28s/it]

batch id predict after epoch:  790


 85%|████████▍ | 791/931 [40:39<03:01,  1.30s/it]

batch id predict after epoch:  791


 85%|████████▌ | 792/931 [40:40<03:00,  1.30s/it]

batch id predict after epoch:  792


 85%|████████▌ | 793/931 [40:42<03:00,  1.31s/it]

batch id predict after epoch:  793


 85%|████████▌ | 794/931 [40:43<02:57,  1.30s/it]

batch id predict after epoch:  794


 85%|████████▌ | 795/931 [40:44<02:57,  1.30s/it]

batch id predict after epoch:  795


 85%|████████▌ | 796/931 [40:46<02:58,  1.32s/it]

batch id predict after epoch:  796


 86%|████████▌ | 797/931 [40:47<02:58,  1.33s/it]

batch id predict after epoch:  797


 86%|████████▌ | 798/931 [40:48<02:55,  1.32s/it]

batch id predict after epoch:  798


 86%|████████▌ | 799/931 [40:50<02:54,  1.32s/it]

batch id predict after epoch:  799


 86%|████████▌ | 800/931 [40:51<02:52,  1.32s/it]

batch id predict after epoch:  800


 86%|████████▌ | 801/931 [40:52<02:51,  1.32s/it]

batch id predict after epoch:  801


 86%|████████▌ | 802/931 [40:54<02:49,  1.31s/it]

batch id predict after epoch:  802


 86%|████████▋ | 803/931 [40:55<02:47,  1.31s/it]

batch id predict after epoch:  803


 86%|████████▋ | 804/931 [40:56<02:48,  1.33s/it]

batch id predict after epoch:  804


 86%|████████▋ | 805/931 [40:57<02:44,  1.31s/it]

batch id predict after epoch:  805


 87%|████████▋ | 806/931 [40:59<02:43,  1.31s/it]

batch id predict after epoch:  806


 87%|████████▋ | 807/931 [41:00<02:42,  1.31s/it]

batch id predict after epoch:  807


 87%|████████▋ | 808/931 [41:01<02:40,  1.30s/it]

batch id predict after epoch:  808


 87%|████████▋ | 809/931 [41:03<02:37,  1.29s/it]

batch id predict after epoch:  809


 87%|████████▋ | 810/931 [41:04<02:35,  1.29s/it]

batch id predict after epoch:  810


 87%|████████▋ | 811/931 [41:05<02:33,  1.28s/it]

batch id predict after epoch:  811


 87%|████████▋ | 812/931 [41:06<02:32,  1.28s/it]

batch id predict after epoch:  812


 87%|████████▋ | 813/931 [41:08<02:34,  1.31s/it]

batch id predict after epoch:  813


 87%|████████▋ | 814/931 [41:09<02:31,  1.30s/it]

batch id predict after epoch:  814


 88%|████████▊ | 815/931 [41:10<02:29,  1.29s/it]

batch id predict after epoch:  815


 88%|████████▊ | 816/931 [41:12<02:28,  1.29s/it]

batch id predict after epoch:  816


 88%|████████▊ | 817/931 [41:13<02:26,  1.28s/it]

batch id predict after epoch:  817


 88%|████████▊ | 818/931 [41:14<02:24,  1.28s/it]

batch id predict after epoch:  818


 88%|████████▊ | 819/931 [41:16<02:24,  1.29s/it]

batch id predict after epoch:  819


 88%|████████▊ | 820/931 [41:17<02:22,  1.29s/it]

batch id predict after epoch:  820


 88%|████████▊ | 821/931 [41:18<02:22,  1.29s/it]

batch id predict after epoch:  821


 88%|████████▊ | 822/931 [41:19<02:22,  1.30s/it]

batch id predict after epoch:  822


 88%|████████▊ | 823/931 [41:21<02:20,  1.30s/it]

batch id predict after epoch:  823


 89%|████████▊ | 824/931 [41:22<02:18,  1.30s/it]

batch id predict after epoch:  824


 89%|████████▊ | 825/931 [41:23<02:17,  1.30s/it]

batch id predict after epoch:  825


 89%|████████▊ | 826/931 [41:25<02:15,  1.29s/it]

batch id predict after epoch:  826


 89%|████████▉ | 827/931 [41:26<02:15,  1.30s/it]

batch id predict after epoch:  827


 89%|████████▉ | 828/931 [41:27<02:13,  1.30s/it]

batch id predict after epoch:  828


 89%|████████▉ | 829/931 [41:29<02:13,  1.31s/it]

batch id predict after epoch:  829


 89%|████████▉ | 830/931 [41:30<02:11,  1.30s/it]

batch id predict after epoch:  830


 89%|████████▉ | 831/931 [41:31<02:09,  1.30s/it]

batch id predict after epoch:  831


 89%|████████▉ | 832/931 [41:32<02:08,  1.29s/it]

batch id predict after epoch:  832


 89%|████████▉ | 833/931 [41:34<02:05,  1.28s/it]

batch id predict after epoch:  833


 90%|████████▉ | 834/931 [41:35<02:05,  1.30s/it]

batch id predict after epoch:  834


 90%|████████▉ | 835/931 [41:36<02:03,  1.29s/it]

batch id predict after epoch:  835


 90%|████████▉ | 836/931 [41:38<02:02,  1.29s/it]

batch id predict after epoch:  836


 90%|████████▉ | 837/931 [41:39<02:00,  1.28s/it]

batch id predict after epoch:  837


 90%|█████████ | 838/931 [41:40<01:59,  1.28s/it]

batch id predict after epoch:  838


 90%|█████████ | 839/931 [41:41<01:56,  1.27s/it]

batch id predict after epoch:  839


 90%|█████████ | 840/931 [41:43<01:55,  1.27s/it]

batch id predict after epoch:  840


 90%|█████████ | 841/931 [41:44<01:56,  1.29s/it]

batch id predict after epoch:  841


 90%|█████████ | 842/931 [41:45<01:54,  1.29s/it]

batch id predict after epoch:  842


 91%|█████████ | 843/931 [41:47<01:53,  1.29s/it]

batch id predict after epoch:  843


 91%|█████████ | 844/931 [41:48<01:51,  1.28s/it]

batch id predict after epoch:  844


 91%|█████████ | 845/931 [41:49<01:49,  1.28s/it]

batch id predict after epoch:  845


 91%|█████████ | 846/931 [41:50<01:48,  1.28s/it]

batch id predict after epoch:  846


 91%|█████████ | 847/931 [41:52<01:47,  1.28s/it]

batch id predict after epoch:  847


 91%|█████████ | 848/931 [41:53<01:45,  1.27s/it]

batch id predict after epoch:  848


 91%|█████████ | 849/931 [41:54<01:44,  1.27s/it]

batch id predict after epoch:  849


 91%|█████████▏| 850/931 [41:55<01:43,  1.28s/it]

batch id predict after epoch:  850


 91%|█████████▏| 851/931 [41:57<01:42,  1.28s/it]

batch id predict after epoch:  851


 92%|█████████▏| 852/931 [41:58<01:40,  1.27s/it]

batch id predict after epoch:  852


 92%|█████████▏| 853/931 [41:59<01:41,  1.30s/it]

batch id predict after epoch:  853


 92%|█████████▏| 854/931 [42:01<01:39,  1.29s/it]

batch id predict after epoch:  854


 92%|█████████▏| 855/931 [42:02<01:37,  1.28s/it]

batch id predict after epoch:  855


 92%|█████████▏| 856/931 [42:03<01:37,  1.29s/it]

batch id predict after epoch:  856


 92%|█████████▏| 857/931 [42:05<01:36,  1.31s/it]

batch id predict after epoch:  857


 92%|█████████▏| 858/931 [42:06<01:35,  1.31s/it]

batch id predict after epoch:  858


 92%|█████████▏| 859/931 [42:07<01:33,  1.30s/it]

batch id predict after epoch:  859


 92%|█████████▏| 860/931 [42:08<01:31,  1.29s/it]

batch id predict after epoch:  860


 92%|█████████▏| 861/931 [42:10<01:30,  1.29s/it]

batch id predict after epoch:  861


 93%|█████████▎| 862/931 [42:11<01:28,  1.28s/it]

batch id predict after epoch:  862


 93%|█████████▎| 863/931 [42:12<01:27,  1.28s/it]

batch id predict after epoch:  863


 93%|█████████▎| 864/931 [42:14<01:26,  1.29s/it]

batch id predict after epoch:  864


 93%|█████████▎| 865/931 [42:15<01:25,  1.29s/it]

batch id predict after epoch:  865


 93%|█████████▎| 866/931 [42:16<01:23,  1.29s/it]

batch id predict after epoch:  866


 93%|█████████▎| 867/931 [42:17<01:22,  1.29s/it]

batch id predict after epoch:  867


 93%|█████████▎| 868/931 [42:19<01:20,  1.28s/it]

batch id predict after epoch:  868


 93%|█████████▎| 869/931 [42:20<01:19,  1.28s/it]

batch id predict after epoch:  869


 93%|█████████▎| 870/931 [42:21<01:18,  1.28s/it]

batch id predict after epoch:  870


 94%|█████████▎| 871/931 [42:23<01:17,  1.28s/it]

batch id predict after epoch:  871


 94%|█████████▎| 872/931 [42:24<01:16,  1.30s/it]

batch id predict after epoch:  872


 94%|█████████▍| 873/931 [42:25<01:15,  1.30s/it]

batch id predict after epoch:  873


 94%|█████████▍| 874/931 [42:26<01:13,  1.29s/it]

batch id predict after epoch:  874


 94%|█████████▍| 875/931 [42:28<01:11,  1.28s/it]

batch id predict after epoch:  875


 94%|█████████▍| 876/931 [42:29<01:10,  1.28s/it]

batch id predict after epoch:  876


 94%|█████████▍| 877/931 [42:30<01:08,  1.28s/it]

batch id predict after epoch:  877


 94%|█████████▍| 878/931 [42:32<01:08,  1.29s/it]

batch id predict after epoch:  878


 94%|█████████▍| 879/931 [42:33<01:07,  1.29s/it]

batch id predict after epoch:  879


 95%|█████████▍| 880/931 [42:34<01:05,  1.29s/it]

batch id predict after epoch:  880


 95%|█████████▍| 881/931 [42:35<01:04,  1.29s/it]

batch id predict after epoch:  881


 95%|█████████▍| 882/931 [42:37<01:03,  1.29s/it]

batch id predict after epoch:  882


 95%|█████████▍| 883/931 [42:38<01:02,  1.31s/it]

batch id predict after epoch:  883


 95%|█████████▍| 884/931 [42:39<01:00,  1.30s/it]

batch id predict after epoch:  884


 95%|█████████▌| 885/931 [42:41<00:59,  1.29s/it]

batch id predict after epoch:  885


 95%|█████████▌| 886/931 [42:42<00:58,  1.31s/it]

batch id predict after epoch:  886


 95%|█████████▌| 887/931 [42:43<00:57,  1.30s/it]

batch id predict after epoch:  887


 95%|█████████▌| 888/931 [42:45<00:55,  1.29s/it]

batch id predict after epoch:  888


 95%|█████████▌| 889/931 [42:46<00:54,  1.29s/it]

batch id predict after epoch:  889


 96%|█████████▌| 890/931 [42:47<00:52,  1.29s/it]

batch id predict after epoch:  890


 96%|█████████▌| 891/931 [42:48<00:51,  1.29s/it]

batch id predict after epoch:  891


 96%|█████████▌| 892/931 [42:50<00:50,  1.30s/it]

batch id predict after epoch:  892


 96%|█████████▌| 893/931 [42:51<00:49,  1.30s/it]

batch id predict after epoch:  893


 96%|█████████▌| 894/931 [42:52<00:47,  1.29s/it]

batch id predict after epoch:  894


 96%|█████████▌| 895/931 [42:54<00:46,  1.29s/it]

batch id predict after epoch:  895


 96%|█████████▌| 896/931 [42:55<00:45,  1.29s/it]

batch id predict after epoch:  896


 96%|█████████▋| 897/931 [42:56<00:44,  1.30s/it]

batch id predict after epoch:  897


 96%|█████████▋| 898/931 [42:57<00:42,  1.30s/it]

batch id predict after epoch:  898


 97%|█████████▋| 899/931 [42:59<00:41,  1.29s/it]

batch id predict after epoch:  899


 97%|█████████▋| 900/931 [43:00<00:39,  1.28s/it]

batch id predict after epoch:  900


 97%|█████████▋| 901/931 [43:01<00:38,  1.28s/it]

batch id predict after epoch:  901


 97%|█████████▋| 902/931 [43:03<00:37,  1.28s/it]

batch id predict after epoch:  902


 97%|█████████▋| 903/931 [43:04<00:36,  1.29s/it]

batch id predict after epoch:  903


 97%|█████████▋| 904/931 [43:05<00:34,  1.28s/it]

batch id predict after epoch:  904


 97%|█████████▋| 905/931 [43:06<00:33,  1.28s/it]

batch id predict after epoch:  905


 97%|█████████▋| 906/931 [43:08<00:31,  1.27s/it]

batch id predict after epoch:  906


 97%|█████████▋| 907/931 [43:09<00:30,  1.28s/it]

batch id predict after epoch:  907


 98%|█████████▊| 908/931 [43:10<00:29,  1.28s/it]

batch id predict after epoch:  908


 98%|█████████▊| 909/931 [43:12<00:28,  1.27s/it]

batch id predict after epoch:  909


 98%|█████████▊| 910/931 [43:13<00:26,  1.27s/it]

batch id predict after epoch:  910


 98%|█████████▊| 911/931 [43:14<00:25,  1.27s/it]

batch id predict after epoch:  911


 98%|█████████▊| 912/931 [43:15<00:24,  1.27s/it]

batch id predict after epoch:  912


 98%|█████████▊| 913/931 [43:17<00:23,  1.28s/it]

batch id predict after epoch:  913


 98%|█████████▊| 914/931 [43:18<00:21,  1.29s/it]

batch id predict after epoch:  914


 98%|█████████▊| 915/931 [43:19<00:20,  1.30s/it]

batch id predict after epoch:  915


 98%|█████████▊| 916/931 [43:21<00:19,  1.29s/it]

batch id predict after epoch:  916


 98%|█████████▊| 917/931 [43:22<00:18,  1.29s/it]

batch id predict after epoch:  917


 99%|█████████▊| 918/931 [43:23<00:16,  1.30s/it]

batch id predict after epoch:  918


 99%|█████████▊| 919/931 [43:24<00:15,  1.30s/it]

batch id predict after epoch:  919


 99%|█████████▉| 920/931 [43:26<00:14,  1.33s/it]

batch id predict after epoch:  920


 99%|█████████▉| 921/931 [43:27<00:13,  1.32s/it]

batch id predict after epoch:  921


 99%|█████████▉| 922/931 [43:28<00:11,  1.31s/it]

batch id predict after epoch:  922


 99%|█████████▉| 923/931 [43:30<00:10,  1.30s/it]

batch id predict after epoch:  923


 99%|█████████▉| 924/931 [43:31<00:09,  1.30s/it]

batch id predict after epoch:  924


 99%|█████████▉| 925/931 [43:32<00:07,  1.30s/it]

batch id predict after epoch:  925


 99%|█████████▉| 926/931 [43:34<00:06,  1.31s/it]

batch id predict after epoch:  926


100%|█████████▉| 927/931 [43:35<00:05,  1.30s/it]

batch id predict after epoch:  927


100%|█████████▉| 928/931 [43:36<00:03,  1.29s/it]

batch id predict after epoch:  928


100%|█████████▉| 929/931 [43:37<00:02,  1.30s/it]

batch id predict after epoch:  929


100%|█████████▉| 930/931 [43:39<00:01,  1.29s/it]

batch id predict after epoch:  930


100%|██████████| 931/931 [43:39<00:00,  2.81s/it]

batch id predict after epoch:  931





[1 0 0 ... 0 0 0]
Finished Training


In [None]:
trained_net.eval()

In [None]:
import torch
# load model from checkpoint
model_loaded = TripletNet(FeatureExtractNET())
checkpoint = torch.load('/content/drive/MyDrive/test3/model_epoch_5.pt')
model_loaded.load_state_dict(checkpoint, strict=False)

model_loaded.to(device)
model_loaded.eval()

In [None]:
with torch.no_grad():
  valloader_iterator = iter(train_loader)
  mean_accuracy = val_accuracy(model_loaded, train_loader, valloader_iterator)
  print(mean_accuracy)

In [None]:
count = len(open("/content/drive/MyDrive/test3/submission_1epoch.txt",'rU').readlines())
print(count)