# CVAE for MNIST  

The purpose of this demo is to help you learn about variational autoencoder. The algorithm is being implemented is from the paper "Learning Structured Output Representation using Deep Conditional Generative Models" by 
Kihyuk Sohn, Honglak Lee, Xinchen Yan (https://openreview.net/forum?id=rJWXGDWd-H).

Followed the logic in https://github.com/AntixK/PyTorch-VAE/blob/master/models/cvae.py.

You are free to change model acrhitecture, or any part of the logic. 

If you have any suggestions or find errors, please, don't be hesitate to text me at jayanta.jayantamukherjee@gmail.com


In [1]:
import torch
from torch import nn
from torch import Tensor
from torch.nn import functional as F


### Conditional VAE Models 

In [2]:
class ConditionalVAE(torch.nn.Module):

    def __init__(self,
                 in_channels: int,
                 num_classes: int,
                 latent_dim: int,
                 hidden_dims: [] = None,
                 img_size:int = 64,
                 **kwargs) -> None:
        super(ConditionalVAE, self).__init__()
        self.latent_dim = latent_dim
        self.img_size = img_size

        self.embed_class = nn.Linear(num_classes, img_size * img_size)
        self.embed_data = nn.Conv2d(in_channels, in_channels, kernel_size=1)

        modules = []
        if hidden_dims is None:
            hidden_dims = [32, 64, 128, 256, 512]

        in_channels += 1 # To account for the extra label channel
        # Build Encoder
        for h_dim in hidden_dims:
            modules.append(
                nn.Sequential(
                    nn.Conv2d(in_channels, out_channels=h_dim,
                              kernel_size= 3, stride= 2, padding  = 1),
                    nn.BatchNorm2d(h_dim),
                    nn.LeakyReLU())
            )
            in_channels = h_dim

        self.encoder = nn.Sequential(*modules)
        self.fc_mu = nn.Linear(hidden_dims[-1]*4, latent_dim)
        self.fc_var = nn.Linear(hidden_dims[-1]*4, latent_dim)


        # Build Decoder
        modules = []

        self.decoder_input = nn.Linear(latent_dim + num_classes, hidden_dims[-1] * 4)

        hidden_dims.reverse()

        for i in range(len(hidden_dims) - 1):
            modules.append(
                nn.Sequential(
                    nn.ConvTranspose2d(hidden_dims[i],
                                       hidden_dims[i + 1],
                                       kernel_size=3,
                                       stride = 2,
                                       padding=1,
                                       output_padding=1),
                    nn.BatchNorm2d(hidden_dims[i + 1]),
                    nn.LeakyReLU())
            )



        self.decoder = nn.Sequential(*modules)

        self.final_layer = nn.Sequential(
                            nn.ConvTranspose2d(hidden_dims[-1],
                                               hidden_dims[-1],
                                               kernel_size=3,
                                               stride=2,
                                               padding=1,
                                               output_padding=1),
                            nn.BatchNorm2d(hidden_dims[-1]),
                            nn.LeakyReLU(),
                            nn.Conv2d(hidden_dims[-1], out_channels= 3,
                                      kernel_size= 3, padding= 1),
                            nn.Tanh())

    def encode(self, input: Tensor):
        """
        Encodes the input by passing through the encoder network
        and returns the latent codes.
        :param input: (Tensor) Input tensor to encoder [N x C x H x W]
        :return: (Tensor) List of latent codes
        """
        result = self.encoder(input)
        result = torch.flatten(result, start_dim=1)

        # Split the result into mu and var components
        # of the latent Gaussian distribution
        mu = self.fc_mu(result)
        log_var = self.fc_var(result)

        return [mu, log_var]

    def decode(self, z: Tensor) -> Tensor:
        result = self.decoder_input(z)
        result = result.view(-1, 512, 2, 2)
        result = self.decoder(result)
        result = self.final_layer(result)
        return result

    def reparameterize(self, mu: Tensor, logvar: Tensor) -> Tensor:
        """
        Will a single z be enough to compute the expectation
        for the loss??
        :param mu: (Tensor) Mean of the latent Gaussian
        :param logvar: (Tensor) Standard deviation of the latent Gaussian
        :return:
        """
        std = torch.exp(0.5 * logvar)
        eps = torch.randn_like(std)
        return eps * std + mu

    def forward(self, input: Tensor, **kwargs):
        y = kwargs['labels'].float()
        embedded_class = self.embed_class(y)
        embedded_class = embedded_class.view(-1, self.img_size, self.img_size).unsqueeze(1)
        embedded_input = self.embed_data(input)

        x = torch.cat([embedded_input, embedded_class], dim = 1)
        mu, log_var = self.encode(x)

        z = self.reparameterize(mu, log_var)

        z = torch.cat([z, y], dim = 1)
        return  [self.decode(z), input, mu, log_var]

    def loss_function(self,
                      *args,
                      **kwargs) -> dict:
        recons = args[0]
        input = args[1]
        mu = args[2]
        log_var = args[3]

        kld_weight = kwargs['M_N']  # Account for the minibatch samples from the dataset
        recons_loss =F.mse_loss(recons, input)

        kld_loss = torch.mean(-0.5 * torch.sum(1 + log_var - mu ** 2 - log_var.exp(), dim = 1), dim = 0)

        loss = recons_loss + kld_weight * kld_loss
        return {'loss': loss, 'Reconstruction_Loss':recons_loss, 'KLD':-kld_loss}

    def sample(self,
               num_samples:int,
               current_device: int,
               **kwargs) -> Tensor:
        """
        Samples from the latent space and return the corresponding
        image space map.
        :param num_samples: (Int) Number of samples
        :param current_device: (Int) Device to run the model
        :return: (Tensor)
        """
        y = kwargs['labels'].float()
        z = torch.randn(num_samples,
                        self.latent_dim)

        z = z.to(current_device)

        z = torch.cat([z, y], dim=1)
        samples = self.decode(z)
        return samples

    def generate(self, x: Tensor, **kwargs) -> Tensor:
        """
        Given an input image x, returns the reconstructed image
        :param x: (Tensor) [B x C x H x W]
        :return: (Tensor) [B x C x H x W]
        """

        return self.forward(x, **kwargs)[0]

### VAE

In [3]:
import yaml
import argparse
import numpy as np

from VAEXperiment import VAEXperiment
from pytorch_lightning import Trainer
from pytorch_lightning.logging import TestTubeLogger


model = ConditionalVAE(in_channels = 3,
                       num_classes = 40,
                       latent_dim = 128)

exp_params = {
  "dataset": "celeba",
  "data_path": "../../shared/Data/",
  "img_size": 64,
  "batch_size": 144,
  "LR": 0.005,
  "weight_decay": 0.0,
  "scheduler_gamma": 0.95
}

trainer_params = {
  "gpus": 1,
  "max_nb_epochs": 50,
  "max_epochs": 50
}

experiment = VAEXperiment(model,
                          exp_params)

tt_logger = TestTubeLogger(
    save_dir="logs/",
    name="ConditionalVAE",
    debug=False,
    create_git_tag=False
)

runner = Trainer(default_save_path=tt_logger.save_dir,
                 min_nb_epochs=1,
                 logger=tt_logger,
                 log_save_interval=100,
                 train_percent_check=1.,
                 val_percent_check=1.,
                 num_sanity_val_steps=5,
                 early_stop_callback = False,
                 gpus = 1,
                 max_nb_epochs = 50,
                 max_epochs = 50)

print("======= Training ConditionalVAE =======")
runner.fit(experiment)

INFO:root:gpu available: True, used: True
INFO:root:VISIBLE GPUS: 0


Files already downloaded and verified


INFO:root:
                   Name             Type Params
0                 model   ConditionalVAE    4 M
1     model.embed_class           Linear  167 K
2      model.embed_data           Conv2d   12  
3         model.encoder       Sequential    1 M
4       model.encoder.0       Sequential    1 K
5     model.encoder.0.0           Conv2d    1 K
6     model.encoder.0.1      BatchNorm2d   64  
7     model.encoder.0.2        LeakyReLU    0  
8       model.encoder.1       Sequential   18 K
9     model.encoder.1.0           Conv2d   18 K
10    model.encoder.1.1      BatchNorm2d  128  
11    model.encoder.1.2        LeakyReLU    0  
12      model.encoder.2       Sequential   74 K
13    model.encoder.2.0           Conv2d   73 K
14    model.encoder.2.1      BatchNorm2d  256  
15    model.encoder.2.2        LeakyReLU    0  
16      model.encoder.3       Sequential  295 K
17    model.encoder.3.0           Conv2d  295 K
18    model.encoder.3.1      BatchNorm2d  512  
19    model.encoder.3.2      

Epoch 1:   0%|          | 0/1268 [00:00<?, ?batch/s]



Epoch 1:  89%|████████▉ | 1130/1268 [04:43<00:35,  3.92batch/s, batch_idx=1129, gpu=0, loss=0.090, v_num=0]
Validating:   0%|          | 0/138 [00:00<?, ?batch/s][A
Epoch 1:  89%|████████▉ | 1131/1268 [04:43<00:31,  4.30batch/s, batch_idx=1129, gpu=0, loss=0.090, v_num=0]
Epoch 1:  89%|████████▉ | 1132/1268 [04:43<00:29,  4.62batch/s, batch_idx=1129, gpu=0, loss=0.090, v_num=0]
Epoch 1:  89%|████████▉ | 1133/1268 [04:43<00:28,  4.75batch/s, batch_idx=1129, gpu=0, loss=0.090, v_num=0]
Epoch 1:  89%|████████▉ | 1134/1268 [04:43<00:26,  4.99batch/s, batch_idx=1129, gpu=0, loss=0.090, v_num=0]
Epoch 1:  90%|████████▉ | 1135/1268 [04:44<00:25,  5.17batch/s, batch_idx=1129, gpu=0, loss=0.090, v_num=0]
Epoch 1:  90%|████████▉ | 1136/1268 [04:44<00:24,  5.31batch/s, batch_idx=1129, gpu=0, loss=0.090, v_num=0]
Epoch 1:  90%|████████▉ | 1137/1268 [04:44<00:24,  5.37batch/s, batch_idx=1129, gpu=0, loss=0.090, v_num=0]
Epoch 1:  90%|████████▉ | 1138/1268 [04:44<00:24,  5.36batch/s, batch_idx=1129

  "Did not find hyperparameters at model.hparams. Saving checkpoint without"


Epoch 2:  89%|████████▉ | 1130/1268 [04:31<00:33,  4.12batch/s, batch_idx=1129, gpu=0, loss=0.082, v_num=0]
Validating:   0%|          | 0/138 [00:00<?, ?batch/s][A
Epoch 2:  89%|████████▉ | 1131/1268 [04:31<00:30,  4.52batch/s, batch_idx=1129, gpu=0, loss=0.082, v_num=0]
Epoch 2:  89%|████████▉ | 1132/1268 [04:31<00:28,  4.81batch/s, batch_idx=1129, gpu=0, loss=0.082, v_num=0]
Epoch 2:  89%|████████▉ | 1133/1268 [04:31<00:26,  5.10batch/s, batch_idx=1129, gpu=0, loss=0.082, v_num=0]
Epoch 2:  89%|████████▉ | 1134/1268 [04:32<00:25,  5.31batch/s, batch_idx=1129, gpu=0, loss=0.082, v_num=0]
Epoch 2:  90%|████████▉ | 1135/1268 [04:32<00:24,  5.47batch/s, batch_idx=1129, gpu=0, loss=0.082, v_num=0]
Epoch 2:  90%|████████▉ | 1136/1268 [04:32<00:22,  5.75batch/s, batch_idx=1129, gpu=0, loss=0.082, v_num=0]
Epoch 2:  90%|████████▉ | 1137/1268 [04:32<00:22,  5.93batch/s, batch_idx=1129, gpu=0, loss=0.082, v_num=0]
Epoch 2:  90%|████████▉ | 1138/1268 [04:32<00:21,  5.97batch/s, batch_idx=1129

Epoch 3:  90%|████████▉ | 1141/1268 [04:35<00:21,  5.94batch/s, batch_idx=1129, gpu=0, loss=0.079, v_num=0]
Epoch 3:  90%|█████████ | 1142/1268 [04:35<00:21,  5.87batch/s, batch_idx=1129, gpu=0, loss=0.079, v_num=0]
Epoch 3:  90%|█████████ | 1143/1268 [04:36<00:21,  5.81batch/s, batch_idx=1129, gpu=0, loss=0.079, v_num=0]
Epoch 3:  90%|█████████ | 1144/1268 [04:36<00:21,  5.79batch/s, batch_idx=1129, gpu=0, loss=0.079, v_num=0]
Epoch 3:  90%|█████████ | 1145/1268 [04:36<00:21,  5.79batch/s, batch_idx=1129, gpu=0, loss=0.079, v_num=0]
Epoch 3:  90%|█████████ | 1146/1268 [04:36<00:21,  5.80batch/s, batch_idx=1129, gpu=0, loss=0.079, v_num=0]
Epoch 3:  90%|█████████ | 1147/1268 [04:36<00:20,  5.81batch/s, batch_idx=1129, gpu=0, loss=0.079, v_num=0]
Epoch 3:  91%|█████████ | 1148/1268 [04:37<00:20,  5.79batch/s, batch_idx=1129, gpu=0, loss=0.079, v_num=0]
Epoch 3:  91%|█████████ | 1149/1268 [04:37<00:20,  5.76batch/s, batch_idx=1129, gpu=0, loss=0.079, v_num=0]
Epoch 3:  91%|█████████ | 11

Epoch 4:  91%|█████████ | 1152/1268 [04:37<00:19,  5.94batch/s, batch_idx=1129, gpu=0, loss=0.078, v_num=0]
Epoch 4:  91%|█████████ | 1153/1268 [04:37<00:19,  6.00batch/s, batch_idx=1129, gpu=0, loss=0.078, v_num=0]
Epoch 4:  91%|█████████ | 1154/1268 [04:38<00:18,  6.03batch/s, batch_idx=1129, gpu=0, loss=0.078, v_num=0]
Epoch 4:  91%|█████████ | 1155/1268 [04:38<00:18,  6.07batch/s, batch_idx=1129, gpu=0, loss=0.078, v_num=0]
Epoch 4:  91%|█████████ | 1156/1268 [04:38<00:18,  6.10batch/s, batch_idx=1129, gpu=0, loss=0.078, v_num=0]
Epoch 4:  91%|█████████ | 1157/1268 [04:38<00:18,  6.16batch/s, batch_idx=1129, gpu=0, loss=0.078, v_num=0]
Epoch 4:  91%|█████████▏| 1158/1268 [04:38<00:17,  6.21batch/s, batch_idx=1129, gpu=0, loss=0.078, v_num=0]
Epoch 4:  91%|█████████▏| 1159/1268 [04:38<00:17,  6.26batch/s, batch_idx=1129, gpu=0, loss=0.078, v_num=0]
Epoch 4:  91%|█████████▏| 1160/1268 [04:39<00:17,  6.28batch/s, batch_idx=1129, gpu=0, loss=0.078, v_num=0]
Epoch 4:  92%|█████████▏| 11

Epoch 5:  92%|█████████▏| 1163/1268 [04:39<00:16,  6.47batch/s, batch_idx=1129, gpu=0, loss=0.077, v_num=0]
Epoch 5:  92%|█████████▏| 1164/1268 [04:39<00:16,  6.35batch/s, batch_idx=1129, gpu=0, loss=0.077, v_num=0]
Epoch 5:  92%|█████████▏| 1165/1268 [04:39<00:16,  6.34batch/s, batch_idx=1129, gpu=0, loss=0.077, v_num=0]
Epoch 5:  92%|█████████▏| 1166/1268 [04:40<00:16,  6.36batch/s, batch_idx=1129, gpu=0, loss=0.077, v_num=0]
Epoch 5:  92%|█████████▏| 1167/1268 [04:40<00:15,  6.37batch/s, batch_idx=1129, gpu=0, loss=0.077, v_num=0]
Epoch 5:  92%|█████████▏| 1168/1268 [04:40<00:15,  6.37batch/s, batch_idx=1129, gpu=0, loss=0.077, v_num=0]
Epoch 5:  92%|█████████▏| 1169/1268 [04:40<00:15,  6.24batch/s, batch_idx=1129, gpu=0, loss=0.077, v_num=0]
Epoch 5:  92%|█████████▏| 1170/1268 [04:40<00:16,  6.10batch/s, batch_idx=1129, gpu=0, loss=0.077, v_num=0]
Epoch 5:  92%|█████████▏| 1171/1268 [04:40<00:16,  6.02batch/s, batch_idx=1129, gpu=0, loss=0.077, v_num=0]
Epoch 5:  92%|█████████▏| 11

Epoch 6:  93%|█████████▎| 1174/1268 [04:43<00:14,  6.30batch/s, batch_idx=1129, gpu=0, loss=0.076, v_num=0]
Epoch 6:  93%|█████████▎| 1175/1268 [04:43<00:14,  6.30batch/s, batch_idx=1129, gpu=0, loss=0.076, v_num=0]
Epoch 6:  93%|█████████▎| 1176/1268 [04:44<00:14,  6.32batch/s, batch_idx=1129, gpu=0, loss=0.076, v_num=0]
Epoch 6:  93%|█████████▎| 1177/1268 [04:44<00:14,  6.33batch/s, batch_idx=1129, gpu=0, loss=0.076, v_num=0]
Epoch 6:  93%|█████████▎| 1178/1268 [04:44<00:14,  6.34batch/s, batch_idx=1129, gpu=0, loss=0.076, v_num=0]
Epoch 6:  93%|█████████▎| 1179/1268 [04:44<00:14,  6.34batch/s, batch_idx=1129, gpu=0, loss=0.076, v_num=0]
Epoch 6:  93%|█████████▎| 1180/1268 [04:44<00:13,  6.35batch/s, batch_idx=1129, gpu=0, loss=0.076, v_num=0]
Epoch 6:  93%|█████████▎| 1181/1268 [04:44<00:13,  6.43batch/s, batch_idx=1129, gpu=0, loss=0.076, v_num=0]
Epoch 6:  93%|█████████▎| 1182/1268 [04:44<00:13,  6.48batch/s, batch_idx=1129, gpu=0, loss=0.076, v_num=0]
Epoch 6:  93%|█████████▎| 11

Epoch 7:  93%|█████████▎| 1185/1268 [04:48<00:16,  5.09batch/s, batch_idx=1129, gpu=0, loss=0.076, v_num=0]
Epoch 7:  94%|█████████▎| 1186/1268 [04:48<00:16,  5.06batch/s, batch_idx=1129, gpu=0, loss=0.076, v_num=0]
Epoch 7:  94%|█████████▎| 1187/1268 [04:48<00:16,  4.93batch/s, batch_idx=1129, gpu=0, loss=0.076, v_num=0]
Epoch 7:  94%|█████████▎| 1188/1268 [04:48<00:16,  4.92batch/s, batch_idx=1129, gpu=0, loss=0.076, v_num=0]
Epoch 7:  94%|█████████▍| 1189/1268 [04:48<00:16,  4.92batch/s, batch_idx=1129, gpu=0, loss=0.076, v_num=0]
Epoch 7:  94%|█████████▍| 1190/1268 [04:49<00:15,  4.91batch/s, batch_idx=1129, gpu=0, loss=0.076, v_num=0]
Epoch 7:  94%|█████████▍| 1191/1268 [04:49<00:15,  4.90batch/s, batch_idx=1129, gpu=0, loss=0.076, v_num=0]
Epoch 7:  94%|█████████▍| 1192/1268 [04:49<00:15,  4.89batch/s, batch_idx=1129, gpu=0, loss=0.076, v_num=0]
Epoch 7:  94%|█████████▍| 1193/1268 [04:49<00:15,  4.90batch/s, batch_idx=1129, gpu=0, loss=0.076, v_num=0]
Epoch 7:  94%|█████████▍| 11

Epoch 8:  94%|█████████▍| 1196/1268 [04:48<00:13,  5.29batch/s, batch_idx=1129, gpu=0, loss=0.076, v_num=0]
Epoch 8:  94%|█████████▍| 1197/1268 [04:48<00:13,  5.29batch/s, batch_idx=1129, gpu=0, loss=0.076, v_num=0]
Epoch 8:  94%|█████████▍| 1198/1268 [04:49<00:13,  5.28batch/s, batch_idx=1129, gpu=0, loss=0.076, v_num=0]
Epoch 8:  95%|█████████▍| 1199/1268 [04:49<00:13,  5.22batch/s, batch_idx=1129, gpu=0, loss=0.076, v_num=0]
Epoch 8:  95%|█████████▍| 1200/1268 [04:49<00:13,  5.16batch/s, batch_idx=1129, gpu=0, loss=0.076, v_num=0]
Epoch 8:  95%|█████████▍| 1201/1268 [04:49<00:13,  5.11batch/s, batch_idx=1129, gpu=0, loss=0.076, v_num=0]
Epoch 8:  95%|█████████▍| 1202/1268 [04:49<00:13,  5.07batch/s, batch_idx=1129, gpu=0, loss=0.076, v_num=0]
Epoch 8:  95%|█████████▍| 1203/1268 [04:50<00:12,  5.04batch/s, batch_idx=1129, gpu=0, loss=0.076, v_num=0]
Epoch 8:  95%|█████████▍| 1204/1268 [04:50<00:12,  5.01batch/s, batch_idx=1129, gpu=0, loss=0.076, v_num=0]
Epoch 8:  95%|█████████▌| 12

Epoch 9:  95%|█████████▌| 1207/1268 [04:50<00:11,  5.39batch/s, batch_idx=1129, gpu=0, loss=0.075, v_num=0]
Epoch 9:  95%|█████████▌| 1208/1268 [04:50<00:11,  5.39batch/s, batch_idx=1129, gpu=0, loss=0.075, v_num=0]
Epoch 9:  95%|█████████▌| 1209/1268 [04:50<00:10,  5.38batch/s, batch_idx=1129, gpu=0, loss=0.075, v_num=0]
Epoch 9:  95%|█████████▌| 1210/1268 [04:51<00:10,  5.38batch/s, batch_idx=1129, gpu=0, loss=0.075, v_num=0]
Epoch 9:  96%|█████████▌| 1211/1268 [04:51<00:10,  5.36batch/s, batch_idx=1129, gpu=0, loss=0.075, v_num=0]
Epoch 9:  96%|█████████▌| 1212/1268 [04:51<00:10,  5.35batch/s, batch_idx=1129, gpu=0, loss=0.075, v_num=0]
Epoch 9:  96%|█████████▌| 1213/1268 [04:51<00:10,  5.33batch/s, batch_idx=1129, gpu=0, loss=0.075, v_num=0]
Epoch 9:  96%|█████████▌| 1214/1268 [04:51<00:10,  5.31batch/s, batch_idx=1129, gpu=0, loss=0.075, v_num=0]
Epoch 9:  96%|█████████▌| 1215/1268 [04:51<00:10,  5.29batch/s, batch_idx=1129, gpu=0, loss=0.075, v_num=0]
Epoch 9:  96%|█████████▌| 12

Epoch 10:  96%|█████████▌| 1218/1268 [04:53<00:09,  5.26batch/s, batch_idx=1129, gpu=0, loss=0.074, v_num=0]
Epoch 10:  96%|█████████▌| 1219/1268 [04:53<00:09,  5.26batch/s, batch_idx=1129, gpu=0, loss=0.074, v_num=0]
Epoch 10:  96%|█████████▌| 1220/1268 [04:53<00:09,  5.26batch/s, batch_idx=1129, gpu=0, loss=0.074, v_num=0]
Epoch 10:  96%|█████████▋| 1221/1268 [04:53<00:08,  5.26batch/s, batch_idx=1129, gpu=0, loss=0.074, v_num=0]
Epoch 10:  96%|█████████▋| 1222/1268 [04:54<00:08,  5.26batch/s, batch_idx=1129, gpu=0, loss=0.074, v_num=0]
Epoch 10:  96%|█████████▋| 1223/1268 [04:54<00:08,  5.24batch/s, batch_idx=1129, gpu=0, loss=0.074, v_num=0]
Epoch 10:  97%|█████████▋| 1224/1268 [04:54<00:08,  5.22batch/s, batch_idx=1129, gpu=0, loss=0.074, v_num=0]
Epoch 10:  97%|█████████▋| 1225/1268 [04:54<00:08,  5.21batch/s, batch_idx=1129, gpu=0, loss=0.074, v_num=0]
Epoch 10:  97%|█████████▋| 1226/1268 [04:54<00:08,  5.20batch/s, batch_idx=1129, gpu=0, loss=0.074, v_num=0]
Epoch 10:  97%|████

Epoch 11:  97%|█████████▋| 1228/1268 [04:56<00:06,  6.33batch/s, batch_idx=1129, gpu=0, loss=0.074, v_num=0]
Epoch 11:  97%|█████████▋| 1229/1268 [04:56<00:06,  6.36batch/s, batch_idx=1129, gpu=0, loss=0.074, v_num=0]
Epoch 11:  97%|█████████▋| 1230/1268 [04:56<00:05,  6.37batch/s, batch_idx=1129, gpu=0, loss=0.074, v_num=0]
Epoch 11:  97%|█████████▋| 1231/1268 [04:56<00:05,  6.38batch/s, batch_idx=1129, gpu=0, loss=0.074, v_num=0]
Epoch 11:  97%|█████████▋| 1232/1268 [04:57<00:05,  6.38batch/s, batch_idx=1129, gpu=0, loss=0.074, v_num=0]
Epoch 11:  97%|█████████▋| 1233/1268 [04:57<00:05,  6.39batch/s, batch_idx=1129, gpu=0, loss=0.074, v_num=0]
Epoch 11:  97%|█████████▋| 1234/1268 [04:57<00:05,  6.40batch/s, batch_idx=1129, gpu=0, loss=0.074, v_num=0]
Epoch 11:  97%|█████████▋| 1235/1268 [04:57<00:05,  6.42batch/s, batch_idx=1129, gpu=0, loss=0.074, v_num=0]
Epoch 11:  97%|█████████▋| 1236/1268 [04:57<00:04,  6.41batch/s, batch_idx=1129, gpu=0, loss=0.074, v_num=0]
Epoch 11:  98%|████

Epoch 12:  98%|█████████▊| 1238/1268 [04:59<00:04,  6.38batch/s, batch_idx=1129, gpu=0, loss=0.074, v_num=0]
Epoch 12:  98%|█████████▊| 1239/1268 [04:59<00:04,  6.39batch/s, batch_idx=1129, gpu=0, loss=0.074, v_num=0]
Epoch 12:  98%|█████████▊| 1240/1268 [04:59<00:04,  6.38batch/s, batch_idx=1129, gpu=0, loss=0.074, v_num=0]
Epoch 12:  98%|█████████▊| 1241/1268 [04:59<00:04,  6.38batch/s, batch_idx=1129, gpu=0, loss=0.074, v_num=0]
Epoch 12:  98%|█████████▊| 1242/1268 [05:00<00:04,  6.37batch/s, batch_idx=1129, gpu=0, loss=0.074, v_num=0]
Epoch 12:  98%|█████████▊| 1243/1268 [05:00<00:03,  6.37batch/s, batch_idx=1129, gpu=0, loss=0.074, v_num=0]
Epoch 12:  98%|█████████▊| 1244/1268 [05:00<00:03,  6.37batch/s, batch_idx=1129, gpu=0, loss=0.074, v_num=0]
Epoch 12:  98%|█████████▊| 1245/1268 [05:00<00:03,  6.37batch/s, batch_idx=1129, gpu=0, loss=0.074, v_num=0]
Epoch 12:  98%|█████████▊| 1246/1268 [05:00<00:03,  6.43batch/s, batch_idx=1129, gpu=0, loss=0.074, v_num=0]
Epoch 12:  98%|████

Epoch 13:  98%|█████████▊| 1248/1268 [05:04<00:03,  6.24batch/s, batch_idx=1129, gpu=0, loss=0.074, v_num=0]
Epoch 13:  99%|█████████▊| 1249/1268 [05:04<00:03,  6.24batch/s, batch_idx=1129, gpu=0, loss=0.074, v_num=0]
Epoch 13:  99%|█████████▊| 1250/1268 [05:04<00:02,  6.23batch/s, batch_idx=1129, gpu=0, loss=0.074, v_num=0]
Epoch 13:  99%|█████████▊| 1251/1268 [05:04<00:02,  6.24batch/s, batch_idx=1129, gpu=0, loss=0.074, v_num=0]
Epoch 13:  99%|█████████▊| 1252/1268 [05:04<00:02,  6.26batch/s, batch_idx=1129, gpu=0, loss=0.074, v_num=0]
Epoch 13:  99%|█████████▉| 1253/1268 [05:04<00:02,  6.27batch/s, batch_idx=1129, gpu=0, loss=0.074, v_num=0]
Epoch 13:  99%|█████████▉| 1254/1268 [05:05<00:02,  6.28batch/s, batch_idx=1129, gpu=0, loss=0.074, v_num=0]
Epoch 13:  99%|█████████▉| 1255/1268 [05:05<00:02,  6.29batch/s, batch_idx=1129, gpu=0, loss=0.074, v_num=0]
Epoch 13:  99%|█████████▉| 1256/1268 [05:05<00:01,  6.29batch/s, batch_idx=1129, gpu=0, loss=0.074, v_num=0]
Epoch 13:  99%|████

Epoch 14:  99%|█████████▉| 1258/1268 [05:04<00:01,  6.42batch/s, batch_idx=1129, gpu=0, loss=0.073, v_num=0]
Epoch 14:  99%|█████████▉| 1259/1268 [05:04<00:01,  6.42batch/s, batch_idx=1129, gpu=0, loss=0.073, v_num=0]
Epoch 14:  99%|█████████▉| 1260/1268 [05:04<00:01,  6.43batch/s, batch_idx=1129, gpu=0, loss=0.073, v_num=0]
Epoch 14:  99%|█████████▉| 1261/1268 [05:04<00:01,  6.42batch/s, batch_idx=1129, gpu=0, loss=0.073, v_num=0]
Epoch 14: 100%|█████████▉| 1262/1268 [05:04<00:00,  6.42batch/s, batch_idx=1129, gpu=0, loss=0.073, v_num=0]
Epoch 14: 100%|█████████▉| 1263/1268 [05:05<00:00,  6.42batch/s, batch_idx=1129, gpu=0, loss=0.073, v_num=0]
Epoch 14: 100%|█████████▉| 1264/1268 [05:05<00:00,  6.44batch/s, batch_idx=1129, gpu=0, loss=0.073, v_num=0]
Epoch 14: 100%|█████████▉| 1265/1268 [05:05<00:00,  6.45batch/s, batch_idx=1129, gpu=0, loss=0.073, v_num=0]
Epoch 14: 100%|█████████▉| 1266/1268 [05:05<00:00,  6.48batch/s, batch_idx=1129, gpu=0, loss=0.073, v_num=0]
Epoch 14: 100%|████

Epoch 15: 100%|██████████| 1268/1268 [05:11<00:00,  5.64batch/s, batch_idx=1129, gpu=0, loss=0.073, v_num=0]
Epoch 16:  89%|████████▉ | 1130/1268 [04:45<00:33,  4.12batch/s, batch_idx=1129, gpu=0, loss=0.073, v_num=0]
Validating:   0%|          | 0/138 [00:00<?, ?batch/s][A
Epoch 16:  89%|████████▉ | 1131/1268 [04:45<00:30,  4.55batch/s, batch_idx=1129, gpu=0, loss=0.073, v_num=0]
Epoch 16:  89%|████████▉ | 1132/1268 [04:45<00:27,  4.94batch/s, batch_idx=1129, gpu=0, loss=0.073, v_num=0]
Epoch 16:  89%|████████▉ | 1133/1268 [04:45<00:26,  5.18batch/s, batch_idx=1129, gpu=0, loss=0.073, v_num=0]
Epoch 16:  89%|████████▉ | 1134/1268 [04:45<00:24,  5.52batch/s, batch_idx=1129, gpu=0, loss=0.073, v_num=0]
Epoch 16:  90%|████████▉ | 1135/1268 [04:45<00:23,  5.77batch/s, batch_idx=1129, gpu=0, loss=0.073, v_num=0]
Epoch 16:  90%|████████▉ | 1136/1268 [04:46<00:22,  5.95batch/s, batch_idx=1129, gpu=0, loss=0.073, v_num=0]
Epoch 16:  90%|████████▉ | 1137/1268 [04:46<00:21,  6.08batch/s, batch

Epoch 17:  90%|████████▉ | 1138/1268 [04:47<00:21,  6.01batch/s, batch_idx=1129, gpu=0, loss=0.073, v_num=0]
Epoch 17:  90%|████████▉ | 1139/1268 [04:47<00:21,  6.09batch/s, batch_idx=1129, gpu=0, loss=0.073, v_num=0]
Epoch 17:  90%|████████▉ | 1140/1268 [04:47<00:20,  6.13batch/s, batch_idx=1129, gpu=0, loss=0.073, v_num=0]
Epoch 17:  90%|████████▉ | 1141/1268 [04:47<00:20,  6.16batch/s, batch_idx=1129, gpu=0, loss=0.073, v_num=0]
Epoch 17:  90%|█████████ | 1142/1268 [04:47<00:20,  6.11batch/s, batch_idx=1129, gpu=0, loss=0.073, v_num=0]
Epoch 17:  90%|█████████ | 1143/1268 [04:47<00:20,  6.05batch/s, batch_idx=1129, gpu=0, loss=0.073, v_num=0]
Epoch 17:  90%|█████████ | 1144/1268 [04:48<00:20,  6.01batch/s, batch_idx=1129, gpu=0, loss=0.073, v_num=0]
Epoch 17:  90%|█████████ | 1145/1268 [04:48<00:20,  5.99batch/s, batch_idx=1129, gpu=0, loss=0.073, v_num=0]
Epoch 17:  90%|█████████ | 1146/1268 [04:48<00:20,  5.97batch/s, batch_idx=1129, gpu=0, loss=0.073, v_num=0]
Epoch 17:  90%|████

Epoch 18:  91%|█████████ | 1148/1268 [04:51<00:20,  5.77batch/s, batch_idx=1129, gpu=0, loss=0.073, v_num=0]
Epoch 18:  91%|█████████ | 1149/1268 [04:51<00:20,  5.76batch/s, batch_idx=1129, gpu=0, loss=0.073, v_num=0]
Epoch 18:  91%|█████████ | 1150/1268 [04:51<00:20,  5.73batch/s, batch_idx=1129, gpu=0, loss=0.073, v_num=0]
Epoch 18:  91%|█████████ | 1151/1268 [04:51<00:20,  5.70batch/s, batch_idx=1129, gpu=0, loss=0.073, v_num=0]
Epoch 18:  91%|█████████ | 1152/1268 [04:51<00:20,  5.65batch/s, batch_idx=1129, gpu=0, loss=0.073, v_num=0]
Epoch 18:  91%|█████████ | 1153/1268 [04:52<00:20,  5.61batch/s, batch_idx=1129, gpu=0, loss=0.073, v_num=0]
Epoch 18:  91%|█████████ | 1154/1268 [04:52<00:20,  5.58batch/s, batch_idx=1129, gpu=0, loss=0.073, v_num=0]
Epoch 18:  91%|█████████ | 1155/1268 [04:52<00:20,  5.57batch/s, batch_idx=1129, gpu=0, loss=0.073, v_num=0]
Epoch 18:  91%|█████████ | 1156/1268 [04:52<00:20,  5.55batch/s, batch_idx=1129, gpu=0, loss=0.073, v_num=0]
Epoch 18:  91%|████

Epoch 19:  91%|█████████▏| 1158/1268 [04:52<00:20,  5.42batch/s, batch_idx=1129, gpu=0, loss=0.073, v_num=0]
Epoch 19:  91%|█████████▏| 1159/1268 [04:52<00:20,  5.38batch/s, batch_idx=1129, gpu=0, loss=0.073, v_num=0]
Epoch 19:  91%|█████████▏| 1160/1268 [04:53<00:20,  5.36batch/s, batch_idx=1129, gpu=0, loss=0.073, v_num=0]
Epoch 19:  92%|█████████▏| 1161/1268 [04:53<00:20,  5.35batch/s, batch_idx=1129, gpu=0, loss=0.073, v_num=0]
Epoch 19:  92%|█████████▏| 1162/1268 [04:53<00:19,  5.35batch/s, batch_idx=1129, gpu=0, loss=0.073, v_num=0]
Epoch 19:  92%|█████████▏| 1163/1268 [04:53<00:19,  5.34batch/s, batch_idx=1129, gpu=0, loss=0.073, v_num=0]
Epoch 19:  92%|█████████▏| 1164/1268 [04:53<00:19,  5.34batch/s, batch_idx=1129, gpu=0, loss=0.073, v_num=0]
Epoch 19:  92%|█████████▏| 1165/1268 [04:53<00:19,  5.34batch/s, batch_idx=1129, gpu=0, loss=0.073, v_num=0]
Epoch 19:  92%|█████████▏| 1166/1268 [04:54<00:19,  5.34batch/s, batch_idx=1129, gpu=0, loss=0.073, v_num=0]
Epoch 19:  92%|████

Epoch 20:  92%|█████████▏| 1168/1268 [04:55<00:15,  6.51batch/s, batch_idx=1129, gpu=0, loss=0.072, v_num=0]
Epoch 20:  92%|█████████▏| 1169/1268 [04:55<00:15,  6.52batch/s, batch_idx=1129, gpu=0, loss=0.072, v_num=0]
Epoch 20:  92%|█████████▏| 1170/1268 [04:55<00:15,  6.53batch/s, batch_idx=1129, gpu=0, loss=0.072, v_num=0]
Epoch 20:  92%|█████████▏| 1171/1268 [04:55<00:14,  6.53batch/s, batch_idx=1129, gpu=0, loss=0.072, v_num=0]
Epoch 20:  92%|█████████▏| 1172/1268 [04:55<00:14,  6.53batch/s, batch_idx=1129, gpu=0, loss=0.072, v_num=0]
Epoch 20:  93%|█████████▎| 1173/1268 [04:55<00:14,  6.53batch/s, batch_idx=1129, gpu=0, loss=0.072, v_num=0]
Epoch 20:  93%|█████████▎| 1174/1268 [04:55<00:14,  6.56batch/s, batch_idx=1129, gpu=0, loss=0.072, v_num=0]
Epoch 20:  93%|█████████▎| 1175/1268 [04:56<00:14,  6.58batch/s, batch_idx=1129, gpu=0, loss=0.072, v_num=0]
Epoch 20:  93%|█████████▎| 1176/1268 [04:56<00:13,  6.61batch/s, batch_idx=1129, gpu=0, loss=0.072, v_num=0]
Epoch 20:  93%|████

Epoch 21:  93%|█████████▎| 1178/1268 [04:58<00:15,  5.96batch/s, batch_idx=1129, gpu=0, loss=0.072, v_num=0]
Epoch 21:  93%|█████████▎| 1179/1268 [04:58<00:14,  5.98batch/s, batch_idx=1129, gpu=0, loss=0.072, v_num=0]
Epoch 21:  93%|█████████▎| 1180/1268 [04:58<00:14,  6.01batch/s, batch_idx=1129, gpu=0, loss=0.072, v_num=0]
Epoch 21:  93%|█████████▎| 1181/1268 [04:58<00:14,  6.03batch/s, batch_idx=1129, gpu=0, loss=0.072, v_num=0]
Epoch 21:  93%|█████████▎| 1182/1268 [04:59<00:14,  6.05batch/s, batch_idx=1129, gpu=0, loss=0.072, v_num=0]
Epoch 21:  93%|█████████▎| 1183/1268 [04:59<00:14,  6.04batch/s, batch_idx=1129, gpu=0, loss=0.072, v_num=0]
Epoch 21:  93%|█████████▎| 1184/1268 [04:59<00:13,  6.05batch/s, batch_idx=1129, gpu=0, loss=0.072, v_num=0]
Epoch 21:  93%|█████████▎| 1185/1268 [04:59<00:13,  6.06batch/s, batch_idx=1129, gpu=0, loss=0.072, v_num=0]
Epoch 21:  94%|█████████▎| 1186/1268 [04:59<00:13,  6.09batch/s, batch_idx=1129, gpu=0, loss=0.072, v_num=0]
Epoch 21:  94%|████

Epoch 22:  94%|█████████▎| 1188/1268 [05:02<00:15,  5.20batch/s, batch_idx=1129, gpu=0, loss=0.072, v_num=0]
Epoch 22:  94%|█████████▍| 1189/1268 [05:02<00:15,  5.19batch/s, batch_idx=1129, gpu=0, loss=0.072, v_num=0]
Epoch 22:  94%|█████████▍| 1190/1268 [05:02<00:15,  5.14batch/s, batch_idx=1129, gpu=0, loss=0.072, v_num=0]
Epoch 22:  94%|█████████▍| 1191/1268 [05:02<00:15,  5.11batch/s, batch_idx=1129, gpu=0, loss=0.072, v_num=0]
Epoch 22:  94%|█████████▍| 1192/1268 [05:03<00:14,  5.08batch/s, batch_idx=1129, gpu=0, loss=0.072, v_num=0]
Epoch 22:  94%|█████████▍| 1193/1268 [05:03<00:14,  5.05batch/s, batch_idx=1129, gpu=0, loss=0.072, v_num=0]
Epoch 22:  94%|█████████▍| 1194/1268 [05:03<00:14,  5.04batch/s, batch_idx=1129, gpu=0, loss=0.072, v_num=0]
Epoch 22:  94%|█████████▍| 1195/1268 [05:03<00:14,  5.03batch/s, batch_idx=1129, gpu=0, loss=0.072, v_num=0]
Epoch 22:  94%|█████████▍| 1196/1268 [05:03<00:14,  5.01batch/s, batch_idx=1129, gpu=0, loss=0.072, v_num=0]
Epoch 22:  94%|████

Epoch 23:  94%|█████████▍| 1198/1268 [05:03<00:11,  6.25batch/s, batch_idx=1129, gpu=0, loss=0.072, v_num=0]
Epoch 23:  95%|█████████▍| 1199/1268 [05:04<00:11,  6.22batch/s, batch_idx=1129, gpu=0, loss=0.072, v_num=0]
Epoch 23:  95%|█████████▍| 1200/1268 [05:04<00:10,  6.21batch/s, batch_idx=1129, gpu=0, loss=0.072, v_num=0]
Epoch 23:  95%|█████████▍| 1201/1268 [05:04<00:10,  6.20batch/s, batch_idx=1129, gpu=0, loss=0.072, v_num=0]
Epoch 23:  95%|█████████▍| 1202/1268 [05:04<00:10,  6.11batch/s, batch_idx=1129, gpu=0, loss=0.072, v_num=0]
Epoch 23:  95%|█████████▍| 1203/1268 [05:04<00:10,  6.06batch/s, batch_idx=1129, gpu=0, loss=0.072, v_num=0]
Epoch 23:  95%|█████████▍| 1204/1268 [05:04<00:10,  6.09batch/s, batch_idx=1129, gpu=0, loss=0.072, v_num=0]
Epoch 23:  95%|█████████▌| 1205/1268 [05:05<00:10,  6.18batch/s, batch_idx=1129, gpu=0, loss=0.072, v_num=0]
Epoch 23:  95%|█████████▌| 1206/1268 [05:05<00:09,  6.23batch/s, batch_idx=1129, gpu=0, loss=0.072, v_num=0]
Epoch 23:  95%|████

Epoch 24:  95%|█████████▌| 1208/1268 [05:07<00:09,  6.66batch/s, batch_idx=1129, gpu=0, loss=0.072, v_num=0]
Epoch 24:  95%|█████████▌| 1209/1268 [05:07<00:08,  6.68batch/s, batch_idx=1129, gpu=0, loss=0.072, v_num=0]
Epoch 24:  95%|█████████▌| 1210/1268 [05:07<00:08,  6.68batch/s, batch_idx=1129, gpu=0, loss=0.072, v_num=0]
Epoch 24:  96%|█████████▌| 1211/1268 [05:07<00:08,  6.67batch/s, batch_idx=1129, gpu=0, loss=0.072, v_num=0]
Epoch 24:  96%|█████████▌| 1212/1268 [05:07<00:08,  6.66batch/s, batch_idx=1129, gpu=0, loss=0.072, v_num=0]
Epoch 24:  96%|█████████▌| 1213/1268 [05:07<00:08,  6.65batch/s, batch_idx=1129, gpu=0, loss=0.072, v_num=0]
Epoch 24:  96%|█████████▌| 1214/1268 [05:07<00:08,  6.65batch/s, batch_idx=1129, gpu=0, loss=0.072, v_num=0]
Epoch 24:  96%|█████████▌| 1215/1268 [05:08<00:07,  6.63batch/s, batch_idx=1129, gpu=0, loss=0.072, v_num=0]
Epoch 24:  96%|█████████▌| 1216/1268 [05:08<00:07,  6.62batch/s, batch_idx=1129, gpu=0, loss=0.072, v_num=0]
Epoch 24:  96%|████

Epoch 25:  96%|█████████▌| 1218/1268 [05:09<00:07,  6.74batch/s, batch_idx=1129, gpu=0, loss=0.072, v_num=0]
Epoch 25:  96%|█████████▌| 1219/1268 [05:09<00:07,  6.74batch/s, batch_idx=1129, gpu=0, loss=0.072, v_num=0]
Epoch 25:  96%|█████████▌| 1220/1268 [05:09<00:07,  6.75batch/s, batch_idx=1129, gpu=0, loss=0.072, v_num=0]
Epoch 25:  96%|█████████▋| 1221/1268 [05:09<00:06,  6.74batch/s, batch_idx=1129, gpu=0, loss=0.072, v_num=0]
Epoch 25:  96%|█████████▋| 1222/1268 [05:09<00:06,  6.75batch/s, batch_idx=1129, gpu=0, loss=0.072, v_num=0]
Epoch 25:  96%|█████████▋| 1223/1268 [05:09<00:06,  6.73batch/s, batch_idx=1129, gpu=0, loss=0.072, v_num=0]
Epoch 25:  97%|█████████▋| 1224/1268 [05:09<00:06,  6.72batch/s, batch_idx=1129, gpu=0, loss=0.072, v_num=0]
Epoch 25:  97%|█████████▋| 1225/1268 [05:10<00:06,  6.57batch/s, batch_idx=1129, gpu=0, loss=0.072, v_num=0]
Epoch 25:  97%|█████████▋| 1226/1268 [05:10<00:06,  6.42batch/s, batch_idx=1129, gpu=0, loss=0.072, v_num=0]
Epoch 25:  97%|████

Epoch 26:  97%|█████████▋| 1228/1268 [05:11<00:06,  6.55batch/s, batch_idx=1129, gpu=0, loss=0.072, v_num=0]
Epoch 26:  97%|█████████▋| 1229/1268 [05:11<00:05,  6.52batch/s, batch_idx=1129, gpu=0, loss=0.072, v_num=0]
Epoch 26:  97%|█████████▋| 1230/1268 [05:11<00:05,  6.47batch/s, batch_idx=1129, gpu=0, loss=0.072, v_num=0]
Epoch 26:  97%|█████████▋| 1231/1268 [05:11<00:05,  6.42batch/s, batch_idx=1129, gpu=0, loss=0.072, v_num=0]
Epoch 26:  97%|█████████▋| 1232/1268 [05:11<00:05,  6.40batch/s, batch_idx=1129, gpu=0, loss=0.072, v_num=0]
Epoch 26:  97%|█████████▋| 1233/1268 [05:11<00:05,  6.38batch/s, batch_idx=1129, gpu=0, loss=0.072, v_num=0]
Epoch 26:  97%|█████████▋| 1234/1268 [05:12<00:05,  6.32batch/s, batch_idx=1129, gpu=0, loss=0.072, v_num=0]
Epoch 26:  97%|█████████▋| 1235/1268 [05:12<00:05,  6.28batch/s, batch_idx=1129, gpu=0, loss=0.072, v_num=0]
Epoch 26:  97%|█████████▋| 1236/1268 [05:12<00:05,  6.25batch/s, batch_idx=1129, gpu=0, loss=0.072, v_num=0]
Epoch 26:  98%|████

Epoch 27:  98%|█████████▊| 1238/1268 [05:15<00:04,  6.52batch/s, batch_idx=1129, gpu=0, loss=0.071, v_num=0]
Epoch 27:  98%|█████████▊| 1239/1268 [05:15<00:04,  6.53batch/s, batch_idx=1129, gpu=0, loss=0.071, v_num=0]
Epoch 27:  98%|█████████▊| 1240/1268 [05:15<00:04,  6.54batch/s, batch_idx=1129, gpu=0, loss=0.071, v_num=0]
Epoch 27:  98%|█████████▊| 1241/1268 [05:15<00:04,  6.46batch/s, batch_idx=1129, gpu=0, loss=0.071, v_num=0]
Epoch 27:  98%|█████████▊| 1242/1268 [05:15<00:04,  6.39batch/s, batch_idx=1129, gpu=0, loss=0.071, v_num=0]
Epoch 27:  98%|█████████▊| 1243/1268 [05:16<00:03,  6.42batch/s, batch_idx=1129, gpu=0, loss=0.071, v_num=0]
Epoch 27:  98%|█████████▊| 1244/1268 [05:16<00:03,  6.44batch/s, batch_idx=1129, gpu=0, loss=0.071, v_num=0]
Epoch 27:  98%|█████████▊| 1245/1268 [05:16<00:03,  6.46batch/s, batch_idx=1129, gpu=0, loss=0.071, v_num=0]
Epoch 27:  98%|█████████▊| 1246/1268 [05:16<00:03,  6.47batch/s, batch_idx=1129, gpu=0, loss=0.071, v_num=0]
Epoch 27:  98%|████

Epoch 28:  98%|█████████▊| 1248/1268 [05:15<00:03,  6.26batch/s, batch_idx=1129, gpu=0, loss=0.072, v_num=0]
Epoch 28:  99%|█████████▊| 1249/1268 [05:15<00:03,  6.27batch/s, batch_idx=1129, gpu=0, loss=0.072, v_num=0]
Epoch 28:  99%|█████████▊| 1250/1268 [05:15<00:02,  6.29batch/s, batch_idx=1129, gpu=0, loss=0.072, v_num=0]
Epoch 28:  99%|█████████▊| 1251/1268 [05:16<00:02,  6.30batch/s, batch_idx=1129, gpu=0, loss=0.072, v_num=0]
Epoch 28:  99%|█████████▊| 1252/1268 [05:16<00:02,  6.36batch/s, batch_idx=1129, gpu=0, loss=0.072, v_num=0]
Epoch 28:  99%|█████████▉| 1253/1268 [05:16<00:02,  6.41batch/s, batch_idx=1129, gpu=0, loss=0.072, v_num=0]
Epoch 28:  99%|█████████▉| 1254/1268 [05:16<00:02,  6.45batch/s, batch_idx=1129, gpu=0, loss=0.072, v_num=0]
Epoch 28:  99%|█████████▉| 1255/1268 [05:16<00:02,  6.48batch/s, batch_idx=1129, gpu=0, loss=0.072, v_num=0]
Epoch 28:  99%|█████████▉| 1256/1268 [05:16<00:01,  6.51batch/s, batch_idx=1129, gpu=0, loss=0.072, v_num=0]
Epoch 28:  99%|████

Epoch 29:  99%|█████████▉| 1258/1268 [05:20<00:01,  6.54batch/s, batch_idx=1129, gpu=0, loss=0.071, v_num=0]
Epoch 29:  99%|█████████▉| 1259/1268 [05:20<00:01,  6.57batch/s, batch_idx=1129, gpu=0, loss=0.071, v_num=0]
Epoch 29:  99%|█████████▉| 1260/1268 [05:20<00:01,  6.62batch/s, batch_idx=1129, gpu=0, loss=0.071, v_num=0]
Epoch 29:  99%|█████████▉| 1261/1268 [05:20<00:01,  6.67batch/s, batch_idx=1129, gpu=0, loss=0.071, v_num=0]
Epoch 29: 100%|█████████▉| 1262/1268 [05:20<00:00,  6.70batch/s, batch_idx=1129, gpu=0, loss=0.071, v_num=0]
Epoch 29: 100%|█████████▉| 1263/1268 [05:20<00:00,  6.72batch/s, batch_idx=1129, gpu=0, loss=0.071, v_num=0]
Epoch 29: 100%|█████████▉| 1264/1268 [05:21<00:00,  6.74batch/s, batch_idx=1129, gpu=0, loss=0.071, v_num=0]
Epoch 29: 100%|█████████▉| 1265/1268 [05:21<00:00,  6.74batch/s, batch_idx=1129, gpu=0, loss=0.071, v_num=0]
Epoch 29: 100%|█████████▉| 1266/1268 [05:21<00:00,  6.75batch/s, batch_idx=1129, gpu=0, loss=0.071, v_num=0]
Epoch 29: 100%|████

Epoch 30: 100%|██████████| 1268/1268 [05:26<00:00,  5.11batch/s, batch_idx=1129, gpu=0, loss=0.071, v_num=0]
Epoch 31:  89%|████████▉ | 1130/1268 [04:59<00:32,  4.23batch/s, batch_idx=1129, gpu=0, loss=0.071, v_num=0]
Validating:   0%|          | 0/138 [00:00<?, ?batch/s][A
Epoch 31:  89%|████████▉ | 1131/1268 [04:59<00:29,  4.71batch/s, batch_idx=1129, gpu=0, loss=0.071, v_num=0]
Epoch 31:  89%|████████▉ | 1132/1268 [05:00<00:26,  5.14batch/s, batch_idx=1129, gpu=0, loss=0.071, v_num=0]
Epoch 31:  89%|████████▉ | 1133/1268 [05:00<00:24,  5.48batch/s, batch_idx=1129, gpu=0, loss=0.071, v_num=0]
Epoch 31:  89%|████████▉ | 1134/1268 [05:00<00:23,  5.76batch/s, batch_idx=1129, gpu=0, loss=0.071, v_num=0]
Epoch 31:  90%|████████▉ | 1135/1268 [05:00<00:22,  5.97batch/s, batch_idx=1129, gpu=0, loss=0.071, v_num=0]
Epoch 31:  90%|████████▉ | 1136/1268 [05:00<00:21,  6.17batch/s, batch_idx=1129, gpu=0, loss=0.071, v_num=0]
Epoch 31:  90%|████████▉ | 1137/1268 [05:00<00:20,  6.33batch/s, batch

Epoch 32:  90%|████████▉ | 1138/1268 [05:02<00:20,  6.29batch/s, batch_idx=1129, gpu=0, loss=0.071, v_num=0]
Epoch 32:  90%|████████▉ | 1139/1268 [05:02<00:20,  6.37batch/s, batch_idx=1129, gpu=0, loss=0.071, v_num=0]
Epoch 32:  90%|████████▉ | 1140/1268 [05:02<00:19,  6.42batch/s, batch_idx=1129, gpu=0, loss=0.071, v_num=0]
Epoch 32:  90%|████████▉ | 1141/1268 [05:02<00:19,  6.46batch/s, batch_idx=1129, gpu=0, loss=0.071, v_num=0]
Epoch 32:  90%|█████████ | 1142/1268 [05:02<00:19,  6.46batch/s, batch_idx=1129, gpu=0, loss=0.071, v_num=0]
Epoch 32:  90%|█████████ | 1143/1268 [05:02<00:19,  6.39batch/s, batch_idx=1129, gpu=0, loss=0.071, v_num=0]
Epoch 32:  90%|█████████ | 1144/1268 [05:03<00:19,  6.33batch/s, batch_idx=1129, gpu=0, loss=0.071, v_num=0]
Epoch 32:  90%|█████████ | 1145/1268 [05:03<00:19,  6.30batch/s, batch_idx=1129, gpu=0, loss=0.071, v_num=0]
Epoch 32:  90%|█████████ | 1146/1268 [05:03<00:19,  6.26batch/s, batch_idx=1129, gpu=0, loss=0.071, v_num=0]
Epoch 32:  90%|████

Epoch 33:  91%|█████████ | 1148/1268 [05:05<00:20,  5.81batch/s, batch_idx=1129, gpu=0, loss=0.071, v_num=0]
Epoch 33:  91%|█████████ | 1149/1268 [05:05<00:20,  5.77batch/s, batch_idx=1129, gpu=0, loss=0.071, v_num=0]
Epoch 33:  91%|█████████ | 1150/1268 [05:05<00:20,  5.74batch/s, batch_idx=1129, gpu=0, loss=0.071, v_num=0]
Epoch 33:  91%|█████████ | 1151/1268 [05:05<00:20,  5.72batch/s, batch_idx=1129, gpu=0, loss=0.071, v_num=0]
Epoch 33:  91%|█████████ | 1152/1268 [05:05<00:20,  5.71batch/s, batch_idx=1129, gpu=0, loss=0.071, v_num=0]
Epoch 33:  91%|█████████ | 1153/1268 [05:05<00:20,  5.68batch/s, batch_idx=1129, gpu=0, loss=0.071, v_num=0]
Epoch 33:  91%|█████████ | 1154/1268 [05:06<00:20,  5.66batch/s, batch_idx=1129, gpu=0, loss=0.071, v_num=0]
Epoch 33:  91%|█████████ | 1155/1268 [05:06<00:20,  5.63batch/s, batch_idx=1129, gpu=0, loss=0.071, v_num=0]
Epoch 33:  91%|█████████ | 1156/1268 [05:06<00:20,  5.59batch/s, batch_idx=1129, gpu=0, loss=0.071, v_num=0]
Epoch 33:  91%|████

Epoch 34:  91%|█████████▏| 1158/1268 [05:07<00:18,  5.94batch/s, batch_idx=1129, gpu=0, loss=0.071, v_num=0]
Epoch 34:  91%|█████████▏| 1159/1268 [05:07<00:18,  5.97batch/s, batch_idx=1129, gpu=0, loss=0.071, v_num=0]
Epoch 34:  91%|█████████▏| 1160/1268 [05:07<00:18,  5.99batch/s, batch_idx=1129, gpu=0, loss=0.071, v_num=0]
Epoch 34:  92%|█████████▏| 1161/1268 [05:08<00:17,  6.02batch/s, batch_idx=1129, gpu=0, loss=0.071, v_num=0]
Epoch 34:  92%|█████████▏| 1162/1268 [05:08<00:17,  6.03batch/s, batch_idx=1129, gpu=0, loss=0.071, v_num=0]
Epoch 34:  92%|█████████▏| 1163/1268 [05:08<00:17,  6.04batch/s, batch_idx=1129, gpu=0, loss=0.071, v_num=0]
Epoch 34:  92%|█████████▏| 1164/1268 [05:08<00:17,  6.04batch/s, batch_idx=1129, gpu=0, loss=0.071, v_num=0]
Epoch 34:  92%|█████████▏| 1165/1268 [05:08<00:17,  6.04batch/s, batch_idx=1129, gpu=0, loss=0.071, v_num=0]
Epoch 34:  92%|█████████▏| 1166/1268 [05:08<00:16,  6.04batch/s, batch_idx=1129, gpu=0, loss=0.071, v_num=0]
Epoch 34:  92%|████

Epoch 35:  92%|█████████▏| 1168/1268 [05:08<00:16,  6.14batch/s, batch_idx=1129, gpu=0, loss=0.071, v_num=0]
Epoch 35:  92%|█████████▏| 1169/1268 [05:09<00:16,  6.14batch/s, batch_idx=1129, gpu=0, loss=0.071, v_num=0]
Epoch 35:  92%|█████████▏| 1170/1268 [05:09<00:15,  6.15batch/s, batch_idx=1129, gpu=0, loss=0.071, v_num=0]
Epoch 35:  92%|█████████▏| 1171/1268 [05:09<00:15,  6.16batch/s, batch_idx=1129, gpu=0, loss=0.071, v_num=0]
Epoch 35:  92%|█████████▏| 1172/1268 [05:09<00:15,  6.17batch/s, batch_idx=1129, gpu=0, loss=0.071, v_num=0]
Epoch 35:  93%|█████████▎| 1173/1268 [05:09<00:15,  6.23batch/s, batch_idx=1129, gpu=0, loss=0.071, v_num=0]
Epoch 35:  93%|█████████▎| 1174/1268 [05:09<00:14,  6.28batch/s, batch_idx=1129, gpu=0, loss=0.071, v_num=0]
Epoch 35:  93%|█████████▎| 1175/1268 [05:10<00:14,  6.31batch/s, batch_idx=1129, gpu=0, loss=0.071, v_num=0]
Epoch 35:  93%|█████████▎| 1176/1268 [05:10<00:14,  6.32batch/s, batch_idx=1129, gpu=0, loss=0.071, v_num=0]
Epoch 35:  93%|████

Epoch 36:  93%|█████████▎| 1178/1268 [05:12<00:17,  5.14batch/s, batch_idx=1129, gpu=0, loss=0.071, v_num=0]
Epoch 36:  93%|█████████▎| 1179/1268 [05:12<00:17,  5.13batch/s, batch_idx=1129, gpu=0, loss=0.071, v_num=0]
Epoch 36:  93%|█████████▎| 1180/1268 [05:13<00:17,  5.12batch/s, batch_idx=1129, gpu=0, loss=0.071, v_num=0]
Epoch 36:  93%|█████████▎| 1181/1268 [05:13<00:16,  5.12batch/s, batch_idx=1129, gpu=0, loss=0.071, v_num=0]
Epoch 36:  93%|█████████▎| 1182/1268 [05:13<00:16,  5.12batch/s, batch_idx=1129, gpu=0, loss=0.071, v_num=0]
Epoch 36:  93%|█████████▎| 1183/1268 [05:13<00:16,  5.11batch/s, batch_idx=1129, gpu=0, loss=0.071, v_num=0]
Epoch 36:  93%|█████████▎| 1184/1268 [05:13<00:16,  5.10batch/s, batch_idx=1129, gpu=0, loss=0.071, v_num=0]
Epoch 36:  93%|█████████▎| 1185/1268 [05:14<00:16,  5.10batch/s, batch_idx=1129, gpu=0, loss=0.071, v_num=0]
Epoch 36:  94%|█████████▎| 1186/1268 [05:14<00:16,  5.09batch/s, batch_idx=1129, gpu=0, loss=0.071, v_num=0]
Epoch 36:  94%|████

Epoch 37:  94%|█████████▎| 1188/1268 [05:14<00:12,  6.19batch/s, batch_idx=1129, gpu=0, loss=0.071, v_num=0]
Epoch 37:  94%|█████████▍| 1189/1268 [05:14<00:12,  6.18batch/s, batch_idx=1129, gpu=0, loss=0.071, v_num=0]
Epoch 37:  94%|█████████▍| 1190/1268 [05:14<00:12,  6.18batch/s, batch_idx=1129, gpu=0, loss=0.071, v_num=0]
Epoch 37:  94%|█████████▍| 1191/1268 [05:15<00:12,  6.19batch/s, batch_idx=1129, gpu=0, loss=0.071, v_num=0]
Epoch 37:  94%|█████████▍| 1192/1268 [05:15<00:12,  6.18batch/s, batch_idx=1129, gpu=0, loss=0.071, v_num=0]
Epoch 37:  94%|█████████▍| 1193/1268 [05:15<00:12,  6.19batch/s, batch_idx=1129, gpu=0, loss=0.071, v_num=0]
Epoch 37:  94%|█████████▍| 1194/1268 [05:15<00:11,  6.19batch/s, batch_idx=1129, gpu=0, loss=0.071, v_num=0]
Epoch 37:  94%|█████████▍| 1195/1268 [05:15<00:11,  6.18batch/s, batch_idx=1129, gpu=0, loss=0.071, v_num=0]
Epoch 37:  94%|█████████▍| 1196/1268 [05:15<00:11,  6.14batch/s, batch_idx=1129, gpu=0, loss=0.071, v_num=0]
Epoch 37:  94%|████

Epoch 38:  94%|█████████▍| 1198/1268 [05:18<00:11,  6.10batch/s, batch_idx=1129, gpu=0, loss=0.071, v_num=0]
Epoch 38:  95%|█████████▍| 1199/1268 [05:19<00:11,  6.16batch/s, batch_idx=1129, gpu=0, loss=0.071, v_num=0]
Epoch 38:  95%|█████████▍| 1200/1268 [05:19<00:10,  6.21batch/s, batch_idx=1129, gpu=0, loss=0.071, v_num=0]
Epoch 38:  95%|█████████▍| 1201/1268 [05:19<00:10,  6.21batch/s, batch_idx=1129, gpu=0, loss=0.071, v_num=0]
Epoch 38:  95%|█████████▍| 1202/1268 [05:19<00:10,  6.20batch/s, batch_idx=1129, gpu=0, loss=0.071, v_num=0]
Epoch 38:  95%|█████████▍| 1203/1268 [05:19<00:10,  6.19batch/s, batch_idx=1129, gpu=0, loss=0.071, v_num=0]
Epoch 38:  95%|█████████▍| 1204/1268 [05:19<00:10,  6.18batch/s, batch_idx=1129, gpu=0, loss=0.071, v_num=0]
Epoch 38:  95%|█████████▌| 1205/1268 [05:20<00:10,  6.18batch/s, batch_idx=1129, gpu=0, loss=0.071, v_num=0]
Epoch 38:  95%|█████████▌| 1206/1268 [05:20<00:10,  6.18batch/s, batch_idx=1129, gpu=0, loss=0.071, v_num=0]
Epoch 38:  95%|████

Epoch 39:  95%|█████████▌| 1208/1268 [05:21<00:09,  6.05batch/s, batch_idx=1129, gpu=0, loss=0.071, v_num=0]
Epoch 39:  95%|█████████▌| 1209/1268 [05:21<00:09,  6.05batch/s, batch_idx=1129, gpu=0, loss=0.071, v_num=0]
Epoch 39:  95%|█████████▌| 1210/1268 [05:21<00:09,  6.07batch/s, batch_idx=1129, gpu=0, loss=0.071, v_num=0]
Epoch 39:  96%|█████████▌| 1211/1268 [05:21<00:09,  6.11batch/s, batch_idx=1129, gpu=0, loss=0.071, v_num=0]
Epoch 39:  96%|█████████▌| 1212/1268 [05:22<00:09,  6.14batch/s, batch_idx=1129, gpu=0, loss=0.071, v_num=0]
Epoch 39:  96%|█████████▌| 1213/1268 [05:22<00:08,  6.17batch/s, batch_idx=1129, gpu=0, loss=0.071, v_num=0]
Epoch 39:  96%|█████████▌| 1214/1268 [05:22<00:08,  6.19batch/s, batch_idx=1129, gpu=0, loss=0.071, v_num=0]
Epoch 39:  96%|█████████▌| 1215/1268 [05:22<00:08,  6.21batch/s, batch_idx=1129, gpu=0, loss=0.071, v_num=0]
Epoch 39:  96%|█████████▌| 1216/1268 [05:22<00:08,  6.23batch/s, batch_idx=1129, gpu=0, loss=0.071, v_num=0]
Epoch 39:  96%|████

Epoch 40:  96%|█████████▌| 1218/1268 [05:23<00:08,  6.21batch/s, batch_idx=1129, gpu=0, loss=0.071, v_num=0]
Epoch 40:  96%|█████████▌| 1219/1268 [05:23<00:07,  6.27batch/s, batch_idx=1129, gpu=0, loss=0.071, v_num=0]
Epoch 40:  96%|█████████▌| 1220/1268 [05:23<00:07,  6.32batch/s, batch_idx=1129, gpu=0, loss=0.071, v_num=0]
Epoch 40:  96%|█████████▋| 1221/1268 [05:24<00:07,  6.35batch/s, batch_idx=1129, gpu=0, loss=0.071, v_num=0]
Epoch 40:  96%|█████████▋| 1222/1268 [05:24<00:07,  6.38batch/s, batch_idx=1129, gpu=0, loss=0.071, v_num=0]
Epoch 40:  96%|█████████▋| 1223/1268 [05:24<00:07,  6.40batch/s, batch_idx=1129, gpu=0, loss=0.071, v_num=0]
Epoch 40:  97%|█████████▋| 1224/1268 [05:24<00:06,  6.40batch/s, batch_idx=1129, gpu=0, loss=0.071, v_num=0]
Epoch 40:  97%|█████████▋| 1225/1268 [05:24<00:06,  6.41batch/s, batch_idx=1129, gpu=0, loss=0.071, v_num=0]
Epoch 40:  97%|█████████▋| 1226/1268 [05:24<00:06,  6.42batch/s, batch_idx=1129, gpu=0, loss=0.071, v_num=0]
Epoch 40:  97%|████

Epoch 41:  97%|█████████▋| 1228/1268 [05:30<00:06,  5.97batch/s, batch_idx=1129, gpu=0, loss=0.070, v_num=0]
Epoch 41:  97%|█████████▋| 1229/1268 [05:30<00:06,  5.99batch/s, batch_idx=1129, gpu=0, loss=0.070, v_num=0]
Epoch 41:  97%|█████████▋| 1230/1268 [05:30<00:06,  6.04batch/s, batch_idx=1129, gpu=0, loss=0.070, v_num=0]
Epoch 41:  97%|█████████▋| 1231/1268 [05:30<00:06,  6.07batch/s, batch_idx=1129, gpu=0, loss=0.070, v_num=0]
Epoch 41:  97%|█████████▋| 1232/1268 [05:30<00:05,  6.10batch/s, batch_idx=1129, gpu=0, loss=0.070, v_num=0]
Epoch 41:  97%|█████████▋| 1233/1268 [05:30<00:05,  6.12batch/s, batch_idx=1129, gpu=0, loss=0.070, v_num=0]
Epoch 41:  97%|█████████▋| 1234/1268 [05:31<00:05,  6.14batch/s, batch_idx=1129, gpu=0, loss=0.070, v_num=0]
Epoch 41:  97%|█████████▋| 1235/1268 [05:31<00:05,  6.16batch/s, batch_idx=1129, gpu=0, loss=0.070, v_num=0]
Epoch 41:  97%|█████████▋| 1236/1268 [05:31<00:05,  6.16batch/s, batch_idx=1129, gpu=0, loss=0.070, v_num=0]
Epoch 41:  98%|████

Epoch 42:  98%|█████████▊| 1238/1268 [05:30<00:05,  5.12batch/s, batch_idx=1129, gpu=0, loss=0.071, v_num=0]
Epoch 42:  98%|█████████▊| 1239/1268 [05:30<00:05,  5.10batch/s, batch_idx=1129, gpu=0, loss=0.071, v_num=0]
Epoch 42:  98%|█████████▊| 1240/1268 [05:30<00:05,  5.10batch/s, batch_idx=1129, gpu=0, loss=0.071, v_num=0]
Epoch 42:  98%|█████████▊| 1241/1268 [05:30<00:05,  5.10batch/s, batch_idx=1129, gpu=0, loss=0.071, v_num=0]
Epoch 42:  98%|█████████▊| 1242/1268 [05:30<00:05,  5.11batch/s, batch_idx=1129, gpu=0, loss=0.071, v_num=0]
Epoch 42:  98%|█████████▊| 1243/1268 [05:31<00:04,  5.13batch/s, batch_idx=1129, gpu=0, loss=0.071, v_num=0]
Epoch 42:  98%|█████████▊| 1244/1268 [05:31<00:04,  5.15batch/s, batch_idx=1129, gpu=0, loss=0.071, v_num=0]
Epoch 42:  98%|█████████▊| 1245/1268 [05:31<00:04,  5.17batch/s, batch_idx=1129, gpu=0, loss=0.071, v_num=0]
Epoch 42:  98%|█████████▊| 1246/1268 [05:31<00:04,  5.20batch/s, batch_idx=1129, gpu=0, loss=0.071, v_num=0]
Epoch 42:  98%|████

Epoch 43:  98%|█████████▊| 1248/1268 [05:29<00:03,  6.46batch/s, batch_idx=1129, gpu=0, loss=0.071, v_num=0]
Epoch 43:  99%|█████████▊| 1249/1268 [05:30<00:02,  6.44batch/s, batch_idx=1129, gpu=0, loss=0.071, v_num=0]
Epoch 43:  99%|█████████▊| 1250/1268 [05:30<00:02,  6.41batch/s, batch_idx=1129, gpu=0, loss=0.071, v_num=0]
Epoch 43:  99%|█████████▊| 1251/1268 [05:30<00:02,  6.36batch/s, batch_idx=1129, gpu=0, loss=0.071, v_num=0]
Epoch 43:  99%|█████████▊| 1252/1268 [05:30<00:02,  6.33batch/s, batch_idx=1129, gpu=0, loss=0.071, v_num=0]
Epoch 43:  99%|█████████▉| 1253/1268 [05:30<00:02,  6.38batch/s, batch_idx=1129, gpu=0, loss=0.071, v_num=0]
Epoch 43:  99%|█████████▉| 1254/1268 [05:30<00:02,  6.42batch/s, batch_idx=1129, gpu=0, loss=0.071, v_num=0]
Epoch 43:  99%|█████████▉| 1255/1268 [05:31<00:02,  6.43batch/s, batch_idx=1129, gpu=0, loss=0.071, v_num=0]
Epoch 43:  99%|█████████▉| 1256/1268 [05:31<00:01,  6.43batch/s, batch_idx=1129, gpu=0, loss=0.071, v_num=0]
Epoch 43:  99%|████

Epoch 44:  99%|█████████▉| 1258/1268 [05:33<00:01,  5.79batch/s, batch_idx=1129, gpu=0, loss=0.071, v_num=0]
Epoch 44:  99%|█████████▉| 1259/1268 [05:33<00:01,  5.71batch/s, batch_idx=1129, gpu=0, loss=0.071, v_num=0]
Epoch 44:  99%|█████████▉| 1260/1268 [05:33<00:01,  5.84batch/s, batch_idx=1129, gpu=0, loss=0.071, v_num=0]
Epoch 44:  99%|█████████▉| 1261/1268 [05:33<00:01,  5.90batch/s, batch_idx=1129, gpu=0, loss=0.071, v_num=0]
Epoch 44: 100%|█████████▉| 1262/1268 [05:34<00:01,  5.91batch/s, batch_idx=1129, gpu=0, loss=0.071, v_num=0]
Epoch 44: 100%|█████████▉| 1263/1268 [05:34<00:00,  5.91batch/s, batch_idx=1129, gpu=0, loss=0.071, v_num=0]
Epoch 44: 100%|█████████▉| 1264/1268 [05:34<00:00,  5.90batch/s, batch_idx=1129, gpu=0, loss=0.071, v_num=0]
Epoch 44: 100%|█████████▉| 1265/1268 [05:34<00:00,  5.93batch/s, batch_idx=1129, gpu=0, loss=0.071, v_num=0]
Epoch 44: 100%|█████████▉| 1266/1268 [05:34<00:00,  5.99batch/s, batch_idx=1129, gpu=0, loss=0.071, v_num=0]
Epoch 44: 100%|████

Epoch 45: 100%|██████████| 1268/1268 [05:37<00:00,  6.53batch/s, batch_idx=1129, gpu=0, loss=0.071, v_num=0]
Epoch 46:  89%|████████▉ | 1130/1268 [05:29<00:34,  3.97batch/s, batch_idx=1129, gpu=0, loss=0.071, v_num=0]
Validating:   0%|          | 0/138 [00:00<?, ?batch/s][A
Epoch 46:  89%|████████▉ | 1131/1268 [05:29<00:31,  4.39batch/s, batch_idx=1129, gpu=0, loss=0.071, v_num=0]
Epoch 46:  89%|████████▉ | 1132/1268 [05:29<00:28,  4.75batch/s, batch_idx=1129, gpu=0, loss=0.071, v_num=0]
Epoch 46:  89%|████████▉ | 1133/1268 [05:29<00:26,  5.05batch/s, batch_idx=1129, gpu=0, loss=0.071, v_num=0]
Epoch 46:  89%|████████▉ | 1134/1268 [05:29<00:25,  5.30batch/s, batch_idx=1129, gpu=0, loss=0.071, v_num=0]
Epoch 46:  90%|████████▉ | 1135/1268 [05:29<00:24,  5.49batch/s, batch_idx=1129, gpu=0, loss=0.071, v_num=0]
Epoch 46:  90%|████████▉ | 1136/1268 [05:30<00:22,  5.74batch/s, batch_idx=1129, gpu=0, loss=0.071, v_num=0]
Epoch 46:  90%|████████▉ | 1137/1268 [05:30<00:22,  5.85batch/s, batch

Epoch 47:  90%|████████▉ | 1138/1268 [05:23<00:21,  6.00batch/s, batch_idx=1129, gpu=0, loss=0.071, v_num=0]
Epoch 47:  90%|████████▉ | 1139/1268 [05:23<00:21,  6.00batch/s, batch_idx=1129, gpu=0, loss=0.071, v_num=0]
Epoch 47:  90%|████████▉ | 1140/1268 [05:23<00:21,  6.03batch/s, batch_idx=1129, gpu=0, loss=0.071, v_num=0]
Epoch 47:  90%|████████▉ | 1141/1268 [05:23<00:20,  6.23batch/s, batch_idx=1129, gpu=0, loss=0.071, v_num=0]
Epoch 47:  90%|█████████ | 1142/1268 [05:23<00:19,  6.32batch/s, batch_idx=1129, gpu=0, loss=0.071, v_num=0]
Epoch 47:  90%|█████████ | 1143/1268 [05:24<00:19,  6.38batch/s, batch_idx=1129, gpu=0, loss=0.071, v_num=0]
Epoch 47:  90%|█████████ | 1144/1268 [05:24<00:19,  6.39batch/s, batch_idx=1129, gpu=0, loss=0.071, v_num=0]
Epoch 47:  90%|█████████ | 1145/1268 [05:24<00:19,  6.42batch/s, batch_idx=1129, gpu=0, loss=0.071, v_num=0]
Epoch 47:  90%|█████████ | 1146/1268 [05:24<00:19,  6.37batch/s, batch_idx=1129, gpu=0, loss=0.071, v_num=0]
Epoch 47:  90%|████

Epoch 48:  91%|█████████ | 1148/1268 [05:22<00:20,  5.94batch/s, batch_idx=1129, gpu=0, loss=0.070, v_num=0]
Epoch 48:  91%|█████████ | 1149/1268 [05:23<00:20,  5.81batch/s, batch_idx=1129, gpu=0, loss=0.070, v_num=0]
Epoch 48:  91%|█████████ | 1150/1268 [05:23<00:20,  5.76batch/s, batch_idx=1129, gpu=0, loss=0.070, v_num=0]
Epoch 48:  91%|█████████ | 1151/1268 [05:23<00:20,  5.72batch/s, batch_idx=1129, gpu=0, loss=0.070, v_num=0]
Epoch 48:  91%|█████████ | 1152/1268 [05:23<00:20,  5.66batch/s, batch_idx=1129, gpu=0, loss=0.070, v_num=0]
Epoch 48:  91%|█████████ | 1153/1268 [05:23<00:20,  5.59batch/s, batch_idx=1129, gpu=0, loss=0.070, v_num=0]
Epoch 48:  91%|█████████ | 1154/1268 [05:23<00:20,  5.58batch/s, batch_idx=1129, gpu=0, loss=0.070, v_num=0]
Epoch 48:  91%|█████████ | 1155/1268 [05:24<00:20,  5.58batch/s, batch_idx=1129, gpu=0, loss=0.070, v_num=0]
Epoch 48:  91%|█████████ | 1156/1268 [05:24<00:20,  5.59batch/s, batch_idx=1129, gpu=0, loss=0.070, v_num=0]
Epoch 48:  91%|████

Epoch 49:  91%|█████████▏| 1158/1268 [05:24<00:20,  5.42batch/s, batch_idx=1129, gpu=0, loss=0.071, v_num=0]
Epoch 49:  91%|█████████▏| 1159/1268 [05:24<00:20,  5.39batch/s, batch_idx=1129, gpu=0, loss=0.071, v_num=0]
Epoch 49:  91%|█████████▏| 1160/1268 [05:24<00:20,  5.36batch/s, batch_idx=1129, gpu=0, loss=0.071, v_num=0]
Epoch 49:  92%|█████████▏| 1161/1268 [05:24<00:20,  5.34batch/s, batch_idx=1129, gpu=0, loss=0.071, v_num=0]
Epoch 49:  92%|█████████▏| 1162/1268 [05:24<00:19,  5.32batch/s, batch_idx=1129, gpu=0, loss=0.071, v_num=0]
Epoch 49:  92%|█████████▏| 1163/1268 [05:25<00:19,  5.30batch/s, batch_idx=1129, gpu=0, loss=0.071, v_num=0]
Epoch 49:  92%|█████████▏| 1164/1268 [05:25<00:19,  5.28batch/s, batch_idx=1129, gpu=0, loss=0.071, v_num=0]
Epoch 49:  92%|█████████▏| 1165/1268 [05:25<00:19,  5.27batch/s, batch_idx=1129, gpu=0, loss=0.071, v_num=0]
Epoch 49:  92%|█████████▏| 1166/1268 [05:25<00:19,  5.25batch/s, batch_idx=1129, gpu=0, loss=0.071, v_num=0]
Epoch 49:  92%|████

Epoch 50:  92%|█████████▏| 1168/1268 [05:29<00:15,  6.33batch/s, batch_idx=1129, gpu=0, loss=0.071, v_num=0]
Epoch 50:  92%|█████████▏| 1169/1268 [05:29<00:15,  6.33batch/s, batch_idx=1129, gpu=0, loss=0.071, v_num=0]
Epoch 50:  92%|█████████▏| 1170/1268 [05:29<00:15,  6.33batch/s, batch_idx=1129, gpu=0, loss=0.071, v_num=0]
Epoch 50:  92%|█████████▏| 1171/1268 [05:29<00:15,  6.33batch/s, batch_idx=1129, gpu=0, loss=0.071, v_num=0]
Epoch 50:  92%|█████████▏| 1172/1268 [05:30<00:15,  6.33batch/s, batch_idx=1129, gpu=0, loss=0.071, v_num=0]
Epoch 50:  93%|█████████▎| 1173/1268 [05:30<00:14,  6.34batch/s, batch_idx=1129, gpu=0, loss=0.071, v_num=0]
Epoch 50:  93%|█████████▎| 1174/1268 [05:30<00:14,  6.36batch/s, batch_idx=1129, gpu=0, loss=0.071, v_num=0]
Epoch 50:  93%|█████████▎| 1175/1268 [05:30<00:14,  6.41batch/s, batch_idx=1129, gpu=0, loss=0.071, v_num=0]
Epoch 50:  93%|█████████▎| 1176/1268 [05:30<00:14,  6.44batch/s, batch_idx=1129, gpu=0, loss=0.071, v_num=0]
Epoch 50:  93%|████

1