In [1]:
import os
import time
import math
import numpy as np
import random
import json
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.autograd import Variable

from utils import to_gpu, train_ngram_lm, get_ppl, create_exp_dir, Dictionary, length_sort
from models import Seq2Seq, MLP_D, MLP_D_local, MLP_G
from bleu_self import *
from bleu_test import *
import datetime
now_time = datetime.datetime.now().strftime('%Y-%m-%d-%H-%M-%S')
device = torch.device("cuda" if torch.cuda.is_available() else "cpu") 

In [2]:
import argparse

parser = argparse.ArgumentParser(description='TILGAN for unconditional generation')

In [3]:
import pandas as pd
from transformers import *
from matplotlib import pyplot as plt
from torch.utils.data import TensorDataset, DataLoader, RandomSampler, SequentialSampler

In [4]:
seed=4
random.seed(seed)
np.random.seed(seed)
torch.manual_seed(seed)
torch.cuda.manual_seed(seed)

In [5]:
#data_path="data/MS_COCO"
data_path="../../yahoo/unlabelled_small"
save="./results/yahoo_merge_assigned_results"
maxlen=16
batch_size=48
eval_batch_size = 16
noise_seq_length = 15
add_noise=True #what does this do? - question applies to most parameters
emsize=512
nhidden=512
nlayers=2
nheads=4
nff=1024
aehidden=56
noise_r=0.05
hidden_init=True
dropout=0.3
gpu=True
z_size=100
arch_g='300-300'
gan_g_activation=True
arch_d='300-300'
gan_d_local=False
gan_d_local_windowsize=3
arch_d_local='300-300'
lr_ae=0.12
lr_gan_e=1e-04
beta1=0.5
lr_gan_g=4e-04
lr_gan_d=1e-04
epochs=200
sample=True
clip=1
log_interval=100
gan_lambda=0.1
niters_gan_d=1
niters_gan_g=1
niters_gan_ae=1
niters_gan_dec=1
niters_gan_schedule=''
niters_ae=1
gan_type='kl'
enhance_dec=True
gan_gp_lambda=1
vocab_size=0
lowercase=True

In [6]:
# data="../../yahoo/yahoo_everything.csv"

label_list = ["UNK",1,2,3,4,5,6,7,8,9,10]

# df_yahoo = pd.read_csv(data)
# #df_yahoo=df_yahoo.rename(columns = {"Unnamed: 0":'label'})
# df_yahoo=df_yahoo.set_index("Unnamed: 0")
# df = df_yahoo.sample(frac=1)
# df.head()

In [7]:
# numbers=list(df['label'].unique())
# list_zeros = [0]*len(numbers)
# count_dictionary = dict(zip(numbers, list_zeros))

# values_array_train_labelled=[]
# values_array_test_labelled=[]
# values_array_test_unlabelled=[]
# values_array_train_unlabelled=[]
# values_array_unlabelled=[]
# data_all=[]
# for index, row in df.iterrows():
#     if count_dictionary[row['label']]<20:
#         count_dictionary[row['label']]=count_dictionary[row['label']]+1
#         values_array_train_labelled.append((row['question_1'],row['label']))
#     elif count_dictionary[row['label']]<60:
#         count_dictionary[row['label']]=count_dictionary[row['label']]+1
#         values_array_test_labelled.append((row['question_1'],row['label']))
#     elif count_dictionary[row['label']]<600:
#         count_dictionary[row['label']]=count_dictionary[row['label']]+1
#         values_array_unlabelled.append((row['question_1'],'UNK'))
#     elif count_dictionary[row['label']]<1600:
#         count_dictionary[row['label']]=count_dictionary[row['label']]+1
#         values_array_test_unlabelled.append((row['question_1'],'UNK'))
#     elif count_dictionary[row['label']]<7600:
#         count_dictionary[row['label']]=count_dictionary[row['label']]+1
#         values_array_train_unlabelled.append((row['question_1'],'UNK'))
#     data_all.append(row['question_1'])

In [8]:
# train_l =  values_array_train_labelled
# test_l = values_array_test_labelled
# test_u = values_array_test_unlabelled
# train_u = values_array_train_unlabelled

In [9]:
df_train_l=pd.read_csv("../../yahoo/assigned/train_l.csv", index_col="Unnamed: 0")
df_test_l=pd.read_csv("../../yahoo/assigned/test_l.csv", index_col="Unnamed: 0")
df_u=pd.read_csv("../../yahoo/assigned/u.csv", index_col="Unnamed: 0")
df_train_u=pd.read_csv("../../yahoo/assigned/train_u.csv", index_col="Unnamed: 0")#.head(30000)
df_test_u=pd.read_csv("../../yahoo/assigned/test_u.csv", index_col="Unnamed: 0")#.head(5000)
df_all = pd.concat([df_train_l, df_test_l, df_u, df_train_u, df_test_u])

In [10]:
train_l =  list(df_train_l.to_records(index=False))
test_l = list(df_test_l.to_records(index=False))
u_list = list(df_u.to_records(index=False))
test_u = list(df_test_u.to_records(index=False))
train_u = list(df_train_u.to_records(index=False))
data_all = list(df_all["0"])

In [11]:
class Corpus(object):
    def __init__(self, data_all, train_l, test_l, train_u, test_u, maxlen, vocab_size=11000, lowercase=False):
        self.dictionary = Dictionary()
        self.maxlen = maxlen
        self.lowercase = lowercase
        self.vocab_size = vocab_size
        
        self.data_all = data_all
        self.train_l = train_l
        self.test_l = test_l
        self.train_u = train_u
        self.test_u = test_u

        # make the vocabulary from training set
        self.make_vocab()
        
        self.train_l_tok = self.tokenize(self.train_l)
        self.test_l_tok = self.tokenize(self.test_l)
        self.train_u_tok = self.tokenize(self.train_u)
        self.test_u_tok = self.tokenize(self.test_u)

    def make_vocab(self):
        # Add words to the dictionary
        print(len(self.data_all))
        print(self.data_all[0])
        for line in self.data_all:
            if self.lowercase:
                # -1 to get rid of \n character
                words = line[:-1].lower().split(" ")
            else:
                words = line[:-1].split(" ")
            for word in words:
                self.dictionary.add_word(word)

        # prune the vocabulary
        self.dictionary.prune_vocab(k=self.vocab_size, cnt=True)

    def tokenize(self, data):
        """Tokenizes a text file."""
        dropped = 0
        #with open(path, 'r') as f:
        linecount = 0
        lines = []
        for line, label in data:
            linecount += 1
            if self.lowercase:
                words = line[:-1].lower().strip().split(" ")
            else:
                words = line[:-1].strip().split(" ")
            if len(words) > self.maxlen:
                dropped += 1
                continue
            words = ['<sos>'] + words
            words += ['<eos>']
            # vectorize
            vocab = self.dictionary.word2idx
            unk_idx = vocab['<oov>']
            indices = [vocab[w] if w in vocab else unk_idx for w in words]
            lines.append(indices)

        print("Number of sentences dropped: {} out of {} total".
              format(dropped, linecount))
        return lines

In [12]:
corpus = Corpus(data_all = data_all,
                train_l=train_l,
                test_l=test_l,
                train_u=train_u,
                test_u=test_u,
                maxlen=maxlen,
                vocab_size=vocab_size,
                lowercase=lowercase)

240001
how has religion affected war?
original vocab 93259; pruned to 93263
Number of sentences dropped: 0 out of 200 total
Number of sentences dropped: 1 out of 400 total
Number of sentences dropped: 39 out of 209401 total
Number of sentences dropped: 1 out of 20000 total


In [13]:
# save arguments
ntokens = len(corpus.dictionary.word2idx)
print("Vocabulary Size: {}".format(ntokens))

Vocabulary Size: 93263


In [14]:
# exp dir
create_exp_dir(os.path.join(save), ['train.py', 'models.py', 'utils.py'],
        dict=corpus.dictionary.word2idx)

Experiment dir : ./results/yahoo_merge_assigned_results


In [15]:
def logging(str, to_stdout=True):
    with open(os.path.join(save, 'log.txt'), 'a') as f:
        f.write(str + '\n')
    if to_stdout:
        print(str)

In [16]:
def batchify(data, bsz, max_len, shuffle=False, gpu=False):
    if shuffle:
        random.shuffle(data)
    nbatch = len(data) // bsz
    batches = []

    for i in range(nbatch):
        maxlen = max_len+1
        # Pad batches to maximum sequence length in batch
        batch = data[i*bsz:(i+1)*bsz]
        # subtract 1 from lengths b/c includes BOTH starts & end symbols
        lengths = [len(x)-1 for x in batch]

        # sort items by length (decreasing)
        batch, lengths = length_sort(batch, lengths)

        # source has no end symbol
        source = [x[:-1] for x in batch]
        # target has no start symbol
        target = [x[1:] for x in batch]


        for x, y in zip(source, target):
            zeros = (maxlen-len(x))*[0]
            x += zeros
            y += zeros
        source = torch.LongTensor(np.array(source))
        target = torch.LongTensor(np.array(target)).view(-1)

        if gpu:
            source = source.cuda()
            target = target.cuda()

        batches.append((source, target, lengths))

    return batches

In [17]:
test_data = batchify(corpus.test_u_tok, eval_batch_size, maxlen, shuffle=False)
train_data = batchify(corpus.train_u_tok, batch_size, maxlen,  shuffle=True)

print("Loaded data!")

Loaded data!


In [18]:
print(len(test_data))
print(len(train_data))

1249
4361


In [19]:
###############################################################################
# Build the models
###############################################################################
autoencoder = Seq2Seq(add_noise=add_noise,
                      emsize=emsize,
                      nhidden=nhidden,
                      ntokens=ntokens,
                      nlayers=nlayers,
                      nheads=nheads,
                      nff=nff,
                      aehidden=aehidden,
                      noise_r=noise_r,
                      hidden_init=hidden_init,
                      dropout=dropout,
                      gpu=True)
nlatent = aehidden * (maxlen+1)
gan_gen = MLP_G(ninput=z_size, noutput=nlatent, layers=arch_g, gan_g_activation=gan_g_activation)
gan_disc = MLP_D(ninput=nlatent, noutput=1, layers=arch_d)
gan_disc_local = MLP_D_local(ninput=gan_d_local_windowsize * aehidden, noutput=1, layers=arch_d_local)

optimizer_ae = optim.SGD(autoencoder.parameters(), lr=lr_ae)


optimizer_gan_e = optim.Adam(autoencoder.encoder.parameters(),
                             lr=lr_gan_e,
                             betas=(beta1, 0.999))
optimizer_gan_g = optim.Adam(gan_gen.parameters(),
                             lr=lr_gan_g,
                             betas=(beta1, 0.999))
optimizer_gan_d = optim.Adam(gan_disc.parameters(),
                             lr=lr_gan_d,
                             betas=(beta1, 0.999))
optimizer_gan_d_local = optim.Adam(gan_disc_local.parameters(),
                             lr=lr_gan_d,
                             betas=(beta1, 0.999))
optimizer_gan_dec = optim.Adam(autoencoder.decoder.parameters(),
                             lr=lr_gan_e,
                             betas=(beta1, 0.999))

autoencoder = autoencoder.to(device)
gan_gen = gan_gen.to(device)
gan_disc = gan_disc.to(device)
gan_disc_local = gan_disc_local.to(device)

In [20]:
def save_model():
    print("Saving models to {}".format(save))
    torch.save({
        "ae": autoencoder.state_dict(),
        "gan_g": gan_gen.state_dict(),
        "gan_d": gan_disc.state_dict(),
        "gan_d_local": gan_disc_local.state_dict()

        },
        os.path.join(save, "model.pt"))

In [21]:
def cal_norm(model):
    total_norm = 0
    for p in model.parameters():
        param_norm = p.grad.data.norm(2)
        total_norm += param_norm.item() ** 2
    total_norm = total_norm ** (1. / 2)
    return total_norm

In [22]:
def load_models():
    model_args = json.load(open(os.path.join(save, 'options.json'), 'r'))
    word2idx = json.load(open(os.path.join(save, 'vocab.json'), 'r'))
    idx2word = {v: k for k, v in word2idx.items()}

    print('Loading models from {}'.format(save))
    loaded = torch.load(os.path.join(save, "model.pt"))
    autoencoder.load_state_dict(loaded.get('ae'))
    gan_gen.load_state_dict(loaded.get('gan_g'))
    gan_disc.load_state_dict(loaded.get('gan_d'))
    gan_disc_local.load_state_dict(loaded.get('gan_d_local'))
    return model_args, idx2word, autoencoder, gan_gen, gan_disc

In [23]:
def evaluate_autoencoder(data_source, epoch):
    # Turn on evaluation mode which disables dropout.
    autoencoder.eval()
    total_loss = 0
    ntokens = len(corpus.dictionary.word2idx)
    all_accuracies = 0
    bcnt = 0
    for i, batch in enumerate(data_source):
        source, target, lengths = batch
        with torch.no_grad():
            source = Variable(source.to(device))
            target = Variable(target.to(device))
            mask = target.gt(0)
            masked_target = target.masked_select(mask)
            # examples x ntokens
            output_mask = mask.unsqueeze(1).expand(mask.size(0), ntokens)

            # output: batch x seq_len x ntokens
            output = autoencoder(source, lengths, source, add_noise=add_noise, soft=False)
            flattened_output = output.view(-1, ntokens)

            masked_output = \
                flattened_output.masked_select(output_mask).view(-1, ntokens)
            total_loss += F.cross_entropy(masked_output, masked_target)

            # accuracy
            max_vals, max_indices = torch.max(masked_output, 1)
            accuracy = torch.mean(max_indices.eq(masked_target).float()).data.item()
            all_accuracies += accuracy
            bcnt += 1

        aeoutf = os.path.join(save, "autoencoder.txt")
        with open(aeoutf, "w") as f:
            max_values, max_indices = torch.max(output, 2)
            max_indices = \
                max_indices.view(output.size(0), -1).data.cpu().numpy()
            target = target.view(output.size(0), -1).data.cpu().numpy()
            for t, idx in zip(target, max_indices):
                # real sentence
                chars = " ".join([corpus.dictionary.idx2word[x] for x in t])
                f.write(chars + '\n')
                # autoencoder output sentence
                chars = " ".join([corpus.dictionary.idx2word[x] for x in idx])
                f.write(chars + '\n'*2)

    return total_loss.item() / len(data_source), all_accuracies/bcnt

In [24]:
def gen_fixed_noise(noise, to_save):
    gan_gen.eval()
    autoencoder.eval()

    fake_hidden = gan_gen(noise)
    max_indices = autoencoder.generate(fake_hidden, maxlen, sample=sample)

    with open(to_save, "w") as f:
        max_indices = max_indices.data.cpu().numpy()
        for idx in max_indices:
            # generated sentence
            words = [corpus.dictionary.idx2word[x] for x in idx]
            # truncate sentences to first occurrence of <eos>
            truncated_sent = []
            for w in words:
                if w != '<eos>':
                    truncated_sent.append(w)
                else:
                    break
            chars = " ".join(truncated_sent)
            f.write(chars + '\n')

In [25]:
def gen_fixed_noise_new(noise):
    gan_gen.eval()
    autoencoder.eval()

    fake_hidden = gan_gen(noise)
    max_indices = autoencoder.generate(fake_hidden, maxlen, sample=sample)
    
    sent_list = []
    
    #with open(to_save, "w") as f:
    max_indices = max_indices.data.cpu().numpy()
    for idx in max_indices:
        # generated sentence
        words = [corpus.dictionary.idx2word[x] for x in idx]
        # truncate sentences to first occurrence of <eos>
        truncated_sent = []
        for w in words:
            if w != '<eos>':
                truncated_sent.append(w)
            else:
                break
        chars = " ".join(truncated_sent)
        #f.write(chars + '\n')
        sent_list.append(chars)
    #print(sent_list)
    return sent_list

In [26]:
def eval_bleu(gen_text_savepath):
    selfbleu = bleu_self(gen_text_savepath)
    real_text = os.path.join(data_path, "test.txt")
    testbleu = bleu_test(real_text, gen_text_savepath)
    return selfbleu, testbleu

In [27]:
def train_ae(epoch, batch, total_loss_ae, start_time, i):
    '''Train AE with the negative log-likelihood loss'''
    autoencoder.train()
    optimizer_ae.zero_grad()

    source, target, lengths = batch
    source = Variable(source.to(device))
    target = Variable(target.to(device))
    output = autoencoder(source, lengths, source, add_noise=add_noise, soft=False)

    mask = target.gt(0)
    masked_target = target.masked_select(mask)
    output_mask = mask.unsqueeze(1).expand(mask.size(0), ntokens)
    flat_output = output.view(-1, ntokens)
    masked_output = flat_output.masked_select(output_mask).view(-1, ntokens)
    loss = F.cross_entropy(masked_output, masked_target)
    loss.backward()
    torch.nn.utils.clip_grad_norm(autoencoder.parameters(), clip)
    train_ae_norm = cal_norm(autoencoder)
    optimizer_ae.step()

    total_loss_ae += loss.data.item()
    if i % log_interval == 0:
        probs = F.softmax(masked_output, dim=-1)
        max_vals, max_indices = torch.max(probs, 1)
        accuracy = torch.mean(max_indices.eq(masked_target).float()).data.item()
        cur_loss = total_loss_ae / log_interval
        elapsed = time.time() - start_time
        logging('| epoch {:3d} | {:5d}/{:5d} batches | lr {:08.6f} | ms/batch {:5.2f} | '
                'loss {:5.2f} | ppl {:8.2f} | acc {:8.2f} | train_ae_norm {:8.2f}'.format(
                epoch, i, len(train_data), 0,
                elapsed * 1000 / log_interval,
                cur_loss, math.exp(cur_loss), accuracy, train_ae_norm))

        total_loss_ae = 0
        start_time = time.time()
    return total_loss_ae, start_time

In [28]:
def train_gan_g(gan_type='kl'):
    gan_gen.train()
    optimizer_gan_g.zero_grad()

    z = Variable(torch.Tensor(batch_size, z_size).normal_(0, 1).to(device))
    fake_hidden = gan_gen(z)
    fake_score = gan_disc(fake_hidden)

    if gan_d_local:
        idx = random.randint(0, maxlen - gan_d_local_windowsize)
        fake_hidden_local = fake_hidden[:, idx * aehidden : (idx + gan_d_local_windowsize) * aehidden]
        fake_score_local = gan_disc_local(fake_hidden_local)

        if gan_type == 'kl':
            errG = -(torch.exp(fake_score.detach()).clamp(0.5, 2) * fake_score).mean() -(torch.exp(fake_score_local.detach()).clamp(0.5, 2) * fake_score_local).mean()
        else:
            errG = -fake_score.mean() -fake_score_local.mean()
    else:
        if gan_type == 'kl':
            errG = -(torch.exp(fake_score.detach()).clamp(0.5, 2) * fake_score).mean()
        else:
            errG = -fake_score.mean()


    errG *= gan_lambda
    errG.backward()
    optimizer_gan_g.step()

    return errG

In [29]:
def train_gan_dec(gan_type='kl'):
    autoencoder.decoder.train()
    optimizer_gan_dec.zero_grad()

    z = Variable(torch.Tensor(batch_size, z_size).normal_(0, 1).to(device))
    fake_hidden = gan_gen(z)

    # 1. decoder  - soft distribution
    enhance_source, max_indices= autoencoder.generate_enh_dec(fake_hidden, maxlen, sample=sample)
    # 2. soft distribution - > encoder  -> fake_hidden
    enhance_hidden = autoencoder(enhance_source, None, max_indices, add_noise=add_noise, soft=True, encode_only=True)
    fake_score = gan_disc(enhance_hidden)

    if gan_d_local:
        idx = random.randint(0, maxlen - gan_d_local_windowsize)
        fake_hidden_local = fake_hidden[:, idx * aehidden : (idx + gan_d_local_windowsize) * aehidden]
        fake_score_local = gan_disc_local(fake_hidden_local)

        if gan_type == 'kl':
            errG = -(torch.exp(fake_score.detach()).clamp(0.5, 2) * fake_score).mean() -(torch.exp(fake_score_local.detach()).clamp(0.5, 2) * fake_score_local).mean()
        else:
            errG = -fake_score.mean() -fake_score_local.mean()
    else:
        if gan_type == 'kl':
            errG = -(torch.exp(fake_score.detach()).clamp(0.5, 2) * fake_score).mean()
        else:
            errG = -fake_score.mean()


    errG *= gan_lambda
    errG.backward()
    optimizer_gan_dec.step()

    return errG

In [30]:
def grad_hook(grad):
    return grad * gan_lambda

In [31]:
''' Steal from https://github.com/caogang/wgan-gp/blob/master/gan_cifar10.py '''
def calc_gradient_penalty(netD, real_data, fake_data):
    bsz = real_data.size(0)
    alpha = torch.rand(bsz, 1)
    alpha = alpha.expand(bsz, real_data.size(1))  # only works for 2D XXX
    alpha = alpha.to(device)
    interpolates = alpha * real_data + ((1 - alpha) * fake_data)
    interpolates = Variable(interpolates, requires_grad=True)
    disc_interpolates = netD(interpolates)

    gradients = torch.autograd.grad(outputs=disc_interpolates, inputs=interpolates,
                                    grad_outputs=torch.ones(disc_interpolates.size()).to(device),
                                    create_graph=True, retain_graph=True, only_inputs=True)[0]
    gradients = gradients.view(gradients.size(0), -1)

    gradient_penalty = ((gradients.norm(2, dim=1) - 1) ** 2).mean() * gan_gp_lambda
    return gradient_penalty

In [32]:
def train_gan_d(batch, gan_type='kl'):
    gan_disc.train()
    gan_disc_local.train()
    optimizer_gan_d.zero_grad()
    optimizer_gan_d_local.zero_grad()

    # + samples
    source, target, lengths = batch
    source = Variable(source.to(device))
    target = Variable(target.to(device))
    real_hidden = autoencoder(source, lengths, source, add_noise=add_noise, soft=False, encode_only=True)
    real_score = gan_disc(real_hidden.detach())

    idx = random.randint(0, maxlen - gan_d_local_windowsize)
    if gan_d_local:
        real_hidden_local = real_hidden[:, idx * aehidden : (idx + gan_d_local_windowsize) * aehidden]
        real_score_local = gan_disc_local(real_hidden_local)
        real_score += real_score_local


    if gan_type == 'wgan':
        errD_real = -real_score.mean()
    else: # kl or all
        errD_real = F.softplus(-real_score).mean()
    errD_real.backward()

    # - samples
    z = Variable(torch.Tensor(batch_size, z_size).normal_(0, 1).to(device))
    fake_hidden = gan_gen(z)
    fake_score = gan_disc(fake_hidden.detach())

    if gan_d_local:
        fake_hidden_local = fake_hidden[:, idx * aehidden : (idx + gan_d_local_windowsize) * aehidden]
        fake_score_local = gan_disc_local(fake_hidden_local)
        fake_score += fake_score_local

    if gan_type == 'wgan':
        errD_fake = fake_score.mean()
    else:  # kl or all
        errD_fake = F.softplus(fake_score).mean()
    errD_fake.backward()

    # gradient penalty
    if gan_type == 'wgan':
        gradient_penalty = calc_gradient_penalty(gan_disc, real_hidden.data, fake_hidden.data)
        gradient_penalty.backward()

    optimizer_gan_d.step()
    optimizer_gan_d_local.step()
    return errD_real + errD_fake, errD_real, errD_fake

In [33]:
def train_gan_d_into_ae(batch):
    autoencoder.train()
    optimizer_gan_e.zero_grad()

    source, target, lengths = batch
    source = Variable(source.to(device))
    target = Variable(target.to(device))
    real_hidden = autoencoder(source, lengths, source, add_noise=add_noise, soft=False, encode_only=True)

    if gan_d_local:
        idx = random.randint(0, maxlen - gan_d_local_windowsize)
        real_hidden_local = real_hidden[:, idx * aehidden : (idx + gan_d_local_windowsize) * aehidden]
        real_score_local = gan_disc_local(real_hidden_local)
        errD_real = gan_disc(real_hidden).mean() + real_score_local.mean()
    else:
        errD_real = gan_disc(real_hidden).mean()

    errD_real *= gan_lambda
    errD_real.backward()
    torch.nn.utils.clip_grad_norm(autoencoder.parameters(), clip)

    optimizer_gan_e.step()
    return errD_real

In [34]:
#--------------------------------
#  Transformer parameters
#--------------------------------
max_seq_length = 20
batch_size_d = 48

#--------------------------------
#  GAN-BERT specific parameters
#--------------------------------
# number of hidden layers in the generator, 
# each of the size of the output space
#num_hidden_layers_g = 1; 
# number of hidden layers in the discriminator, 
# each of the size of the input space
num_hidden_layers_d = 1; 
# size of the generator's input noisy vectors
noise_size = 100
# dropout to be applied to discriminator's input vectors
out_dropout_rate = 0.2

# Replicate labeled data to balance poorly represented datasets, 
# e.g., less than 1% of labeled material
apply_balance = True

#--------------------------------
#  Optimization parameters
#--------------------------------
learning_rate_discriminator = 5e-6 #5e-6?
#learning_rate_generator = 5e-5
epsilon = 1e-8
num_train_epochs = 50
multi_gpu = True
# Scheduler
apply_scheduler = False
warmup_proportion = 0.1
# Print
print_each_n_step = 10

#--------------------------------
#  Adopted Tranformer model
#--------------------------------
# Since this version is compatible with Huggingface transformers, you can uncomment
# (or add) transformer models compatible with GAN

model_name = "bert-base-cased"
#model_name = "bert-base-uncased"
#model_name = "roberta-base"
#model_name = "albert-base-v2"
#model_name = "xlm-roberta-base"
#model_name = "amazon/bort"
#model_name="google/electra-large-discriminator"
#model_name="google/electra-small-discriminator"
#model_name="microsoft/deberta-v2-xxlarge"
#model_name="microsoft/deberta-v3-base"
#model_name = "google/electra-base-discriminator"

In [35]:
transformer = AutoModel.from_pretrained(model_name)
tokenizer = AutoTokenizer.from_pretrained(model_name)

loading configuration file https://huggingface.co/bert-base-cased/resolve/main/config.json from cache at /home/harry/.cache/huggingface/transformers/a803e0468a8fe090683bdc453f4fac622804f49de86d7cecaee92365d4a0f829.a64a22196690e0e82ead56f388a3ef3a50de93335926ccfa20610217db589307
Model config BertConfig {
  "_name_or_path": "bert-base-cased",
  "architectures": [
    "BertForMaskedLM"
  ],
  "attention_probs_dropout_prob": 0.1,
  "classifier_dropout": null,
  "gradient_checkpointing": false,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-12,
  "max_position_embeddings": 512,
  "model_type": "bert",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "pad_token_id": 0,
  "position_embedding_type": "absolute",
  "transformers_version": "4.18.0",
  "type_vocab_size": 2,
  "use_cache": true,
  "vocab_size": 28996
}

loading weights file https://huggingface.co/bert-base-cased/re

In [36]:
def generate_data_loader(input_examples, label_masks, label_map, do_shuffle = False, balance_label_examples = False):
  '''
  Generate a Dataloader given the input examples, eventually masked if they are 
  to be considered NOT labeled.
  '''
  examples = []

  # Count the percentage of labeled examples  
  num_labeled_examples = 0
  for label_mask in label_masks:
    if label_mask: 
      num_labeled_examples += 1
  label_mask_rate = num_labeled_examples/len(input_examples)

  # if required it applies the balance
  for index, ex in enumerate(input_examples): 
    if label_mask_rate == 1 or not balance_label_examples:
      examples.append((ex, label_masks[index]))
    else:
      # IT SIMULATE A LABELED EXAMPLE
      if label_masks[index]:
        balance = int(1/label_mask_rate)
        balance = int(math.log(balance,2))
        if balance < 1:
          balance = 1
        for b in range(0, int(balance)):
          examples.append((ex, label_masks[index]))
      else:
        examples.append((ex, label_masks[index]))
  
  #-----------------------------------------------
  # Generate input examples to the Transformer
  #-----------------------------------------------
  input_ids = []
  input_mask_array = []
  label_mask_array = []
  label_id_array = []

  # Tokenization 
  for (text, label_mask) in examples:
    encoded_sent = tokenizer.encode(text[0], add_special_tokens=True, max_length=max_seq_length, padding="max_length", truncation=True)
    input_ids.append(encoded_sent)
    label_id_array.append(label_map[text[1]])
    label_mask_array.append(label_mask)
  
  # Attention to token (to ignore padded input wordpieces)
  for sent in input_ids:
    att_mask = [int(token_id > 0) for token_id in sent]                          
    input_mask_array.append(att_mask)
  # Convertion to Tensor
  input_ids = torch.tensor(input_ids) 
  input_mask_array = torch.tensor(input_mask_array)
  label_id_array = torch.tensor(label_id_array, dtype=torch.long)
  label_mask_array = torch.tensor(label_mask_array)

  # Building the TensorDataset
  dataset = TensorDataset(input_ids, input_mask_array, label_id_array, label_mask_array)

  if do_shuffle:
    sampler = RandomSampler
  else:
    sampler = SequentialSampler

  # Building the DataLoader
  return DataLoader(
              dataset,  # The training samples.
              sampler = sampler(dataset), 
              batch_size = batch_size) # Trains with this batch size.

def format_time(elapsed):
    '''
    Takes a time in seconds and returns a string hh:mm:ss
    '''
    # Round to the nearest second.
    elapsed_rounded = int(round((elapsed)))
    # Format as hh:mm:ss
    return str(datetime.timedelta(seconds=elapsed_rounded))

In [37]:
def generate_data_fake(input_examples):
  '''
  Generate a Dataloader given the input examples, eventually masked if they are 
  to be considered NOT labeled.
  '''
  
  #-----------------------------------------------
  # Generate input examples to the Transformer
  #-----------------------------------------------
  input_ids = []
  input_mask_array = []

  # Tokenization 
  for text in input_examples:
    encoded_sent = tokenizer.encode(text, add_special_tokens=True, max_length=max_seq_length, padding="max_length", truncation=True)
    input_ids.append(encoded_sent)
  
  # Attention to token (to ignore padded input wordpieces)
  for sent in input_ids:
    att_mask = [int(token_id > 0) for token_id in sent]                          
    input_mask_array.append(att_mask)
  # Convertion to Tensor
  input_ids = torch.tensor(input_ids) 
  input_mask_array = torch.tensor(input_mask_array)

  # Building the DataLoader
  return input_ids, input_mask_array # Trains with this batch size.

In [38]:
#Load the examples
labeled_examples = train_l
unlabeled_examples = u_list
test_examples = test_l

In [39]:
label_map = {}
for (i, label) in enumerate(label_list):
  label_map[label] = i
#------------------------------
#   Load the train dataset
#------------------------------
train_examples = labeled_examples
#The labeled (train) dataset is assigned with a mask set to True
train_label_masks = np.ones(len(labeled_examples), dtype=bool)
#If unlabel examples are available
if unlabeled_examples:
  train_examples = train_examples + unlabeled_examples
  #The unlabeled (train) dataset is assigned with a mask set to False
  tmp_masks = np.zeros(len(unlabeled_examples), dtype=bool)
  train_label_masks = np.concatenate([train_label_masks,tmp_masks])

train_dataloader = generate_data_loader(train_examples, train_label_masks, label_map, do_shuffle = True, balance_label_examples = apply_balance)

#------------------------------
#   Load the test dataset
#------------------------------
#The labeled (test) dataset is assigned with a mask set to True
test_label_masks = np.ones(len(test_examples), dtype=bool)

test_dataloader = generate_data_loader(test_examples, test_label_masks, label_map, do_shuffle = False, balance_label_examples = False)

In [40]:
#------------------------------
#   The Discriminator
#   https://www.aclweb.org/anthology/2020.acl-main.191/
#   https://github.com/crux82/ganbert
#------------------------------
class Discriminator(nn.Module):
    def __init__(self, input_size=512, hidden_sizes=[512], num_labels=2, dropout_rate=0.1):
        super(Discriminator, self).__init__()
        self.input_dropout = nn.Dropout(p=dropout_rate)
        layers = []
        hidden_sizes = [input_size] + hidden_sizes
        for i in range(len(hidden_sizes)-1):
            layers.extend([nn.Linear(hidden_sizes[i], hidden_sizes[i+1]), nn.LeakyReLU(0.2, inplace=True), nn.Dropout(dropout_rate)])

        self.layers = nn.Sequential(*layers) #per il flatten
        self.logit = nn.Linear(hidden_sizes[-1],num_labels+1) # +1 for the probability of this sample being fake/real.
        self.softmax = nn.Softmax(dim=-1)

    def forward(self, input_rep):
        input_rep = self.input_dropout(input_rep)
        last_rep = self.layers(input_rep)
        logits = self.logit(last_rep)
        probs = self.softmax(logits)
        return last_rep, logits, probs

In [41]:
# The config file is required to get the dimension of the vector produced by 
# the underlying transformer
config = AutoConfig.from_pretrained(model_name)
hidden_size = int(config.hidden_size)
# Define the number and width of hidden layers
#hidden_levels_g = [hidden_size for i in range(0, num_hidden_layers_g)]
hidden_levels_d = [hidden_size for i in range(0, num_hidden_layers_d)]

#-------------------------------------------------
#   Instantiate the Generator and Discriminator
#-------------------------------------------------
#generator = Generator(noise_size=noise_size, output_size=hidden_size, hidden_sizes=hidden_levels_g, dropout_rate=out_dropout_rate)
discriminator = Discriminator(input_size=hidden_size, hidden_sizes=hidden_levels_d, num_labels=len(label_list), dropout_rate=out_dropout_rate)

# Put everything in the GPU if available
if torch.cuda.is_available():    
  #generator.cuda()
  discriminator.cuda()
  transformer.cuda()
  if multi_gpu:
    transformer = torch.nn.DataParallel(transformer)

# print(config)

loading configuration file https://huggingface.co/bert-base-cased/resolve/main/config.json from cache at /home/harry/.cache/huggingface/transformers/a803e0468a8fe090683bdc453f4fac622804f49de86d7cecaee92365d4a0f829.a64a22196690e0e82ead56f388a3ef3a50de93335926ccfa20610217db589307
Model config BertConfig {
  "_name_or_path": "bert-base-cased",
  "architectures": [
    "BertForMaskedLM"
  ],
  "attention_probs_dropout_prob": 0.1,
  "classifier_dropout": null,
  "gradient_checkpointing": false,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-12,
  "max_position_embeddings": 512,
  "model_type": "bert",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "pad_token_id": 0,
  "position_embedding_type": "absolute",
  "transformers_version": "4.18.0",
  "type_vocab_size": 2,
  "use_cache": true,
  "vocab_size": 28996
}



In [42]:
label_map = {}
for (i, label) in enumerate(label_list):
  label_map[label] = i
#------------------------------
#   Load the train dataset
#------------------------------
train_examples = labeled_examples
#The labeled (train) dataset is assigned with a mask set to True
train_label_masks = np.ones(len(labeled_examples), dtype=bool)
#If unlabel examples are available
if unlabeled_examples:
  train_examples = train_examples + unlabeled_examples
  #The unlabeled (train) dataset is assigned with a mask set to False
  tmp_masks = np.zeros(len(unlabeled_examples), dtype=bool)
  train_label_masks = np.concatenate([train_label_masks,tmp_masks])

train_dataloader = generate_data_loader(train_examples, train_label_masks, label_map, do_shuffle = True, balance_label_examples = apply_balance)

#------------------------------
#   Load the test dataset
#------------------------------
#The labeled (test) dataset is assigned with a mask set to True
test_label_masks = np.ones(len(test_examples), dtype=bool)

test_dataloader = generate_data_loader(test_examples, test_label_masks, label_map, do_shuffle = False, balance_label_examples = False)

In [43]:
training_stats = []

accuracy_array=[]

# Measure the total training time for the whole run.
total_t0 = time.time()

#models parameters
transformer_vars = [i for i in transformer.parameters()]
d_vars = transformer_vars + [v for v in discriminator.parameters()]
#g_vars = [v for v in generator.parameters()]

#optimizer
dis_optimizer = torch.optim.AdamW(d_vars, lr=learning_rate_discriminator)
#gen_optimizer = torch.optim.AdamW(g_vars, lr=learning_rate_generator) 

#scheduler
if apply_scheduler:
  num_train_examples = len(train_examples)
  num_train_steps = int(num_train_examples / batch_size * num_train_epochs)
  num_warmup_steps = int(num_train_steps * warmup_proportion)

  scheduler_d = get_constant_schedule_with_warmup(dis_optimizer, 
                                           num_warmup_steps = num_warmup_steps)
  scheduler_g = get_constant_schedule_with_warmup(gen_optimizer, 
                                           num_warmup_steps = num_warmup_steps)

In [44]:
def train():
    logging("Training")
    train_data = batchify(corpus.train_u_tok, batch_size, maxlen, shuffle=True)

    # gan: preparation
    if niters_gan_schedule != "":
        gan_schedule = [int(x) for x in niters_gan_schedule.split("-")]
    else:
        gan_schedule = []
    niter_gan = 1
    fixed_noise = Variable(torch.ones(eval_batch_size, z_size).normal_(0, 1).to(device))

    for epoch in range(1, epochs+1):
        # update gan training schedule
        if epoch in gan_schedule:
            niter_gan += 1
            logging("GAN training loop schedule: {}".format(niter_gan))

        total_loss_ae = 0
        epoch_start_time = time.time()
        start_time = time.time()
        niter = 0
        niter_g = 1
        print("Train classification discriminator")
        # ========================================
        #               Training
        # ========================================
        # Perform one full pass over the training set.
        print("")
        print('======== Epoch {:} / {:} ========'.format(epoch, epochs))
        print('Training...')

        # Measure how long the training epoch takes.
        t0 = time.time()

        # Reset the total loss for this epoch.
        tr_g_loss = 0
        tr_d_loss = 0

        # Put the model into training mode.
        transformer.train() 
        #generator.train()
        discriminator.train()

        # For each batch of training data...
        for step, batch in enumerate(train_dataloader):

            # Progress update every print_each_n_step batches.
            if step % print_each_n_step == 0 and not step == 0:
                # Calculate elapsed time in minutes.
                elapsed = format_time(time.time() - t0)

                # Report progress.
                print('  Batch {:>5,}  of  {:>5,}.    Elapsed: {:}.'.format(step, len(train_dataloader), elapsed))

            # Unpack this training batch from our dataloader. 
            b_input_ids = batch[0].to(device)
            b_input_mask = batch[1].to(device)
            b_labels = batch[2].to(device)
            b_label_mask = batch[3].to(device)

            real_batch_size = b_input_ids.shape[0]

            # Encode real data in the Transformer
            model_outputs = transformer(b_input_ids, attention_mask=b_input_mask)
            hidden_states = model_outputs.last_hidden_state[:,0,:] 
            #hidden_states = model_outputs[-1]
            #print("  Number of real sentences (labelled and unlabelled): {}".format(len(hidden_states)))
            
            # Generate fake data that should have the same distribution of the ones
            # encoded by the transformer. 
            # First noisy input are used in input to the Generator
            fixed_noise = Variable(torch.ones(real_batch_size, 100).normal_(0, 1).to(device))
            fake_sentences = gen_fixed_noise_new(fixed_noise)
            #print("  Number of generated sentences: {}".format(len(fake_sentences)))

            b_input_ids_fake, b_input_mask_fake = generate_data_fake(fake_sentences)
            model_outputs_fake = transformer(b_input_ids_fake, attention_mask=b_input_mask_fake)
            hidden_states_fake = model_outputs_fake.last_hidden_state[:,0,:] 
            #hidden_states_fake = model_outputs_fake[-1]

            #noise = torch.zeros(real_batch_size, noise_size, device=device).uniform_(0, 1)
            # Gnerate Fake data
            #gen_rep = generator(noise)
            #print("Length of generator output {}".format(len(gen_rep)))
            #print("Length of single generator output {}".format(len(gen_rep[0])))

            # Generate the output of the Discriminator for real and fake data.
            # First, we put together the output of the tranformer and the generator
            disciminator_input = torch.cat([hidden_states, hidden_states_fake], dim=0)
            # Then, we select the output of the disciminator
            features, logits, probs = discriminator(disciminator_input)

            # Finally, we separate the discriminator's output for the real and fake
            # data
            features_list = torch.split(features, real_batch_size)
            D_real_features = features_list[0]
            D_fake_features = features_list[1]

            logits_list = torch.split(logits, real_batch_size)
            D_real_logits = logits_list[0]
            D_fake_logits = logits_list[1]

            probs_list = torch.split(probs, real_batch_size)
            D_real_probs = probs_list[0]
            D_fake_probs = probs_list[1]

            #---------------------------------
            #  LOSS evaluation
            #---------------------------------
            # Generator's LOSS estimation
            g_loss_d = -1 * torch.mean(torch.log(1 - D_fake_probs[:,-1] + epsilon))
            g_feat_reg = torch.mean(torch.pow(torch.mean(D_real_features, dim=0) - torch.mean(D_fake_features, dim=0), 2))
            g_loss = g_loss_d + g_feat_reg

            # Disciminator's LOSS estimation
            logits = D_real_logits[:,0:-1]
            log_probs = F.log_softmax(logits, dim=-1)
            # The discriminator provides an output for labeled and unlabeled real data
            # so the loss evaluated for unlabeled data is ignored (masked)
            label2one_hot = torch.nn.functional.one_hot(b_labels, len(label_list))
            per_example_loss = -torch.sum(label2one_hot * log_probs, dim=-1)
            per_example_loss = torch.masked_select(per_example_loss, b_label_mask.to(device))
            labeled_example_count = per_example_loss.type(torch.float32).numel()

            # It may be the case that a batch does not contain labeled examples, 
            # so the "supervised loss" in this case is not evaluated
            if labeled_example_count == 0:
              D_L_Supervised = 0
            else:
              D_L_Supervised = torch.div(torch.sum(per_example_loss.to(device)), labeled_example_count)

            D_L_unsupervised1U = -1 * torch.mean(torch.log(1 - D_real_probs[:, -1] + epsilon))
            D_L_unsupervised2U = -1 * torch.mean(torch.log(D_fake_probs[:, -1] + epsilon))
            d_loss = D_L_Supervised + D_L_unsupervised1U + D_L_unsupervised2U

            #---------------------------------
            #  OPTIMIZATION
            #---------------------------------
            # Avoid gradient accumulation
            #gen_optimizer.zero_grad()
            dis_optimizer.zero_grad()

            # Calculate weigth updates
            # retain_graph=True is required since the underlying graph will be deleted after backward
            g_loss.backward(retain_graph=True)
            d_loss.backward() 

            # Apply modifications
            #gen_optimizer.step()
            dis_optimizer.step()

            # A detail log of the individual losses
            #print("{0:.4f}\t{1:.4f}\t{2:.4f}\t{3:.4f}\t{4:.4f}".
            #      format(D_L_Supervised, D_L_unsupervised1U, D_L_unsupervised2U,
            #             g_loss_d, g_feat_reg))

            # Save the losses to print them later
            tr_g_loss += g_loss.item()
            tr_d_loss += d_loss.item()

            # Update the learning rate with the scheduler
            if apply_scheduler:
              scheduler_d.step()
              #scheduler_g.step()

        # Calculate the average loss over all of the batches.
        avg_train_loss_g = tr_g_loss / len(train_dataloader)
        avg_train_loss_d = tr_d_loss / len(train_dataloader)             

        # Measure how long this epoch took.
        training_time = format_time(time.time() - t0)

        print("")
        print("  Average training loss generetor: {0:.3f}".format(avg_train_loss_g))
        print("  Average training loss discriminator: {0:.3f}".format(avg_train_loss_d))
        print("  Training epcoh took: {:}".format(training_time))

        # ========================================
        #     TEST ON THE EVALUATION DATASET
        # ========================================
        # After the completion of each training epoch, measure our performance on
        # our test set.
        print("")
        print("Running Test...")

        t0 = time.time()

        # Put the model in evaluation mode--the dropout layers behave differently
        # during evaluation.
        transformer.eval() #maybe redundant
        discriminator.eval()
        #generator.eval()

        # Tracking variables 
        total_test_accuracy = 0

        total_test_loss = 0
        nb_test_steps = 0

        all_preds = []
        all_labels_ids = []

        #loss
        nll_loss = torch.nn.CrossEntropyLoss(ignore_index=-1)

        # Evaluate data for one epoch
        for batch in test_dataloader:

            # Unpack this training batch from our dataloader. 
            b_input_ids = batch[0].to(device)
            b_input_mask = batch[1].to(device)
            b_labels = batch[2].to(device)

            # Tell pytorch not to bother with constructing the compute graph during
            # the forward pass, since this is only needed for backprop (training).
            with torch.no_grad():        
                model_outputs = transformer(b_input_ids, attention_mask=b_input_mask)
                hidden_states = model_outputs.last_hidden_state[:,0,:] 
                #hidden_states = model_outputs[-1]
                _, logits, probs = discriminator(hidden_states)
                ###log_probs = F.log_softmax(probs[:,1:], dim=-1)
                filtered_logits = logits[:,0:-1]
                # Accumulate the test loss.
                total_test_loss += nll_loss(filtered_logits, b_labels)

            # Accumulate the predictions and the input labels
            _, preds = torch.max(filtered_logits, 1)
            all_preds += preds.detach().cpu()
            all_labels_ids += b_labels.detach().cpu()

        # Report the final accuracy for this validation run.
        all_preds = torch.stack(all_preds).numpy()
        all_labels_ids = torch.stack(all_labels_ids).numpy()
        test_accuracy = np.sum(all_preds == all_labels_ids) / len(all_preds)
        print("  Accuracy: {0:.3f}".format(test_accuracy))

        # Calculate the average loss over all of the batches.
        avg_test_loss = total_test_loss / len(test_dataloader)
        avg_test_loss = avg_test_loss.item()

        # Measure how long the validation run took.
        test_time = format_time(time.time() - t0)

        print("  Test Loss: {0:.3f}".format(avg_test_loss))
        print("  Test took: {:}".format(test_time))

        # Record all statistics from this epoch.
        training_stats.append(
            {
                'epoch': epoch + 1,
                'Training Loss generator': avg_train_loss_g,
                'Training Loss discriminator': avg_train_loss_d,
                'Valid. Loss': avg_test_loss,
                'Valid. Accur.': test_accuracy,
                'Training Time': training_time,
                'Test Time': test_time
            }
        )

        accuracy_array.append(test_accuracy)
        print("Train other shit")
        while niter < len(train_data):
            # train ae
            for i in range(niters_ae):
                if niter >= len(train_data):
                    break  # end of epoch
                total_loss_ae, start_time = train_ae(epoch, train_data[niter],
                                total_loss_ae, start_time, niter)
                niter += 1
            # train gan
            for k in range(niter_gan):
                for i in range(niters_gan_d):
                    errD, errD_real, errD_fake = train_gan_d(
                            train_data[random.randint(0, len(train_data)-1)], gan_type)
                for i in range(niters_gan_ae):
                    train_gan_d_into_ae(train_data[random.randint(0, len(train_data)-1)])
                for i in range(niters_gan_g):
                    errG = train_gan_g(gan_type)
                if enhance_dec:
                    for i in range(niters_gan_dec):
                        errG_enh_dec = train_gan_dec()
                else:
                    errG_enh_dec = torch.Tensor([0])

            niter_g += 1
            if niter_g % log_interval == 0:
                logging('[{}/{}][{}/{}] Loss_D: {:.8f} (Loss_D_real: {:.8f} '
                        'Loss_D_fake: {:.8f}) Loss_G: {:.8f} Loss_Enh_Dec: {:.8f}'.format(
                         epoch, epochs, niter, len(train_data),
                         errD.data.item(), errD_real.data.item(),
                         errD_fake.data.item(), errG.data.item(), errG_enh_dec.data.item()))
        # eval
        test_loss, accuracy = evaluate_autoencoder(test_data, epoch)
        logging('| end of epoch {:3d} | time: {:5.2f}s | test loss {:5.2f} | '
                'test ppl {:5.2f} | acc {:3.3f}'.format(epoch,
                (time.time() - epoch_start_time), test_loss,
                math.exp(test_loss), accuracy))

        gen_text_savepath = os.path.join(save, "{:03d}_examplar_gen".format(epoch))
        gen_fixed_noise(fixed_noise, gen_text_savepath)
        if epoch % 5 == 0 or epoch % 4 == 0 or (epochs - epoch) <=2:
            selfbleu, testbleu = eval_bleu(gen_text_savepath)
            logging('bleu_self: [{:.8f},{:.8f},{:.8f},{:.8f},{:.8f}]'.format(selfbleu[0], selfbleu[1], selfbleu[2], selfbleu[3], selfbleu[4]))
            logging('bleu_test: [{:.8f},{:.8f},{:.8f},{:.8f},{:.8f}]'.format(testbleu[0], testbleu[1], testbleu[2], testbleu[3], testbleu[4]))

        if epoch % 15 == 0 or epoch == epochs-1:
            logging("New saving model: epoch {:03d}.".format(epoch))
            save_model()

In [None]:
train()

Training
Train classification discriminator

Training...
  Batch    10  of    230.    Elapsed: 0:00:04.
  Batch    20  of    230.    Elapsed: 0:00:08.
  Batch    30  of    230.    Elapsed: 0:00:11.
  Batch    40  of    230.    Elapsed: 0:00:15.
  Batch    50  of    230.    Elapsed: 0:00:19.
  Batch    60  of    230.    Elapsed: 0:00:23.
  Batch    70  of    230.    Elapsed: 0:00:27.
  Batch    80  of    230.    Elapsed: 0:00:30.
  Batch    90  of    230.    Elapsed: 0:00:34.
  Batch   100  of    230.    Elapsed: 0:00:38.
  Batch   110  of    230.    Elapsed: 0:00:42.
  Batch   120  of    230.    Elapsed: 0:00:46.
  Batch   130  of    230.    Elapsed: 0:00:49.
  Batch   140  of    230.    Elapsed: 0:00:53.
  Batch   150  of    230.    Elapsed: 0:00:57.
  Batch   160  of    230.    Elapsed: 0:01:01.
  Batch   170  of    230.    Elapsed: 0:01:05.
  Batch   180  of    230.    Elapsed: 0:01:09.
  Batch   190  of    230.    Elapsed: 0:01:12.
  Batch   200  of    230.    Elapsed: 0:01:16.
  B



[1/200][99/4361] Loss_D: 1.38524914 (Loss_D_real: 0.69120073 Loss_D_fake: 0.69404840) Loss_G: -0.00016868 Loss_Enh_Dec: -0.00040080
| epoch   1 |   100/ 4361 batches | lr 0.000000 | ms/batch 399.08 | loss  8.57 | ppl  5260.61 | acc     0.19 | train_ae_norm     1.00
[1/200][199/4361] Loss_D: 1.38535583 (Loss_D_real: 0.69144821 Loss_D_fake: 0.69390762) Loss_G: -0.00013183 Loss_Enh_Dec: -0.00032994
| epoch   1 |   200/ 4361 batches | lr 0.000000 | ms/batch 398.60 | loss  7.58 | ppl  1949.98 | acc     0.23 | train_ae_norm     1.00
[1/200][299/4361] Loss_D: 1.38564086 (Loss_D_real: 0.69203019 Loss_D_fake: 0.69361067) Loss_G: -0.00004620 Loss_Enh_Dec: -0.00020663
| epoch   1 |   300/ 4361 batches | lr 0.000000 | ms/batch 398.52 | loss  7.21 | ppl  1350.46 | acc     0.22 | train_ae_norm     1.00
[1/200][399/4361] Loss_D: 1.38577974 (Loss_D_real: 0.69268084 Loss_D_fake: 0.69309890) Loss_G: -0.00000916 Loss_Enh_Dec: -0.00005714
| epoch   1 |   400/ 4361 batches | lr 0.000000 | ms/batch 397.57 |

[1/200][3199/4361] Loss_D: 1.38636816 (Loss_D_real: 0.69314611 Loss_D_fake: 0.69322205) Loss_G: 0.00000650 Loss_Enh_Dec: -0.00000008
| epoch   1 |  3200/ 4361 batches | lr 0.000000 | ms/batch 396.04 | loss  5.15 | ppl   172.96 | acc     0.42 | train_ae_norm     1.00
[1/200][3299/4361] Loss_D: 1.38643909 (Loss_D_real: 0.69333905 Loss_D_fake: 0.69310009) Loss_G: 0.00003178 Loss_Enh_Dec: 0.00002790
| epoch   1 |  3300/ 4361 batches | lr 0.000000 | ms/batch 396.07 | loss  5.13 | ppl   169.13 | acc     0.44 | train_ae_norm     1.00
[1/200][3399/4361] Loss_D: 1.38636720 (Loss_D_real: 0.69319361 Loss_D_fake: 0.69317359) Loss_G: 0.00001398 Loss_Enh_Dec: 0.00000927
| epoch   1 |  3400/ 4361 batches | lr 0.000000 | ms/batch 396.37 | loss  5.08 | ppl   161.00 | acc     0.45 | train_ae_norm     1.00
[1/200][3499/4361] Loss_D: 1.38639891 (Loss_D_real: 0.69322628 Loss_D_fake: 0.69317263) Loss_G: -0.00001093 Loss_Enh_Dec: 0.00000599
| epoch   1 |  3500/ 4361 batches | lr 0.000000 | ms/batch 396.75 | 

[2/200][1399/4361] Loss_D: 1.38631761 (Loss_D_real: 0.69314563 Loss_D_fake: 0.69317198) Loss_G: 0.00000090 Loss_Enh_Dec: -0.00000929
| epoch   2 |  1400/ 4361 batches | lr 0.000000 | ms/batch 397.12 | loss  4.36 | ppl    78.03 | acc     0.44 | train_ae_norm     1.00
[2/200][1499/4361] Loss_D: 1.38631582 (Loss_D_real: 0.69315672 Loss_D_fake: 0.69315904) Loss_G: 0.00000143 Loss_Enh_Dec: -0.00000251
| epoch   2 |  1500/ 4361 batches | lr 0.000000 | ms/batch 397.41 | loss  4.37 | ppl    79.33 | acc     0.49 | train_ae_norm     1.00
[2/200][1599/4361] Loss_D: 1.38631392 (Loss_D_real: 0.69317311 Loss_D_fake: 0.69314086) Loss_G: 0.00000308 Loss_Enh_Dec: -0.00000895
| epoch   2 |  1600/ 4361 batches | lr 0.000000 | ms/batch 396.61 | loss  4.33 | ppl    76.05 | acc     0.55 | train_ae_norm     1.00
[2/200][1699/4361] Loss_D: 1.38628292 (Loss_D_real: 0.69313288 Loss_D_fake: 0.69314998) Loss_G: -0.00000669 Loss_Enh_Dec: -0.00002164
| epoch   2 |  1700/ 4361 batches | lr 0.000000 | ms/batch 396.29

  Batch    20  of    230.    Elapsed: 0:00:08.
  Batch    30  of    230.    Elapsed: 0:00:11.
  Batch    40  of    230.    Elapsed: 0:00:15.
  Batch    50  of    230.    Elapsed: 0:00:19.
  Batch    60  of    230.    Elapsed: 0:00:23.
  Batch    70  of    230.    Elapsed: 0:00:26.
  Batch    80  of    230.    Elapsed: 0:00:30.
  Batch    90  of    230.    Elapsed: 0:00:34.
  Batch   100  of    230.    Elapsed: 0:00:38.
  Batch   110  of    230.    Elapsed: 0:00:41.
  Batch   120  of    230.    Elapsed: 0:00:45.
  Batch   130  of    230.    Elapsed: 0:00:49.
  Batch   140  of    230.    Elapsed: 0:00:53.
  Batch   150  of    230.    Elapsed: 0:00:56.
  Batch   160  of    230.    Elapsed: 0:01:00.
  Batch   170  of    230.    Elapsed: 0:01:04.
  Batch   180  of    230.    Elapsed: 0:01:08.
  Batch   190  of    230.    Elapsed: 0:01:11.
  Batch   200  of    230.    Elapsed: 0:01:15.
  Batch   210  of    230.    Elapsed: 0:01:19.
  Batch   220  of    230.    Elapsed: 0:01:23.

  Average tr

[3/200][2899/4361] Loss_D: 1.38290119 (Loss_D_real: 0.69103277 Loss_D_fake: 0.69186842) Loss_G: 0.00021061 Loss_Enh_Dec: -0.00009812
| epoch   3 |  2900/ 4361 batches | lr 0.000000 | ms/batch 399.24 | loss  4.89 | ppl   132.89 | acc     0.49 | train_ae_norm     1.00
[3/200][2999/4361] Loss_D: 1.38509440 (Loss_D_real: 0.69205928 Loss_D_fake: 0.69303513) Loss_G: 0.00025547 Loss_Enh_Dec: -0.00004536
| epoch   3 |  3000/ 4361 batches | lr 0.000000 | ms/batch 399.86 | loss  4.81 | ppl   122.52 | acc     0.43 | train_ae_norm     1.00
[3/200][3099/4361] Loss_D: 1.38382077 (Loss_D_real: 0.69211906 Loss_D_fake: 0.69170177) Loss_G: 0.00027694 Loss_Enh_Dec: -0.00013886
| epoch   3 |  3100/ 4361 batches | lr 0.000000 | ms/batch 400.23 | loss  4.89 | ppl   132.77 | acc     0.42 | train_ae_norm     1.00
[3/200][3199/4361] Loss_D: 1.38478470 (Loss_D_real: 0.69254416 Loss_D_fake: 0.69224060) Loss_G: 0.00031622 Loss_Enh_Dec: 0.00004376
| epoch   3 |  3200/ 4361 batches | lr 0.000000 | ms/batch 399.64 |

| epoch   4 |  1100/ 4361 batches | lr 0.000000 | ms/batch 399.76 | loss  4.67 | ppl   106.24 | acc     0.45 | train_ae_norm     1.00
[4/200][1199/4361] Loss_D: 1.37860334 (Loss_D_real: 0.68801737 Loss_D_fake: 0.69058597) Loss_G: 0.00090298 Loss_Enh_Dec: -0.00054578
| epoch   4 |  1200/ 4361 batches | lr 0.000000 | ms/batch 399.81 | loss  4.68 | ppl   107.45 | acc     0.47 | train_ae_norm     1.00
[4/200][1299/4361] Loss_D: 1.38117766 (Loss_D_real: 0.69123399 Loss_D_fake: 0.68994367) Loss_G: 0.00082703 Loss_Enh_Dec: -0.00142455
| epoch   4 |  1300/ 4361 batches | lr 0.000000 | ms/batch 399.49 | loss  4.71 | ppl   110.64 | acc     0.46 | train_ae_norm     1.00
[4/200][1399/4361] Loss_D: 1.38234091 (Loss_D_real: 0.69137335 Loss_D_fake: 0.69096756) Loss_G: 0.00031389 Loss_Enh_Dec: -0.00046136
| epoch   4 |  1400/ 4361 batches | lr 0.000000 | ms/batch 399.51 | loss  4.78 | ppl   119.32 | acc     0.39 | train_ae_norm     1.00
[4/200][1499/4361] Loss_D: 1.37893939 (Loss_D_real: 0.68911934 Lo

| epoch   4 |  4200/ 4361 batches | lr 0.000000 | ms/batch 399.57 | loss  4.68 | ppl   107.61 | acc     0.48 | train_ae_norm     1.00
[4/200][4299/4361] Loss_D: 1.37417293 (Loss_D_real: 0.68536615 Loss_D_fake: 0.68880671) Loss_G: 0.00118500 Loss_Enh_Dec: -0.00326756
| epoch   4 |  4300/ 4361 batches | lr 0.000000 | ms/batch 398.72 | loss  4.65 | ppl   104.30 | acc     0.47 | train_ae_norm     1.00
| end of epoch   4 | time: 1843.99s | test loss  4.14 | test ppl 62.49 | acc 0.572
bleu_self:  [3.51377609e-01 2.17325488e-01 1.95000379e-06 6.54209428e-09
 5.22970001e-09]
bleu_test:  [8.17708333e-01 4.68120800e-01 7.87482272e-02 1.25407537e-05
 5.05060968e-07]
bleu_self: [0.35137761,0.21732549,0.00000195,0.00000001,0.00000001]
bleu_test: [0.81770833,0.46812080,0.07874823,0.00001254,0.00000051]
Train classification discriminator

Training...
  Batch    10  of    230.    Elapsed: 0:00:04.
  Batch    20  of    230.    Elapsed: 0:00:07.
  Batch    30  of    230.    Elapsed: 0:00:11.
  Batch    

| epoch   5 |  2300/ 4361 batches | lr 0.000000 | ms/batch 399.24 | loss  5.01 | ppl   150.01 | acc     0.42 | train_ae_norm     1.00
[5/200][2399/4361] Loss_D: 1.35194910 (Loss_D_real: 0.68435478 Loss_D_fake: 0.66759431) Loss_G: 0.00273768 Loss_Enh_Dec: -0.00711892
| epoch   5 |  2400/ 4361 batches | lr 0.000000 | ms/batch 398.97 | loss  5.01 | ppl   149.49 | acc     0.39 | train_ae_norm     1.00
[5/200][2499/4361] Loss_D: 1.36469972 (Loss_D_real: 0.68392104 Loss_D_fake: 0.68077868) Loss_G: 0.00080840 Loss_Enh_Dec: -0.00262409
| epoch   5 |  2500/ 4361 batches | lr 0.000000 | ms/batch 399.16 | loss  5.15 | ppl   172.79 | acc     0.40 | train_ae_norm     1.00
[5/200][2599/4361] Loss_D: 1.36778021 (Loss_D_real: 0.68448496 Loss_D_fake: 0.68329519) Loss_G: 0.00151070 Loss_Enh_Dec: -0.00600428
| epoch   5 |  2600/ 4361 batches | lr 0.000000 | ms/batch 399.23 | loss  5.07 | ppl   159.22 | acc     0.40 | train_ae_norm     1.00
[5/200][2699/4361] Loss_D: 1.37031984 (Loss_D_real: 0.68631268 Lo

| epoch   6 |   400/ 4361 batches | lr 0.000000 | ms/batch 398.88 | loss  5.14 | ppl   170.17 | acc     0.40 | train_ae_norm     1.00
[6/200][499/4361] Loss_D: 1.26427722 (Loss_D_real: 0.63366890 Loss_D_fake: 0.63060826) Loss_G: 0.00980524 Loss_Enh_Dec: -0.01898212
| epoch   6 |   500/ 4361 batches | lr 0.000000 | ms/batch 398.87 | loss  5.16 | ppl   174.32 | acc     0.38 | train_ae_norm     1.00
[6/200][599/4361] Loss_D: 1.28044415 (Loss_D_real: 0.64185023 Loss_D_fake: 0.63859397) Loss_G: 0.00904784 Loss_Enh_Dec: -0.00834971
| epoch   6 |   600/ 4361 batches | lr 0.000000 | ms/batch 399.31 | loss  5.14 | ppl   171.21 | acc     0.33 | train_ae_norm     1.00
[6/200][699/4361] Loss_D: 1.25440288 (Loss_D_real: 0.63271916 Loss_D_fake: 0.62168378) Loss_G: 0.01051683 Loss_Enh_Dec: -0.02744132
| epoch   6 |   700/ 4361 batches | lr 0.000000 | ms/batch 398.62 | loss  5.18 | ppl   177.64 | acc     0.40 | train_ae_norm     1.00
[6/200][799/4361] Loss_D: 1.21689820 (Loss_D_real: 0.60143614 Loss_D

| epoch   6 |  3500/ 4361 batches | lr 0.000000 | ms/batch 400.20 | loss  5.08 | ppl   160.90 | acc     0.41 | train_ae_norm     1.00
[6/200][3599/4361] Loss_D: 0.87626243 (Loss_D_real: 0.45039782 Loss_D_fake: 0.42586464) Loss_G: 0.03964215 Loss_Enh_Dec: -0.11943912
| epoch   6 |  3600/ 4361 batches | lr 0.000000 | ms/batch 401.15 | loss  5.06 | ppl   157.13 | acc     0.43 | train_ae_norm     1.00
[6/200][3699/4361] Loss_D: 0.79220510 (Loss_D_real: 0.40514016 Loss_D_fake: 0.38706493) Loss_G: 0.04118444 Loss_Enh_Dec: -0.09333160
| epoch   6 |  3700/ 4361 batches | lr 0.000000 | ms/batch 400.53 | loss  5.06 | ppl   157.92 | acc     0.40 | train_ae_norm     1.00
[6/200][3799/4361] Loss_D: 0.82662231 (Loss_D_real: 0.42923647 Loss_D_fake: 0.39738584) Loss_G: 0.04020854 Loss_Enh_Dec: -0.12629218
| epoch   6 |  3800/ 4361 batches | lr 0.000000 | ms/batch 401.19 | loss  5.05 | ppl   156.27 | acc     0.42 | train_ae_norm     1.00
[6/200][3899/4361] Loss_D: 0.94717169 (Loss_D_real: 0.52507174 Lo

| epoch   7 |  1700/ 4361 batches | lr 0.000000 | ms/batch 400.88 | loss  5.05 | ppl   155.90 | acc     0.37 | train_ae_norm     1.00
[7/200][1799/4361] Loss_D: 0.58561182 (Loss_D_real: 0.29022726 Loss_D_fake: 0.29538456) Loss_G: 0.06255469 Loss_Enh_Dec: -0.29149818
| epoch   7 |  1800/ 4361 batches | lr 0.000000 | ms/batch 400.83 | loss  5.04 | ppl   154.92 | acc     0.38 | train_ae_norm     1.00
[7/200][1899/4361] Loss_D: 0.56981874 (Loss_D_real: 0.25752592 Loss_D_fake: 0.31229281) Loss_G: 0.05477221 Loss_Enh_Dec: -0.16432695
| epoch   7 |  1900/ 4361 batches | lr 0.000000 | ms/batch 401.33 | loss  5.06 | ppl   156.91 | acc     0.39 | train_ae_norm     1.00
[7/200][1999/4361] Loss_D: 0.78163743 (Loss_D_real: 0.43602118 Loss_D_fake: 0.34561628) Loss_G: 0.05282926 Loss_Enh_Dec: -0.19783631
| epoch   7 |  2000/ 4361 batches | lr 0.000000 | ms/batch 400.71 | loss  5.02 | ppl   151.55 | acc     0.43 | train_ae_norm     1.00
[7/200][2099/4361] Loss_D: 0.83000255 (Loss_D_real: 0.37027463 Lo

  Batch   220  of    230.    Elapsed: 0:01:22.

  Average training loss generetor: 0.708
  Average training loss discriminator: 0.825
  Training epcoh took: 0:01:26

Running Test...
  Accuracy: 0.453
  Test Loss: 2.090
  Test took: 0:00:00
Train other shit
| epoch   8 |     0/ 4361 batches | lr 0.000000 | ms/batch 859.73 | loss  0.04 | ppl     1.04 | acc     0.48 | train_ae_norm     1.00
[8/200][99/4361] Loss_D: 1.10688496 (Loss_D_real: 0.55506200 Loss_D_fake: 0.55182302) Loss_G: -0.01377871 Loss_Enh_Dec: -0.09172004
| epoch   8 |   100/ 4361 batches | lr 0.000000 | ms/batch 401.16 | loss  4.71 | ppl   110.97 | acc     0.45 | train_ae_norm     1.00
[8/200][199/4361] Loss_D: 0.98205340 (Loss_D_real: 0.48050237 Loss_D_fake: 0.50155103) Loss_G: 0.02098194 Loss_Enh_Dec: -0.13646187
| epoch   8 |   200/ 4361 batches | lr 0.000000 | ms/batch 400.50 | loss  4.77 | ppl   117.37 | acc     0.47 | train_ae_norm     1.00
[8/200][299/4361] Loss_D: 1.07516670 (Loss_D_real: 0.52335018 Loss_D_fake: 0.

| epoch   8 |  3000/ 4361 batches | lr 0.000000 | ms/batch 399.95 | loss  4.65 | ppl   104.61 | acc     0.46 | train_ae_norm     1.00
[8/200][3099/4361] Loss_D: 0.96318138 (Loss_D_real: 0.39700669 Loss_D_fake: 0.56617469) Loss_G: 0.02801378 Loss_Enh_Dec: -0.09518508
| epoch   8 |  3100/ 4361 batches | lr 0.000000 | ms/batch 400.20 | loss  4.66 | ppl   105.76 | acc     0.44 | train_ae_norm     1.00
[8/200][3199/4361] Loss_D: 0.87862903 (Loss_D_real: 0.42013109 Loss_D_fake: 0.45849794) Loss_G: 0.02900676 Loss_Enh_Dec: -0.10450589
| epoch   8 |  3200/ 4361 batches | lr 0.000000 | ms/batch 400.24 | loss  4.67 | ppl   106.83 | acc     0.46 | train_ae_norm     1.00
[8/200][3299/4361] Loss_D: 0.99634153 (Loss_D_real: 0.43515652 Loss_D_fake: 0.56118500) Loss_G: 0.01838928 Loss_Enh_Dec: -0.12413012
| epoch   8 |  3300/ 4361 batches | lr 0.000000 | ms/batch 400.72 | loss  4.66 | ppl   106.16 | acc     0.46 | train_ae_norm     1.00
[8/200][3399/4361] Loss_D: 0.85905617 (Loss_D_real: 0.43155226 Lo

| epoch   9 |  1100/ 4361 batches | lr 0.000000 | ms/batch 400.80 | loss  4.52 | ppl    91.94 | acc     0.48 | train_ae_norm     1.00
[9/200][1199/4361] Loss_D: 0.95281482 (Loss_D_real: 0.49139977 Loss_D_fake: 0.46141505) Loss_G: 0.02603627 Loss_Enh_Dec: -0.12924515
| epoch   9 |  1200/ 4361 batches | lr 0.000000 | ms/batch 400.98 | loss  4.54 | ppl    93.43 | acc     0.51 | train_ae_norm     1.00
[9/200][1299/4361] Loss_D: 0.78395474 (Loss_D_real: 0.38308454 Loss_D_fake: 0.40087017) Loss_G: 0.03144911 Loss_Enh_Dec: -0.13457690
| epoch   9 |  1300/ 4361 batches | lr 0.000000 | ms/batch 400.67 | loss  4.50 | ppl    90.26 | acc     0.49 | train_ae_norm     1.00
[9/200][1399/4361] Loss_D: 0.83318412 (Loss_D_real: 0.41624007 Loss_D_fake: 0.41694409) Loss_G: 0.05064386 Loss_Enh_Dec: -0.26178440
| epoch   9 |  1400/ 4361 batches | lr 0.000000 | ms/batch 400.60 | loss  4.50 | ppl    89.91 | acc     0.44 | train_ae_norm     1.00
[9/200][1499/4361] Loss_D: 0.86734396 (Loss_D_real: 0.48535424 Lo

| epoch   9 |  4200/ 4361 batches | lr 0.000000 | ms/batch 401.29 | loss  4.37 | ppl    79.41 | acc     0.54 | train_ae_norm     1.00
[9/200][4299/4361] Loss_D: 0.87830770 (Loss_D_real: 0.42352575 Loss_D_fake: 0.45478198) Loss_G: 0.00942458 Loss_Enh_Dec: -0.18022668
| epoch   9 |  4300/ 4361 batches | lr 0.000000 | ms/batch 400.77 | loss  4.31 | ppl    74.34 | acc     0.55 | train_ae_norm     1.00
| end of epoch   9 | time: 1848.80s | test loss  4.05 | test ppl 57.31 | acc 0.584
Train classification discriminator

Training...
  Batch    10  of    230.    Elapsed: 0:00:04.
  Batch    20  of    230.    Elapsed: 0:00:07.
  Batch    30  of    230.    Elapsed: 0:00:11.
  Batch    40  of    230.    Elapsed: 0:00:15.
  Batch    50  of    230.    Elapsed: 0:00:19.
  Batch    60  of    230.    Elapsed: 0:00:22.
  Batch    70  of    230.    Elapsed: 0:00:26.
  Batch    80  of    230.    Elapsed: 0:00:30.
  Batch    90  of    230.    Elapsed: 0:00:34.
  Batch   100  of    230.    Elapsed: 0:00:37

| epoch  10 |  2400/ 4361 batches | lr 0.000000 | ms/batch 400.61 | loss  4.26 | ppl    70.81 | acc     0.51 | train_ae_norm     1.00
[10/200][2499/4361] Loss_D: 0.88159353 (Loss_D_real: 0.41445860 Loss_D_fake: 0.46713492) Loss_G: 0.05639929 Loss_Enh_Dec: -0.14430088
| epoch  10 |  2500/ 4361 batches | lr 0.000000 | ms/batch 401.39 | loss  4.30 | ppl    73.92 | acc     0.54 | train_ae_norm     1.00
[10/200][2599/4361] Loss_D: 0.79461032 (Loss_D_real: 0.39315850 Loss_D_fake: 0.40145183) Loss_G: 0.04704618 Loss_Enh_Dec: -0.11444175
| epoch  10 |  2600/ 4361 batches | lr 0.000000 | ms/batch 401.44 | loss  4.25 | ppl    70.29 | acc     0.51 | train_ae_norm     1.00
[10/200][2699/4361] Loss_D: 0.81175971 (Loss_D_real: 0.43435448 Loss_D_fake: 0.37740526) Loss_G: 0.05008814 Loss_Enh_Dec: -0.16995715
| epoch  10 |  2700/ 4361 batches | lr 0.000000 | ms/batch 401.31 | loss  4.28 | ppl    72.56 | acc     0.54 | train_ae_norm     1.00
[10/200][2799/4361] Loss_D: 0.78502005 (Loss_D_real: 0.3013211

| epoch  11 |   500/ 4361 batches | lr 0.000000 | ms/batch 400.84 | loss  4.36 | ppl    78.26 | acc     0.55 | train_ae_norm     1.00
[11/200][599/4361] Loss_D: 0.88315666 (Loss_D_real: 0.46825320 Loss_D_fake: 0.41490349) Loss_G: 0.03740079 Loss_Enh_Dec: -0.14397679
| epoch  11 |   600/ 4361 batches | lr 0.000000 | ms/batch 401.48 | loss  4.29 | ppl    73.04 | acc     0.49 | train_ae_norm     1.00
[11/200][699/4361] Loss_D: 1.01715302 (Loss_D_real: 0.59164453 Loss_D_fake: 0.42550847) Loss_G: 0.01957024 Loss_Enh_Dec: -0.16427353
| epoch  11 |   700/ 4361 batches | lr 0.000000 | ms/batch 401.72 | loss  4.33 | ppl    76.27 | acc     0.53 | train_ae_norm     1.00
[11/200][799/4361] Loss_D: 0.94139647 (Loss_D_real: 0.48482651 Loss_D_fake: 0.45656997) Loss_G: 0.03302510 Loss_Enh_Dec: -0.15721522
| epoch  11 |   800/ 4361 batches | lr 0.000000 | ms/batch 401.56 | loss  4.36 | ppl    78.55 | acc     0.52 | train_ae_norm     1.00
[11/200][899/4361] Loss_D: 0.99412429 (Loss_D_real: 0.46027812 Lo

| epoch  11 |  3800/ 4361 batches | lr 0.000000 | ms/batch 401.23 | loss  4.24 | ppl    69.11 | acc     0.56 | train_ae_norm     1.00
[11/200][3899/4361] Loss_D: 0.84387302 (Loss_D_real: 0.34365475 Loss_D_fake: 0.50021827) Loss_G: 0.02601921 Loss_Enh_Dec: -0.19253121
| epoch  11 |  3900/ 4361 batches | lr 0.000000 | ms/batch 400.78 | loss  4.22 | ppl    68.19 | acc     0.51 | train_ae_norm     1.00
[11/200][3999/4361] Loss_D: 0.86420333 (Loss_D_real: 0.44707236 Loss_D_fake: 0.41713101) Loss_G: 0.05684992 Loss_Enh_Dec: -0.12679562
| epoch  11 |  4000/ 4361 batches | lr 0.000000 | ms/batch 400.94 | loss  4.21 | ppl    67.62 | acc     0.55 | train_ae_norm     1.00
[11/200][4099/4361] Loss_D: 0.97465277 (Loss_D_real: 0.57159972 Loss_D_fake: 0.40305305) Loss_G: 0.02511944 Loss_Enh_Dec: -0.16945656
| epoch  11 |  4100/ 4361 batches | lr 0.000000 | ms/batch 401.12 | loss  4.19 | ppl    66.14 | acc     0.53 | train_ae_norm     1.00
[11/200][4199/4361] Loss_D: 0.94467866 (Loss_D_real: 0.5202832

| epoch  12 |  2000/ 4361 batches | lr 0.000000 | ms/batch 400.71 | loss  4.12 | ppl    61.76 | acc     0.55 | train_ae_norm     1.00
[12/200][2099/4361] Loss_D: 0.90172458 (Loss_D_real: 0.42287391 Loss_D_fake: 0.47885069) Loss_G: 0.03328840 Loss_Enh_Dec: -0.24904649
| epoch  12 |  2100/ 4361 batches | lr 0.000000 | ms/batch 401.52 | loss  4.13 | ppl    62.20 | acc     0.57 | train_ae_norm     1.00
[12/200][2199/4361] Loss_D: 0.84783083 (Loss_D_real: 0.43230638 Loss_D_fake: 0.41552445) Loss_G: 0.03060243 Loss_Enh_Dec: -0.17660984
| epoch  12 |  2200/ 4361 batches | lr 0.000000 | ms/batch 400.88 | loss  4.12 | ppl    61.42 | acc     0.56 | train_ae_norm     1.00
[12/200][2299/4361] Loss_D: 0.74670964 (Loss_D_real: 0.41728517 Loss_D_fake: 0.32942447) Loss_G: 0.03350262 Loss_Enh_Dec: -0.23734656
| epoch  12 |  2300/ 4361 batches | lr 0.000000 | ms/batch 400.60 | loss  4.10 | ppl    60.64 | acc     0.58 | train_ae_norm     1.00
[12/200][2399/4361] Loss_D: 0.83984601 (Loss_D_real: 0.3705236

| epoch  13 |   100/ 4361 batches | lr 0.000000 | ms/batch 402.05 | loss  4.24 | ppl    69.56 | acc     0.51 | train_ae_norm     1.00
[13/200][199/4361] Loss_D: 0.84366775 (Loss_D_real: 0.43728751 Loss_D_fake: 0.40638027) Loss_G: 0.04743294 Loss_Enh_Dec: -0.20893112
| epoch  13 |   200/ 4361 batches | lr 0.000000 | ms/batch 401.37 | loss  4.28 | ppl    72.18 | acc     0.56 | train_ae_norm     1.00
[13/200][299/4361] Loss_D: 0.71416807 (Loss_D_real: 0.41109276 Loss_D_fake: 0.30307531) Loss_G: 0.04539827 Loss_Enh_Dec: -0.17274688
| epoch  13 |   300/ 4361 batches | lr 0.000000 | ms/batch 401.89 | loss  4.33 | ppl    75.70 | acc     0.46 | train_ae_norm     1.00
[13/200][399/4361] Loss_D: 0.80036622 (Loss_D_real: 0.35286933 Loss_D_fake: 0.44749689) Loss_G: 0.05648738 Loss_Enh_Dec: -0.20024042
| epoch  13 |   400/ 4361 batches | lr 0.000000 | ms/batch 401.22 | loss  4.17 | ppl    64.76 | acc     0.53 | train_ae_norm     1.00
[13/200][499/4361] Loss_D: 0.60111648 (Loss_D_real: 0.32084674 Lo

| epoch  13 |  3200/ 4361 batches | lr 0.000000 | ms/batch 402.06 | loss  4.20 | ppl    66.46 | acc     0.54 | train_ae_norm     1.00
[13/200][3299/4361] Loss_D: 0.67713940 (Loss_D_real: 0.31206271 Loss_D_fake: 0.36507666) Loss_G: 0.04592621 Loss_Enh_Dec: -0.22099729
| epoch  13 |  3300/ 4361 batches | lr 0.000000 | ms/batch 401.29 | loss  4.23 | ppl    68.67 | acc     0.52 | train_ae_norm     1.00
[13/200][3399/4361] Loss_D: 0.81787038 (Loss_D_real: 0.40143785 Loss_D_fake: 0.41643256) Loss_G: 0.04641819 Loss_Enh_Dec: -0.23616615
| epoch  13 |  3400/ 4361 batches | lr 0.000000 | ms/batch 401.34 | loss  4.17 | ppl    64.88 | acc     0.56 | train_ae_norm     1.00
[13/200][3499/4361] Loss_D: 0.67318934 (Loss_D_real: 0.34393129 Loss_D_fake: 0.32925805) Loss_G: 0.05990602 Loss_Enh_Dec: -0.24656974
| epoch  13 |  3500/ 4361 batches | lr 0.000000 | ms/batch 401.41 | loss  4.15 | ppl    63.51 | acc     0.57 | train_ae_norm     1.00
[13/200][3599/4361] Loss_D: 0.84562892 (Loss_D_real: 0.4601329

| epoch  14 |  1400/ 4361 batches | lr 0.000000 | ms/batch 400.71 | loss  4.15 | ppl    63.71 | acc     0.49 | train_ae_norm     1.00
[14/200][1499/4361] Loss_D: 0.80666351 (Loss_D_real: 0.27381185 Loss_D_fake: 0.53285164) Loss_G: 0.04661287 Loss_Enh_Dec: -0.19840117
| epoch  14 |  1500/ 4361 batches | lr 0.000000 | ms/batch 400.41 | loss  4.20 | ppl    66.68 | acc     0.54 | train_ae_norm     1.00
[14/200][1599/4361] Loss_D: 0.82056797 (Loss_D_real: 0.47565240 Loss_D_fake: 0.34491560) Loss_G: 0.03683065 Loss_Enh_Dec: -0.19047478
| epoch  14 |  1600/ 4361 batches | lr 0.000000 | ms/batch 401.35 | loss  4.15 | ppl    63.72 | acc     0.55 | train_ae_norm     1.00
[14/200][1699/4361] Loss_D: 0.78593862 (Loss_D_real: 0.35773250 Loss_D_fake: 0.42820615) Loss_G: 0.04352307 Loss_Enh_Dec: -0.15918416
| epoch  14 |  1700/ 4361 batches | lr 0.000000 | ms/batch 400.87 | loss  4.17 | ppl    64.90 | acc     0.52 | train_ae_norm     1.00
[14/200][1799/4361] Loss_D: 0.66192025 (Loss_D_real: 0.3775378

  Batch    40  of    230.    Elapsed: 0:00:15.
  Batch    50  of    230.    Elapsed: 0:00:19.
  Batch    60  of    230.    Elapsed: 0:00:22.
  Batch    70  of    230.    Elapsed: 0:00:26.
  Batch    80  of    230.    Elapsed: 0:00:30.
  Batch    90  of    230.    Elapsed: 0:00:34.
  Batch   100  of    230.    Elapsed: 0:00:37.
  Batch   110  of    230.    Elapsed: 0:00:41.
  Batch   120  of    230.    Elapsed: 0:00:45.
  Batch   130  of    230.    Elapsed: 0:00:49.
  Batch   140  of    230.    Elapsed: 0:00:52.
  Batch   150  of    230.    Elapsed: 0:00:56.
  Batch   160  of    230.    Elapsed: 0:01:00.
  Batch   170  of    230.    Elapsed: 0:01:04.
  Batch   180  of    230.    Elapsed: 0:01:07.
  Batch   190  of    230.    Elapsed: 0:01:11.
  Batch   200  of    230.    Elapsed: 0:01:15.
  Batch   210  of    230.    Elapsed: 0:01:19.
  Batch   220  of    230.    Elapsed: 0:01:22.

  Average training loss generetor: 0.703
  Average training loss discriminator: 0.759
  Training epcoh too

[15/200][2699/4361] Loss_D: 0.80697703 (Loss_D_real: 0.43489039 Loss_D_fake: 0.37208661) Loss_G: 0.03904104 Loss_Enh_Dec: -0.21338537
| epoch  15 |  2700/ 4361 batches | lr 0.000000 | ms/batch 400.96 | loss  4.08 | ppl    58.96 | acc     0.54 | train_ae_norm     1.00
[15/200][2799/4361] Loss_D: 0.84367108 (Loss_D_real: 0.47999758 Loss_D_fake: 0.36367351) Loss_G: 0.06315795 Loss_Enh_Dec: -0.20976818
| epoch  15 |  2800/ 4361 batches | lr 0.000000 | ms/batch 400.85 | loss  4.00 | ppl    54.61 | acc     0.52 | train_ae_norm     1.00
[15/200][2899/4361] Loss_D: 0.87066156 (Loss_D_real: 0.48541439 Loss_D_fake: 0.38524717) Loss_G: 0.06308474 Loss_Enh_Dec: -0.18286265
| epoch  15 |  2900/ 4361 batches | lr 0.000000 | ms/batch 401.15 | loss  4.06 | ppl    57.74 | acc     0.58 | train_ae_norm     1.00
[15/200][2999/4361] Loss_D: 0.70015889 (Loss_D_real: 0.39658558 Loss_D_fake: 0.30357331) Loss_G: 0.02518850 Loss_Enh_Dec: -0.25478962
| epoch  15 |  3000/ 4361 batches | lr 0.000000 | ms/batch 400

| epoch  16 |   700/ 4361 batches | lr 0.000000 | ms/batch 401.31 | loss  4.02 | ppl    55.67 | acc     0.59 | train_ae_norm     1.00
[16/200][799/4361] Loss_D: 0.91821027 (Loss_D_real: 0.39269909 Loss_D_fake: 0.52551115) Loss_G: 0.03535310 Loss_Enh_Dec: -0.18858908
| epoch  16 |   800/ 4361 batches | lr 0.000000 | ms/batch 400.88 | loss  3.99 | ppl    54.25 | acc     0.56 | train_ae_norm     1.00
[16/200][899/4361] Loss_D: 0.72714865 (Loss_D_real: 0.41273409 Loss_D_fake: 0.31441453) Loss_G: 0.05731382 Loss_Enh_Dec: -0.24311367
| epoch  16 |   900/ 4361 batches | lr 0.000000 | ms/batch 401.30 | loss  4.04 | ppl    56.86 | acc     0.59 | train_ae_norm     1.00
[16/200][999/4361] Loss_D: 0.81655264 (Loss_D_real: 0.52886343 Loss_D_fake: 0.28768921) Loss_G: 0.03999960 Loss_Enh_Dec: -0.21629314
| epoch  16 |  1000/ 4361 batches | lr 0.000000 | ms/batch 401.52 | loss  4.01 | ppl    54.89 | acc     0.56 | train_ae_norm     1.00
[16/200][1099/4361] Loss_D: 0.97563505 (Loss_D_real: 0.60529697 L

| epoch  16 |  3800/ 4361 batches | lr 0.000000 | ms/batch 401.75 | loss  4.11 | ppl    60.99 | acc     0.58 | train_ae_norm     1.00
[16/200][3899/4361] Loss_D: 0.80006987 (Loss_D_real: 0.36507842 Loss_D_fake: 0.43499145) Loss_G: 0.02455233 Loss_Enh_Dec: -0.27065474
| epoch  16 |  3900/ 4361 batches | lr 0.000000 | ms/batch 401.56 | loss  4.11 | ppl    61.05 | acc     0.50 | train_ae_norm     1.00
[16/200][3999/4361] Loss_D: 0.83311975 (Loss_D_real: 0.46873444 Loss_D_fake: 0.36438534) Loss_G: 0.05074907 Loss_Enh_Dec: -0.39219609
| epoch  16 |  4000/ 4361 batches | lr 0.000000 | ms/batch 401.37 | loss  4.15 | ppl    63.21 | acc     0.54 | train_ae_norm     1.00
[16/200][4099/4361] Loss_D: 0.91962409 (Loss_D_real: 0.42365450 Loss_D_fake: 0.49596962) Loss_G: 0.07587512 Loss_Enh_Dec: -0.31916329
| epoch  16 |  4100/ 4361 batches | lr 0.000000 | ms/batch 401.55 | loss  4.10 | ppl    60.16 | acc     0.56 | train_ae_norm     1.00
[16/200][4199/4361] Loss_D: 0.89764655 (Loss_D_real: 0.4814196

| epoch  17 |  1900/ 4361 batches | lr 0.000000 | ms/batch 401.63 | loss  4.20 | ppl    66.53 | acc     0.54 | train_ae_norm     1.00
[17/200][1999/4361] Loss_D: 0.84532320 (Loss_D_real: 0.37902302 Loss_D_fake: 0.46630022) Loss_G: 0.06497040 Loss_Enh_Dec: -0.33408451
| epoch  17 |  2000/ 4361 batches | lr 0.000000 | ms/batch 401.59 | loss  4.13 | ppl    62.36 | acc     0.53 | train_ae_norm     1.00
[17/200][2099/4361] Loss_D: 0.82758081 (Loss_D_real: 0.33471105 Loss_D_fake: 0.49286973) Loss_G: 0.06372762 Loss_Enh_Dec: -0.36743590
| epoch  17 |  2100/ 4361 batches | lr 0.000000 | ms/batch 401.95 | loss  4.17 | ppl    64.72 | acc     0.53 | train_ae_norm     1.00
[17/200][2199/4361] Loss_D: 0.87307906 (Loss_D_real: 0.42231888 Loss_D_fake: 0.45076022) Loss_G: 0.03989534 Loss_Enh_Dec: -0.38275358
| epoch  17 |  2200/ 4361 batches | lr 0.000000 | ms/batch 401.00 | loss  4.18 | ppl    65.13 | acc     0.51 | train_ae_norm     1.00
[17/200][2299/4361] Loss_D: 0.91037607 (Loss_D_real: 0.4681150

| epoch  18 |   100/ 4361 batches | lr 0.000000 | ms/batch 402.24 | loss  4.23 | ppl    68.68 | acc     0.52 | train_ae_norm     1.00
[18/200][199/4361] Loss_D: 0.95635909 (Loss_D_real: 0.45292979 Loss_D_fake: 0.50342929) Loss_G: 0.02215381 Loss_Enh_Dec: -0.36123598
| epoch  18 |   200/ 4361 batches | lr 0.000000 | ms/batch 401.57 | loss  4.25 | ppl    70.03 | acc     0.54 | train_ae_norm     1.00
[18/200][299/4361] Loss_D: 0.84468162 (Loss_D_real: 0.45660990 Loss_D_fake: 0.38807175) Loss_G: 0.04758188 Loss_Enh_Dec: -0.28572613
| epoch  18 |   300/ 4361 batches | lr 0.000000 | ms/batch 401.33 | loss  4.24 | ppl    69.22 | acc     0.47 | train_ae_norm     1.00
[18/200][399/4361] Loss_D: 0.91565228 (Loss_D_real: 0.47266078 Loss_D_fake: 0.44299150) Loss_G: 0.04620230 Loss_Enh_Dec: -0.40849733
| epoch  18 |   400/ 4361 batches | lr 0.000000 | ms/batch 401.14 | loss  4.17 | ppl    64.56 | acc     0.53 | train_ae_norm     1.00
[18/200][499/4361] Loss_D: 0.89667463 (Loss_D_real: 0.49183846 Lo

| epoch  18 |  3200/ 4361 batches | lr 0.000000 | ms/batch 401.23 | loss  4.37 | ppl    79.40 | acc     0.51 | train_ae_norm     1.00
[18/200][3299/4361] Loss_D: 0.86206424 (Loss_D_real: 0.40942606 Loss_D_fake: 0.45263821) Loss_G: 0.05900223 Loss_Enh_Dec: -0.23660672
| epoch  18 |  3300/ 4361 batches | lr 0.000000 | ms/batch 401.33 | loss  4.34 | ppl    76.54 | acc     0.51 | train_ae_norm     1.00
[18/200][3399/4361] Loss_D: 0.92543077 (Loss_D_real: 0.48016295 Loss_D_fake: 0.44526786) Loss_G: 0.07495121 Loss_Enh_Dec: -0.26894233
| epoch  18 |  3400/ 4361 batches | lr 0.000000 | ms/batch 401.41 | loss  4.29 | ppl    72.96 | acc     0.50 | train_ae_norm     1.00
[18/200][3499/4361] Loss_D: 0.83856481 (Loss_D_real: 0.41422269 Loss_D_fake: 0.42434213) Loss_G: 0.06992338 Loss_Enh_Dec: -0.31372935
| epoch  18 |  3500/ 4361 batches | lr 0.000000 | ms/batch 401.54 | loss  4.23 | ppl    68.91 | acc     0.48 | train_ae_norm     1.00
[18/200][3599/4361] Loss_D: 0.68179119 (Loss_D_real: 0.2714735

| epoch  19 |  1400/ 4361 batches | lr 0.000000 | ms/batch 400.06 | loss  4.27 | ppl    71.51 | acc     0.46 | train_ae_norm     1.00
[19/200][1499/4361] Loss_D: 0.46455896 (Loss_D_real: 0.29737887 Loss_D_fake: 0.16718011) Loss_G: 0.10278070 Loss_Enh_Dec: -0.61277992
| epoch  19 |  1500/ 4361 batches | lr 0.000000 | ms/batch 400.38 | loss  4.32 | ppl    75.07 | acc     0.46 | train_ae_norm     1.00
[19/200][1599/4361] Loss_D: 0.58903193 (Loss_D_real: 0.22595975 Loss_D_fake: 0.36307216) Loss_G: 0.06719339 Loss_Enh_Dec: -0.56341243
| epoch  19 |  1600/ 4361 batches | lr 0.000000 | ms/batch 401.12 | loss  4.30 | ppl    73.58 | acc     0.52 | train_ae_norm     1.00
[19/200][1699/4361] Loss_D: 0.66558945 (Loss_D_real: 0.40470034 Loss_D_fake: 0.26088914) Loss_G: 0.06368359 Loss_Enh_Dec: -0.54492944
| epoch  19 |  1700/ 4361 batches | lr 0.000000 | ms/batch 400.97 | loss  4.30 | ppl    74.06 | acc     0.47 | train_ae_norm     1.00
[19/200][1799/4361] Loss_D: 0.46024108 (Loss_D_real: 0.3303513

  Batch    40  of    230.    Elapsed: 0:00:15.
  Batch    50  of    230.    Elapsed: 0:00:19.
  Batch    60  of    230.    Elapsed: 0:00:22.
  Batch    70  of    230.    Elapsed: 0:00:26.
  Batch    80  of    230.    Elapsed: 0:00:30.
  Batch    90  of    230.    Elapsed: 0:00:34.
  Batch   100  of    230.    Elapsed: 0:00:37.
  Batch   110  of    230.    Elapsed: 0:00:41.
  Batch   120  of    230.    Elapsed: 0:00:45.
  Batch   130  of    230.    Elapsed: 0:00:49.
  Batch   140  of    230.    Elapsed: 0:00:52.
  Batch   150  of    230.    Elapsed: 0:00:56.
  Batch   160  of    230.    Elapsed: 0:01:00.
  Batch   170  of    230.    Elapsed: 0:01:04.
  Batch   180  of    230.    Elapsed: 0:01:07.
  Batch   190  of    230.    Elapsed: 0:01:11.
  Batch   200  of    230.    Elapsed: 0:01:15.
  Batch   210  of    230.    Elapsed: 0:01:19.
  Batch   220  of    230.    Elapsed: 0:01:22.

  Average training loss generetor: 0.702
  Average training loss discriminator: 0.742
  Training epcoh too

[20/200][2699/4361] Loss_D: 0.77339214 (Loss_D_real: 0.30473942 Loss_D_fake: 0.46865273) Loss_G: 0.02907758 Loss_Enh_Dec: -0.43850064
| epoch  20 |  2700/ 4361 batches | lr 0.000000 | ms/batch 400.80 | loss  4.25 | ppl    70.43 | acc     0.52 | train_ae_norm     1.00
[20/200][2799/4361] Loss_D: 0.83557111 (Loss_D_real: 0.32291013 Loss_D_fake: 0.51266098) Loss_G: 0.07746001 Loss_Enh_Dec: -0.40976954
| epoch  20 |  2800/ 4361 batches | lr 0.000000 | ms/batch 401.25 | loss  4.19 | ppl    66.02 | acc     0.47 | train_ae_norm     1.00
[20/200][2899/4361] Loss_D: 0.64084399 (Loss_D_real: 0.31574529 Loss_D_fake: 0.32509869) Loss_G: 0.05973517 Loss_Enh_Dec: -0.57540888
| epoch  20 |  2900/ 4361 batches | lr 0.000000 | ms/batch 401.03 | loss  4.25 | ppl    70.18 | acc     0.51 | train_ae_norm     1.00
[20/200][2999/4361] Loss_D: 0.80739075 (Loss_D_real: 0.40978608 Loss_D_fake: 0.39760467) Loss_G: 0.04983616 Loss_Enh_Dec: -0.58720183
| epoch  20 |  3000/ 4361 batches | lr 0.000000 | ms/batch 401

[21/200][799/4361] Loss_D: 1.08156347 (Loss_D_real: 0.63037485 Loss_D_fake: 0.45118868) Loss_G: -0.00205098 Loss_Enh_Dec: -0.32285172
| epoch  21 |   800/ 4361 batches | lr 0.000000 | ms/batch 401.04 | loss  4.27 | ppl    71.42 | acc     0.51 | train_ae_norm     1.00
[21/200][899/4361] Loss_D: 0.72941017 (Loss_D_real: 0.32073975 Loss_D_fake: 0.40867040) Loss_G: 0.03456929 Loss_Enh_Dec: -0.37831447
| epoch  21 |   900/ 4361 batches | lr 0.000000 | ms/batch 401.30 | loss  4.31 | ppl    74.33 | acc     0.54 | train_ae_norm     1.00
[21/200][999/4361] Loss_D: 0.73582363 (Loss_D_real: 0.36855453 Loss_D_fake: 0.36726910) Loss_G: 0.05751867 Loss_Enh_Dec: -0.33862075
| epoch  21 |  1000/ 4361 batches | lr 0.000000 | ms/batch 401.53 | loss  4.29 | ppl    73.21 | acc     0.46 | train_ae_norm     1.00
[21/200][1099/4361] Loss_D: 0.76940459 (Loss_D_real: 0.52932024 Loss_D_fake: 0.24008434) Loss_G: 0.04221579 Loss_Enh_Dec: -0.41714308
| epoch  21 |  1100/ 4361 batches | lr 0.000000 | ms/batch 401.4

[21/200][3899/4361] Loss_D: 0.96142125 (Loss_D_real: 0.56531233 Loss_D_fake: 0.39610896) Loss_G: 0.04541794 Loss_Enh_Dec: -0.49881896
| epoch  21 |  3900/ 4361 batches | lr 0.000000 | ms/batch 400.53 | loss  4.31 | ppl    74.53 | acc     0.49 | train_ae_norm     1.00
[21/200][3999/4361] Loss_D: 0.78907096 (Loss_D_real: 0.43765959 Loss_D_fake: 0.35141134) Loss_G: 0.05169106 Loss_Enh_Dec: -0.57647818
| epoch  21 |  4000/ 4361 batches | lr 0.000000 | ms/batch 401.85 | loss  4.36 | ppl    78.46 | acc     0.51 | train_ae_norm     1.00
[21/200][4099/4361] Loss_D: 0.42525867 (Loss_D_real: 0.19829170 Loss_D_fake: 0.22696696) Loss_G: 0.09730550 Loss_Enh_Dec: -0.52725160
| epoch  21 |  4100/ 4361 batches | lr 0.000000 | ms/batch 401.20 | loss  4.31 | ppl    74.10 | acc     0.50 | train_ae_norm     1.00
[21/200][4199/4361] Loss_D: 0.38107949 (Loss_D_real: 0.10863083 Loss_D_fake: 0.27244866) Loss_G: 0.09958588 Loss_Enh_Dec: -0.47671849
| epoch  21 |  4200/ 4361 batches | lr 0.000000 | ms/batch 401

[22/200][2099/4361] Loss_D: 0.18617618 (Loss_D_real: 0.05575432 Loss_D_fake: 0.13042186) Loss_G: 0.16072595 Loss_Enh_Dec: -0.45453468
| epoch  22 |  2100/ 4361 batches | lr 0.000000 | ms/batch 401.96 | loss  5.03 | ppl   152.45 | acc     0.40 | train_ae_norm     1.00
[22/200][2199/4361] Loss_D: 0.16148663 (Loss_D_real: 0.05890999 Loss_D_fake: 0.10257664) Loss_G: 0.16931891 Loss_Enh_Dec: -0.49952617
| epoch  22 |  2200/ 4361 batches | lr 0.000000 | ms/batch 401.83 | loss  5.00 | ppl   147.82 | acc     0.36 | train_ae_norm     1.00
[22/200][2299/4361] Loss_D: 0.12404772 (Loss_D_real: 0.03938917 Loss_D_fake: 0.08465855) Loss_G: 0.17387471 Loss_Enh_Dec: -0.54277992
| epoch  22 |  2300/ 4361 batches | lr 0.000000 | ms/batch 401.68 | loss  4.97 | ppl   144.05 | acc     0.37 | train_ae_norm     1.00
[22/200][2399/4361] Loss_D: 0.23793787 (Loss_D_real: 0.16093200 Loss_D_fake: 0.07700586) Loss_G: 0.18048111 Loss_Enh_Dec: -0.52803928
| epoch  22 |  2400/ 4361 batches | lr 0.000000 | ms/batch 401

[23/200][299/4361] Loss_D: 0.38089621 (Loss_D_real: 0.22695820 Loss_D_fake: 0.15393800) Loss_G: 0.13565229 Loss_Enh_Dec: -0.54217452
| epoch  23 |   300/ 4361 batches | lr 0.000000 | ms/batch 400.95 | loss  4.72 | ppl   112.49 | acc     0.38 | train_ae_norm     1.00
[23/200][399/4361] Loss_D: 0.28709033 (Loss_D_real: 0.11695942 Loss_D_fake: 0.17013091) Loss_G: 0.12667874 Loss_Enh_Dec: -0.57771963
| epoch  23 |   400/ 4361 batches | lr 0.000000 | ms/batch 401.06 | loss  4.62 | ppl   101.45 | acc     0.41 | train_ae_norm     1.00
[23/200][499/4361] Loss_D: 0.31567627 (Loss_D_real: 0.15604912 Loss_D_fake: 0.15962714) Loss_G: 0.12556474 Loss_Enh_Dec: -0.74091798
| epoch  23 |   500/ 4361 batches | lr 0.000000 | ms/batch 400.78 | loss  4.69 | ppl   108.64 | acc     0.47 | train_ae_norm     1.00
[23/200][599/4361] Loss_D: 0.28271863 (Loss_D_real: 0.15884887 Loss_D_fake: 0.12386977) Loss_G: 0.14078216 Loss_Enh_Dec: -0.68890154
| epoch  23 |   600/ 4361 batches | lr 0.000000 | ms/batch 401.63 

[23/200][3399/4361] Loss_D: 0.07366658 (Loss_D_real: 0.01624930 Loss_D_fake: 0.05741727) Loss_G: 0.20290394 Loss_Enh_Dec: -0.73678714
| epoch  23 |  3400/ 4361 batches | lr 0.000000 | ms/batch 401.20 | loss  4.48 | ppl    88.18 | acc     0.47 | train_ae_norm     1.00
[23/200][3499/4361] Loss_D: 0.07686626 (Loss_D_real: 0.03930934 Loss_D_fake: 0.03755692) Loss_G: 0.19410646 Loss_Enh_Dec: -0.58516276
| epoch  23 |  3500/ 4361 batches | lr 0.000000 | ms/batch 401.61 | loss  4.38 | ppl    79.46 | acc     0.46 | train_ae_norm     1.00
[23/200][3599/4361] Loss_D: 0.09173568 (Loss_D_real: 0.04177930 Loss_D_fake: 0.04995637) Loss_G: 0.20493484 Loss_Enh_Dec: -0.66253829
| epoch  23 |  3600/ 4361 batches | lr 0.000000 | ms/batch 401.79 | loss  4.40 | ppl    81.07 | acc     0.48 | train_ae_norm     1.00
[23/200][3699/4361] Loss_D: 0.07121073 (Loss_D_real: 0.03714109 Loss_D_fake: 0.03406964) Loss_G: 0.19190823 Loss_Enh_Dec: -0.25221977
| epoch  23 |  3700/ 4361 batches | lr 0.000000 | ms/batch 401

[24/200][1599/4361] Loss_D: 0.05622664 (Loss_D_real: 0.02739789 Loss_D_fake: 0.02882875) Loss_G: 0.21118188 Loss_Enh_Dec: -0.59085387
| epoch  24 |  1600/ 4361 batches | lr 0.000000 | ms/batch 401.70 | loss  4.31 | ppl    74.10 | acc     0.51 | train_ae_norm     1.00
[24/200][1699/4361] Loss_D: 0.04776812 (Loss_D_real: 0.02360401 Loss_D_fake: 0.02416411) Loss_G: 0.23764150 Loss_Enh_Dec: -0.62949729
| epoch  24 |  1700/ 4361 batches | lr 0.000000 | ms/batch 401.54 | loss  4.30 | ppl    73.77 | acc     0.50 | train_ae_norm     1.00
[24/200][1799/4361] Loss_D: 0.09393361 (Loss_D_real: 0.06492414 Loss_D_fake: 0.02900947) Loss_G: 0.22838926 Loss_Enh_Dec: -0.58316910
| epoch  24 |  1800/ 4361 batches | lr 0.000000 | ms/batch 401.77 | loss  4.25 | ppl    70.27 | acc     0.49 | train_ae_norm     1.00
[24/200][1899/4361] Loss_D: 0.03831032 (Loss_D_real: 0.01761284 Loss_D_fake: 0.02069747) Loss_G: 0.21990283 Loss_Enh_Dec: -0.51067019
| epoch  24 |  1900/ 4361 batches | lr 0.000000 | ms/batch 402

  Batch    60  of    230.    Elapsed: 0:00:22.
  Batch    70  of    230.    Elapsed: 0:00:26.
  Batch    80  of    230.    Elapsed: 0:00:30.
  Batch    90  of    230.    Elapsed: 0:00:34.
  Batch   100  of    230.    Elapsed: 0:00:37.
  Batch   110  of    230.    Elapsed: 0:00:41.
  Batch   120  of    230.    Elapsed: 0:00:45.
  Batch   130  of    230.    Elapsed: 0:00:49.
  Batch   140  of    230.    Elapsed: 0:00:52.
  Batch   150  of    230.    Elapsed: 0:00:56.
  Batch   160  of    230.    Elapsed: 0:01:00.
  Batch   170  of    230.    Elapsed: 0:01:03.
  Batch   180  of    230.    Elapsed: 0:01:07.
  Batch   190  of    230.    Elapsed: 0:01:11.
  Batch   200  of    230.    Elapsed: 0:01:15.
  Batch   210  of    230.    Elapsed: 0:01:18.
  Batch   220  of    230.    Elapsed: 0:01:22.

  Average training loss generetor: 0.701
  Average training loss discriminator: 0.729
  Training epcoh took: 0:01:26

Running Test...
  Accuracy: 0.450
  Test Loss: 3.367
  Test took: 0:00:00
Train ot

| epoch  25 |  2700/ 4361 batches | lr 0.000000 | ms/batch 402.02 | loss  4.39 | ppl    80.88 | acc     0.44 | train_ae_norm     1.00
[25/200][2799/4361] Loss_D: 0.05432684 (Loss_D_real: 0.02887665 Loss_D_fake: 0.02545018) Loss_G: 0.25264955 Loss_Enh_Dec: -0.68801516
| epoch  25 |  2800/ 4361 batches | lr 0.000000 | ms/batch 401.97 | loss  4.33 | ppl    75.59 | acc     0.48 | train_ae_norm     1.00
[25/200][2899/4361] Loss_D: 0.04346242 (Loss_D_real: 0.01396716 Loss_D_fake: 0.02949526) Loss_G: 0.22967958 Loss_Enh_Dec: -0.86194795
| epoch  25 |  2900/ 4361 batches | lr 0.000000 | ms/batch 402.16 | loss  4.37 | ppl    79.31 | acc     0.48 | train_ae_norm     1.00
[25/200][2999/4361] Loss_D: 0.04618986 (Loss_D_real: 0.02403639 Loss_D_fake: 0.02215347) Loss_G: 0.23966442 Loss_Enh_Dec: -0.89202636
| epoch  25 |  3000/ 4361 batches | lr 0.000000 | ms/batch 401.68 | loss  4.33 | ppl    75.96 | acc     0.48 | train_ae_norm     1.00
[25/200][3099/4361] Loss_D: 0.03572028 (Loss_D_real: 0.0147940

| epoch  26 |   800/ 4361 batches | lr 0.000000 | ms/batch 402.19 | loss  4.29 | ppl    72.85 | acc     0.49 | train_ae_norm     1.00
[26/200][899/4361] Loss_D: 0.05296314 (Loss_D_real: 0.01960831 Loss_D_fake: 0.03335483) Loss_G: 0.25861013 Loss_Enh_Dec: -0.75757599
| epoch  26 |   900/ 4361 batches | lr 0.000000 | ms/batch 402.00 | loss  4.30 | ppl    73.91 | acc     0.54 | train_ae_norm     1.00
[26/200][999/4361] Loss_D: 0.03564500 (Loss_D_real: 0.00939846 Loss_D_fake: 0.02624653) Loss_G: 0.23935771 Loss_Enh_Dec: -0.99226838
| epoch  26 |  1000/ 4361 batches | lr 0.000000 | ms/batch 401.61 | loss  4.31 | ppl    74.58 | acc     0.49 | train_ae_norm     1.00
[26/200][1099/4361] Loss_D: 0.06013688 (Loss_D_real: 0.04236945 Loss_D_fake: 0.01776742) Loss_G: 0.25438029 Loss_Enh_Dec: -0.92202377
| epoch  26 |  1100/ 4361 batches | lr 0.000000 | ms/batch 401.66 | loss  4.29 | ppl    72.77 | acc     0.49 | train_ae_norm     1.00
[26/200][1199/4361] Loss_D: 0.02907197 (Loss_D_real: 0.01530122 

| epoch  26 |  3900/ 4361 batches | lr 0.000000 | ms/batch 401.14 | loss  4.17 | ppl    64.66 | acc     0.48 | train_ae_norm     1.00
[26/200][3999/4361] Loss_D: 0.02443878 (Loss_D_real: 0.01227204 Loss_D_fake: 0.01216674) Loss_G: 0.27853337 Loss_Enh_Dec: -1.02975571
| epoch  26 |  4000/ 4361 batches | lr 0.000000 | ms/batch 401.28 | loss  4.19 | ppl    66.30 | acc     0.51 | train_ae_norm     1.00
[26/200][4099/4361] Loss_D: 0.04734094 (Loss_D_real: 0.01469144 Loss_D_fake: 0.03264950) Loss_G: 0.28960887 Loss_Enh_Dec: -0.74920124
| epoch  26 |  4100/ 4361 batches | lr 0.000000 | ms/batch 401.66 | loss  4.15 | ppl    63.56 | acc     0.48 | train_ae_norm     1.00
[26/200][4199/4361] Loss_D: 0.01455241 (Loss_D_real: 0.00524452 Loss_D_fake: 0.00930789) Loss_G: 0.26726133 Loss_Enh_Dec: -0.58976835
| epoch  26 |  4200/ 4361 batches | lr 0.000000 | ms/batch 401.02 | loss  4.19 | ppl    65.92 | acc     0.55 | train_ae_norm     1.00
[26/200][4299/4361] Loss_D: 0.02007100 (Loss_D_real: 0.0099131

| epoch  27 |  2100/ 4361 batches | lr 0.000000 | ms/batch 402.19 | loss  4.11 | ppl    60.89 | acc     0.53 | train_ae_norm     1.00
[27/200][2199/4361] Loss_D: 0.02633579 (Loss_D_real: 0.00811329 Loss_D_fake: 0.01822250) Loss_G: 0.29853562 Loss_Enh_Dec: -1.11888981
| epoch  27 |  2200/ 4361 batches | lr 0.000000 | ms/batch 401.89 | loss  4.11 | ppl    61.23 | acc     0.55 | train_ae_norm     1.00
[27/200][2299/4361] Loss_D: 0.06378215 (Loss_D_real: 0.05290471 Loss_D_fake: 0.01087744) Loss_G: 0.26223484 Loss_Enh_Dec: -1.23040926
| epoch  27 |  2300/ 4361 batches | lr 0.000000 | ms/batch 401.84 | loss  4.09 | ppl    59.91 | acc     0.53 | train_ae_norm     1.00
[27/200][2399/4361] Loss_D: 0.03734780 (Loss_D_real: 0.02069056 Loss_D_fake: 0.01665724) Loss_G: 0.25467023 Loss_Enh_Dec: -1.27815330
| epoch  27 |  2400/ 4361 batches | lr 0.000000 | ms/batch 401.70 | loss  4.10 | ppl    60.54 | acc     0.50 | train_ae_norm     1.00
[27/200][2499/4361] Loss_D: 0.01997600 (Loss_D_real: 0.0045983

| epoch  28 |   300/ 4361 batches | lr 0.000000 | ms/batch 401.10 | loss  4.01 | ppl    54.99 | acc     0.52 | train_ae_norm     1.00
[28/200][399/4361] Loss_D: 0.01503472 (Loss_D_real: 0.00433981 Loss_D_fake: 0.01069491) Loss_G: 0.27698380 Loss_Enh_Dec: -1.17407107
| epoch  28 |   400/ 4361 batches | lr 0.000000 | ms/batch 401.79 | loss  3.94 | ppl    51.29 | acc     0.56 | train_ae_norm     1.00
[28/200][499/4361] Loss_D: 0.02935276 (Loss_D_real: 0.01777070 Loss_D_fake: 0.01158206) Loss_G: 0.28502518 Loss_Enh_Dec: -1.26613498
| epoch  28 |   500/ 4361 batches | lr 0.000000 | ms/batch 401.99 | loss  3.99 | ppl    53.94 | acc     0.56 | train_ae_norm     1.00
[28/200][599/4361] Loss_D: 0.01974972 (Loss_D_real: 0.00646375 Loss_D_fake: 0.01328597) Loss_G: 0.29186511 Loss_Enh_Dec: -1.14555252
| epoch  28 |   600/ 4361 batches | lr 0.000000 | ms/batch 401.16 | loss  3.92 | ppl    50.36 | acc     0.51 | train_ae_norm     1.00
[28/200][699/4361] Loss_D: 0.01873909 (Loss_D_real: 0.01004052 Lo

| epoch  28 |  3400/ 4361 batches | lr 0.000000 | ms/batch 401.10 | loss  4.03 | ppl    56.19 | acc     0.57 | train_ae_norm     1.00
[28/200][3499/4361] Loss_D: 0.01274922 (Loss_D_real: 0.00433509 Loss_D_fake: 0.00841413) Loss_G: 0.29017004 Loss_Enh_Dec: -1.17693508
| epoch  28 |  3500/ 4361 batches | lr 0.000000 | ms/batch 401.76 | loss  3.97 | ppl    52.94 | acc     0.56 | train_ae_norm     1.00
[28/200][3599/4361] Loss_D: 0.01821311 (Loss_D_real: 0.00369475 Loss_D_fake: 0.01451835) Loss_G: 0.28946218 Loss_Enh_Dec: -1.27620459
| epoch  28 |  3600/ 4361 batches | lr 0.000000 | ms/batch 402.59 | loss  4.01 | ppl    55.05 | acc     0.52 | train_ae_norm     1.00
[28/200][3699/4361] Loss_D: 0.15358132 (Loss_D_real: 0.13845912 Loss_D_fake: 0.01512221) Loss_G: 0.27494916 Loss_Enh_Dec: -1.43056893
| epoch  28 |  3700/ 4361 batches | lr 0.000000 | ms/batch 401.41 | loss  4.02 | ppl    55.75 | acc     0.52 | train_ae_norm     1.00
[28/200][3799/4361] Loss_D: 0.05671057 (Loss_D_real: 0.0256143

| epoch  29 |  1500/ 4361 batches | lr 0.000000 | ms/batch 402.24 | loss  3.95 | ppl    51.85 | acc     0.54 | train_ae_norm     1.00
[29/200][1599/4361] Loss_D: 0.03751982 (Loss_D_real: 0.00845385 Loss_D_fake: 0.02906597) Loss_G: 0.32333156 Loss_Enh_Dec: -0.90684456
| epoch  29 |  1600/ 4361 batches | lr 0.000000 | ms/batch 401.34 | loss  3.91 | ppl    50.09 | acc     0.55 | train_ae_norm     1.00
[29/200][1699/4361] Loss_D: 0.02139116 (Loss_D_real: 0.00812394 Loss_D_fake: 0.01326722) Loss_G: 0.28235036 Loss_Enh_Dec: -0.38882685
| epoch  29 |  1700/ 4361 batches | lr 0.000000 | ms/batch 401.79 | loss  3.93 | ppl    50.89 | acc     0.55 | train_ae_norm     1.00
[29/200][1799/4361] Loss_D: 0.02966225 (Loss_D_real: 0.01354001 Loss_D_fake: 0.01612224) Loss_G: 0.30774999 Loss_Enh_Dec: -0.93949395
| epoch  29 |  1800/ 4361 batches | lr 0.000000 | ms/batch 401.30 | loss  3.90 | ppl    49.16 | acc     0.57 | train_ae_norm     1.00
[29/200][1899/4361] Loss_D: 0.08633949 (Loss_D_real: 0.0125463

  Batch   100  of    230.    Elapsed: 0:00:37.
  Batch   110  of    230.    Elapsed: 0:00:41.
  Batch   120  of    230.    Elapsed: 0:00:45.
  Batch   130  of    230.    Elapsed: 0:00:49.
  Batch   140  of    230.    Elapsed: 0:00:52.
  Batch   150  of    230.    Elapsed: 0:00:56.
  Batch   160  of    230.    Elapsed: 0:01:00.
  Batch   170  of    230.    Elapsed: 0:01:04.
  Batch   180  of    230.    Elapsed: 0:01:07.
  Batch   190  of    230.    Elapsed: 0:01:11.
  Batch   200  of    230.    Elapsed: 0:01:15.
  Batch   210  of    230.    Elapsed: 0:01:19.
  Batch   220  of    230.    Elapsed: 0:01:22.

  Average training loss generetor: 0.701
  Average training loss discriminator: 0.719
  Training epcoh took: 0:01:26

Running Test...
  Accuracy: 0.468
  Test Loss: 3.617
  Test took: 0:00:00
Train other shit
| epoch  30 |     0/ 4361 batches | lr 0.000000 | ms/batch 863.50 | loss  0.04 | ppl     1.04 | acc     0.57 | train_ae_norm     1.00
[30/200][99/4361] Loss_D: 0.05026804 (Loss_D_

| epoch  30 |  2800/ 4361 batches | lr 0.000000 | ms/batch 401.07 | loss  3.86 | ppl    47.38 | acc     0.54 | train_ae_norm     1.00
[30/200][2899/4361] Loss_D: 0.03728964 (Loss_D_real: 0.02783206 Loss_D_fake: 0.00945758) Loss_G: 0.31216127 Loss_Enh_Dec: -1.19124377
| epoch  30 |  2900/ 4361 batches | lr 0.000000 | ms/batch 401.11 | loss  3.88 | ppl    48.23 | acc     0.59 | train_ae_norm     1.00
[30/200][2999/4361] Loss_D: 0.01427097 (Loss_D_real: 0.00627243 Loss_D_fake: 0.00799854) Loss_G: 0.34792542 Loss_Enh_Dec: -1.27516305
| epoch  30 |  3000/ 4361 batches | lr 0.000000 | ms/batch 402.01 | loss  3.86 | ppl    47.63 | acc     0.58 | train_ae_norm     1.00
[30/200][3099/4361] Loss_D: 0.03078642 (Loss_D_real: 0.00483973 Loss_D_fake: 0.02594669) Loss_G: 0.32411152 Loss_Enh_Dec: -1.47190821
| epoch  30 |  3100/ 4361 batches | lr 0.000000 | ms/batch 401.29 | loss  3.84 | ppl    46.58 | acc     0.55 | train_ae_norm     1.00
[30/200][3199/4361] Loss_D: 0.02108639 (Loss_D_real: 0.0070926

[31/200][899/4361] Loss_D: 0.03109912 (Loss_D_real: 0.00596936 Loss_D_fake: 0.02512976) Loss_G: 0.29948446 Loss_Enh_Dec: -0.56116629
| epoch  31 |   900/ 4361 batches | lr 0.000000 | ms/batch 401.43 | loss  3.83 | ppl    46.03 | acc     0.58 | train_ae_norm     1.00
[31/200][999/4361] Loss_D: 0.12098791 (Loss_D_real: 0.10001531 Loss_D_fake: 0.02097259) Loss_G: 0.32011595 Loss_Enh_Dec: -0.95660889
| epoch  31 |  1000/ 4361 batches | lr 0.000000 | ms/batch 401.34 | loss  3.79 | ppl    44.38 | acc     0.57 | train_ae_norm     1.00
[31/200][1099/4361] Loss_D: 0.01917037 (Loss_D_real: 0.01133789 Loss_D_fake: 0.00783248) Loss_G: 0.28159890 Loss_Enh_Dec: -1.01754141
| epoch  31 |  1100/ 4361 batches | lr 0.000000 | ms/batch 401.72 | loss  3.80 | ppl    44.79 | acc     0.54 | train_ae_norm     1.00
[31/200][1199/4361] Loss_D: 0.00966048 (Loss_D_real: 0.00569840 Loss_D_fake: 0.00396208) Loss_G: 0.37636933 Loss_Enh_Dec: -1.30536973
| epoch  31 |  1200/ 4361 batches | lr 0.000000 | ms/batch 401.6

[31/200][3999/4361] Loss_D: 0.00992980 (Loss_D_real: 0.00267516 Loss_D_fake: 0.00725465) Loss_G: 0.35021076 Loss_Enh_Dec: -1.27540231
| epoch  31 |  4000/ 4361 batches | lr 0.000000 | ms/batch 401.19 | loss  3.78 | ppl    44.02 | acc     0.57 | train_ae_norm     1.00
[31/200][4099/4361] Loss_D: 0.04167500 (Loss_D_real: 0.03248236 Loss_D_fake: 0.00919264) Loss_G: 0.33971611 Loss_Enh_Dec: -1.23186982
| epoch  31 |  4100/ 4361 batches | lr 0.000000 | ms/batch 401.25 | loss  3.73 | ppl    41.79 | acc     0.58 | train_ae_norm     1.00
[31/200][4199/4361] Loss_D: 0.01252860 (Loss_D_real: 0.00551111 Loss_D_fake: 0.00701749) Loss_G: 0.28551060 Loss_Enh_Dec: -1.36445153
| epoch  31 |  4200/ 4361 batches | lr 0.000000 | ms/batch 401.73 | loss  3.77 | ppl    43.36 | acc     0.60 | train_ae_norm     1.00
[31/200][4299/4361] Loss_D: 0.00567307 (Loss_D_real: 0.00217507 Loss_D_fake: 0.00349800) Loss_G: 0.29002324 Loss_Enh_Dec: -1.42581213
| epoch  31 |  4300/ 4361 batches | lr 0.000000 | ms/batch 402

[32/200][2199/4361] Loss_D: 0.01813951 (Loss_D_real: 0.01184343 Loss_D_fake: 0.00629608) Loss_G: 0.35285211 Loss_Enh_Dec: -0.94951242
| epoch  32 |  2200/ 4361 batches | lr 0.000000 | ms/batch 401.93 | loss  3.66 | ppl    39.02 | acc     0.61 | train_ae_norm     1.00
[32/200][2299/4361] Loss_D: 0.01253270 (Loss_D_real: 0.00486967 Loss_D_fake: 0.00766303) Loss_G: 0.35827893 Loss_Enh_Dec: -1.23600101
| epoch  32 |  2300/ 4361 batches | lr 0.000000 | ms/batch 401.16 | loss  3.66 | ppl    38.80 | acc     0.61 | train_ae_norm     1.00
[32/200][2399/4361] Loss_D: 0.01441760 (Loss_D_real: 0.01127923 Loss_D_fake: 0.00313837) Loss_G: 0.33772677 Loss_Enh_Dec: -1.16471183
| epoch  32 |  2400/ 4361 batches | lr 0.000000 | ms/batch 401.15 | loss  3.73 | ppl    41.50 | acc     0.56 | train_ae_norm     1.00
[32/200][2499/4361] Loss_D: 0.01370842 (Loss_D_real: 0.00403541 Loss_D_fake: 0.00967301) Loss_G: 0.31622067 Loss_Enh_Dec: -1.12564301
| epoch  32 |  2500/ 4361 batches | lr 0.000000 | ms/batch 401

[33/200][299/4361] Loss_D: 0.01401756 (Loss_D_real: 0.00614401 Loss_D_fake: 0.00787354) Loss_G: 0.34048203 Loss_Enh_Dec: -1.10815144
| epoch  33 |   300/ 4361 batches | lr 0.000000 | ms/batch 402.26 | loss  3.72 | ppl    41.14 | acc     0.58 | train_ae_norm     1.00
[33/200][399/4361] Loss_D: 0.01890448 (Loss_D_real: 0.00956311 Loss_D_fake: 0.00934137) Loss_G: 0.35260826 Loss_Enh_Dec: -0.58023465
| epoch  33 |   400/ 4361 batches | lr 0.000000 | ms/batch 401.10 | loss  3.63 | ppl    37.54 | acc     0.61 | train_ae_norm     1.00
[33/200][499/4361] Loss_D: 0.00610249 (Loss_D_real: 0.00236683 Loss_D_fake: 0.00373566) Loss_G: 0.33097148 Loss_Enh_Dec: -0.86899871
| epoch  33 |   500/ 4361 batches | lr 0.000000 | ms/batch 401.17 | loss  3.74 | ppl    41.92 | acc     0.61 | train_ae_norm     1.00
[33/200][599/4361] Loss_D: 0.00926257 (Loss_D_real: 0.00606920 Loss_D_fake: 0.00319338) Loss_G: 0.41061154 Loss_Enh_Dec: -0.40960789
| epoch  33 |   600/ 4361 batches | lr 0.000000 | ms/batch 401.09 

[33/200][3399/4361] Loss_D: 0.01011462 (Loss_D_real: 0.00272643 Loss_D_fake: 0.00738818) Loss_G: 0.36566469 Loss_Enh_Dec: -1.38878703
| epoch  33 |  3400/ 4361 batches | lr 0.000000 | ms/batch 401.49 | loss  3.73 | ppl    41.49 | acc     0.59 | train_ae_norm     1.00
[33/200][3499/4361] Loss_D: 0.01467701 (Loss_D_real: 0.00283253 Loss_D_fake: 0.01184447) Loss_G: 0.30500886 Loss_Enh_Dec: -1.08668554
| epoch  33 |  3500/ 4361 batches | lr 0.000000 | ms/batch 401.19 | loss  3.68 | ppl    39.75 | acc     0.61 | train_ae_norm     1.00
[33/200][3599/4361] Loss_D: 0.06073876 (Loss_D_real: 0.05082101 Loss_D_fake: 0.00991776) Loss_G: 0.34230047 Loss_Enh_Dec: -1.10507154
| epoch  33 |  3600/ 4361 batches | lr 0.000000 | ms/batch 402.11 | loss  3.72 | ppl    41.40 | acc     0.59 | train_ae_norm     1.00
[33/200][3699/4361] Loss_D: 0.06510981 (Loss_D_real: 0.06060772 Loss_D_fake: 0.00450209) Loss_G: 0.34776163 Loss_Enh_Dec: -1.02370465
| epoch  33 |  3700/ 4361 batches | lr 0.000000 | ms/batch 401

[34/200][1599/4361] Loss_D: 0.03177774 (Loss_D_real: 0.02601618 Loss_D_fake: 0.00576156) Loss_G: 0.33348024 Loss_Enh_Dec: -0.49118719
| epoch  34 |  1600/ 4361 batches | lr 0.000000 | ms/batch 401.34 | loss  3.74 | ppl    42.01 | acc     0.60 | train_ae_norm     1.00
[34/200][1699/4361] Loss_D: 0.03473125 (Loss_D_real: 0.00875338 Loss_D_fake: 0.02597788) Loss_G: 0.30728036 Loss_Enh_Dec: -0.43392855
| epoch  34 |  1700/ 4361 batches | lr 0.000000 | ms/batch 401.77 | loss  3.73 | ppl    41.59 | acc     0.56 | train_ae_norm     1.00
[34/200][1799/4361] Loss_D: 0.00927728 (Loss_D_real: 0.00309245 Loss_D_fake: 0.00618484) Loss_G: 0.44269830 Loss_Enh_Dec: -0.38277358
| epoch  34 |  1800/ 4361 batches | lr 0.000000 | ms/batch 401.40 | loss  3.70 | ppl    40.46 | acc     0.59 | train_ae_norm     1.00
[34/200][1899/4361] Loss_D: 0.00679155 (Loss_D_real: 0.00139006 Loss_D_fake: 0.00540149) Loss_G: 0.29869500 Loss_Enh_Dec: -0.98652333
| epoch  34 |  1900/ 4361 batches | lr 0.000000 | ms/batch 401

  Batch   130  of    230.    Elapsed: 0:00:49.
  Batch   140  of    230.    Elapsed: 0:00:53.
  Batch   150  of    230.    Elapsed: 0:00:56.
  Batch   160  of    230.    Elapsed: 0:01:00.
  Batch   170  of    230.    Elapsed: 0:01:04.
  Batch   180  of    230.    Elapsed: 0:01:08.
  Batch   190  of    230.    Elapsed: 0:01:11.
  Batch   200  of    230.    Elapsed: 0:01:15.
  Batch   210  of    230.    Elapsed: 0:01:19.
  Batch   220  of    230.    Elapsed: 0:01:23.

  Average training loss generetor: 0.703
  Average training loss discriminator: 0.710
  Training epcoh took: 0:01:26

Running Test...
  Accuracy: 0.475
  Test Loss: 3.742
  Test took: 0:00:00
Train other shit
| epoch  35 |     0/ 4361 batches | lr 0.000000 | ms/batch 866.37 | loss  0.05 | ppl     1.05 | acc     0.40 | train_ae_norm     1.00
[35/200][99/4361] Loss_D: 0.05318007 (Loss_D_real: 0.03003431 Loss_D_fake: 0.02314575) Loss_G: 0.40515938 Loss_Enh_Dec: -1.42102838
| epoch  35 |   100/ 4361 batches | lr 0.000000 | ms/b

[35/200][2899/4361] Loss_D: 0.02048650 (Loss_D_real: 0.01102375 Loss_D_fake: 0.00946274) Loss_G: 0.29643181 Loss_Enh_Dec: -0.37315825
| epoch  35 |  2900/ 4361 batches | lr 0.000000 | ms/batch 401.58 | loss  3.73 | ppl    41.58 | acc     0.59 | train_ae_norm     1.00
[35/200][2999/4361] Loss_D: 0.06019558 (Loss_D_real: 0.04945004 Loss_D_fake: 0.01074554) Loss_G: 0.32337865 Loss_Enh_Dec: -0.22666223
| epoch  35 |  3000/ 4361 batches | lr 0.000000 | ms/batch 401.63 | loss  3.73 | ppl    41.81 | acc     0.56 | train_ae_norm     1.00
[35/200][3099/4361] Loss_D: 0.02120469 (Loss_D_real: 0.01519718 Loss_D_fake: 0.00600751) Loss_G: 0.30118340 Loss_Enh_Dec: -0.35775661
| epoch  35 |  3100/ 4361 batches | lr 0.000000 | ms/batch 401.20 | loss  3.77 | ppl    43.45 | acc     0.54 | train_ae_norm     1.00
[35/200][3199/4361] Loss_D: 0.02646044 (Loss_D_real: 0.01935195 Loss_D_fake: 0.00710849) Loss_G: 0.31031165 Loss_Enh_Dec: -0.63907760
| epoch  35 |  3200/ 4361 batches | lr 0.000000 | ms/batch 401

[36/200][999/4361] Loss_D: 0.06329595 (Loss_D_real: 0.04205224 Loss_D_fake: 0.02124371) Loss_G: 0.37016773 Loss_Enh_Dec: -0.27429143
| epoch  36 |  1000/ 4361 batches | lr 0.000000 | ms/batch 401.50 | loss  3.68 | ppl    39.68 | acc     0.58 | train_ae_norm     1.00
[36/200][1099/4361] Loss_D: 0.07772225 (Loss_D_real: 0.06318324 Loss_D_fake: 0.01453901) Loss_G: 0.31332669 Loss_Enh_Dec: -1.24838471
| epoch  36 |  1100/ 4361 batches | lr 0.000000 | ms/batch 401.10 | loss  3.69 | ppl    39.86 | acc     0.57 | train_ae_norm     1.00
[36/200][1199/4361] Loss_D: 0.03172272 (Loss_D_real: 0.01397930 Loss_D_fake: 0.01774341) Loss_G: 0.27147529 Loss_Enh_Dec: -0.51386529
| epoch  36 |  1200/ 4361 batches | lr 0.000000 | ms/batch 401.27 | loss  3.70 | ppl    40.26 | acc     0.60 | train_ae_norm     1.00
[36/200][1299/4361] Loss_D: 0.04084889 (Loss_D_real: 0.02877619 Loss_D_fake: 0.01207270) Loss_G: 0.27152821 Loss_Enh_Dec: -1.48539412
| epoch  36 |  1300/ 4361 batches | lr 0.000000 | ms/batch 401.

[36/200][4099/4361] Loss_D: 0.02811099 (Loss_D_real: 0.01339681 Loss_D_fake: 0.01471418) Loss_G: 0.37115827 Loss_Enh_Dec: -0.99850047
| epoch  36 |  4100/ 4361 batches | lr 0.000000 | ms/batch 401.86 | loss  3.67 | ppl    39.10 | acc     0.56 | train_ae_norm     1.00
[36/200][4199/4361] Loss_D: 0.03146667 (Loss_D_real: 0.00619223 Loss_D_fake: 0.02527444) Loss_G: 0.29103929 Loss_Enh_Dec: -1.36012137
| epoch  36 |  4200/ 4361 batches | lr 0.000000 | ms/batch 402.01 | loss  3.72 | ppl    41.44 | acc     0.61 | train_ae_norm     1.00
[36/200][4299/4361] Loss_D: 0.06490597 (Loss_D_real: 0.02067445 Loss_D_fake: 0.04423152) Loss_G: 0.31034407 Loss_Enh_Dec: -0.99660999
| epoch  36 |  4300/ 4361 batches | lr 0.000000 | ms/batch 400.94 | loss  3.70 | ppl    40.52 | acc     0.57 | train_ae_norm     1.00
| end of epoch  36 | time: 1853.16s | test loss  3.54 | test ppl 34.48 | acc 0.638
bleu_self:  [4.07589286e-01 2.52815019e-01 9.39816859e-02 1.63543511e-05
 1.01308596e-07]
bleu_test:  [8.62351190

[37/200][2199/4361] Loss_D: 0.01110720 (Loss_D_real: 0.00190412 Loss_D_fake: 0.00920308) Loss_G: 0.33991754 Loss_Enh_Dec: -1.55623019
| epoch  37 |  2200/ 4361 batches | lr 0.000000 | ms/batch 401.26 | loss  3.69 | ppl    40.02 | acc     0.59 | train_ae_norm     1.00
[37/200][2299/4361] Loss_D: 0.05057864 (Loss_D_real: 0.03158757 Loss_D_fake: 0.01899107) Loss_G: 0.28021431 Loss_Enh_Dec: -1.50290930
| epoch  37 |  2300/ 4361 batches | lr 0.000000 | ms/batch 401.66 | loss  3.66 | ppl    38.97 | acc     0.61 | train_ae_norm     1.00
[37/200][2399/4361] Loss_D: 0.03531947 (Loss_D_real: 0.00809129 Loss_D_fake: 0.02722819) Loss_G: 0.38267156 Loss_Enh_Dec: -1.50008702
| epoch  37 |  2400/ 4361 batches | lr 0.000000 | ms/batch 402.05 | loss  3.66 | ppl    38.97 | acc     0.57 | train_ae_norm     1.00
[37/200][2499/4361] Loss_D: 0.02326164 (Loss_D_real: 0.01107307 Loss_D_fake: 0.01218857) Loss_G: 0.29046470 Loss_Enh_Dec: -1.28308618
| epoch  37 |  2500/ 4361 batches | lr 0.000000 | ms/batch 401

[38/200][399/4361] Loss_D: 0.02111973 (Loss_D_real: 0.00936551 Loss_D_fake: 0.01175422) Loss_G: 0.31159827 Loss_Enh_Dec: -1.26343155
| epoch  38 |   400/ 4361 batches | lr 0.000000 | ms/batch 402.39 | loss  3.60 | ppl    36.62 | acc     0.59 | train_ae_norm     1.00
[38/200][499/4361] Loss_D: 0.03528559 (Loss_D_real: 0.00609665 Loss_D_fake: 0.02918894) Loss_G: 0.31112164 Loss_Enh_Dec: -1.68137515
| epoch  38 |   500/ 4361 batches | lr 0.000000 | ms/batch 401.65 | loss  3.67 | ppl    39.42 | acc     0.60 | train_ae_norm     1.00
[38/200][599/4361] Loss_D: 0.03530757 (Loss_D_real: 0.02771565 Loss_D_fake: 0.00759192) Loss_G: 0.29464966 Loss_Enh_Dec: -1.35234940
| epoch  38 |   600/ 4361 batches | lr 0.000000 | ms/batch 401.84 | loss  3.63 | ppl    37.90 | acc     0.56 | train_ae_norm     1.00
[38/200][699/4361] Loss_D: 0.22625896 (Loss_D_real: 0.15096581 Loss_D_fake: 0.07529316) Loss_G: 0.30339432 Loss_Enh_Dec: -1.21307266
| epoch  38 |   700/ 4361 batches | lr 0.000000 | ms/batch 402.18 

[38/200][3499/4361] Loss_D: 0.05079265 (Loss_D_real: 0.04145371 Loss_D_fake: 0.00933894) Loss_G: 0.32231686 Loss_Enh_Dec: -0.73301047
| epoch  38 |  3500/ 4361 batches | lr 0.000000 | ms/batch 402.31 | loss  3.59 | ppl    36.32 | acc     0.60 | train_ae_norm     1.00
[38/200][3599/4361] Loss_D: 0.04037683 (Loss_D_real: 0.03006030 Loss_D_fake: 0.01031653) Loss_G: 0.29387131 Loss_Enh_Dec: -0.95263195
| epoch  38 |  3600/ 4361 batches | lr 0.000000 | ms/batch 402.16 | loss  3.61 | ppl    36.87 | acc     0.61 | train_ae_norm     1.00
[38/200][3699/4361] Loss_D: 0.03255938 (Loss_D_real: 0.00680419 Loss_D_fake: 0.02575520) Loss_G: 0.29069850 Loss_Enh_Dec: -1.17314303
| epoch  38 |  3700/ 4361 batches | lr 0.000000 | ms/batch 401.73 | loss  3.62 | ppl    37.17 | acc     0.58 | train_ae_norm     1.00
[38/200][3799/4361] Loss_D: 0.01230628 (Loss_D_real: 0.00709169 Loss_D_fake: 0.00521460) Loss_G: 0.32532158 Loss_Enh_Dec: -0.82546198
| epoch  38 |  3800/ 4361 batches | lr 0.000000 | ms/batch 401

[39/200][1699/4361] Loss_D: 0.19553091 (Loss_D_real: 0.18152641 Loss_D_fake: 0.01400450) Loss_G: 0.30540034 Loss_Enh_Dec: -1.21928489
| epoch  39 |  1700/ 4361 batches | lr 0.000000 | ms/batch 401.62 | loss  3.55 | ppl    34.91 | acc     0.57 | train_ae_norm     1.00
[39/200][1799/4361] Loss_D: 0.08085936 (Loss_D_real: 0.07569014 Loss_D_fake: 0.00516922) Loss_G: 0.34300384 Loss_Enh_Dec: -1.29366410
| epoch  39 |  1800/ 4361 batches | lr 0.000000 | ms/batch 401.47 | loss  3.52 | ppl    33.86 | acc     0.62 | train_ae_norm     1.00
[39/200][1899/4361] Loss_D: 0.13495734 (Loss_D_real: 0.12632737 Loss_D_fake: 0.00862997) Loss_G: 0.28343275 Loss_Enh_Dec: -1.26101911
| epoch  39 |  1900/ 4361 batches | lr 0.000000 | ms/batch 401.74 | loss  3.58 | ppl    35.99 | acc     0.63 | train_ae_norm     1.00
[39/200][1999/4361] Loss_D: 0.01442305 (Loss_D_real: 0.00626639 Loss_D_fake: 0.00815667) Loss_G: 0.31828648 Loss_Enh_Dec: -1.53867447
| epoch  39 |  2000/ 4361 batches | lr 0.000000 | ms/batch 401

[40/200][99/4361] Loss_D: 0.00833093 (Loss_D_real: 0.00291725 Loss_D_fake: 0.00541368) Loss_G: 0.32741687 Loss_Enh_Dec: -1.01766241
| epoch  40 |   100/ 4361 batches | lr 0.000000 | ms/batch 402.06 | loss  3.54 | ppl    34.50 | acc     0.59 | train_ae_norm     1.00
[40/200][199/4361] Loss_D: 0.05465482 (Loss_D_real: 0.02339814 Loss_D_fake: 0.03125668) Loss_G: 0.32700077 Loss_Enh_Dec: -1.39574337
| epoch  40 |   200/ 4361 batches | lr 0.000000 | ms/batch 401.45 | loss  3.56 | ppl    35.21 | acc     0.63 | train_ae_norm     1.00
[40/200][299/4361] Loss_D: 0.00822841 (Loss_D_real: 0.00305533 Loss_D_fake: 0.00517308) Loss_G: 0.32509682 Loss_Enh_Dec: -1.36195171
| epoch  40 |   300/ 4361 batches | lr 0.000000 | ms/batch 401.82 | loss  3.58 | ppl    35.84 | acc     0.59 | train_ae_norm     1.00
[40/200][399/4361] Loss_D: 0.07837366 (Loss_D_real: 0.05839328 Loss_D_fake: 0.01998038) Loss_G: 0.53778344 Loss_Enh_Dec: -0.70406258
| epoch  40 |   400/ 4361 batches | lr 0.000000 | ms/batch 401.23 |

[40/200][3199/4361] Loss_D: 0.16098671 (Loss_D_real: 0.02297350 Loss_D_fake: 0.13801321) Loss_G: 0.50041085 Loss_Enh_Dec: -0.81088775
| epoch  40 |  3200/ 4361 batches | lr 0.000000 | ms/batch 401.09 | loss  3.56 | ppl    35.24 | acc     0.62 | train_ae_norm     1.00
[40/200][3299/4361] Loss_D: 0.01440699 (Loss_D_real: 0.00286189 Loss_D_fake: 0.01154511) Loss_G: 0.24328171 Loss_Enh_Dec: -1.18902469
| epoch  40 |  3300/ 4361 batches | lr 0.000000 | ms/batch 401.37 | loss  3.57 | ppl    35.62 | acc     0.61 | train_ae_norm     1.00
[40/200][3399/4361] Loss_D: 0.04963849 (Loss_D_real: 0.04044256 Loss_D_fake: 0.00919593) Loss_G: 0.24814974 Loss_Enh_Dec: -1.17182279
| epoch  40 |  3400/ 4361 batches | lr 0.000000 | ms/batch 401.26 | loss  3.53 | ppl    34.08 | acc     0.61 | train_ae_norm     1.00
[40/200][3499/4361] Loss_D: 0.22453745 (Loss_D_real: 0.01373246 Loss_D_fake: 0.21080498) Loss_G: 0.27590623 Loss_Enh_Dec: -1.22816420
| epoch  40 |  3500/ 4361 batches | lr 0.000000 | ms/batch 400

[41/200][1299/4361] Loss_D: 0.00755382 (Loss_D_real: 0.00171779 Loss_D_fake: 0.00583603) Loss_G: 0.30113909 Loss_Enh_Dec: -1.63520467
| epoch  41 |  1300/ 4361 batches | lr 0.000000 | ms/batch 401.26 | loss  3.57 | ppl    35.57 | acc     0.60 | train_ae_norm     1.00
[41/200][1399/4361] Loss_D: 0.01352487 (Loss_D_real: 0.00825998 Loss_D_fake: 0.00526488) Loss_G: 0.30636826 Loss_Enh_Dec: -1.77852499
| epoch  41 |  1400/ 4361 batches | lr 0.000000 | ms/batch 400.98 | loss  3.57 | ppl    35.63 | acc     0.56 | train_ae_norm     1.00
[41/200][1499/4361] Loss_D: 0.05702377 (Loss_D_real: 0.00690976 Loss_D_fake: 0.05011402) Loss_G: 0.48086295 Loss_Enh_Dec: -1.64305270
| epoch  41 |  1500/ 4361 batches | lr 0.000000 | ms/batch 401.59 | loss  3.59 | ppl    36.30 | acc     0.59 | train_ae_norm     1.00
[41/200][1599/4361] Loss_D: 0.03825755 (Loss_D_real: 0.02365303 Loss_D_fake: 0.01460451) Loss_G: 0.30412346 Loss_Enh_Dec: -1.71094632
| epoch  41 |  1600/ 4361 batches | lr 0.000000 | ms/batch 402

| end of epoch  41 | time: 1852.54s | test loss  3.40 | test ppl 30.08 | acc 0.652
Train classification discriminator

Training...
  Batch    10  of    230.    Elapsed: 0:00:04.
  Batch    20  of    230.    Elapsed: 0:00:08.
  Batch    30  of    230.    Elapsed: 0:00:11.
  Batch    40  of    230.    Elapsed: 0:00:15.
  Batch    50  of    230.    Elapsed: 0:00:19.
  Batch    60  of    230.    Elapsed: 0:00:23.
  Batch    70  of    230.    Elapsed: 0:00:26.
  Batch    80  of    230.    Elapsed: 0:00:30.
  Batch    90  of    230.    Elapsed: 0:00:34.
  Batch   100  of    230.    Elapsed: 0:00:38.
  Batch   110  of    230.    Elapsed: 0:00:41.
  Batch   120  of    230.    Elapsed: 0:00:45.
  Batch   130  of    230.    Elapsed: 0:00:49.
  Batch   140  of    230.    Elapsed: 0:00:53.
  Batch   150  of    230.    Elapsed: 0:00:56.
  Batch   160  of    230.    Elapsed: 0:01:00.
  Batch   170  of    230.    Elapsed: 0:01:04.
  Batch   180  of    230.    Elapsed: 0:01:08.
  Batch   190  of    23

[42/200][2599/4361] Loss_D: 0.01725084 (Loss_D_real: 0.00140796 Loss_D_fake: 0.01584288) Loss_G: 0.31070367 Loss_Enh_Dec: -1.91784596
| epoch  42 |  2600/ 4361 batches | lr 0.000000 | ms/batch 401.25 | loss  3.55 | ppl    34.89 | acc     0.57 | train_ae_norm     1.00
[42/200][2699/4361] Loss_D: 0.01373053 (Loss_D_real: 0.00818489 Loss_D_fake: 0.00554564) Loss_G: 0.35069224 Loss_Enh_Dec: -1.38767195
| epoch  42 |  2700/ 4361 batches | lr 0.000000 | ms/batch 401.42 | loss  3.56 | ppl    35.21 | acc     0.60 | train_ae_norm     1.00
[42/200][2799/4361] Loss_D: 0.08866606 (Loss_D_real: 0.07221626 Loss_D_fake: 0.01644980) Loss_G: 0.26185766 Loss_Enh_Dec: -1.41578138
| epoch  42 |  2800/ 4361 batches | lr 0.000000 | ms/batch 401.42 | loss  3.47 | ppl    32.29 | acc     0.60 | train_ae_norm     1.00
[42/200][2899/4361] Loss_D: 0.02749532 (Loss_D_real: 0.01878071 Loss_D_fake: 0.00871461) Loss_G: 0.28919226 Loss_Enh_Dec: -1.60324728
| epoch  42 |  2900/ 4361 batches | lr 0.000000 | ms/batch 401

[43/200][799/4361] Loss_D: 0.01014770 (Loss_D_real: 0.00585361 Loss_D_fake: 0.00429409) Loss_G: 0.33420092 Loss_Enh_Dec: -0.93201697
| epoch  43 |   800/ 4361 batches | lr 0.000000 | ms/batch 401.13 | loss  3.47 | ppl    32.21 | acc     0.62 | train_ae_norm     1.00
[43/200][899/4361] Loss_D: 0.00907449 (Loss_D_real: 0.00848429 Loss_D_fake: 0.00059019) Loss_G: 0.52486241 Loss_Enh_Dec: -0.69544047
| epoch  43 |   900/ 4361 batches | lr 0.000000 | ms/batch 401.46 | loss  3.50 | ppl    33.17 | acc     0.63 | train_ae_norm     1.00
[43/200][999/4361] Loss_D: 0.00728136 (Loss_D_real: 0.00237951 Loss_D_fake: 0.00490185) Loss_G: 0.32755461 Loss_Enh_Dec: -1.02373970
| epoch  43 |  1000/ 4361 batches | lr 0.000000 | ms/batch 401.22 | loss  3.49 | ppl    32.67 | acc     0.61 | train_ae_norm     1.00
[43/200][1099/4361] Loss_D: 0.01434754 (Loss_D_real: 0.00535658 Loss_D_fake: 0.00899096) Loss_G: 0.39983127 Loss_Enh_Dec: -1.08559024
| epoch  43 |  1100/ 4361 batches | lr 0.000000 | ms/batch 400.99

[43/200][3899/4361] Loss_D: 0.01060182 (Loss_D_real: 0.00277710 Loss_D_fake: 0.00782472) Loss_G: 0.32494551 Loss_Enh_Dec: -1.52386737
| epoch  43 |  3900/ 4361 batches | lr 0.000000 | ms/batch 401.07 | loss  3.51 | ppl    33.45 | acc     0.59 | train_ae_norm     1.00
[43/200][3999/4361] Loss_D: 0.00319557 (Loss_D_real: 0.00109593 Loss_D_fake: 0.00209965) Loss_G: 0.35919887 Loss_Enh_Dec: -1.63434970
| epoch  43 |  4000/ 4361 batches | lr 0.000000 | ms/batch 402.63 | loss  3.54 | ppl    34.33 | acc     0.63 | train_ae_norm     1.00
[43/200][4099/4361] Loss_D: 0.01278222 (Loss_D_real: 0.00383005 Loss_D_fake: 0.00895217) Loss_G: 0.35515234 Loss_Enh_Dec: -1.46977043
| epoch  43 |  4100/ 4361 batches | lr 0.000000 | ms/batch 401.60 | loss  3.49 | ppl    32.65 | acc     0.60 | train_ae_norm     1.00
[43/200][4199/4361] Loss_D: 0.02050543 (Loss_D_real: 0.00629660 Loss_D_fake: 0.01420883) Loss_G: 0.38932446 Loss_Enh_Dec: -1.64164507
| epoch  43 |  4200/ 4361 batches | lr 0.000000 | ms/batch 401

[44/200][2099/4361] Loss_D: 0.00597383 (Loss_D_real: 0.00378045 Loss_D_fake: 0.00219339) Loss_G: 0.36808288 Loss_Enh_Dec: -1.24489772
| epoch  44 |  2100/ 4361 batches | lr 0.000000 | ms/batch 402.22 | loss  3.47 | ppl    32.08 | acc     0.60 | train_ae_norm     1.00
[44/200][2199/4361] Loss_D: 0.17764829 (Loss_D_real: 0.00393462 Loss_D_fake: 0.17371367) Loss_G: 0.75028914 Loss_Enh_Dec: -1.32847893
| epoch  44 |  2200/ 4361 batches | lr 0.000000 | ms/batch 401.48 | loss  3.46 | ppl    31.73 | acc     0.61 | train_ae_norm     1.00
[44/200][2299/4361] Loss_D: 0.01422244 (Loss_D_real: 0.01155169 Loss_D_fake: 0.00267075) Loss_G: 0.34428063 Loss_Enh_Dec: -1.12864220
| epoch  44 |  2300/ 4361 batches | lr 0.000000 | ms/batch 402.13 | loss  3.46 | ppl    31.73 | acc     0.61 | train_ae_norm     1.00
[44/200][2399/4361] Loss_D: 0.00488342 (Loss_D_real: 0.00142479 Loss_D_fake: 0.00345863) Loss_G: 0.37440547 Loss_Enh_Dec: -0.95953369
| epoch  44 |  2400/ 4361 batches | lr 0.000000 | ms/batch 401

[45/200][199/4361] Loss_D: 0.00521786 (Loss_D_real: 0.00115269 Loss_D_fake: 0.00406517) Loss_G: 0.34319553 Loss_Enh_Dec: -1.14403796
| epoch  45 |   200/ 4361 batches | lr 0.000000 | ms/batch 401.48 | loss  3.48 | ppl    32.43 | acc     0.62 | train_ae_norm     1.00
[45/200][299/4361] Loss_D: 0.00414928 (Loss_D_real: 0.00214252 Loss_D_fake: 0.00200676) Loss_G: 0.37601364 Loss_Enh_Dec: -1.24833250
| epoch  45 |   300/ 4361 batches | lr 0.000000 | ms/batch 402.61 | loss  3.46 | ppl    31.90 | acc     0.59 | train_ae_norm     1.00
[45/200][399/4361] Loss_D: 0.00561015 (Loss_D_real: 0.00254636 Loss_D_fake: 0.00306378) Loss_G: 0.35828367 Loss_Enh_Dec: -1.43245113
| epoch  45 |   400/ 4361 batches | lr 0.000000 | ms/batch 401.88 | loss  3.38 | ppl    29.49 | acc     0.63 | train_ae_norm     1.00
[45/200][499/4361] Loss_D: 0.00497671 (Loss_D_real: 0.00078453 Loss_D_fake: 0.00419218) Loss_G: 0.41913730 Loss_Enh_Dec: -1.15417540
| epoch  45 |   500/ 4361 batches | lr 0.000000 | ms/batch 401.12 

[45/200][3299/4361] Loss_D: 0.00242176 (Loss_D_real: 0.00104403 Loss_D_fake: 0.00137773) Loss_G: 0.46761948 Loss_Enh_Dec: -1.22525287
| epoch  45 |  3300/ 4361 batches | lr 0.000000 | ms/batch 401.51 | loss  3.44 | ppl    31.29 | acc     0.63 | train_ae_norm     1.00
[45/200][3399/4361] Loss_D: 0.00351009 (Loss_D_real: 0.00041027 Loss_D_fake: 0.00309983) Loss_G: 0.36581132 Loss_Enh_Dec: -1.44563019
| epoch  45 |  3400/ 4361 batches | lr 0.000000 | ms/batch 401.68 | loss  3.41 | ppl    30.25 | acc     0.62 | train_ae_norm     1.00
[45/200][3499/4361] Loss_D: 0.00420606 (Loss_D_real: 0.00107984 Loss_D_fake: 0.00312623) Loss_G: 0.36627936 Loss_Enh_Dec: -0.97053951
| epoch  45 |  3500/ 4361 batches | lr 0.000000 | ms/batch 401.33 | loss  3.36 | ppl    28.74 | acc     0.64 | train_ae_norm     1.00
[45/200][3599/4361] Loss_D: 0.00351581 (Loss_D_real: 0.00240470 Loss_D_fake: 0.00111111) Loss_G: 0.44689617 Loss_Enh_Dec: -0.77260035
| epoch  45 |  3600/ 4361 batches | lr 0.000000 | ms/batch 402

| epoch  46 |  1300/ 4361 batches | lr 0.000000 | ms/batch 401.62 | loss  3.42 | ppl    30.61 | acc     0.65 | train_ae_norm     1.00
[46/200][1399/4361] Loss_D: 0.00418100 (Loss_D_real: 0.00045204 Loss_D_fake: 0.00372896) Loss_G: 0.32184201 Loss_Enh_Dec: -1.29192436
| epoch  46 |  1400/ 4361 batches | lr 0.000000 | ms/batch 401.69 | loss  3.40 | ppl    29.87 | acc     0.57 | train_ae_norm     1.00
[46/200][1499/4361] Loss_D: 0.00526015 (Loss_D_real: 0.00158424 Loss_D_fake: 0.00367591) Loss_G: 0.36807746 Loss_Enh_Dec: -1.37732697
| epoch  46 |  1500/ 4361 batches | lr 0.000000 | ms/batch 401.68 | loss  3.43 | ppl    30.97 | acc     0.61 | train_ae_norm     1.00
[46/200][1599/4361] Loss_D: 0.01121724 (Loss_D_real: 0.00678818 Loss_D_fake: 0.00442906) Loss_G: 0.40884933 Loss_Enh_Dec: -1.23800755
| epoch  46 |  1600/ 4361 batches | lr 0.000000 | ms/batch 401.83 | loss  3.40 | ppl    29.82 | acc     0.61 | train_ae_norm     1.00
[46/200][1699/4361] Loss_D: 0.01592374 (Loss_D_real: 0.0131382

  Batch    10  of    230.    Elapsed: 0:00:04.
  Batch    20  of    230.    Elapsed: 0:00:07.
  Batch    30  of    230.    Elapsed: 0:00:11.
  Batch    40  of    230.    Elapsed: 0:00:15.
  Batch    50  of    230.    Elapsed: 0:00:19.
  Batch    60  of    230.    Elapsed: 0:00:22.
  Batch    70  of    230.    Elapsed: 0:00:26.
  Batch    80  of    230.    Elapsed: 0:00:30.
  Batch    90  of    230.    Elapsed: 0:00:34.
  Batch   100  of    230.    Elapsed: 0:00:37.
  Batch   110  of    230.    Elapsed: 0:00:41.
  Batch   120  of    230.    Elapsed: 0:00:45.
  Batch   130  of    230.    Elapsed: 0:00:49.
  Batch   140  of    230.    Elapsed: 0:00:52.
  Batch   150  of    230.    Elapsed: 0:00:56.
  Batch   160  of    230.    Elapsed: 0:01:00.
  Batch   170  of    230.    Elapsed: 0:01:04.
  Batch   180  of    230.    Elapsed: 0:01:07.
  Batch   190  of    230.    Elapsed: 0:01:11.
  Batch   200  of    230.    Elapsed: 0:01:15.
  Batch   210  of    230.    Elapsed: 0:01:19.
  Batch   220

| epoch  47 |  2600/ 4361 batches | lr 0.000000 | ms/batch 400.82 | loss  3.35 | ppl    28.42 | acc     0.62 | train_ae_norm     1.00
[47/200][2699/4361] Loss_D: 0.01076593 (Loss_D_real: 0.00735053 Loss_D_fake: 0.00341540) Loss_G: 0.39697808 Loss_Enh_Dec: -0.98631305
| epoch  47 |  2700/ 4361 batches | lr 0.000000 | ms/batch 401.55 | loss  3.37 | ppl    29.06 | acc     0.62 | train_ae_norm     1.00
[47/200][2799/4361] Loss_D: 0.00563834 (Loss_D_real: 0.00183115 Loss_D_fake: 0.00380719) Loss_G: 0.42489576 Loss_Enh_Dec: -1.33200228
| epoch  47 |  2800/ 4361 batches | lr 0.000000 | ms/batch 400.77 | loss  3.31 | ppl    27.49 | acc     0.61 | train_ae_norm     1.00
[47/200][2899/4361] Loss_D: 0.00836711 (Loss_D_real: 0.00771957 Loss_D_fake: 0.00064755) Loss_G: 0.47590119 Loss_Enh_Dec: -1.30071139
| epoch  47 |  2900/ 4361 batches | lr 0.000000 | ms/batch 401.37 | loss  3.36 | ppl    28.80 | acc     0.63 | train_ae_norm     1.00
[47/200][2999/4361] Loss_D: 0.00596034 (Loss_D_real: 0.0011981

| epoch  48 |   800/ 4361 batches | lr 0.000000 | ms/batch 402.13 | loss  3.33 | ppl    27.83 | acc     0.65 | train_ae_norm     1.00
[48/200][899/4361] Loss_D: 0.00638588 (Loss_D_real: 0.00083143 Loss_D_fake: 0.00555444) Loss_G: 0.40195996 Loss_Enh_Dec: -1.79579473
| epoch  48 |   900/ 4361 batches | lr 0.000000 | ms/batch 402.05 | loss  3.35 | ppl    28.46 | acc     0.64 | train_ae_norm     1.00
[48/200][999/4361] Loss_D: 0.00788849 (Loss_D_real: 0.00083251 Loss_D_fake: 0.00705598) Loss_G: 0.34060523 Loss_Enh_Dec: -1.56326854
| epoch  48 |  1000/ 4361 batches | lr 0.000000 | ms/batch 401.55 | loss  3.32 | ppl    27.73 | acc     0.64 | train_ae_norm     1.00
[48/200][1099/4361] Loss_D: 0.00917333 (Loss_D_real: 0.00694149 Loss_D_fake: 0.00223184) Loss_G: 0.45135686 Loss_Enh_Dec: -1.56686735
| epoch  48 |  1100/ 4361 batches | lr 0.000000 | ms/batch 401.80 | loss  3.32 | ppl    27.78 | acc     0.61 | train_ae_norm     1.00
[48/200][1199/4361] Loss_D: 0.05595355 (Loss_D_real: 0.05251697 

| epoch  48 |  3900/ 4361 batches | lr 0.000000 | ms/batch 401.51 | loss  3.31 | ppl    27.52 | acc     0.62 | train_ae_norm     1.00
[48/200][3999/4361] Loss_D: 0.00859984 (Loss_D_real: 0.00066044 Loss_D_fake: 0.00793940) Loss_G: 0.40607759 Loss_Enh_Dec: -0.99760765
| epoch  48 |  4000/ 4361 batches | lr 0.000000 | ms/batch 402.05 | loss  3.33 | ppl    27.88 | acc     0.65 | train_ae_norm     1.00
[48/200][4099/4361] Loss_D: 0.00406724 (Loss_D_real: 0.00050194 Loss_D_fake: 0.00356530) Loss_G: 0.38799569 Loss_Enh_Dec: -0.86514515
| epoch  48 |  4100/ 4361 batches | lr 0.000000 | ms/batch 400.68 | loss  3.28 | ppl    26.56 | acc     0.63 | train_ae_norm     1.00
[48/200][4199/4361] Loss_D: 0.00639723 (Loss_D_real: 0.00076673 Loss_D_fake: 0.00563050) Loss_G: 0.36780375 Loss_Enh_Dec: -1.04585016
| epoch  48 |  4200/ 4361 batches | lr 0.000000 | ms/batch 401.54 | loss  3.33 | ppl    27.94 | acc     0.68 | train_ae_norm     1.00
[48/200][4299/4361] Loss_D: 0.00410527 (Loss_D_real: 0.0019858

| epoch  49 |  2000/ 4361 batches | lr 0.000000 | ms/batch 401.06 | loss  3.30 | ppl    27.00 | acc     0.61 | train_ae_norm     1.00
[49/200][2099/4361] Loss_D: 0.00389405 (Loss_D_real: 0.00107445 Loss_D_fake: 0.00281960) Loss_G: 0.42133495 Loss_Enh_Dec: -0.35821959
| epoch  49 |  2100/ 4361 batches | lr 0.000000 | ms/batch 401.31 | loss  3.32 | ppl    27.76 | acc     0.64 | train_ae_norm     1.00
[49/200][2199/4361] Loss_D: 0.00260316 (Loss_D_real: 0.00127760 Loss_D_fake: 0.00132556) Loss_G: 0.45389181 Loss_Enh_Dec: -0.83464509
| epoch  49 |  2200/ 4361 batches | lr 0.000000 | ms/batch 402.16 | loss  3.32 | ppl    27.56 | acc     0.64 | train_ae_norm     1.00
[49/200][2299/4361] Loss_D: 0.00476018 (Loss_D_real: 0.00268089 Loss_D_fake: 0.00207929) Loss_G: 0.44418979 Loss_Enh_Dec: -0.96355057
| epoch  49 |  2300/ 4361 batches | lr 0.000000 | ms/batch 401.51 | loss  3.30 | ppl    27.00 | acc     0.66 | train_ae_norm     1.00
[49/200][2399/4361] Loss_D: 0.00373625 (Loss_D_real: 0.0007706

| epoch  50 |   200/ 4361 batches | lr 0.000000 | ms/batch 401.41 | loss  3.29 | ppl    26.72 | acc     0.65 | train_ae_norm     1.00
[50/200][299/4361] Loss_D: 0.00194309 (Loss_D_real: 0.00049453 Loss_D_fake: 0.00144856) Loss_G: 0.40789166 Loss_Enh_Dec: -1.27744663
| epoch  50 |   300/ 4361 batches | lr 0.000000 | ms/batch 402.12 | loss  3.30 | ppl    27.10 | acc     0.59 | train_ae_norm     1.00
[50/200][399/4361] Loss_D: 0.01510574 (Loss_D_real: 0.01324595 Loss_D_fake: 0.00185979) Loss_G: 0.42447239 Loss_Enh_Dec: -1.28898621
| epoch  50 |   400/ 4361 batches | lr 0.000000 | ms/batch 401.04 | loss  3.20 | ppl    24.61 | acc     0.65 | train_ae_norm     1.00
[50/200][499/4361] Loss_D: 0.00176614 (Loss_D_real: 0.00032063 Loss_D_fake: 0.00144551) Loss_G: 0.39614922 Loss_Enh_Dec: -1.23617935
| epoch  50 |   500/ 4361 batches | lr 0.000000 | ms/batch 401.51 | loss  3.28 | ppl    26.49 | acc     0.67 | train_ae_norm     1.00
[50/200][599/4361] Loss_D: 0.00349467 (Loss_D_real: 0.00093409 Lo

| epoch  50 |  3300/ 4361 batches | lr 0.000000 | ms/batch 401.70 | loss  3.29 | ppl    26.72 | acc     0.64 | train_ae_norm     1.00
[50/200][3399/4361] Loss_D: 0.01936329 (Loss_D_real: 0.01786020 Loss_D_fake: 0.00150309) Loss_G: 0.39855775 Loss_Enh_Dec: -0.84263676
| epoch  50 |  3400/ 4361 batches | lr 0.000000 | ms/batch 401.98 | loss  3.28 | ppl    26.70 | acc     0.63 | train_ae_norm     1.00
[50/200][3499/4361] Loss_D: 0.00234851 (Loss_D_real: 0.00106535 Loss_D_fake: 0.00128316) Loss_G: 0.41436529 Loss_Enh_Dec: -0.83222675
| epoch  50 |  3500/ 4361 batches | lr 0.000000 | ms/batch 401.92 | loss  3.24 | ppl    25.58 | acc     0.63 | train_ae_norm     1.00
[50/200][3599/4361] Loss_D: 0.00133702 (Loss_D_real: 0.00069754 Loss_D_fake: 0.00063948) Loss_G: 0.41236255 Loss_Enh_Dec: -0.88187999
| epoch  50 |  3600/ 4361 batches | lr 0.000000 | ms/batch 402.37 | loss  3.23 | ppl    25.30 | acc     0.64 | train_ae_norm     1.00
[50/200][3699/4361] Loss_D: 0.00742541 (Loss_D_real: 0.0018146

| epoch  51 |  1400/ 4361 batches | lr 0.000000 | ms/batch 400.48 | loss  3.24 | ppl    25.65 | acc     0.60 | train_ae_norm     1.00
[51/200][1499/4361] Loss_D: 0.00643648 (Loss_D_real: 0.00403745 Loss_D_fake: 0.00239903) Loss_G: 0.53821486 Loss_Enh_Dec: 0.01308637
| epoch  51 |  1500/ 4361 batches | lr 0.000000 | ms/batch 401.17 | loss  3.29 | ppl    26.87 | acc     0.64 | train_ae_norm     1.00
[51/200][1599/4361] Loss_D: 0.01872446 (Loss_D_real: 0.01658557 Loss_D_fake: 0.00213889) Loss_G: 0.39306545 Loss_Enh_Dec: 0.12151837
| epoch  51 |  1600/ 4361 batches | lr 0.000000 | ms/batch 401.34 | loss  3.26 | ppl    25.96 | acc     0.65 | train_ae_norm     1.00
[51/200][1699/4361] Loss_D: 0.00742829 (Loss_D_real: 0.00181661 Loss_D_fake: 0.00561169) Loss_G: 0.53678274 Loss_Enh_Dec: 0.11990204
| epoch  51 |  1700/ 4361 batches | lr 0.000000 | ms/batch 400.91 | loss  3.23 | ppl    25.38 | acc     0.63 | train_ae_norm     1.00
[51/200][1799/4361] Loss_D: 0.01752323 (Loss_D_real: 0.01571866 L

  Batch    40  of    230.    Elapsed: 0:00:15.
  Batch    50  of    230.    Elapsed: 0:00:19.
  Batch    60  of    230.    Elapsed: 0:00:23.
  Batch    70  of    230.    Elapsed: 0:00:26.
  Batch    80  of    230.    Elapsed: 0:00:30.
  Batch    90  of    230.    Elapsed: 0:00:34.
  Batch   100  of    230.    Elapsed: 0:00:38.
  Batch   110  of    230.    Elapsed: 0:00:41.
  Batch   120  of    230.    Elapsed: 0:00:45.
  Batch   130  of    230.    Elapsed: 0:00:49.
  Batch   140  of    230.    Elapsed: 0:00:53.
  Batch   150  of    230.    Elapsed: 0:00:56.
  Batch   160  of    230.    Elapsed: 0:01:00.
  Batch   170  of    230.    Elapsed: 0:01:04.
  Batch   180  of    230.    Elapsed: 0:01:08.
  Batch   190  of    230.    Elapsed: 0:01:11.
  Batch   200  of    230.    Elapsed: 0:01:15.
  Batch   210  of    230.    Elapsed: 0:01:19.
  Batch   220  of    230.    Elapsed: 0:01:23.

  Average training loss generetor: 0.701
  Average training loss discriminator: 0.705
  Training epcoh too

[52/200][2699/4361] Loss_D: 0.00429599 (Loss_D_real: 0.00182323 Loss_D_fake: 0.00247276) Loss_G: 0.39282814 Loss_Enh_Dec: -1.72062337
| epoch  52 |  2700/ 4361 batches | lr 0.000000 | ms/batch 401.23 | loss  3.27 | ppl    26.27 | acc     0.63 | train_ae_norm     1.00
[52/200][2799/4361] Loss_D: 0.00739164 (Loss_D_real: 0.00495200 Loss_D_fake: 0.00243964) Loss_G: 0.37300774 Loss_Enh_Dec: -0.91606158
| epoch  52 |  2800/ 4361 batches | lr 0.000000 | ms/batch 401.11 | loss  3.24 | ppl    25.49 | acc     0.64 | train_ae_norm     1.00
[52/200][2899/4361] Loss_D: 0.01272603 (Loss_D_real: 0.01097981 Loss_D_fake: 0.00174622) Loss_G: 0.39592788 Loss_Enh_Dec: -1.32500494
| epoch  52 |  2900/ 4361 batches | lr 0.000000 | ms/batch 401.74 | loss  3.26 | ppl    26.03 | acc     0.64 | train_ae_norm     1.00
[52/200][2999/4361] Loss_D: 0.00700819 (Loss_D_real: 0.00494319 Loss_D_fake: 0.00206500) Loss_G: 0.41224805 Loss_Enh_Dec: -1.01802158
| epoch  52 |  3000/ 4361 batches | lr 0.000000 | ms/batch 401

[53/200][799/4361] Loss_D: 0.01524338 (Loss_D_real: 0.01483469 Loss_D_fake: 0.00040868) Loss_G: 0.57671136 Loss_Enh_Dec: -1.01983643
| epoch  53 |   800/ 4361 batches | lr 0.000000 | ms/batch 400.96 | loss  3.24 | ppl    25.65 | acc     0.65 | train_ae_norm     1.00
[53/200][899/4361] Loss_D: 0.00475585 (Loss_D_real: 0.00266510 Loss_D_fake: 0.00209075) Loss_G: 0.39431027 Loss_Enh_Dec: -1.37637961
| epoch  53 |   900/ 4361 batches | lr 0.000000 | ms/batch 401.46 | loss  3.26 | ppl    25.96 | acc     0.66 | train_ae_norm     1.00
[53/200][999/4361] Loss_D: 0.00206047 (Loss_D_real: 0.00111900 Loss_D_fake: 0.00094148) Loss_G: 0.38776454 Loss_Enh_Dec: -1.33906817
| epoch  53 |  1000/ 4361 batches | lr 0.000000 | ms/batch 401.80 | loss  3.22 | ppl    25.09 | acc     0.66 | train_ae_norm     1.00
[53/200][1099/4361] Loss_D: 0.00793754 (Loss_D_real: 0.00633879 Loss_D_fake: 0.00159875) Loss_G: 0.63079357 Loss_Enh_Dec: -1.31738007
| epoch  53 |  1100/ 4361 batches | lr 0.000000 | ms/batch 401.23

[53/200][3899/4361] Loss_D: 0.01735785 (Loss_D_real: 0.00644887 Loss_D_fake: 0.01090898) Loss_G: 0.22853903 Loss_Enh_Dec: -1.03782594
| epoch  53 |  3900/ 4361 batches | lr 0.000000 | ms/batch 402.16 | loss  3.30 | ppl    27.02 | acc     0.61 | train_ae_norm     1.00
[53/200][3999/4361] Loss_D: 0.01377275 (Loss_D_real: 0.00626285 Loss_D_fake: 0.00750990) Loss_G: 0.24679565 Loss_Enh_Dec: -0.90859145
| epoch  53 |  4000/ 4361 batches | lr 0.000000 | ms/batch 401.58 | loss  3.36 | ppl    28.67 | acc     0.63 | train_ae_norm     1.00
[53/200][4099/4361] Loss_D: 0.01399714 (Loss_D_real: 0.00527855 Loss_D_fake: 0.00871859) Loss_G: 0.24372044 Loss_Enh_Dec: -0.84208411
| epoch  53 |  4100/ 4361 batches | lr 0.000000 | ms/batch 401.90 | loss  3.24 | ppl    25.51 | acc     0.63 | train_ae_norm     1.00
[53/200][4199/4361] Loss_D: 0.01152775 (Loss_D_real: 0.00384109 Loss_D_fake: 0.00768667) Loss_G: 0.24554686 Loss_Enh_Dec: -1.02188480
| epoch  53 |  4200/ 4361 batches | lr 0.000000 | ms/batch 401

[54/200][2099/4361] Loss_D: 0.00695276 (Loss_D_real: 0.00433384 Loss_D_fake: 0.00261892) Loss_G: 0.31013256 Loss_Enh_Dec: -1.23764932
| epoch  54 |  2100/ 4361 batches | lr 0.000000 | ms/batch 401.50 | loss  3.24 | ppl    25.50 | acc     0.64 | train_ae_norm     1.00
[54/200][2199/4361] Loss_D: 0.00627827 (Loss_D_real: 0.00393956 Loss_D_fake: 0.00233870) Loss_G: 0.32326332 Loss_Enh_Dec: -1.32910037
| epoch  54 |  2200/ 4361 batches | lr 0.000000 | ms/batch 401.39 | loss  3.22 | ppl    25.01 | acc     0.65 | train_ae_norm     1.00
[54/200][2299/4361] Loss_D: 0.00579194 (Loss_D_real: 0.00291272 Loss_D_fake: 0.00287922) Loss_G: 0.31503335 Loss_Enh_Dec: -1.41293013
| epoch  54 |  2300/ 4361 batches | lr 0.000000 | ms/batch 401.53 | loss  3.20 | ppl    24.56 | acc     0.66 | train_ae_norm     1.00
[54/200][2399/4361] Loss_D: 0.00801863 (Loss_D_real: 0.00450868 Loss_D_fake: 0.00350995) Loss_G: 0.29598260 Loss_Enh_Dec: -1.71651733
| epoch  54 |  2400/ 4361 batches | lr 0.000000 | ms/batch 401

[55/200][299/4361] Loss_D: 0.00830219 (Loss_D_real: 0.00328940 Loss_D_fake: 0.00501279) Loss_G: 0.29296330 Loss_Enh_Dec: -2.06446123
| epoch  55 |   300/ 4361 batches | lr 0.000000 | ms/batch 401.73 | loss  3.32 | ppl    27.77 | acc     0.60 | train_ae_norm     1.00
[55/200][399/4361] Loss_D: 0.01665466 (Loss_D_real: 0.01240304 Loss_D_fake: 0.00425162) Loss_G: 0.29650480 Loss_Enh_Dec: -2.05287457
| epoch  55 |   400/ 4361 batches | lr 0.000000 | ms/batch 402.10 | loss  3.23 | ppl    25.16 | acc     0.67 | train_ae_norm     1.00
[55/200][499/4361] Loss_D: 0.00407955 (Loss_D_real: 0.00178681 Loss_D_fake: 0.00229274) Loss_G: 0.38055396 Loss_Enh_Dec: -1.78203094
| epoch  55 |   500/ 4361 batches | lr 0.000000 | ms/batch 400.71 | loss  3.32 | ppl    27.71 | acc     0.66 | train_ae_norm     1.00
[55/200][599/4361] Loss_D: 0.00910487 (Loss_D_real: 0.00150962 Loss_D_fake: 0.00759525) Loss_G: 0.42771870 Loss_Enh_Dec: -1.92111003
| epoch  55 |   600/ 4361 batches | lr 0.000000 | ms/batch 401.70 

[55/200][3399/4361] Loss_D: 0.00134505 (Loss_D_real: 0.00059887 Loss_D_fake: 0.00074618) Loss_G: 0.44123307 Loss_Enh_Dec: -1.25390053
| epoch  55 |  3400/ 4361 batches | lr 0.000000 | ms/batch 401.05 | loss  3.25 | ppl    25.68 | acc     0.64 | train_ae_norm     1.00
[55/200][3499/4361] Loss_D: 0.00898224 (Loss_D_real: 0.00380854 Loss_D_fake: 0.00517371) Loss_G: 0.36357614 Loss_Enh_Dec: -1.04535854
| epoch  55 |  3500/ 4361 batches | lr 0.000000 | ms/batch 401.69 | loss  3.20 | ppl    24.50 | acc     0.64 | train_ae_norm     1.00
[55/200][3599/4361] Loss_D: 0.00246490 (Loss_D_real: 0.00069469 Loss_D_fake: 0.00177021) Loss_G: 0.36364490 Loss_Enh_Dec: -1.35787892
| epoch  55 |  3600/ 4361 batches | lr 0.000000 | ms/batch 402.16 | loss  3.28 | ppl    26.52 | acc     0.62 | train_ae_norm     1.00
[55/200][3699/4361] Loss_D: 0.00213578 (Loss_D_real: 0.00103304 Loss_D_fake: 0.00110274) Loss_G: 0.51039541 Loss_Enh_Dec: -1.27340019
| epoch  55 |  3700/ 4361 batches | lr 0.000000 | ms/batch 401

[56/200][1499/4361] Loss_D: 0.05252388 (Loss_D_real: 0.04950312 Loss_D_fake: 0.00302076) Loss_G: 0.37229416 Loss_Enh_Dec: -1.17072058
| epoch  56 |  1500/ 4361 batches | lr 0.000000 | ms/batch 402.03 | loss  3.29 | ppl    26.88 | acc     0.62 | train_ae_norm     1.00
[56/200][1599/4361] Loss_D: 0.00384848 (Loss_D_real: 0.00222045 Loss_D_fake: 0.00162803) Loss_G: 0.37203348 Loss_Enh_Dec: -1.44894028
| epoch  56 |  1600/ 4361 batches | lr 0.000000 | ms/batch 401.83 | loss  3.26 | ppl    26.03 | acc     0.63 | train_ae_norm     1.00
[56/200][1699/4361] Loss_D: 0.01956739 (Loss_D_real: 0.01824028 Loss_D_fake: 0.00132711) Loss_G: 0.38207570 Loss_Enh_Dec: -1.93027937
| epoch  56 |  1700/ 4361 batches | lr 0.000000 | ms/batch 401.88 | loss  3.26 | ppl    25.96 | acc     0.62 | train_ae_norm     1.00
[56/200][1799/4361] Loss_D: 0.00290493 (Loss_D_real: 0.00097826 Loss_D_fake: 0.00192667) Loss_G: 0.36060750 Loss_Enh_Dec: -1.75982726
| epoch  56 |  1800/ 4361 batches | lr 0.000000 | ms/batch 401

  Batch    10  of    230.    Elapsed: 0:00:04.
  Batch    20  of    230.    Elapsed: 0:00:08.
  Batch    30  of    230.    Elapsed: 0:00:11.
  Batch    40  of    230.    Elapsed: 0:00:15.
  Batch    50  of    230.    Elapsed: 0:00:19.
  Batch    60  of    230.    Elapsed: 0:00:23.
  Batch    70  of    230.    Elapsed: 0:00:26.
  Batch    80  of    230.    Elapsed: 0:00:30.
  Batch    90  of    230.    Elapsed: 0:00:34.
  Batch   100  of    230.    Elapsed: 0:00:38.
  Batch   110  of    230.    Elapsed: 0:00:41.
  Batch   120  of    230.    Elapsed: 0:00:45.
  Batch   130  of    230.    Elapsed: 0:00:49.
  Batch   140  of    230.    Elapsed: 0:00:53.
  Batch   150  of    230.    Elapsed: 0:00:56.
  Batch   160  of    230.    Elapsed: 0:01:00.
  Batch   170  of    230.    Elapsed: 0:01:04.
  Batch   180  of    230.    Elapsed: 0:01:08.
  Batch   190  of    230.    Elapsed: 0:01:11.
  Batch   200  of    230.    Elapsed: 0:01:15.
  Batch   210  of    230.    Elapsed: 0:01:19.
  Batch   220

| epoch  57 |  2600/ 4361 batches | lr 0.000000 | ms/batch 401.50 | loss  3.26 | ppl    26.03 | acc     0.64 | train_ae_norm     1.00
[57/200][2699/4361] Loss_D: 0.00649997 (Loss_D_real: 0.00056922 Loss_D_fake: 0.00593075) Loss_G: 0.32245958 Loss_Enh_Dec: -1.31383669
| epoch  57 |  2700/ 4361 batches | lr 0.000000 | ms/batch 401.26 | loss  3.28 | ppl    26.68 | acc     0.62 | train_ae_norm     1.00
[57/200][2799/4361] Loss_D: 0.00371596 (Loss_D_real: 0.00126209 Loss_D_fake: 0.00245387) Loss_G: 0.35437080 Loss_Enh_Dec: -1.43750656
| epoch  57 |  2800/ 4361 batches | lr 0.000000 | ms/batch 402.00 | loss  3.24 | ppl    25.59 | acc     0.62 | train_ae_norm     1.00
[57/200][2899/4361] Loss_D: 0.00319566 (Loss_D_real: 0.00090829 Loss_D_fake: 0.00228737) Loss_G: 0.33469963 Loss_Enh_Dec: -1.66477811
| epoch  57 |  2900/ 4361 batches | lr 0.000000 | ms/batch 401.78 | loss  3.25 | ppl    25.92 | acc     0.64 | train_ae_norm     1.00
[57/200][2999/4361] Loss_D: 0.00522018 (Loss_D_real: 0.0004979

| epoch  58 |   800/ 4361 batches | lr 0.000000 | ms/batch 400.72 | loss  3.22 | ppl    24.94 | acc     0.66 | train_ae_norm     1.00
[58/200][899/4361] Loss_D: 0.00677973 (Loss_D_real: 0.00413477 Loss_D_fake: 0.00264496) Loss_G: 0.38011989 Loss_Enh_Dec: -1.23437953
| epoch  58 |   900/ 4361 batches | lr 0.000000 | ms/batch 401.85 | loss  3.23 | ppl    25.39 | acc     0.67 | train_ae_norm     1.00
[58/200][999/4361] Loss_D: 0.05782839 (Loss_D_real: 0.05464501 Loss_D_fake: 0.00318338) Loss_G: 0.48219118 Loss_Enh_Dec: -1.15826368
| epoch  58 |  1000/ 4361 batches | lr 0.000000 | ms/batch 400.70 | loss  3.23 | ppl    25.40 | acc     0.65 | train_ae_norm     1.00
[58/200][1099/4361] Loss_D: 0.00250953 (Loss_D_real: 0.00017665 Loss_D_fake: 0.00233288) Loss_G: 0.42102310 Loss_Enh_Dec: -1.27063203
| epoch  58 |  1100/ 4361 batches | lr 0.000000 | ms/batch 401.24 | loss  3.22 | ppl    25.11 | acc     0.61 | train_ae_norm     1.00
[58/200][1199/4361] Loss_D: 0.00399340 (Loss_D_real: 0.00129055 

| epoch  58 |  3900/ 4361 batches | lr 0.000000 | ms/batch 400.56 | loss  3.24 | ppl    25.43 | acc     0.61 | train_ae_norm     1.00
[58/200][3999/4361] Loss_D: 0.03197069 (Loss_D_real: 0.01868466 Loss_D_fake: 0.01328603) Loss_G: 0.42469129 Loss_Enh_Dec: -1.15960491
| epoch  58 |  4000/ 4361 batches | lr 0.000000 | ms/batch 401.47 | loss  3.23 | ppl    25.28 | acc     0.65 | train_ae_norm     1.00
[58/200][4099/4361] Loss_D: 0.00349918 (Loss_D_real: 0.00125786 Loss_D_fake: 0.00224132) Loss_G: 0.38894916 Loss_Enh_Dec: -1.42492771
| epoch  58 |  4100/ 4361 batches | lr 0.000000 | ms/batch 401.35 | loss  3.19 | ppl    24.33 | acc     0.64 | train_ae_norm     1.00
[58/200][4199/4361] Loss_D: 0.01557316 (Loss_D_real: 0.01076570 Loss_D_fake: 0.00480746) Loss_G: 0.39713582 Loss_Enh_Dec: -1.47531605
| epoch  58 |  4200/ 4361 batches | lr 0.000000 | ms/batch 401.54 | loss  3.23 | ppl    25.36 | acc     0.65 | train_ae_norm     1.00
[58/200][4299/4361] Loss_D: 0.00717033 (Loss_D_real: 0.0003177

| epoch  59 |  2300/ 4361 batches | lr 0.000000 | ms/batch 401.37 | loss  3.17 | ppl    23.90 | acc     0.64 | train_ae_norm     1.00
[59/200][2399/4361] Loss_D: 0.00406872 (Loss_D_real: 0.00371902 Loss_D_fake: 0.00034970) Loss_G: 0.58022797 Loss_Enh_Dec: -1.33958173
| epoch  59 |  2400/ 4361 batches | lr 0.000000 | ms/batch 401.53 | loss  3.19 | ppl    24.34 | acc     0.62 | train_ae_norm     1.00
[59/200][2499/4361] Loss_D: 0.00479215 (Loss_D_real: 0.00197181 Loss_D_fake: 0.00282033) Loss_G: 0.35935766 Loss_Enh_Dec: -1.68697953
| epoch  59 |  2500/ 4361 batches | lr 0.000000 | ms/batch 401.46 | loss  3.23 | ppl    25.32 | acc     0.64 | train_ae_norm     1.00
[59/200][2599/4361] Loss_D: 0.00481003 (Loss_D_real: 0.00303352 Loss_D_fake: 0.00177651) Loss_G: 0.38221818 Loss_Enh_Dec: -1.58689344
| epoch  59 |  2600/ 4361 batches | lr 0.000000 | ms/batch 401.24 | loss  3.20 | ppl    24.47 | acc     0.63 | train_ae_norm     1.00
[59/200][2699/4361] Loss_D: 0.00240931 (Loss_D_real: 0.0017031

| epoch  60 |   500/ 4361 batches | lr 0.000000 | ms/batch 400.69 | loss  3.15 | ppl    23.35 | acc     0.68 | train_ae_norm     1.00
[60/200][599/4361] Loss_D: 0.00313499 (Loss_D_real: 0.00205455 Loss_D_fake: 0.00108044) Loss_G: 0.40966174 Loss_Enh_Dec: -0.93832606
| epoch  60 |   600/ 4361 batches | lr 0.000000 | ms/batch 400.95 | loss  3.11 | ppl    22.53 | acc     0.64 | train_ae_norm     1.00
[60/200][699/4361] Loss_D: 0.00462023 (Loss_D_real: 0.00242666 Loss_D_fake: 0.00219357) Loss_G: 0.42349535 Loss_Enh_Dec: -1.09378040
| epoch  60 |   700/ 4361 batches | lr 0.000000 | ms/batch 401.62 | loss  3.16 | ppl    23.62 | acc     0.66 | train_ae_norm     1.00
[60/200][799/4361] Loss_D: 0.00377317 (Loss_D_real: 0.00150994 Loss_D_fake: 0.00226323) Loss_G: 0.41914639 Loss_Enh_Dec: -0.95570594
| epoch  60 |   800/ 4361 batches | lr 0.000000 | ms/batch 401.58 | loss  3.15 | ppl    23.36 | acc     0.64 | train_ae_norm     1.00
[60/200][899/4361] Loss_D: 0.00924767 (Loss_D_real: 0.00850443 Lo

| epoch  60 |  3600/ 4361 batches | lr 0.000000 | ms/batch 402.11 | loss  3.13 | ppl    22.87 | acc     0.67 | train_ae_norm     1.00
[60/200][3699/4361] Loss_D: 0.00134069 (Loss_D_real: 0.00050384 Loss_D_fake: 0.00083685) Loss_G: 0.41261521 Loss_Enh_Dec: -1.74431419
| epoch  60 |  3700/ 4361 batches | lr 0.000000 | ms/batch 401.26 | loss  3.16 | ppl    23.48 | acc     0.61 | train_ae_norm     1.00
[60/200][3799/4361] Loss_D: 0.00822140 (Loss_D_real: 0.00392141 Loss_D_fake: 0.00429999) Loss_G: 0.42806378 Loss_Enh_Dec: -1.20488513
| epoch  60 |  3800/ 4361 batches | lr 0.000000 | ms/batch 401.76 | loss  3.18 | ppl    23.97 | acc     0.69 | train_ae_norm     1.00
[60/200][3899/4361] Loss_D: 0.00355101 (Loss_D_real: 0.00037698 Loss_D_fake: 0.00317403) Loss_G: 0.41240293 Loss_Enh_Dec: -1.87762916
| epoch  60 |  3900/ 4361 batches | lr 0.000000 | ms/batch 401.38 | loss  3.19 | ppl    24.34 | acc     0.61 | train_ae_norm     1.00
[60/200][3999/4361] Loss_D: 0.00319482 (Loss_D_real: 0.0009491

[61/200][1699/4361] Loss_D: 0.00745903 (Loss_D_real: 0.00378644 Loss_D_fake: 0.00367259) Loss_G: 0.37160277 Loss_Enh_Dec: -1.40818405
| epoch  61 |  1700/ 4361 batches | lr 0.000000 | ms/batch 401.63 | loss  3.16 | ppl    23.50 | acc     0.64 | train_ae_norm     1.00
[61/200][1799/4361] Loss_D: 0.05193255 (Loss_D_real: 0.04939127 Loss_D_fake: 0.00254129) Loss_G: 0.55609733 Loss_Enh_Dec: -1.54685366
| epoch  61 |  1800/ 4361 batches | lr 0.000000 | ms/batch 401.24 | loss  3.14 | ppl    23.16 | acc     0.66 | train_ae_norm     1.00
[61/200][1899/4361] Loss_D: 0.00151252 (Loss_D_real: 0.00064394 Loss_D_fake: 0.00086858) Loss_G: 0.52325487 Loss_Enh_Dec: -1.08309042
| epoch  61 |  1900/ 4361 batches | lr 0.000000 | ms/batch 401.24 | loss  3.19 | ppl    24.29 | acc     0.67 | train_ae_norm     1.00
[61/200][1999/4361] Loss_D: 0.00554042 (Loss_D_real: 0.00469482 Loss_D_fake: 0.00084560) Loss_G: 0.40608701 Loss_Enh_Dec: -1.63648117
| epoch  61 |  2000/ 4361 batches | lr 0.000000 | ms/batch 402

  Batch   180  of    230.    Elapsed: 0:01:08.
  Batch   190  of    230.    Elapsed: 0:01:12.
  Batch   200  of    230.    Elapsed: 0:01:15.
  Batch   210  of    230.    Elapsed: 0:01:19.
  Batch   220  of    230.    Elapsed: 0:01:23.

  Average training loss generetor: 0.699
  Average training loss discriminator: 0.706
  Training epcoh took: 0:01:26

Running Test...
  Accuracy: 0.490
  Test Loss: 4.021
  Test took: 0:00:00
Train other shit
| epoch  62 |     0/ 4361 batches | lr 0.000000 | ms/batch 867.13 | loss  0.03 | ppl     1.03 | acc     0.67 | train_ae_norm     1.00
[62/200][99/4361] Loss_D: 0.00450474 (Loss_D_real: 0.00087855 Loss_D_fake: 0.00362619) Loss_G: 0.43284950 Loss_Enh_Dec: -1.75719869
| epoch  62 |   100/ 4361 batches | lr 0.000000 | ms/batch 401.99 | loss  3.15 | ppl    23.41 | acc     0.62 | train_ae_norm     1.00
[62/200][199/4361] Loss_D: 0.00509033 (Loss_D_real: 0.00303605 Loss_D_fake: 0.00205428) Loss_G: 0.43867618 Loss_Enh_Dec: -1.49184692
| epoch  62 |   200/ 4

| epoch  62 |  2900/ 4361 batches | lr 0.000000 | ms/batch 401.70 | loss  3.13 | ppl    22.78 | acc     0.64 | train_ae_norm     1.00
[62/200][2999/4361] Loss_D: 0.02797361 (Loss_D_real: 0.02655159 Loss_D_fake: 0.00142202) Loss_G: 0.43915382 Loss_Enh_Dec: -1.85785675
| epoch  62 |  3000/ 4361 batches | lr 0.000000 | ms/batch 401.41 | loss  3.14 | ppl    22.99 | acc     0.65 | train_ae_norm     1.00
[62/200][3099/4361] Loss_D: 0.00213063 (Loss_D_real: 0.00121384 Loss_D_fake: 0.00091680) Loss_G: 0.43429556 Loss_Enh_Dec: -1.96607339
| epoch  62 |  3100/ 4361 batches | lr 0.000000 | ms/batch 401.44 | loss  3.15 | ppl    23.28 | acc     0.62 | train_ae_norm     1.00
[62/200][3199/4361] Loss_D: 0.00291585 (Loss_D_real: 0.00122044 Loss_D_fake: 0.00169541) Loss_G: 0.41621199 Loss_Enh_Dec: -2.04264522
| epoch  62 |  3200/ 4361 batches | lr 0.000000 | ms/batch 401.28 | loss  3.17 | ppl    23.74 | acc     0.65 | train_ae_norm     1.00
[62/200][3299/4361] Loss_D: 0.00202226 (Loss_D_real: 0.0002336

| epoch  63 |  1100/ 4361 batches | lr 0.000000 | ms/batch 401.35 | loss  3.11 | ppl    22.40 | acc     0.65 | train_ae_norm     1.00
[63/200][1199/4361] Loss_D: 0.00424427 (Loss_D_real: 0.00264333 Loss_D_fake: 0.00160094) Loss_G: 0.51231968 Loss_Enh_Dec: -1.45831645
| epoch  63 |  1200/ 4361 batches | lr 0.000000 | ms/batch 401.07 | loss  3.11 | ppl    22.38 | acc     0.68 | train_ae_norm     1.00
[63/200][1299/4361] Loss_D: 0.00255373 (Loss_D_real: 0.00123899 Loss_D_fake: 0.00131475) Loss_G: 0.43357155 Loss_Enh_Dec: -1.18479538
| epoch  63 |  1300/ 4361 batches | lr 0.000000 | ms/batch 401.86 | loss  3.12 | ppl    22.63 | acc     0.66 | train_ae_norm     1.00
[63/200][1399/4361] Loss_D: 0.00272050 (Loss_D_real: 0.00153365 Loss_D_fake: 0.00118685) Loss_G: 0.48295030 Loss_Enh_Dec: -1.35541260
| epoch  63 |  1400/ 4361 batches | lr 0.000000 | ms/batch 401.51 | loss  3.12 | ppl    22.69 | acc     0.61 | train_ae_norm     1.00
[63/200][1499/4361] Loss_D: 0.00186543 (Loss_D_real: 0.0008218

| epoch  63 |  4200/ 4361 batches | lr 0.000000 | ms/batch 400.94 | loss  3.17 | ppl    23.78 | acc     0.69 | train_ae_norm     1.00
[63/200][4299/4361] Loss_D: 0.00358768 (Loss_D_real: 0.00018927 Loss_D_fake: 0.00339841) Loss_G: 0.35813969 Loss_Enh_Dec: -1.38101602
| epoch  63 |  4300/ 4361 batches | lr 0.000000 | ms/batch 401.65 | loss  3.13 | ppl    22.78 | acc     0.63 | train_ae_norm     1.00
| end of epoch  63 | time: 1853.19s | test loss  3.08 | test ppl 21.77 | acc 0.684
Train classification discriminator

Training...
  Batch    10  of    230.    Elapsed: 0:00:04.
  Batch    20  of    230.    Elapsed: 0:00:08.
  Batch    30  of    230.    Elapsed: 0:00:11.
  Batch    40  of    230.    Elapsed: 0:00:15.
  Batch    50  of    230.    Elapsed: 0:00:19.
  Batch    60  of    230.    Elapsed: 0:00:23.
  Batch    70  of    230.    Elapsed: 0:00:26.
  Batch    80  of    230.    Elapsed: 0:00:30.
  Batch    90  of    230.    Elapsed: 0:00:34.
  Batch   100  of    230.    Elapsed: 0:00:3

| epoch  64 |  2400/ 4361 batches | lr 0.000000 | ms/batch 401.59 | loss  3.12 | ppl    22.66 | acc     0.62 | train_ae_norm     1.00
[64/200][2499/4361] Loss_D: 0.00318660 (Loss_D_real: 0.00227620 Loss_D_fake: 0.00091040) Loss_G: 0.41084358 Loss_Enh_Dec: -1.33233142
| epoch  64 |  2500/ 4361 batches | lr 0.000000 | ms/batch 401.81 | loss  3.15 | ppl    23.32 | acc     0.66 | train_ae_norm     1.00
[64/200][2599/4361] Loss_D: 0.00237542 (Loss_D_real: 0.00112817 Loss_D_fake: 0.00124725) Loss_G: 0.40838391 Loss_Enh_Dec: -1.80317461
| epoch  64 |  2600/ 4361 batches | lr 0.000000 | ms/batch 401.60 | loss  3.12 | ppl    22.70 | acc     0.64 | train_ae_norm     1.00
[64/200][2699/4361] Loss_D: 0.00420977 (Loss_D_real: 0.00076981 Loss_D_fake: 0.00343997) Loss_G: 0.59580207 Loss_Enh_Dec: -1.42228401
| epoch  64 |  2700/ 4361 batches | lr 0.000000 | ms/batch 401.72 | loss  3.12 | ppl    22.61 | acc     0.65 | train_ae_norm     1.00
[64/200][2799/4361] Loss_D: 0.00434172 (Loss_D_real: 0.0027551

| epoch  65 |   500/ 4361 batches | lr 0.000000 | ms/batch 401.52 | loss  3.25 | ppl    25.68 | acc     0.65 | train_ae_norm     1.00
[65/200][599/4361] Loss_D: 0.00345132 (Loss_D_real: 0.00110828 Loss_D_fake: 0.00234304) Loss_G: 0.38021097 Loss_Enh_Dec: -2.07487655
| epoch  65 |   600/ 4361 batches | lr 0.000000 | ms/batch 401.85 | loss  3.16 | ppl    23.52 | acc     0.61 | train_ae_norm     1.00
[65/200][699/4361] Loss_D: 0.02242238 (Loss_D_real: 0.01286865 Loss_D_fake: 0.00955372) Loss_G: 0.36792526 Loss_Enh_Dec: -2.19119716
| epoch  65 |   700/ 4361 batches | lr 0.000000 | ms/batch 402.06 | loss  3.17 | ppl    23.81 | acc     0.68 | train_ae_norm     1.00
[65/200][799/4361] Loss_D: 0.01071901 (Loss_D_real: 0.00717138 Loss_D_fake: 0.00354763) Loss_G: 0.42741004 Loss_Enh_Dec: -1.87900722
| epoch  65 |   800/ 4361 batches | lr 0.000000 | ms/batch 401.36 | loss  3.14 | ppl    23.11 | acc     0.65 | train_ae_norm     1.00
[65/200][899/4361] Loss_D: 0.01361866 (Loss_D_real: 0.00233723 Lo

| epoch  65 |  3600/ 4361 batches | lr 0.000000 | ms/batch 401.56 | loss  3.08 | ppl    21.81 | acc     0.66 | train_ae_norm     1.00
[65/200][3699/4361] Loss_D: 0.00377587 (Loss_D_real: 0.00166596 Loss_D_fake: 0.00210991) Loss_G: 0.41590300 Loss_Enh_Dec: -1.63710368
| epoch  65 |  3700/ 4361 batches | lr 0.000000 | ms/batch 401.52 | loss  3.10 | ppl    22.28 | acc     0.63 | train_ae_norm     1.00
[65/200][3799/4361] Loss_D: 0.00245968 (Loss_D_real: 0.00068558 Loss_D_fake: 0.00177409) Loss_G: 0.40087295 Loss_Enh_Dec: -1.91868520
| epoch  65 |  3800/ 4361 batches | lr 0.000000 | ms/batch 401.45 | loss  3.13 | ppl    22.88 | acc     0.69 | train_ae_norm     1.00
[65/200][3899/4361] Loss_D: 0.00261964 (Loss_D_real: 0.00130598 Loss_D_fake: 0.00131366) Loss_G: 0.44765207 Loss_Enh_Dec: -1.65050316
| epoch  65 |  3900/ 4361 batches | lr 0.000000 | ms/batch 401.89 | loss  3.12 | ppl    22.54 | acc     0.64 | train_ae_norm     1.00
[65/200][3999/4361] Loss_D: 0.00342702 (Loss_D_real: 0.0021224

| epoch  66 |  1700/ 4361 batches | lr 0.000000 | ms/batch 401.86 | loss  3.05 | ppl    21.09 | acc     0.66 | train_ae_norm     1.00
[66/200][1799/4361] Loss_D: 0.03192895 (Loss_D_real: 0.02850271 Loss_D_fake: 0.00342623) Loss_G: 0.41869593 Loss_Enh_Dec: -1.79958689
| epoch  66 |  1800/ 4361 batches | lr 0.000000 | ms/batch 401.42 | loss  3.02 | ppl    20.51 | acc     0.67 | train_ae_norm     1.00
[66/200][1899/4361] Loss_D: 0.01237552 (Loss_D_real: 0.00960159 Loss_D_fake: 0.00277393) Loss_G: 0.45598149 Loss_Enh_Dec: -1.57368803
| epoch  66 |  1900/ 4361 batches | lr 0.000000 | ms/batch 401.73 | loss  3.08 | ppl    21.73 | acc     0.68 | train_ae_norm     1.00
[66/200][1999/4361] Loss_D: 0.00635887 (Loss_D_real: 0.00090907 Loss_D_fake: 0.00544980) Loss_G: 0.39827964 Loss_Enh_Dec: -1.53291702
| epoch  66 |  2000/ 4361 batches | lr 0.000000 | ms/batch 401.73 | loss  3.03 | ppl    20.60 | acc     0.68 | train_ae_norm     1.00
[66/200][2099/4361] Loss_D: 0.00438082 (Loss_D_real: 0.0029580

  Batch   210  of    230.    Elapsed: 0:01:18.
  Batch   220  of    230.    Elapsed: 0:01:22.

  Average training loss generetor: 0.697
  Average training loss discriminator: 0.710
  Training epcoh took: 0:01:26

Running Test...
  Accuracy: 0.492
  Test Loss: 4.111
  Test took: 0:00:00
Train other shit
| epoch  67 |     0/ 4361 batches | lr 0.000000 | ms/batch 860.38 | loss  0.03 | ppl     1.03 | acc     0.67 | train_ae_norm     1.00
[67/200][99/4361] Loss_D: 0.00154975 (Loss_D_real: 0.00078612 Loss_D_fake: 0.00076363) Loss_G: 0.41760126 Loss_Enh_Dec: -1.46405065
| epoch  67 |   100/ 4361 batches | lr 0.000000 | ms/batch 401.90 | loss  3.06 | ppl    21.22 | acc     0.64 | train_ae_norm     1.00
[67/200][199/4361] Loss_D: 0.00306939 (Loss_D_real: 0.00021425 Loss_D_fake: 0.00285514) Loss_G: 0.48321459 Loss_Enh_Dec: -1.08132839
| epoch  67 |   200/ 4361 batches | lr 0.000000 | ms/batch 401.86 | loss  3.08 | ppl    21.82 | acc     0.66 | train_ae_norm     1.00
[67/200][299/4361] Loss_D: 0.

[67/200][2999/4361] Loss_D: 0.00942093 (Loss_D_real: 0.00806114 Loss_D_fake: 0.00135978) Loss_G: 0.46330053 Loss_Enh_Dec: -1.66229630
| epoch  67 |  3000/ 4361 batches | lr 0.000000 | ms/batch 402.57 | loss  3.11 | ppl    22.40 | acc     0.67 | train_ae_norm     1.00
[67/200][3099/4361] Loss_D: 0.00816463 (Loss_D_real: 0.00674332 Loss_D_fake: 0.00142131) Loss_G: 0.61211246 Loss_Enh_Dec: -1.41926193
| epoch  67 |  3100/ 4361 batches | lr 0.000000 | ms/batch 402.38 | loss  3.13 | ppl    22.84 | acc     0.62 | train_ae_norm     1.00
[67/200][3199/4361] Loss_D: 0.00436477 (Loss_D_real: 0.00137439 Loss_D_fake: 0.00299038) Loss_G: 0.44474077 Loss_Enh_Dec: -1.66708171
| epoch  67 |  3200/ 4361 batches | lr 0.000000 | ms/batch 401.95 | loss  3.16 | ppl    23.59 | acc     0.67 | train_ae_norm     1.00
[67/200][3299/4361] Loss_D: 0.01999854 (Loss_D_real: 0.01907327 Loss_D_fake: 0.00092528) Loss_G: 0.42609587 Loss_Enh_Dec: -1.55964279
| epoch  67 |  3300/ 4361 batches | lr 0.000000 | ms/batch 401

[68/200][1199/4361] Loss_D: 0.00392595 (Loss_D_real: 0.00140779 Loss_D_fake: 0.00251816) Loss_G: 0.36195096 Loss_Enh_Dec: -1.16189706
| epoch  68 |  1200/ 4361 batches | lr 0.000000 | ms/batch 401.83 | loss  3.00 | ppl    20.12 | acc     0.67 | train_ae_norm     1.00
[68/200][1299/4361] Loss_D: 0.03213280 (Loss_D_real: 0.02973115 Loss_D_fake: 0.00240164) Loss_G: 0.40557224 Loss_Enh_Dec: -1.46524811
| epoch  68 |  1300/ 4361 batches | lr 0.000000 | ms/batch 401.72 | loss  3.02 | ppl    20.53 | acc     0.67 | train_ae_norm     1.00
[68/200][1399/4361] Loss_D: 0.00335701 (Loss_D_real: 0.00210657 Loss_D_fake: 0.00125044) Loss_G: 0.39665872 Loss_Enh_Dec: -1.15419948
| epoch  68 |  1400/ 4361 batches | lr 0.000000 | ms/batch 401.00 | loss  3.01 | ppl    20.34 | acc     0.63 | train_ae_norm     1.00
[68/200][1499/4361] Loss_D: 0.02446864 (Loss_D_real: 0.02227311 Loss_D_fake: 0.00219553) Loss_G: 0.38032284 Loss_Enh_Dec: -1.36963654
| epoch  68 |  1500/ 4361 batches | lr 0.000000 | ms/batch 401

[68/200][4299/4361] Loss_D: 0.00280462 (Loss_D_real: 0.00159903 Loss_D_fake: 0.00120558) Loss_G: 0.40960893 Loss_Enh_Dec: -1.48770368
| epoch  68 |  4300/ 4361 batches | lr 0.000000 | ms/batch 401.91 | loss  3.00 | ppl    20.09 | acc     0.68 | train_ae_norm     1.00
| end of epoch  68 | time: 1853.92s | test loss  2.98 | test ppl 19.62 | acc 0.694
bleu_self:  [2.43055556e-02 8.09898782e-10 2.71036086e-12 1.67455727e-13
 1.58925487e-13]
bleu_test:  [5.85813492e-01 4.16666751e-02 3.14169992e-07 9.77347997e-10
 3.75592399e-10]
bleu_self: [0.02430556,0.00000000,0.00000000,0.00000000,0.00000000]
bleu_test: [0.58581349,0.04166668,0.00000031,0.00000000,0.00000000]
Train classification discriminator

Training...
  Batch    10  of    230.    Elapsed: 0:00:04.
  Batch    20  of    230.    Elapsed: 0:00:08.
  Batch    30  of    230.    Elapsed: 0:00:11.
  Batch    40  of    230.    Elapsed: 0:00:15.
  Batch    50  of    230.    Elapsed: 0:00:19.
  Batch    60  of    230.    Elapsed: 0:00:23.
  B

[69/200][2599/4361] Loss_D: 0.00400827 (Loss_D_real: 0.00207679 Loss_D_fake: 0.00193148) Loss_G: 0.46092430 Loss_Enh_Dec: -0.72687000
| epoch  69 |  2600/ 4361 batches | lr 0.000000 | ms/batch 401.38 | loss  3.03 | ppl    20.64 | acc     0.63 | train_ae_norm     1.00
[69/200][2699/4361] Loss_D: 0.00371645 (Loss_D_real: 0.00199772 Loss_D_fake: 0.00171873) Loss_G: 0.40244213 Loss_Enh_Dec: -1.41057658
| epoch  69 |  2700/ 4361 batches | lr 0.000000 | ms/batch 401.43 | loss  3.02 | ppl    20.39 | acc     0.65 | train_ae_norm     1.00
[69/200][2799/4361] Loss_D: 0.00737751 (Loss_D_real: 0.00435354 Loss_D_fake: 0.00302397) Loss_G: 0.44778839 Loss_Enh_Dec: -1.69946599
| epoch  69 |  2800/ 4361 batches | lr 0.000000 | ms/batch 401.91 | loss  2.97 | ppl    19.49 | acc     0.64 | train_ae_norm     1.00
[69/200][2899/4361] Loss_D: 0.00118132 (Loss_D_real: 0.00025257 Loss_D_fake: 0.00092875) Loss_G: 0.46875104 Loss_Enh_Dec: -1.45107484
| epoch  69 |  2900/ 4361 batches | lr 0.000000 | ms/batch 402

[70/200][799/4361] Loss_D: 0.00605763 (Loss_D_real: 0.00192653 Loss_D_fake: 0.00413110) Loss_G: 0.44769469 Loss_Enh_Dec: -1.92228210
| epoch  70 |   800/ 4361 batches | lr 0.000000 | ms/batch 401.73 | loss  3.03 | ppl    20.75 | acc     0.66 | train_ae_norm     1.00
[70/200][899/4361] Loss_D: 0.03891055 (Loss_D_real: 0.03730682 Loss_D_fake: 0.00160373) Loss_G: 0.79217368 Loss_Enh_Dec: -1.53396440
| epoch  70 |   900/ 4361 batches | lr 0.000000 | ms/batch 401.71 | loss  3.03 | ppl    20.68 | acc     0.68 | train_ae_norm     1.00
[70/200][1199/4361] Loss_D: 0.01059160 (Loss_D_real: 0.00937404 Loss_D_fake: 0.00121756) Loss_G: 0.44760299 Loss_Enh_Dec: -1.61026502
| epoch  70 |  1200/ 4361 batches | lr 0.000000 | ms/batch 401.93 | loss  3.00 | ppl    20.00 | acc     0.68 | train_ae_norm     1.00
[70/200][1299/4361] Loss_D: 0.01373950 (Loss_D_real: 0.00873803 Loss_D_fake: 0.00500148) Loss_G: 0.43894741 Loss_Enh_Dec: -1.28898084
| epoch  70 |  1300/ 4361 batches | lr 0.000000 | ms/batch 401.8

[70/200][4099/4361] Loss_D: 0.05997207 (Loss_D_real: 0.05698431 Loss_D_fake: 0.00298776) Loss_G: 0.36554739 Loss_Enh_Dec: -1.22628665
| epoch  70 |  4100/ 4361 batches | lr 0.000000 | ms/batch 401.24 | loss  2.95 | ppl    19.05 | acc     0.68 | train_ae_norm     1.00
[70/200][4199/4361] Loss_D: 0.00742769 (Loss_D_real: 0.00270155 Loss_D_fake: 0.00472614) Loss_G: 0.36275330 Loss_Enh_Dec: -1.54879630
| epoch  70 |  4200/ 4361 batches | lr 0.000000 | ms/batch 402.21 | loss  2.97 | ppl    19.59 | acc     0.70 | train_ae_norm     1.00
[70/200][4299/4361] Loss_D: 0.01462752 (Loss_D_real: 0.00301025 Loss_D_fake: 0.01161727) Loss_G: 0.62353057 Loss_Enh_Dec: -1.30982745
| epoch  70 |  4300/ 4361 batches | lr 0.000000 | ms/batch 401.81 | loss  2.94 | ppl    18.96 | acc     0.69 | train_ae_norm     1.00
| end of epoch  70 | time: 1854.21s | test loss  2.94 | test ppl 18.93 | acc 0.698
bleu_self:  [3.12500000e-02 1.05644315e-09 3.79468392e-12 6.89594141e-12
 5.54107232e-11]
bleu_test:  [5.62500000

[71/200][2199/4361] Loss_D: 0.00306458 (Loss_D_real: 0.00092140 Loss_D_fake: 0.00214317) Loss_G: 0.37225652 Loss_Enh_Dec: -2.09676147
| epoch  71 |  2200/ 4361 batches | lr 0.000000 | ms/batch 401.18 | loss  2.96 | ppl    19.21 | acc     0.68 | train_ae_norm     1.00
[71/200][2299/4361] Loss_D: 0.00585611 (Loss_D_real: 0.00182009 Loss_D_fake: 0.00403601) Loss_G: 0.41841349 Loss_Enh_Dec: -2.01599860
| epoch  71 |  2300/ 4361 batches | lr 0.000000 | ms/batch 401.89 | loss  2.95 | ppl    19.15 | acc     0.69 | train_ae_norm     1.00
[71/200][2399/4361] Loss_D: 0.00319700 (Loss_D_real: 0.00071657 Loss_D_fake: 0.00248043) Loss_G: 0.40999356 Loss_Enh_Dec: -1.79946768
| epoch  71 |  2400/ 4361 batches | lr 0.000000 | ms/batch 402.26 | loss  2.94 | ppl    18.99 | acc     0.66 | train_ae_norm     1.00
[71/200][2499/4361] Loss_D: 0.00645628 (Loss_D_real: 0.00222028 Loss_D_fake: 0.00423600) Loss_G: 0.55121750 Loss_Enh_Dec: -1.77900970
| epoch  71 |  2500/ 4361 batches | lr 0.000000 | ms/batch 402

| epoch  72 |   800/ 4361 batches | lr 0.000000 | ms/batch 401.43 | loss  2.95 | ppl    19.07 | acc     0.68 | train_ae_norm     1.00
[72/200][899/4361] Loss_D: 0.00586119 (Loss_D_real: 0.00455052 Loss_D_fake: 0.00131067) Loss_G: 0.51478916 Loss_Enh_Dec: -1.50457036
| epoch  72 |   900/ 4361 batches | lr 0.000000 | ms/batch 401.62 | loss  2.96 | ppl    19.35 | acc     0.71 | train_ae_norm     1.00
[72/200][999/4361] Loss_D: 0.00330692 (Loss_D_real: 0.00218462 Loss_D_fake: 0.00112230) Loss_G: 0.37688860 Loss_Enh_Dec: -1.20780015
| epoch  72 |  1000/ 4361 batches | lr 0.000000 | ms/batch 401.65 | loss  2.95 | ppl    19.11 | acc     0.68 | train_ae_norm     1.00
[72/200][1099/4361] Loss_D: 0.00573304 (Loss_D_real: 0.00022653 Loss_D_fake: 0.00550652) Loss_G: 0.43891969 Loss_Enh_Dec: -1.43785632
| epoch  72 |  1100/ 4361 batches | lr 0.000000 | ms/batch 401.78 | loss  2.95 | ppl    19.17 | acc     0.66 | train_ae_norm     1.00
[72/200][1199/4361] Loss_D: 0.01223872 (Loss_D_real: 0.00110519 

| epoch  72 |  4100/ 4361 batches | lr 0.000000 | ms/batch 402.04 | loss  2.90 | ppl    18.08 | acc     0.67 | train_ae_norm     1.00
[72/200][4199/4361] Loss_D: 0.00722845 (Loss_D_real: 0.00538846 Loss_D_fake: 0.00183999) Loss_G: 0.51696527 Loss_Enh_Dec: -1.96409726
| epoch  72 |  4200/ 4361 batches | lr 0.000000 | ms/batch 401.53 | loss  2.95 | ppl    19.15 | acc     0.70 | train_ae_norm     1.00
[72/200][4299/4361] Loss_D: 0.00292463 (Loss_D_real: 0.00043755 Loss_D_fake: 0.00248708) Loss_G: 0.41609287 Loss_Enh_Dec: -1.85144842
| epoch  72 |  4300/ 4361 batches | lr 0.000000 | ms/batch 401.58 | loss  2.92 | ppl    18.53 | acc     0.69 | train_ae_norm     1.00
| end of epoch  72 | time: 1854.05s | test loss  2.92 | test ppl 18.62 | acc 0.701
bleu_self:  [8.93763758e-02 2.55115291e-09 8.13927989e-12 5.03119840e-13
 7.84325627e-13]
bleu_test:  [6.89285714e-01 1.35305266e-01 8.99462693e-07 2.57435860e-09
 8.50919362e-10]
bleu_self: [0.08937638,0.00000000,0.00000000,0.00000000,0.00000000]

| epoch  73 |  2400/ 4361 batches | lr 0.000000 | ms/batch 401.51 | loss  2.90 | ppl    18.20 | acc     0.65 | train_ae_norm     1.00
[73/200][2499/4361] Loss_D: 0.00195337 (Loss_D_real: 0.00023371 Loss_D_fake: 0.00171965) Loss_G: 0.44146916 Loss_Enh_Dec: -1.77708995
| epoch  73 |  2500/ 4361 batches | lr 0.000000 | ms/batch 401.30 | loss  2.95 | ppl    19.04 | acc     0.68 | train_ae_norm     1.00
[73/200][2599/4361] Loss_D: 0.00210754 (Loss_D_real: 0.00149494 Loss_D_fake: 0.00061260) Loss_G: 0.54270428 Loss_Enh_Dec: -1.47203624
| epoch  73 |  2600/ 4361 batches | lr 0.000000 | ms/batch 401.76 | loss  2.90 | ppl    18.10 | acc     0.65 | train_ae_norm     1.00
[73/200][2699/4361] Loss_D: 0.03120418 (Loss_D_real: 0.03028314 Loss_D_fake: 0.00092104) Loss_G: 0.48563910 Loss_Enh_Dec: -1.50792813
| epoch  73 |  2700/ 4361 batches | lr 0.000000 | ms/batch 401.02 | loss  2.92 | ppl    18.45 | acc     0.67 | train_ae_norm     1.00
[73/200][2799/4361] Loss_D: 0.00996684 (Loss_D_real: 0.0073269

| epoch  74 |   600/ 4361 batches | lr 0.000000 | ms/batch 401.34 | loss  2.88 | ppl    17.86 | acc     0.63 | train_ae_norm     1.00
[74/200][699/4361] Loss_D: 0.00922135 (Loss_D_real: 0.00113704 Loss_D_fake: 0.00808431) Loss_G: 0.39568517 Loss_Enh_Dec: -1.79501081
| epoch  74 |   700/ 4361 batches | lr 0.000000 | ms/batch 402.10 | loss  2.92 | ppl    18.55 | acc     0.67 | train_ae_norm     1.00
[74/200][799/4361] Loss_D: 0.00295650 (Loss_D_real: 0.00221729 Loss_D_fake: 0.00073921) Loss_G: 0.55489558 Loss_Enh_Dec: -1.54412079
| epoch  74 |   800/ 4361 batches | lr 0.000000 | ms/batch 401.66 | loss  2.89 | ppl    18.07 | acc     0.67 | train_ae_norm     1.00
[74/200][899/4361] Loss_D: 0.00256650 (Loss_D_real: 0.00136675 Loss_D_fake: 0.00119975) Loss_G: 0.53718579 Loss_Enh_Dec: -1.43318307
| epoch  74 |   900/ 4361 batches | lr 0.000000 | ms/batch 400.88 | loss  2.92 | ppl    18.47 | acc     0.70 | train_ae_norm     1.00
[74/200][999/4361] Loss_D: 0.00721384 (Loss_D_real: 0.00427816 Lo

| epoch  74 |  3700/ 4361 batches | lr 0.000000 | ms/batch 401.26 | loss  2.92 | ppl    18.53 | acc     0.63 | train_ae_norm     1.00
[74/200][3799/4361] Loss_D: 0.00364391 (Loss_D_real: 0.00117436 Loss_D_fake: 0.00246955) Loss_G: 0.38703004 Loss_Enh_Dec: -1.69600189
| epoch  74 |  3800/ 4361 batches | lr 0.000000 | ms/batch 401.95 | loss  2.92 | ppl    18.57 | acc     0.69 | train_ae_norm     1.00
[74/200][3899/4361] Loss_D: 0.00788140 (Loss_D_real: 0.00738064 Loss_D_fake: 0.00050075) Loss_G: 0.53740501 Loss_Enh_Dec: -1.32387447
| epoch  74 |  3900/ 4361 batches | lr 0.000000 | ms/batch 401.20 | loss  2.91 | ppl    18.43 | acc     0.65 | train_ae_norm     1.00
[74/200][3999/4361] Loss_D: 0.00176830 (Loss_D_real: 0.00085970 Loss_D_fake: 0.00090860) Loss_G: 0.50053930 Loss_Enh_Dec: -1.78400958
| epoch  74 |  4000/ 4361 batches | lr 0.000000 | ms/batch 401.53 | loss  2.92 | ppl    18.59 | acc     0.67 | train_ae_norm     1.00
[74/200][4099/4361] Loss_D: 0.00150713 (Loss_D_real: 0.0007264

| epoch  75 |  1900/ 4361 batches | lr 0.000000 | ms/batch 401.92 | loss  2.96 | ppl    19.21 | acc     0.70 | train_ae_norm     1.00
[75/200][1999/4361] Loss_D: 0.01471943 (Loss_D_real: 0.01175992 Loss_D_fake: 0.00295951) Loss_G: 0.41226292 Loss_Enh_Dec: -1.50211525
| epoch  75 |  2000/ 4361 batches | lr 0.000000 | ms/batch 401.21 | loss  2.88 | ppl    17.78 | acc     0.67 | train_ae_norm     1.00
[75/200][2099/4361] Loss_D: 0.00638597 (Loss_D_real: 0.00399488 Loss_D_fake: 0.00239109) Loss_G: 0.46559715 Loss_Enh_Dec: -1.98990083
| epoch  75 |  2100/ 4361 batches | lr 0.000000 | ms/batch 401.46 | loss  2.93 | ppl    18.68 | acc     0.67 | train_ae_norm     1.00
[75/200][2199/4361] Loss_D: 0.01094142 (Loss_D_real: 0.00872089 Loss_D_fake: 0.00222053) Loss_G: 0.44073692 Loss_Enh_Dec: -1.41697311
| epoch  75 |  2200/ 4361 batches | lr 0.000000 | ms/batch 401.24 | loss  2.91 | ppl    18.39 | acc     0.68 | train_ae_norm     1.00
[75/200][2299/4361] Loss_D: 0.00457703 (Loss_D_real: 0.0010038

  Accuracy: 0.482
  Test Loss: 4.281
  Test took: 0:00:00
Train other shit
| epoch  76 |     0/ 4361 batches | lr 0.000000 | ms/batch 866.21 | loss  0.03 | ppl     1.03 | acc     0.69 | train_ae_norm     1.00
[76/200][99/4361] Loss_D: 0.00349368 (Loss_D_real: 0.00261977 Loss_D_fake: 0.00087391) Loss_G: 0.37847432 Loss_Enh_Dec: -1.85769582
| epoch  76 |   100/ 4361 batches | lr 0.000000 | ms/batch 402.39 | loss  2.91 | ppl    18.32 | acc     0.65 | train_ae_norm     1.00
[76/200][199/4361] Loss_D: 0.00375500 (Loss_D_real: 0.00294697 Loss_D_fake: 0.00080802) Loss_G: 0.41857585 Loss_Enh_Dec: -1.61602676
| epoch  76 |   200/ 4361 batches | lr 0.000000 | ms/batch 401.74 | loss  2.93 | ppl    18.77 | acc     0.67 | train_ae_norm     1.00
[76/200][299/4361] Loss_D: 0.00395804 (Loss_D_real: 0.00246876 Loss_D_fake: 0.00148928) Loss_G: 0.41322193 Loss_Enh_Dec: -1.65908849
| epoch  76 |   300/ 4361 batches | lr 0.000000 | ms/batch 401.85 | loss  2.93 | ppl    18.67 | acc     0.62 | train_ae_norm 

[76/200][3099/4361] Loss_D: 0.00385935 (Loss_D_real: 0.00023867 Loss_D_fake: 0.00362069) Loss_G: 0.42552757 Loss_Enh_Dec: -1.88443089
| epoch  76 |  3100/ 4361 batches | lr 0.000000 | ms/batch 402.24 | loss  2.89 | ppl    17.93 | acc     0.66 | train_ae_norm     1.00
[76/200][3199/4361] Loss_D: 0.00150411 (Loss_D_real: 0.00019264 Loss_D_fake: 0.00131147) Loss_G: 0.39784524 Loss_Enh_Dec: -1.64748538
| epoch  76 |  3200/ 4361 batches | lr 0.000000 | ms/batch 401.16 | loss  2.92 | ppl    18.46 | acc     0.67 | train_ae_norm     1.00
[76/200][3299/4361] Loss_D: 0.02395430 (Loss_D_real: 0.02312623 Loss_D_fake: 0.00082807) Loss_G: 0.54903406 Loss_Enh_Dec: -1.46361649
| epoch  76 |  3300/ 4361 batches | lr 0.000000 | ms/batch 401.13 | loss  2.91 | ppl    18.44 | acc     0.68 | train_ae_norm     1.00
[76/200][3399/4361] Loss_D: 0.05668312 (Loss_D_real: 0.05588569 Loss_D_fake: 0.00079743) Loss_G: 0.75600642 Loss_Enh_Dec: -1.50674236
| epoch  76 |  3400/ 4361 batches | lr 0.000000 | ms/batch 401

[77/200][1199/4361] Loss_D: 0.08776291 (Loss_D_real: 0.00199201 Loss_D_fake: 0.08577091) Loss_G: 0.72572708 Loss_Enh_Dec: -1.62572765
| epoch  77 |  1200/ 4361 batches | lr 0.000000 | ms/batch 401.51 | loss  2.86 | ppl    17.49 | acc     0.69 | train_ae_norm     1.00
[77/200][1499/4361] Loss_D: 0.00666342 (Loss_D_real: 0.00518122 Loss_D_fake: 0.00148220) Loss_G: 0.42202851 Loss_Enh_Dec: -1.38136375
| epoch  77 |  1500/ 4361 batches | lr 0.000000 | ms/batch 400.97 | loss  2.94 | ppl    18.93 | acc     0.66 | train_ae_norm     1.00
[77/200][1599/4361] Loss_D: 0.00236333 (Loss_D_real: 0.00147088 Loss_D_fake: 0.00089245) Loss_G: 0.52209049 Loss_Enh_Dec: -0.82880020
| epoch  77 |  1600/ 4361 batches | lr 0.000000 | ms/batch 401.62 | loss  2.91 | ppl    18.39 | acc     0.65 | train_ae_norm     1.00
[77/200][1699/4361] Loss_D: 0.00477615 (Loss_D_real: 0.00128986 Loss_D_fake: 0.00348629) Loss_G: 0.39033675 Loss_Enh_Dec: -1.14484048
| epoch  77 |  1700/ 4361 batches | lr 0.000000 | ms/batch 401

  Batch    10  of    230.    Elapsed: 0:00:04.
  Batch    20  of    230.    Elapsed: 0:00:07.
  Batch    30  of    230.    Elapsed: 0:00:11.
  Batch    40  of    230.    Elapsed: 0:00:15.
  Batch    50  of    230.    Elapsed: 0:00:19.
  Batch    60  of    230.    Elapsed: 0:00:22.
  Batch    70  of    230.    Elapsed: 0:00:26.
  Batch    80  of    230.    Elapsed: 0:00:30.
  Batch    90  of    230.    Elapsed: 0:00:34.
  Batch   100  of    230.    Elapsed: 0:00:37.
  Batch   110  of    230.    Elapsed: 0:00:41.
  Batch   120  of    230.    Elapsed: 0:00:45.
  Batch   130  of    230.    Elapsed: 0:00:49.
  Batch   140  of    230.    Elapsed: 0:00:52.
  Batch   150  of    230.    Elapsed: 0:00:56.
  Batch   160  of    230.    Elapsed: 0:01:00.
  Batch   170  of    230.    Elapsed: 0:01:04.
  Batch   180  of    230.    Elapsed: 0:01:07.
  Batch   190  of    230.    Elapsed: 0:01:11.
  Batch   200  of    230.    Elapsed: 0:01:15.
  Batch   210  of    230.    Elapsed: 0:01:19.
  Batch   220

| epoch  78 |  2600/ 4361 batches | lr 0.000000 | ms/batch 401.28 | loss  2.87 | ppl    17.59 | acc     0.64 | train_ae_norm     1.00
[78/200][2699/4361] Loss_D: 0.00149030 (Loss_D_real: 0.00013583 Loss_D_fake: 0.00135447) Loss_G: 0.42890984 Loss_Enh_Dec: -2.05236793
| epoch  78 |  2700/ 4361 batches | lr 0.000000 | ms/batch 400.85 | loss  2.86 | ppl    17.52 | acc     0.69 | train_ae_norm     1.00
[78/200][2799/4361] Loss_D: 0.00105115 (Loss_D_real: 0.00040742 Loss_D_fake: 0.00064373) Loss_G: 0.42310148 Loss_Enh_Dec: -1.34745395
| epoch  78 |  2800/ 4361 batches | lr 0.000000 | ms/batch 401.20 | loss  2.82 | ppl    16.82 | acc     0.66 | train_ae_norm     1.00
[78/200][2899/4361] Loss_D: 0.00234774 (Loss_D_real: 0.00123844 Loss_D_fake: 0.00110931) Loss_G: 0.51279038 Loss_Enh_Dec: -1.52169216
| epoch  78 |  2900/ 4361 batches | lr 0.000000 | ms/batch 401.77 | loss  2.85 | ppl    17.24 | acc     0.68 | train_ae_norm     1.00
[78/200][2999/4361] Loss_D: 0.00063151 (Loss_D_real: 0.0001008

| epoch  79 |   800/ 4361 batches | lr 0.000000 | ms/batch 401.32 | loss  2.80 | ppl    16.45 | acc     0.68 | train_ae_norm     1.00
[79/200][899/4361] Loss_D: 0.00327274 (Loss_D_real: 0.00073582 Loss_D_fake: 0.00253691) Loss_G: 0.48251700 Loss_Enh_Dec: -1.37457228
| epoch  79 |   900/ 4361 batches | lr 0.000000 | ms/batch 401.66 | loss  2.83 | ppl    16.97 | acc     0.71 | train_ae_norm     1.00
[79/200][999/4361] Loss_D: 0.00137679 (Loss_D_real: 0.00087348 Loss_D_fake: 0.00050332) Loss_G: 0.51288593 Loss_Enh_Dec: -1.41704643
| epoch  79 |  1000/ 4361 batches | lr 0.000000 | ms/batch 401.56 | loss  2.83 | ppl    16.87 | acc     0.70 | train_ae_norm     1.00
[79/200][1099/4361] Loss_D: 0.03380706 (Loss_D_real: 0.03312711 Loss_D_fake: 0.00067995) Loss_G: 0.41894242 Loss_Enh_Dec: -1.58395326
| epoch  79 |  1100/ 4361 batches | lr 0.000000 | ms/batch 402.08 | loss  2.82 | ppl    16.70 | acc     0.68 | train_ae_norm     1.00
[79/200][1199/4361] Loss_D: 0.00356578 (Loss_D_real: 0.00283102 

| epoch  79 |  3900/ 4361 batches | lr 0.000000 | ms/batch 401.27 | loss  2.81 | ppl    16.64 | acc     0.65 | train_ae_norm     1.00
[79/200][3999/4361] Loss_D: 0.00357600 (Loss_D_real: 0.00236772 Loss_D_fake: 0.00120829) Loss_G: 0.56404918 Loss_Enh_Dec: -0.76773721
| epoch  79 |  4000/ 4361 batches | lr 0.000000 | ms/batch 400.65 | loss  2.83 | ppl    16.90 | acc     0.67 | train_ae_norm     1.00
[79/200][4099/4361] Loss_D: 0.00148150 (Loss_D_real: 0.00051925 Loss_D_fake: 0.00096225) Loss_G: 0.45135275 Loss_Enh_Dec: -1.03425276
| epoch  79 |  4100/ 4361 batches | lr 0.000000 | ms/batch 400.39 | loss  2.77 | ppl    15.94 | acc     0.68 | train_ae_norm     1.00
[79/200][4199/4361] Loss_D: 0.00115652 (Loss_D_real: 0.00033711 Loss_D_fake: 0.00081941) Loss_G: 0.59767687 Loss_Enh_Dec: -0.54081804
| epoch  79 |  4200/ 4361 batches | lr 0.000000 | ms/batch 401.03 | loss  2.83 | ppl    16.88 | acc     0.73 | train_ae_norm     1.00
[79/200][4299/4361] Loss_D: 0.00221041 (Loss_D_real: 0.0008297

| epoch  80 |  2100/ 4361 batches | lr 0.000000 | ms/batch 401.09 | loss  2.81 | ppl    16.61 | acc     0.68 | train_ae_norm     1.00
[80/200][2199/4361] Loss_D: 0.00808076 (Loss_D_real: 0.00572428 Loss_D_fake: 0.00235648) Loss_G: 0.44217211 Loss_Enh_Dec: -0.97251123
| epoch  80 |  2200/ 4361 batches | lr 0.000000 | ms/batch 401.49 | loss  2.82 | ppl    16.86 | acc     0.68 | train_ae_norm     1.00
[80/200][2299/4361] Loss_D: 0.00116016 (Loss_D_real: 0.00050027 Loss_D_fake: 0.00065989) Loss_G: 0.51846522 Loss_Enh_Dec: -1.73203182
| epoch  80 |  2300/ 4361 batches | lr 0.000000 | ms/batch 402.09 | loss  2.81 | ppl    16.56 | acc     0.70 | train_ae_norm     1.00
[80/200][2399/4361] Loss_D: 0.00243917 (Loss_D_real: 0.00156781 Loss_D_fake: 0.00087136) Loss_G: 0.57350707 Loss_Enh_Dec: -0.95251673
| epoch  80 |  2400/ 4361 batches | lr 0.000000 | ms/batch 401.76 | loss  2.81 | ppl    16.66 | acc     0.65 | train_ae_norm     1.00
[80/200][2499/4361] Loss_D: 0.00321439 (Loss_D_real: 0.0018518

| epoch  81 |   200/ 4361 batches | lr 0.000000 | ms/batch 402.34 | loss  2.83 | ppl    16.87 | acc     0.68 | train_ae_norm     1.00
[81/200][299/4361] Loss_D: 0.00040188 (Loss_D_real: 0.00017657 Loss_D_fake: 0.00022532) Loss_G: 0.45503256 Loss_Enh_Dec: -1.09148717
| epoch  81 |   300/ 4361 batches | lr 0.000000 | ms/batch 401.97 | loss  2.82 | ppl    16.85 | acc     0.64 | train_ae_norm     1.00
[81/200][399/4361] Loss_D: 0.00332044 (Loss_D_real: 0.00284322 Loss_D_fake: 0.00047722) Loss_G: 0.54402232 Loss_Enh_Dec: -1.07462847
| epoch  81 |   400/ 4361 batches | lr 0.000000 | ms/batch 401.51 | loss  2.76 | ppl    15.86 | acc     0.68 | train_ae_norm     1.00
[81/200][499/4361] Loss_D: 0.00232546 (Loss_D_real: 0.00082054 Loss_D_fake: 0.00150492) Loss_G: 0.42109686 Loss_Enh_Dec: -1.22973776
| epoch  81 |   500/ 4361 batches | lr 0.000000 | ms/batch 401.90 | loss  2.83 | ppl    16.99 | acc     0.71 | train_ae_norm     1.00
[81/200][599/4361] Loss_D: 0.00501066 (Loss_D_real: 0.00307949 Lo

| epoch  81 |  3300/ 4361 batches | lr 0.000000 | ms/batch 402.05 | loss  2.82 | ppl    16.78 | acc     0.69 | train_ae_norm     1.00
[81/200][3399/4361] Loss_D: 0.00449497 (Loss_D_real: 0.00384613 Loss_D_fake: 0.00064884) Loss_G: 0.54467386 Loss_Enh_Dec: -0.71299505
| epoch  81 |  3400/ 4361 batches | lr 0.000000 | ms/batch 401.64 | loss  2.82 | ppl    16.80 | acc     0.67 | train_ae_norm     1.00
[81/200][3499/4361] Loss_D: 0.00033057 (Loss_D_real: 0.00024746 Loss_D_fake: 0.00008310) Loss_G: 0.60045761 Loss_Enh_Dec: -0.66086078
| epoch  81 |  3500/ 4361 batches | lr 0.000000 | ms/batch 402.32 | loss  2.75 | ppl    15.66 | acc     0.68 | train_ae_norm     1.00
[81/200][3599/4361] Loss_D: 0.00302905 (Loss_D_real: 0.00103060 Loss_D_fake: 0.00199845) Loss_G: 0.46420947 Loss_Enh_Dec: -1.35552311
| epoch  81 |  3600/ 4361 batches | lr 0.000000 | ms/batch 402.14 | loss  2.77 | ppl    15.90 | acc     0.71 | train_ae_norm     1.00
[81/200][3699/4361] Loss_D: 0.00081945 (Loss_D_real: 0.0001842

| epoch  82 |  1500/ 4361 batches | lr 0.000000 | ms/batch 401.49 | loss  2.84 | ppl    17.07 | acc     0.67 | train_ae_norm     1.00
[82/200][1599/4361] Loss_D: 0.00104570 (Loss_D_real: 0.00038947 Loss_D_fake: 0.00065623) Loss_G: 0.41726303 Loss_Enh_Dec: -1.42236233
| epoch  82 |  1600/ 4361 batches | lr 0.000000 | ms/batch 400.98 | loss  2.80 | ppl    16.52 | acc     0.67 | train_ae_norm     1.00
[82/200][1699/4361] Loss_D: 0.00051029 (Loss_D_real: 0.00018614 Loss_D_fake: 0.00032415) Loss_G: 0.42595974 Loss_Enh_Dec: -1.72512472
| epoch  82 |  1700/ 4361 batches | lr 0.000000 | ms/batch 402.03 | loss  2.78 | ppl    16.05 | acc     0.66 | train_ae_norm     1.00
[82/200][1799/4361] Loss_D: 0.00298264 (Loss_D_real: 0.00182970 Loss_D_fake: 0.00115294) Loss_G: 0.89865649 Loss_Enh_Dec: -1.14954507
| epoch  82 |  1800/ 4361 batches | lr 0.000000 | ms/batch 401.66 | loss  2.77 | ppl    15.94 | acc     0.68 | train_ae_norm     1.00
[82/200][1899/4361] Loss_D: 0.00118801 (Loss_D_real: 0.0004250

  Batch   100  of    230.    Elapsed: 0:00:37.
  Batch   110  of    230.    Elapsed: 0:00:41.
  Batch   120  of    230.    Elapsed: 0:00:45.
  Batch   130  of    230.    Elapsed: 0:00:49.
  Batch   140  of    230.    Elapsed: 0:00:52.
  Batch   150  of    230.    Elapsed: 0:00:56.
  Batch   160  of    230.    Elapsed: 0:01:00.
  Batch   170  of    230.    Elapsed: 0:01:04.
  Batch   180  of    230.    Elapsed: 0:01:07.
  Batch   190  of    230.    Elapsed: 0:01:11.
  Batch   200  of    230.    Elapsed: 0:01:15.
  Batch   210  of    230.    Elapsed: 0:01:19.
  Batch   220  of    230.    Elapsed: 0:01:22.

  Average training loss generetor: 0.698
  Average training loss discriminator: 0.703
  Training epcoh took: 0:01:26

Running Test...
  Accuracy: 0.485
  Test Loss: 4.263
  Test took: 0:00:00
Train other shit
| epoch  83 |     0/ 4361 batches | lr 0.000000 | ms/batch 863.13 | loss  0.02 | ppl     1.02 | acc     0.73 | train_ae_norm     1.00
[83/200][99/4361] Loss_D: 0.00597751 (Loss_D_

| epoch  83 |  2800/ 4361 batches | lr 0.000000 | ms/batch 401.15 | loss  2.69 | ppl    14.68 | acc     0.65 | train_ae_norm     1.00
[83/200][2899/4361] Loss_D: 0.00168858 (Loss_D_real: 0.00102310 Loss_D_fake: 0.00066548) Loss_G: 0.45433989 Loss_Enh_Dec: -1.23811400
| epoch  83 |  2900/ 4361 batches | lr 0.000000 | ms/batch 401.48 | loss  2.71 | ppl    14.97 | acc     0.69 | train_ae_norm     1.00
[83/200][2999/4361] Loss_D: 0.00088936 (Loss_D_real: 0.00063823 Loss_D_fake: 0.00025113) Loss_G: 0.45877352 Loss_Enh_Dec: -1.08275187
| epoch  83 |  3000/ 4361 batches | lr 0.000000 | ms/batch 401.72 | loss  2.73 | ppl    15.37 | acc     0.69 | train_ae_norm     1.00
[83/200][3099/4361] Loss_D: 0.00121409 (Loss_D_real: 0.00042761 Loss_D_fake: 0.00078648) Loss_G: 0.47036019 Loss_Enh_Dec: -1.30821109
| epoch  83 |  3100/ 4361 batches | lr 0.000000 | ms/batch 401.58 | loss  2.72 | ppl    15.23 | acc     0.67 | train_ae_norm     1.00
[83/200][3199/4361] Loss_D: 0.00132782 (Loss_D_real: 0.0000560

| epoch  84 |  1000/ 4361 batches | lr 0.000000 | ms/batch 401.23 | loss  2.72 | ppl    15.20 | acc     0.70 | train_ae_norm     1.00
[84/200][1099/4361] Loss_D: 0.00088619 (Loss_D_real: 0.00019896 Loss_D_fake: 0.00068723) Loss_G: 0.46425802 Loss_Enh_Dec: -1.97531021
| epoch  84 |  1100/ 4361 batches | lr 0.000000 | ms/batch 401.18 | loss  2.73 | ppl    15.28 | acc     0.68 | train_ae_norm     1.00
[84/200][1199/4361] Loss_D: 0.00521399 (Loss_D_real: 0.00247030 Loss_D_fake: 0.00274369) Loss_G: 0.97994119 Loss_Enh_Dec: -1.28040612
| epoch  84 |  1200/ 4361 batches | lr 0.000000 | ms/batch 400.31 | loss  2.75 | ppl    15.62 | acc     0.70 | train_ae_norm     1.00
[84/200][1299/4361] Loss_D: 0.00173025 (Loss_D_real: 0.00024229 Loss_D_fake: 0.00148797) Loss_G: 0.56929755 Loss_Enh_Dec: -1.20617104
| epoch  84 |  1300/ 4361 batches | lr 0.000000 | ms/batch 401.49 | loss  2.81 | ppl    16.63 | acc     0.68 | train_ae_norm     1.00
[84/200][1399/4361] Loss_D: 0.00157380 (Loss_D_real: 0.0002517

| epoch  84 |  4100/ 4361 batches | lr 0.000000 | ms/batch 400.60 | loss  2.70 | ppl    14.83 | acc     0.69 | train_ae_norm     1.00
[84/200][4199/4361] Loss_D: 0.00204874 (Loss_D_real: 0.00016231 Loss_D_fake: 0.00188643) Loss_G: 0.41643792 Loss_Enh_Dec: -1.83284950
| epoch  84 |  4200/ 4361 batches | lr 0.000000 | ms/batch 401.12 | loss  2.75 | ppl    15.67 | acc     0.73 | train_ae_norm     1.00
[84/200][4299/4361] Loss_D: 0.00090392 (Loss_D_real: 0.00042528 Loss_D_fake: 0.00047864) Loss_G: 0.49696198 Loss_Enh_Dec: -1.77017653
| epoch  84 |  4300/ 4361 batches | lr 0.000000 | ms/batch 400.72 | loss  2.72 | ppl    15.25 | acc     0.70 | train_ae_norm     1.00
| end of epoch  84 | time: 1850.91s | test loss  2.73 | test ppl 15.29 | acc 0.721
bleu_self:  [3.88888889e-02 1.29918098e-09 4.36022037e-12 2.90108687e-13
 7.45582650e-13]
bleu_test:  [6.30555555e-01 1.11886667e-08 3.43280454e-11 1.90860284e-10
 7.99105085e-10]
bleu_self: [0.03888889,0.00000000,0.00000000,0.00000000,0.00000000]

| epoch  85 |  2200/ 4361 batches | lr 0.000000 | ms/batch 400.66 | loss  2.72 | ppl    15.25 | acc     0.69 | train_ae_norm     1.00
[85/200][2299/4361] Loss_D: 0.00217584 (Loss_D_real: 0.00154866 Loss_D_fake: 0.00062718) Loss_G: 0.50608057 Loss_Enh_Dec: -1.42412603
| epoch  85 |  2300/ 4361 batches | lr 0.000000 | ms/batch 400.93 | loss  2.71 | ppl    15.05 | acc     0.71 | train_ae_norm     1.00
[85/200][2399/4361] Loss_D: 0.00189619 (Loss_D_real: 0.00111775 Loss_D_fake: 0.00077845) Loss_G: 0.43599817 Loss_Enh_Dec: -1.78932703
| epoch  85 |  2400/ 4361 batches | lr 0.000000 | ms/batch 400.31 | loss  2.73 | ppl    15.27 | acc     0.65 | train_ae_norm     1.00
[85/200][2499/4361] Loss_D: 0.00096879 (Loss_D_real: 0.00060581 Loss_D_fake: 0.00036297) Loss_G: 0.76979882 Loss_Enh_Dec: -1.66387784
| epoch  85 |  2500/ 4361 batches | lr 0.000000 | ms/batch 400.93 | loss  2.73 | ppl    15.39 | acc     0.70 | train_ae_norm     1.00
[85/200][2599/4361] Loss_D: 0.00997998 (Loss_D_real: 0.0005307

| epoch  86 |   300/ 4361 batches | lr 0.000000 | ms/batch 400.71 | loss  2.73 | ppl    15.33 | acc     0.64 | train_ae_norm     1.00
[86/200][399/4361] Loss_D: 0.00114571 (Loss_D_real: 0.00092580 Loss_D_fake: 0.00021991) Loss_G: 0.62474984 Loss_Enh_Dec: -1.38110065
| epoch  86 |   400/ 4361 batches | lr 0.000000 | ms/batch 400.84 | loss  2.64 | ppl    14.02 | acc     0.67 | train_ae_norm     1.00
[86/200][499/4361] Loss_D: 0.00621464 (Loss_D_real: 0.00059977 Loss_D_fake: 0.00561488) Loss_G: 0.45659038 Loss_Enh_Dec: -1.55303335
| epoch  86 |   500/ 4361 batches | lr 0.000000 | ms/batch 401.90 | loss  2.71 | ppl    14.98 | acc     0.71 | train_ae_norm     1.00
[86/200][599/4361] Loss_D: 0.00264433 (Loss_D_real: 0.00054060 Loss_D_fake: 0.00210374) Loss_G: 0.48390028 Loss_Enh_Dec: -1.75576866
| epoch  86 |   600/ 4361 batches | lr 0.000000 | ms/batch 401.96 | loss  2.66 | ppl    14.28 | acc     0.65 | train_ae_norm     1.00
[86/200][899/4361] Loss_D: 0.00152507 (Loss_D_real: 0.00049725 Lo

| epoch  86 |  3600/ 4361 batches | lr 0.000000 | ms/batch 401.23 | loss  2.67 | ppl    14.43 | acc     0.71 | train_ae_norm     1.00
[86/200][3699/4361] Loss_D: 0.00272644 (Loss_D_real: 0.00047449 Loss_D_fake: 0.00225195) Loss_G: 0.30868530 Loss_Enh_Dec: -1.50929344
| epoch  86 |  3700/ 4361 batches | lr 0.000000 | ms/batch 401.10 | loss  2.68 | ppl    14.54 | acc     0.68 | train_ae_norm     1.00
[86/200][3799/4361] Loss_D: 0.00311611 (Loss_D_real: 0.00019223 Loss_D_fake: 0.00292388) Loss_G: 0.29987928 Loss_Enh_Dec: -1.69385076
| epoch  86 |  3800/ 4361 batches | lr 0.000000 | ms/batch 402.09 | loss  2.70 | ppl    14.89 | acc     0.74 | train_ae_norm     1.00
[86/200][3899/4361] Loss_D: 0.00400157 (Loss_D_real: 0.00026194 Loss_D_fake: 0.00373963) Loss_G: 0.28819415 Loss_Enh_Dec: -1.84820461
| epoch  86 |  3900/ 4361 batches | lr 0.000000 | ms/batch 401.68 | loss  2.69 | ppl    14.79 | acc     0.68 | train_ae_norm     1.00
[86/200][3999/4361] Loss_D: 0.00238471 (Loss_D_real: 0.0001863

| epoch  87 |  1900/ 4361 batches | lr 0.000000 | ms/batch 400.87 | loss  2.77 | ppl    16.04 | acc     0.69 | train_ae_norm     1.00
[87/200][1999/4361] Loss_D: 0.00228405 (Loss_D_real: 0.00090382 Loss_D_fake: 0.00138024) Loss_G: 0.33761624 Loss_Enh_Dec: -2.28333187
| epoch  87 |  2000/ 4361 batches | lr 0.000000 | ms/batch 402.31 | loss  2.72 | ppl    15.15 | acc     0.69 | train_ae_norm     1.00
[87/200][2099/4361] Loss_D: 0.00401868 (Loss_D_real: 0.00007443 Loss_D_fake: 0.00394425) Loss_G: 0.32924125 Loss_Enh_Dec: -2.37027812
| epoch  87 |  2100/ 4361 batches | lr 0.000000 | ms/batch 401.45 | loss  2.76 | ppl    15.80 | acc     0.70 | train_ae_norm     1.00
[87/200][2199/4361] Loss_D: 0.00224448 (Loss_D_real: 0.00080908 Loss_D_fake: 0.00143541) Loss_G: 0.33096480 Loss_Enh_Dec: -1.34551942
| epoch  87 |  2200/ 4361 batches | lr 0.000000 | ms/batch 401.75 | loss  2.73 | ppl    15.40 | acc     0.70 | train_ae_norm     1.00
[87/200][2299/4361] Loss_D: 0.00172675 (Loss_D_real: 0.0000723

| epoch  88 |   100/ 4361 batches | lr 0.000000 | ms/batch 401.61 | loss  2.75 | ppl    15.58 | acc     0.65 | train_ae_norm     1.00
[88/200][199/4361] Loss_D: 0.00140763 (Loss_D_real: 0.00014114 Loss_D_fake: 0.00126648) Loss_G: 0.34010673 Loss_Enh_Dec: -2.10969853
| epoch  88 |   200/ 4361 batches | lr 0.000000 | ms/batch 401.20 | loss  2.77 | ppl    16.01 | acc     0.70 | train_ae_norm     1.00
[88/200][299/4361] Loss_D: 0.00251467 (Loss_D_real: 0.00223173 Loss_D_fake: 0.00028293) Loss_G: 0.49105516 Loss_Enh_Dec: -1.63195789
| epoch  88 |   300/ 4361 batches | lr 0.000000 | ms/batch 400.84 | loss  2.77 | ppl    15.99 | acc     0.64 | train_ae_norm     1.00
[88/200][399/4361] Loss_D: 0.00210737 (Loss_D_real: 0.00039619 Loss_D_fake: 0.00171118) Loss_G: 0.32756168 Loss_Enh_Dec: -2.15370154
| epoch  88 |   400/ 4361 batches | lr 0.000000 | ms/batch 401.73 | loss  2.71 | ppl    15.05 | acc     0.69 | train_ae_norm     1.00
[88/200][499/4361] Loss_D: 0.00140027 (Loss_D_real: 0.00034349 Lo

| epoch  88 |  3200/ 4361 batches | lr 0.000000 | ms/batch 400.57 | loss  2.74 | ppl    15.41 | acc     0.69 | train_ae_norm     1.00
[88/200][3299/4361] Loss_D: 0.00168630 (Loss_D_real: 0.00078958 Loss_D_fake: 0.00089672) Loss_G: 0.39045131 Loss_Enh_Dec: -1.44241047
| epoch  88 |  3300/ 4361 batches | lr 0.000000 | ms/batch 400.53 | loss  2.72 | ppl    15.18 | acc     0.70 | train_ae_norm     1.00
[88/200][3399/4361] Loss_D: 0.00494011 (Loss_D_real: 0.00401631 Loss_D_fake: 0.00092380) Loss_G: 0.44585228 Loss_Enh_Dec: -0.74332750
| epoch  88 |  3400/ 4361 batches | lr 0.000000 | ms/batch 400.44 | loss  2.70 | ppl    14.92 | acc     0.66 | train_ae_norm     1.00
[88/200][3499/4361] Loss_D: 0.00289140 (Loss_D_real: 0.00171940 Loss_D_fake: 0.00117201) Loss_G: 0.53996032 Loss_Enh_Dec: -1.08110809
| epoch  88 |  3500/ 4361 batches | lr 0.000000 | ms/batch 401.50 | loss  2.64 | ppl    13.96 | acc     0.69 | train_ae_norm     1.00
[88/200][3599/4361] Loss_D: 0.00083251 (Loss_D_real: 0.0001877

| epoch  89 |  1300/ 4361 batches | lr 0.000000 | ms/batch 400.71 | loss  2.71 | ppl    15.05 | acc     0.68 | train_ae_norm     1.00
[89/200][1399/4361] Loss_D: 0.00183113 (Loss_D_real: 0.00067535 Loss_D_fake: 0.00115578) Loss_G: 0.49638534 Loss_Enh_Dec: -2.02757740
| epoch  89 |  1400/ 4361 batches | lr 0.000000 | ms/batch 400.88 | loss  2.70 | ppl    14.94 | acc     0.65 | train_ae_norm     1.00
[89/200][1499/4361] Loss_D: 0.00134998 (Loss_D_real: 0.00088608 Loss_D_fake: 0.00046390) Loss_G: 0.64774132 Loss_Enh_Dec: -1.84383893
| epoch  89 |  1500/ 4361 batches | lr 0.000000 | ms/batch 406.18 | loss  2.77 | ppl    15.96 | acc     0.67 | train_ae_norm     1.00
[89/200][1599/4361] Loss_D: 0.00234585 (Loss_D_real: 0.00185219 Loss_D_fake: 0.00049365) Loss_G: 0.50479472 Loss_Enh_Dec: -1.55673921
| epoch  89 |  1600/ 4361 batches | lr 0.000000 | ms/batch 400.86 | loss  2.73 | ppl    15.26 | acc     0.67 | train_ae_norm     1.00
[89/200][1699/4361] Loss_D: 0.00042908 (Loss_D_real: 0.0000609

  Batch    10  of    230.    Elapsed: 0:00:04.
  Batch    20  of    230.    Elapsed: 0:00:07.
  Batch    30  of    230.    Elapsed: 0:00:11.
  Batch    40  of    230.    Elapsed: 0:00:15.
  Batch    50  of    230.    Elapsed: 0:00:19.
  Batch    60  of    230.    Elapsed: 0:00:22.
  Batch    70  of    230.    Elapsed: 0:00:26.
  Batch    80  of    230.    Elapsed: 0:00:30.
  Batch    90  of    230.    Elapsed: 0:00:34.
  Batch   100  of    230.    Elapsed: 0:00:37.
  Batch   110  of    230.    Elapsed: 0:00:41.
  Batch   120  of    230.    Elapsed: 0:00:45.
  Batch   130  of    230.    Elapsed: 0:00:49.
  Batch   140  of    230.    Elapsed: 0:00:52.
  Batch   150  of    230.    Elapsed: 0:00:56.
  Batch   160  of    230.    Elapsed: 0:01:00.
  Batch   170  of    230.    Elapsed: 0:01:03.
  Batch   180  of    230.    Elapsed: 0:01:07.
  Batch   190  of    230.    Elapsed: 0:01:11.
  Batch   200  of    230.    Elapsed: 0:01:15.
  Batch   210  of    230.    Elapsed: 0:01:18.
  Batch   220

| epoch  90 |  2600/ 4361 batches | lr 0.000000 | ms/batch 400.59 | loss  2.67 | ppl    14.47 | acc     0.67 | train_ae_norm     1.00
[90/200][2699/4361] Loss_D: 0.00085378 (Loss_D_real: 0.00010551 Loss_D_fake: 0.00074827) Loss_G: 0.42767248 Loss_Enh_Dec: -1.60355461
| epoch  90 |  2700/ 4361 batches | lr 0.000000 | ms/batch 400.22 | loss  2.69 | ppl    14.72 | acc     0.67 | train_ae_norm     1.00
[90/200][2799/4361] Loss_D: 0.00114945 (Loss_D_real: 0.00047611 Loss_D_fake: 0.00067334) Loss_G: 0.41524535 Loss_Enh_Dec: -1.42181170
| epoch  90 |  2800/ 4361 batches | lr 0.000000 | ms/batch 401.03 | loss  2.64 | ppl    14.04 | acc     0.68 | train_ae_norm     1.00
[90/200][2899/4361] Loss_D: 0.00067672 (Loss_D_real: 0.00021374 Loss_D_fake: 0.00046299) Loss_G: 0.41763464 Loss_Enh_Dec: -1.51839089
| epoch  90 |  2900/ 4361 batches | lr 0.000000 | ms/batch 401.64 | loss  2.67 | ppl    14.37 | acc     0.68 | train_ae_norm     1.00
[90/200][2999/4361] Loss_D: 0.00037883 (Loss_D_real: 0.0002735

[91/200][699/4361] Loss_D: 0.00025710 (Loss_D_real: 0.00004327 Loss_D_fake: 0.00021383) Loss_G: 0.40289411 Loss_Enh_Dec: -2.01493335
| epoch  91 |   700/ 4361 batches | lr 0.000000 | ms/batch 400.39 | loss  2.63 | ppl    13.94 | acc     0.69 | train_ae_norm     1.00
[91/200][799/4361] Loss_D: 0.00132985 (Loss_D_real: 0.00087456 Loss_D_fake: 0.00045528) Loss_G: 0.42928830 Loss_Enh_Dec: -2.11668181
| epoch  91 |   800/ 4361 batches | lr 0.000000 | ms/batch 400.92 | loss  2.62 | ppl    13.68 | acc     0.69 | train_ae_norm     1.00
[91/200][899/4361] Loss_D: 0.00259481 (Loss_D_real: 0.00206058 Loss_D_fake: 0.00053423) Loss_G: 0.39943409 Loss_Enh_Dec: -2.08570004
| epoch  91 |   900/ 4361 batches | lr 0.000000 | ms/batch 401.26 | loss  2.63 | ppl    13.93 | acc     0.71 | train_ae_norm     1.00
[91/200][999/4361] Loss_D: 0.03531375 (Loss_D_real: 0.03477272 Loss_D_fake: 0.00054104) Loss_G: 0.74561441 Loss_Enh_Dec: -2.10493970
| epoch  91 |  1000/ 4361 batches | lr 0.000000 | ms/batch 401.30 

[91/200][3799/4361] Loss_D: 0.00090263 (Loss_D_real: 0.00029595 Loss_D_fake: 0.00060668) Loss_G: 0.37556493 Loss_Enh_Dec: -1.85284042
| epoch  91 |  3800/ 4361 batches | lr 0.000000 | ms/batch 401.22 | loss  2.63 | ppl    13.85 | acc     0.76 | train_ae_norm     1.00
[91/200][3899/4361] Loss_D: 0.00058407 (Loss_D_real: 0.00013624 Loss_D_fake: 0.00044783) Loss_G: 0.37252975 Loss_Enh_Dec: -1.47857130
| epoch  91 |  3900/ 4361 batches | lr 0.000000 | ms/batch 401.50 | loss  2.61 | ppl    13.61 | acc     0.67 | train_ae_norm     1.00
[91/200][3999/4361] Loss_D: 0.00309210 (Loss_D_real: 0.00246921 Loss_D_fake: 0.00062289) Loss_G: 0.41099721 Loss_Enh_Dec: -1.70518363
| epoch  91 |  4000/ 4361 batches | lr 0.000000 | ms/batch 401.97 | loss  2.61 | ppl    13.67 | acc     0.70 | train_ae_norm     1.00
[91/200][4099/4361] Loss_D: 0.00122437 (Loss_D_real: 0.00067132 Loss_D_fake: 0.00055305) Loss_G: 0.43291458 Loss_Enh_Dec: -1.69517362
| epoch  91 |  4100/ 4361 batches | lr 0.000000 | ms/batch 400

[92/200][1999/4361] Loss_D: 0.00721913 (Loss_D_real: 0.00616967 Loss_D_fake: 0.00104946) Loss_G: 0.44478771 Loss_Enh_Dec: -1.56524265
| epoch  92 |  2000/ 4361 batches | lr 0.000000 | ms/batch 401.38 | loss  2.59 | ppl    13.30 | acc     0.74 | train_ae_norm     1.00
[92/200][2099/4361] Loss_D: 0.00072418 (Loss_D_real: 0.00016241 Loss_D_fake: 0.00056177) Loss_G: 0.38803121 Loss_Enh_Dec: -1.90672207
| epoch  92 |  2100/ 4361 batches | lr 0.000000 | ms/batch 401.40 | loss  2.64 | ppl    14.00 | acc     0.70 | train_ae_norm     1.00
[92/200][2199/4361] Loss_D: 0.00152267 (Loss_D_real: 0.00017523 Loss_D_fake: 0.00134744) Loss_G: 0.39165398 Loss_Enh_Dec: -1.83383775
| epoch  92 |  2200/ 4361 batches | lr 0.000000 | ms/batch 401.23 | loss  2.60 | ppl    13.50 | acc     0.72 | train_ae_norm     1.00
[92/200][2299/4361] Loss_D: 0.00280501 (Loss_D_real: 0.00211830 Loss_D_fake: 0.00068670) Loss_G: 0.53209203 Loss_Enh_Dec: -1.90366614
| epoch  92 |  2300/ 4361 batches | lr 0.000000 | ms/batch 401

[93/200][99/4361] Loss_D: 0.00100621 (Loss_D_real: 0.00031328 Loss_D_fake: 0.00069292) Loss_G: 0.44798884 Loss_Enh_Dec: -0.91273397
| epoch  93 |   100/ 4361 batches | lr 0.000000 | ms/batch 400.98 | loss  2.58 | ppl    13.18 | acc     0.67 | train_ae_norm     1.00
[93/200][199/4361] Loss_D: 0.00170971 (Loss_D_real: 0.00008891 Loss_D_fake: 0.00162080) Loss_G: 0.42756349 Loss_Enh_Dec: -0.89913809
| epoch  93 |   200/ 4361 batches | lr 0.000000 | ms/batch 400.70 | loss  2.60 | ppl    13.50 | acc     0.70 | train_ae_norm     1.00
[93/200][299/4361] Loss_D: 0.00319001 (Loss_D_real: 0.00237107 Loss_D_fake: 0.00081894) Loss_G: 0.53997195 Loss_Enh_Dec: -1.68985617
| epoch  93 |   300/ 4361 batches | lr 0.000000 | ms/batch 401.80 | loss  2.60 | ppl    13.50 | acc     0.66 | train_ae_norm     1.00
[93/200][399/4361] Loss_D: 0.02383589 (Loss_D_real: 0.02277983 Loss_D_fake: 0.00105606) Loss_G: 0.45067191 Loss_Enh_Dec: -1.66027677
| epoch  93 |   400/ 4361 batches | lr 0.000000 | ms/batch 401.65 |

[93/200][3199/4361] Loss_D: 0.00138449 (Loss_D_real: 0.00087226 Loss_D_fake: 0.00051223) Loss_G: 0.43682519 Loss_Enh_Dec: -2.04803443
| epoch  93 |  3200/ 4361 batches | lr 0.000000 | ms/batch 400.99 | loss  2.63 | ppl    13.82 | acc     0.69 | train_ae_norm     1.00
[93/200][3299/4361] Loss_D: 0.00234391 (Loss_D_real: 0.00199403 Loss_D_fake: 0.00034988) Loss_G: 0.42759266 Loss_Enh_Dec: -2.05707383
| epoch  93 |  3300/ 4361 batches | lr 0.000000 | ms/batch 400.17 | loss  2.62 | ppl    13.71 | acc     0.70 | train_ae_norm     1.00
[93/200][3399/4361] Loss_D: 0.00286670 (Loss_D_real: 0.00248742 Loss_D_fake: 0.00037927) Loss_G: 0.42757568 Loss_Enh_Dec: -1.58951402
| epoch  93 |  3400/ 4361 batches | lr 0.000000 | ms/batch 401.02 | loss  2.61 | ppl    13.62 | acc     0.69 | train_ae_norm     1.00
[93/200][3499/4361] Loss_D: 0.00145008 (Loss_D_real: 0.00005518 Loss_D_fake: 0.00139490) Loss_G: 0.43547311 Loss_Enh_Dec: -1.82835853
| epoch  93 |  3500/ 4361 batches | lr 0.000000 | ms/batch 401

[94/200][1399/4361] Loss_D: 0.00143557 (Loss_D_real: 0.00018032 Loss_D_fake: 0.00125525) Loss_G: 0.45380297 Loss_Enh_Dec: -2.20290732
| epoch  94 |  1400/ 4361 batches | lr 0.000000 | ms/batch 400.88 | loss  2.61 | ppl    13.54 | acc     0.67 | train_ae_norm     1.00
[94/200][1499/4361] Loss_D: 0.01727665 (Loss_D_real: 0.01680906 Loss_D_fake: 0.00046759) Loss_G: 0.47265139 Loss_Enh_Dec: -2.26281166
| epoch  94 |  1500/ 4361 batches | lr 0.000000 | ms/batch 400.89 | loss  2.65 | ppl    14.13 | acc     0.67 | train_ae_norm     1.00
[94/200][1599/4361] Loss_D: 0.00080241 (Loss_D_real: 0.00004934 Loss_D_fake: 0.00075306) Loss_G: 0.36816320 Loss_Enh_Dec: -1.86881983
| epoch  94 |  1600/ 4361 batches | lr 0.000000 | ms/batch 400.88 | loss  2.62 | ppl    13.68 | acc     0.70 | train_ae_norm     1.00
[94/200][1699/4361] Loss_D: 0.00440707 (Loss_D_real: 0.00147710 Loss_D_fake: 0.00292997) Loss_G: 0.38950238 Loss_Enh_Dec: -1.68640363
| epoch  94 |  1700/ 4361 batches | lr 0.000000 | ms/batch 401

  Batch    10  of    230.    Elapsed: 0:00:04.
  Batch    20  of    230.    Elapsed: 0:00:07.
  Batch    30  of    230.    Elapsed: 0:00:11.
  Batch    40  of    230.    Elapsed: 0:00:15.
  Batch    50  of    230.    Elapsed: 0:00:19.
  Batch    60  of    230.    Elapsed: 0:00:22.
  Batch    70  of    230.    Elapsed: 0:00:26.
  Batch    80  of    230.    Elapsed: 0:00:30.
  Batch    90  of    230.    Elapsed: 0:00:34.
  Batch   100  of    230.    Elapsed: 0:00:37.
  Batch   110  of    230.    Elapsed: 0:00:41.
  Batch   120  of    230.    Elapsed: 0:00:45.
  Batch   130  of    230.    Elapsed: 0:00:49.
  Batch   140  of    230.    Elapsed: 0:00:52.
  Batch   150  of    230.    Elapsed: 0:00:56.
  Batch   160  of    230.    Elapsed: 0:01:00.
  Batch   170  of    230.    Elapsed: 0:01:04.
  Batch   180  of    230.    Elapsed: 0:01:07.
  Batch   190  of    230.    Elapsed: 0:01:11.
  Batch   200  of    230.    Elapsed: 0:01:15.
  Batch   210  of    230.    Elapsed: 0:01:19.
  Batch   220

| epoch  95 |  2600/ 4361 batches | lr 0.000000 | ms/batch 399.73 | loss  2.60 | ppl    13.42 | acc     0.68 | train_ae_norm     1.00
[95/200][2699/4361] Loss_D: 0.00122314 (Loss_D_real: 0.00102283 Loss_D_fake: 0.00020031) Loss_G: 0.56636399 Loss_Enh_Dec: -1.50110006
| epoch  95 |  2700/ 4361 batches | lr 0.000000 | ms/batch 400.42 | loss  2.59 | ppl    13.28 | acc     0.69 | train_ae_norm     1.00
[95/200][2799/4361] Loss_D: 0.00257567 (Loss_D_real: 0.00109648 Loss_D_fake: 0.00147919) Loss_G: 0.39942202 Loss_Enh_Dec: -1.61154866
| epoch  95 |  2800/ 4361 batches | lr 0.000000 | ms/batch 399.99 | loss  2.56 | ppl    12.94 | acc     0.68 | train_ae_norm     1.00
[95/200][2899/4361] Loss_D: 0.00330744 (Loss_D_real: 0.00112226 Loss_D_fake: 0.00218518) Loss_G: 0.59268636 Loss_Enh_Dec: -1.91775167
| epoch  95 |  2900/ 4361 batches | lr 0.000000 | ms/batch 400.44 | loss  2.56 | ppl    12.95 | acc     0.69 | train_ae_norm     1.00
[95/200][2999/4361] Loss_D: 0.00214040 (Loss_D_real: 0.0016945

| epoch  96 |   700/ 4361 batches | lr 0.000000 | ms/batch 401.68 | loss  2.56 | ppl    12.94 | acc     0.70 | train_ae_norm     1.00
[96/200][799/4361] Loss_D: 0.01364515 (Loss_D_real: 0.00039431 Loss_D_fake: 0.01325084) Loss_G: 0.39060709 Loss_Enh_Dec: -1.44902575
| epoch  96 |   800/ 4361 batches | lr 0.000000 | ms/batch 401.76 | loss  2.54 | ppl    12.62 | acc     0.70 | train_ae_norm     1.00
[96/200][899/4361] Loss_D: 0.00360384 (Loss_D_real: 0.00014755 Loss_D_fake: 0.00345628) Loss_G: 0.54663020 Loss_Enh_Dec: -0.64996177
| epoch  96 |   900/ 4361 batches | lr 0.000000 | ms/batch 401.75 | loss  2.54 | ppl    12.72 | acc     0.74 | train_ae_norm     1.00
[96/200][999/4361] Loss_D: 0.00081681 (Loss_D_real: 0.00035478 Loss_D_fake: 0.00046203) Loss_G: 0.49161202 Loss_Enh_Dec: -1.29678023
| epoch  96 |  1000/ 4361 batches | lr 0.000000 | ms/batch 401.49 | loss  2.52 | ppl    12.42 | acc     0.74 | train_ae_norm     1.00
[96/200][1099/4361] Loss_D: 0.00099549 (Loss_D_real: 0.00052106 L

| epoch  96 |  3800/ 4361 batches | lr 0.000000 | ms/batch 401.93 | loss  2.59 | ppl    13.32 | acc     0.75 | train_ae_norm     1.00
[96/200][3899/4361] Loss_D: 0.00112086 (Loss_D_real: 0.00008225 Loss_D_fake: 0.00103861) Loss_G: 0.36581856 Loss_Enh_Dec: -1.55840409
| epoch  96 |  3900/ 4361 batches | lr 0.000000 | ms/batch 401.39 | loss  2.60 | ppl    13.47 | acc     0.68 | train_ae_norm     1.00
[96/200][3999/4361] Loss_D: 0.01730241 (Loss_D_real: 0.01376008 Loss_D_fake: 0.00354233) Loss_G: 0.35498413 Loss_Enh_Dec: -1.82192695
| epoch  96 |  4000/ 4361 batches | lr 0.000000 | ms/batch 401.64 | loss  2.59 | ppl    13.37 | acc     0.70 | train_ae_norm     1.00
[96/200][4099/4361] Loss_D: 0.02119307 (Loss_D_real: 0.00028908 Loss_D_fake: 0.02090399) Loss_G: 0.35408983 Loss_Enh_Dec: -2.30016780
| epoch  96 |  4100/ 4361 batches | lr 0.000000 | ms/batch 401.66 | loss  2.55 | ppl    12.82 | acc     0.70 | train_ae_norm     1.00
[96/200][4199/4361] Loss_D: 0.02544983 (Loss_D_real: 0.0244932

| epoch  97 |  1900/ 4361 batches | lr 0.000000 | ms/batch 401.44 | loss  2.57 | ppl    13.07 | acc     0.73 | train_ae_norm     1.00
[97/200][1999/4361] Loss_D: 0.01860021 (Loss_D_real: 0.01448065 Loss_D_fake: 0.00411957) Loss_G: 0.29027259 Loss_Enh_Dec: -1.90706730
| epoch  97 |  2000/ 4361 batches | lr 0.000000 | ms/batch 402.28 | loss  2.54 | ppl    12.62 | acc     0.72 | train_ae_norm     1.00
[97/200][2099/4361] Loss_D: 0.02464624 (Loss_D_real: 0.02174893 Loss_D_fake: 0.00289731) Loss_G: 0.31210738 Loss_Enh_Dec: -2.23331714
| epoch  97 |  2100/ 4361 batches | lr 0.000000 | ms/batch 402.04 | loss  2.54 | ppl    12.70 | acc     0.74 | train_ae_norm     1.00
[97/200][2199/4361] Loss_D: 0.00304904 (Loss_D_real: 0.00017583 Loss_D_fake: 0.00287321) Loss_G: 0.31927365 Loss_Enh_Dec: -1.83857727
| epoch  97 |  2200/ 4361 batches | lr 0.000000 | ms/batch 401.28 | loss  2.54 | ppl    12.64 | acc     0.70 | train_ae_norm     1.00
[97/200][2299/4361] Loss_D: 0.00192326 (Loss_D_real: 0.0002414

| epoch  98 |   100/ 4361 batches | lr 0.000000 | ms/batch 401.66 | loss  2.50 | ppl    12.14 | acc     0.66 | train_ae_norm     1.00
[98/200][199/4361] Loss_D: 0.00172637 (Loss_D_real: 0.00032781 Loss_D_fake: 0.00139857) Loss_G: 0.31568703 Loss_Enh_Dec: -2.28040457
| epoch  98 |   200/ 4361 batches | lr 0.000000 | ms/batch 401.52 | loss  2.51 | ppl    12.36 | acc     0.74 | train_ae_norm     1.00
[98/200][299/4361] Loss_D: 0.05797193 (Loss_D_real: 0.05532369 Loss_D_fake: 0.00264825) Loss_G: 0.31031474 Loss_Enh_Dec: -2.26971126
| epoch  98 |   300/ 4361 batches | lr 0.000000 | ms/batch 400.69 | loss  2.54 | ppl    12.64 | acc     0.69 | train_ae_norm     1.00
[98/200][399/4361] Loss_D: 0.00205217 (Loss_D_real: 0.00053764 Loss_D_fake: 0.00151453) Loss_G: 0.32594261 Loss_Enh_Dec: -2.73519278
| epoch  98 |   400/ 4361 batches | lr 0.000000 | ms/batch 401.09 | loss  2.45 | ppl    11.60 | acc     0.74 | train_ae_norm     1.00
[98/200][499/4361] Loss_D: 0.00245116 (Loss_D_real: 0.00024156 Lo

| epoch  98 |  3200/ 4361 batches | lr 0.000000 | ms/batch 401.60 | loss  2.57 | ppl    13.07 | acc     0.72 | train_ae_norm     1.00
[98/200][3299/4361] Loss_D: 0.00087151 (Loss_D_real: 0.00008650 Loss_D_fake: 0.00078502) Loss_G: 0.38587606 Loss_Enh_Dec: -2.27394009
| epoch  98 |  3300/ 4361 batches | lr 0.000000 | ms/batch 400.60 | loss  2.55 | ppl    12.80 | acc     0.72 | train_ae_norm     1.00
[98/200][3399/4361] Loss_D: 0.00995665 (Loss_D_real: 0.00520732 Loss_D_fake: 0.00474933) Loss_G: 0.40704814 Loss_Enh_Dec: -2.41788077
| epoch  98 |  3400/ 4361 batches | lr 0.000000 | ms/batch 402.04 | loss  2.55 | ppl    12.76 | acc     0.66 | train_ae_norm     1.00
[98/200][3499/4361] Loss_D: 0.00199017 (Loss_D_real: 0.00137349 Loss_D_fake: 0.00061668) Loss_G: 0.60444200 Loss_Enh_Dec: -1.92926061
| epoch  98 |  3500/ 4361 batches | lr 0.000000 | ms/batch 401.42 | loss  2.49 | ppl    12.07 | acc     0.71 | train_ae_norm     1.00
[98/200][3599/4361] Loss_D: 0.00103631 (Loss_D_real: 0.0001586

| epoch  99 |  1400/ 4361 batches | lr 0.000000 | ms/batch 402.34 | loss  2.50 | ppl    12.24 | acc     0.68 | train_ae_norm     1.00
[99/200][1499/4361] Loss_D: 0.00086851 (Loss_D_real: 0.00013472 Loss_D_fake: 0.00073379) Loss_G: 0.40206346 Loss_Enh_Dec: -2.47551632
| epoch  99 |  1500/ 4361 batches | lr 0.000000 | ms/batch 401.66 | loss  2.57 | ppl    13.10 | acc     0.70 | train_ae_norm     1.00
[99/200][1599/4361] Loss_D: 0.00210768 (Loss_D_real: 0.00012217 Loss_D_fake: 0.00198552) Loss_G: 0.44476911 Loss_Enh_Dec: -2.35220695
| epoch  99 |  1600/ 4361 batches | lr 0.000000 | ms/batch 401.21 | loss  2.55 | ppl    12.82 | acc     0.70 | train_ae_norm     1.00
[99/200][1699/4361] Loss_D: 0.00108008 (Loss_D_real: 0.00033696 Loss_D_fake: 0.00074311) Loss_G: 0.39445138 Loss_Enh_Dec: -2.24843097
| epoch  99 |  1700/ 4361 batches | lr 0.000000 | ms/batch 402.03 | loss  2.51 | ppl    12.35 | acc     0.68 | train_ae_norm     1.00
[99/200][1799/4361] Loss_D: 0.00187291 (Loss_D_real: 0.0003490

  Batch    40  of    230.    Elapsed: 0:00:15.
  Batch    50  of    230.    Elapsed: 0:00:19.
  Batch    60  of    230.    Elapsed: 0:00:22.
  Batch    70  of    230.    Elapsed: 0:00:26.
  Batch    80  of    230.    Elapsed: 0:00:30.
  Batch    90  of    230.    Elapsed: 0:00:34.
  Batch   100  of    230.    Elapsed: 0:00:37.
  Batch   110  of    230.    Elapsed: 0:00:41.
  Batch   120  of    230.    Elapsed: 0:00:45.
  Batch   130  of    230.    Elapsed: 0:00:49.
  Batch   140  of    230.    Elapsed: 0:00:52.
  Batch   150  of    230.    Elapsed: 0:00:56.
  Batch   160  of    230.    Elapsed: 0:01:00.
  Batch   170  of    230.    Elapsed: 0:01:04.
  Batch   180  of    230.    Elapsed: 0:01:08.
  Batch   190  of    230.    Elapsed: 0:01:11.
  Batch   200  of    230.    Elapsed: 0:01:15.
  Batch   210  of    230.    Elapsed: 0:01:19.
  Batch   220  of    230.    Elapsed: 0:01:22.

  Average training loss generetor: 0.698
  Average training loss discriminator: 0.702
  Training epcoh too

[100/200][2699/4361] Loss_D: 0.00867874 (Loss_D_real: 0.00147750 Loss_D_fake: 0.00720124) Loss_G: 0.52741718 Loss_Enh_Dec: -1.99378991
| epoch 100 |  2700/ 4361 batches | lr 0.000000 | ms/batch 401.11 | loss  2.49 | ppl    12.10 | acc     0.72 | train_ae_norm     1.00
[100/200][2799/4361] Loss_D: 0.00129497 (Loss_D_real: 0.00021167 Loss_D_fake: 0.00108330) Loss_G: 0.39756289 Loss_Enh_Dec: -2.00987768
| epoch 100 |  2800/ 4361 batches | lr 0.000000 | ms/batch 400.97 | loss  2.44 | ppl    11.49 | acc     0.70 | train_ae_norm     1.00
[100/200][2899/4361] Loss_D: 0.00208243 (Loss_D_real: 0.00085926 Loss_D_fake: 0.00122317) Loss_G: 0.44615650 Loss_Enh_Dec: -1.94558358
| epoch 100 |  2900/ 4361 batches | lr 0.000000 | ms/batch 401.14 | loss  2.47 | ppl    11.78 | acc     0.71 | train_ae_norm     1.00
[100/200][2999/4361] Loss_D: 0.00094984 (Loss_D_real: 0.00019024 Loss_D_fake: 0.00075959) Loss_G: 0.45296031 Loss_Enh_Dec: -1.49374413
| epoch 100 |  3000/ 4361 batches | lr 0.000000 | ms/batch

[101/200][799/4361] Loss_D: 0.00349057 (Loss_D_real: 0.00132558 Loss_D_fake: 0.00216499) Loss_G: 0.38310048 Loss_Enh_Dec: -1.23781538
| epoch 101 |   800/ 4361 batches | lr 0.000000 | ms/batch 401.95 | loss  2.42 | ppl    11.30 | acc     0.71 | train_ae_norm     1.00
[101/200][899/4361] Loss_D: 0.00051820 (Loss_D_real: 0.00026279 Loss_D_fake: 0.00025541) Loss_G: 0.44871065 Loss_Enh_Dec: -1.63282418
| epoch 101 |   900/ 4361 batches | lr 0.000000 | ms/batch 401.19 | loss  2.46 | ppl    11.74 | acc     0.74 | train_ae_norm     1.00
[101/200][999/4361] Loss_D: 0.00490474 (Loss_D_real: 0.00411815 Loss_D_fake: 0.00078658) Loss_G: 0.45831117 Loss_Enh_Dec: -1.52390003
| epoch 101 |  1000/ 4361 batches | lr 0.000000 | ms/batch 401.59 | loss  2.43 | ppl    11.38 | acc     0.75 | train_ae_norm     1.00
[101/200][1099/4361] Loss_D: 0.00231530 (Loss_D_real: 0.00181660 Loss_D_fake: 0.00049870) Loss_G: 0.46979371 Loss_Enh_Dec: -1.60872233
| epoch 101 |  1100/ 4361 batches | lr 0.000000 | ms/batch 40

| epoch 101 |  3800/ 4361 batches | lr 0.000000 | ms/batch 401.88 | loss  2.44 | ppl    11.47 | acc     0.74 | train_ae_norm     1.00
[101/200][3899/4361] Loss_D: 0.00064274 (Loss_D_real: 0.00003030 Loss_D_fake: 0.00061244) Loss_G: 0.41434059 Loss_Enh_Dec: -2.07133055
| epoch 101 |  3900/ 4361 batches | lr 0.000000 | ms/batch 401.72 | loss  2.44 | ppl    11.47 | acc     0.69 | train_ae_norm     1.00
[101/200][3999/4361] Loss_D: 0.00101235 (Loss_D_real: 0.00024287 Loss_D_fake: 0.00076948) Loss_G: 0.41088316 Loss_Enh_Dec: -2.16722941
| epoch 101 |  4000/ 4361 batches | lr 0.000000 | ms/batch 401.64 | loss  2.43 | ppl    11.31 | acc     0.71 | train_ae_norm     1.00
[101/200][4099/4361] Loss_D: 0.00303978 (Loss_D_real: 0.00112163 Loss_D_fake: 0.00191814) Loss_G: 0.39137918 Loss_Enh_Dec: -2.41504264
| epoch 101 |  4100/ 4361 batches | lr 0.000000 | ms/batch 401.75 | loss  2.40 | ppl    10.98 | acc     0.73 | train_ae_norm     1.00
[101/200][4199/4361] Loss_D: 0.00241937 (Loss_D_real: 0.001

| epoch 102 |  2000/ 4361 batches | lr 0.000000 | ms/batch 400.65 | loss  2.40 | ppl    11.03 | acc     0.73 | train_ae_norm     1.00
[102/200][2099/4361] Loss_D: 0.00127741 (Loss_D_real: 0.00056580 Loss_D_fake: 0.00071160) Loss_G: 0.38079411 Loss_Enh_Dec: -1.55986917
| epoch 102 |  2100/ 4361 batches | lr 0.000000 | ms/batch 400.88 | loss  2.44 | ppl    11.48 | acc     0.75 | train_ae_norm     1.00
[102/200][2199/4361] Loss_D: 0.00662809 (Loss_D_real: 0.00339115 Loss_D_fake: 0.00323694) Loss_G: 0.40770203 Loss_Enh_Dec: -1.84873283
| epoch 102 |  2200/ 4361 batches | lr 0.000000 | ms/batch 400.34 | loss  2.42 | ppl    11.29 | acc     0.74 | train_ae_norm     1.00
[102/200][2299/4361] Loss_D: 0.00079867 (Loss_D_real: 0.00020056 Loss_D_fake: 0.00059812) Loss_G: 0.54776841 Loss_Enh_Dec: -2.07946134
| epoch 102 |  2300/ 4361 batches | lr 0.000000 | ms/batch 400.59 | loss  2.42 | ppl    11.21 | acc     0.72 | train_ae_norm     1.00
[102/200][2399/4361] Loss_D: 0.02176580 (Loss_D_real: 0.021

| epoch 103 |   400/ 4361 batches | lr 0.000000 | ms/batch 401.26 | loss  2.36 | ppl    10.57 | acc     0.71 | train_ae_norm     1.00
[103/200][499/4361] Loss_D: 0.00979594 (Loss_D_real: 0.00004919 Loss_D_fake: 0.00974675) Loss_G: 0.36343017 Loss_Enh_Dec: -2.09943938
| epoch 103 |   500/ 4361 batches | lr 0.000000 | ms/batch 401.15 | loss  2.44 | ppl    11.49 | acc     0.72 | train_ae_norm     1.00
[103/200][599/4361] Loss_D: 0.00615161 (Loss_D_real: 0.00525400 Loss_D_fake: 0.00089761) Loss_G: 0.45150396 Loss_Enh_Dec: -1.88142228
| epoch 103 |   600/ 4361 batches | lr 0.000000 | ms/batch 401.37 | loss  2.40 | ppl    11.00 | acc     0.71 | train_ae_norm     1.00
[103/200][699/4361] Loss_D: 0.00301995 (Loss_D_real: 0.00144290 Loss_D_fake: 0.00157705) Loss_G: 0.41107589 Loss_Enh_Dec: -2.31865311
| epoch 103 |   700/ 4361 batches | lr 0.000000 | ms/batch 400.33 | loss  2.45 | ppl    11.58 | acc     0.71 | train_ae_norm     1.00
[103/200][799/4361] Loss_D: 0.00171465 (Loss_D_real: 0.0007035

[103/200][3499/4361] Loss_D: 0.00221945 (Loss_D_real: 0.00012018 Loss_D_fake: 0.00209927) Loss_G: 0.44205004 Loss_Enh_Dec: -1.79065025
| epoch 103 |  3500/ 4361 batches | lr 0.000000 | ms/batch 400.36 | loss  2.40 | ppl    10.97 | acc     0.68 | train_ae_norm     1.00
[103/200][3599/4361] Loss_D: 0.01706174 (Loss_D_real: 0.01599052 Loss_D_fake: 0.00107122) Loss_G: 0.39909098 Loss_Enh_Dec: -1.79095268
| epoch 103 |  3600/ 4361 batches | lr 0.000000 | ms/batch 401.11 | loss  2.39 | ppl    10.92 | acc     0.73 | train_ae_norm     1.00
[103/200][3699/4361] Loss_D: 0.00652185 (Loss_D_real: 0.00012956 Loss_D_fake: 0.00639229) Loss_G: 0.38558337 Loss_Enh_Dec: -1.71096575
| epoch 103 |  3700/ 4361 batches | lr 0.000000 | ms/batch 400.40 | loss  2.42 | ppl    11.21 | acc     0.70 | train_ae_norm     1.00
[103/200][3799/4361] Loss_D: 0.00321141 (Loss_D_real: 0.00027617 Loss_D_fake: 0.00293524) Loss_G: 0.39580646 Loss_Enh_Dec: -2.07810187
| epoch 103 |  3800/ 4361 batches | lr 0.000000 | ms/batch

[104/200][1699/4361] Loss_D: 0.00135893 (Loss_D_real: 0.00083499 Loss_D_fake: 0.00052394) Loss_G: 0.40400329 Loss_Enh_Dec: -2.12154627
| epoch 104 |  1700/ 4361 batches | lr 0.000000 | ms/batch 401.03 | loss  2.46 | ppl    11.74 | acc     0.67 | train_ae_norm     1.00
[104/200][1799/4361] Loss_D: 0.00181788 (Loss_D_real: 0.00128901 Loss_D_fake: 0.00052887) Loss_G: 0.47481933 Loss_Enh_Dec: -2.05382776
| epoch 104 |  1800/ 4361 batches | lr 0.000000 | ms/batch 401.40 | loss  2.41 | ppl    11.09 | acc     0.69 | train_ae_norm     1.00
[104/200][1899/4361] Loss_D: 0.00261856 (Loss_D_real: 0.00165356 Loss_D_fake: 0.00096500) Loss_G: 0.46198541 Loss_Enh_Dec: -2.07578254
| epoch 104 |  1900/ 4361 batches | lr 0.000000 | ms/batch 400.83 | loss  2.47 | ppl    11.86 | acc     0.72 | train_ae_norm     1.00
[104/200][1999/4361] Loss_D: 0.00859509 (Loss_D_real: 0.00772179 Loss_D_fake: 0.00087329) Loss_G: 0.52414298 Loss_Enh_Dec: -2.12763119
| epoch 104 |  2000/ 4361 batches | lr 0.000000 | ms/batch

  Batch   110  of    230.    Elapsed: 0:00:41.
  Batch   120  of    230.    Elapsed: 0:00:45.
  Batch   130  of    230.    Elapsed: 0:00:49.
  Batch   140  of    230.    Elapsed: 0:00:53.
  Batch   150  of    230.    Elapsed: 0:00:56.
  Batch   160  of    230.    Elapsed: 0:01:00.
  Batch   170  of    230.    Elapsed: 0:01:04.
  Batch   180  of    230.    Elapsed: 0:01:08.
  Batch   190  of    230.    Elapsed: 0:01:11.
  Batch   200  of    230.    Elapsed: 0:01:15.
  Batch   210  of    230.    Elapsed: 0:01:19.
  Batch   220  of    230.    Elapsed: 0:01:23.

  Average training loss generetor: 0.698
  Average training loss discriminator: 0.697
  Training epcoh took: 0:01:26

Running Test...
  Accuracy: 0.485
  Test Loss: 4.487
  Test took: 0:00:00
Train other shit
| epoch 105 |     0/ 4361 batches | lr 0.000000 | ms/batch 866.04 | loss  0.02 | ppl     1.02 | acc     0.74 | train_ae_norm     1.00
[105/200][99/4361] Loss_D: 0.01631919 (Loss_D_real: 0.01246710 Loss_D_fake: 0.00385208) Loss

| epoch 105 |  2800/ 4361 batches | lr 0.000000 | ms/batch 400.65 | loss  2.45 | ppl    11.54 | acc     0.67 | train_ae_norm     1.00
[105/200][2899/4361] Loss_D: 0.00340655 (Loss_D_real: 0.00168264 Loss_D_fake: 0.00172391) Loss_G: 0.40967304 Loss_Enh_Dec: -1.99641883
| epoch 105 |  2900/ 4361 batches | lr 0.000000 | ms/batch 400.83 | loss  2.51 | ppl    12.31 | acc     0.70 | train_ae_norm     1.00
[105/200][2999/4361] Loss_D: 0.00078262 (Loss_D_real: 0.00020202 Loss_D_fake: 0.00058060) Loss_G: 0.45226827 Loss_Enh_Dec: -1.34564650
| epoch 105 |  3000/ 4361 batches | lr 0.000000 | ms/batch 400.57 | loss  2.49 | ppl    12.04 | acc     0.68 | train_ae_norm     1.00
[105/200][3099/4361] Loss_D: 0.00109765 (Loss_D_real: 0.00006202 Loss_D_fake: 0.00103563) Loss_G: 0.45683509 Loss_Enh_Dec: -1.25658214
| epoch 105 |  3100/ 4361 batches | lr 0.000000 | ms/batch 399.62 | loss  2.50 | ppl    12.20 | acc     0.69 | train_ae_norm     1.00
[105/200][3199/4361] Loss_D: 0.01271699 (Loss_D_real: 0.011

[106/200][899/4361] Loss_D: 0.00239739 (Loss_D_real: 0.00049621 Loss_D_fake: 0.00190118) Loss_G: 0.53430533 Loss_Enh_Dec: -1.27450550
| epoch 106 |   900/ 4361 batches | lr 0.000000 | ms/batch 401.48 | loss  2.43 | ppl    11.35 | acc     0.76 | train_ae_norm     1.00
[106/200][999/4361] Loss_D: 0.00136950 (Loss_D_real: 0.00061031 Loss_D_fake: 0.00075919) Loss_G: 0.41648361 Loss_Enh_Dec: -1.53239024
| epoch 106 |  1000/ 4361 batches | lr 0.000000 | ms/batch 401.17 | loss  2.44 | ppl    11.50 | acc     0.73 | train_ae_norm     1.00
[106/200][1099/4361] Loss_D: 0.00238541 (Loss_D_real: 0.00027768 Loss_D_fake: 0.00210774) Loss_G: 0.43397322 Loss_Enh_Dec: -1.35386658
| epoch 106 |  1100/ 4361 batches | lr 0.000000 | ms/batch 401.39 | loss  2.40 | ppl    11.00 | acc     0.69 | train_ae_norm     1.00
[106/200][1199/4361] Loss_D: 0.00306884 (Loss_D_real: 0.00102777 Loss_D_fake: 0.00204107) Loss_G: 0.40429497 Loss_Enh_Dec: -1.86890030
| epoch 106 |  1200/ 4361 batches | lr 0.000000 | ms/batch 4

| epoch 106 |  3900/ 4361 batches | lr 0.000000 | ms/batch 400.61 | loss  2.42 | ppl    11.21 | acc     0.68 | train_ae_norm     1.00
[106/200][3999/4361] Loss_D: 0.00198237 (Loss_D_real: 0.00036001 Loss_D_fake: 0.00162236) Loss_G: 0.44401285 Loss_Enh_Dec: -1.54953420
| epoch 106 |  4000/ 4361 batches | lr 0.000000 | ms/batch 401.03 | loss  2.42 | ppl    11.23 | acc     0.71 | train_ae_norm     1.00
[106/200][4099/4361] Loss_D: 0.00473859 (Loss_D_real: 0.00362290 Loss_D_fake: 0.00111570) Loss_G: 0.48240826 Loss_Enh_Dec: -1.56979322
| epoch 106 |  4100/ 4361 batches | lr 0.000000 | ms/batch 401.55 | loss  2.40 | ppl    10.98 | acc     0.70 | train_ae_norm     1.00
[106/200][4199/4361] Loss_D: 0.00110308 (Loss_D_real: 0.00060168 Loss_D_fake: 0.00050140) Loss_G: 0.48993322 Loss_Enh_Dec: -0.66119760
| epoch 106 |  4200/ 4361 batches | lr 0.000000 | ms/batch 401.56 | loss  2.45 | ppl    11.54 | acc     0.73 | train_ae_norm     1.00
[106/200][4299/4361] Loss_D: 0.00096368 (Loss_D_real: 0.000

| epoch 107 |  2100/ 4361 batches | lr 0.000000 | ms/batch 401.28 | loss  2.37 | ppl    10.72 | acc     0.76 | train_ae_norm     1.00
[107/200][2199/4361] Loss_D: 0.00327048 (Loss_D_real: 0.00008764 Loss_D_fake: 0.00318283) Loss_G: 0.28111944 Loss_Enh_Dec: -1.59662771
| epoch 107 |  2200/ 4361 batches | lr 0.000000 | ms/batch 401.56 | loss  2.38 | ppl    10.77 | acc     0.72 | train_ae_norm     1.00
[107/200][2299/4361] Loss_D: 0.00548466 (Loss_D_real: 0.00193556 Loss_D_fake: 0.00354909) Loss_G: 0.27852708 Loss_Enh_Dec: -2.18025565
| epoch 107 |  2300/ 4361 batches | lr 0.000000 | ms/batch 401.47 | loss  2.37 | ppl    10.72 | acc     0.73 | train_ae_norm     1.00
[107/200][2399/4361] Loss_D: 0.00326362 (Loss_D_real: 0.00025410 Loss_D_fake: 0.00300952) Loss_G: 0.29592511 Loss_Enh_Dec: -1.92829835
| epoch 107 |  2400/ 4361 batches | lr 0.000000 | ms/batch 401.32 | loss  2.38 | ppl    10.84 | acc     0.68 | train_ae_norm     1.00
[107/200][2499/4361] Loss_D: 0.00327922 (Loss_D_real: 0.000

| epoch 108 |   300/ 4361 batches | lr 0.000000 | ms/batch 400.95 | loss  2.40 | ppl    10.98 | acc     0.68 | train_ae_norm     1.00
[108/200][399/4361] Loss_D: 0.00385543 (Loss_D_real: 0.00074209 Loss_D_fake: 0.00311334) Loss_G: 0.35973004 Loss_Enh_Dec: -2.94302487
| epoch 108 |   400/ 4361 batches | lr 0.000000 | ms/batch 400.55 | loss  2.28 | ppl     9.81 | acc     0.75 | train_ae_norm     1.00
[108/200][499/4361] Loss_D: 0.00756113 (Loss_D_real: 0.00562077 Loss_D_fake: 0.00194037) Loss_G: 0.32542440 Loss_Enh_Dec: -2.87735653
| epoch 108 |   500/ 4361 batches | lr 0.000000 | ms/batch 400.23 | loss  2.35 | ppl    10.45 | acc     0.74 | train_ae_norm     1.00
[108/200][599/4361] Loss_D: 0.00261558 (Loss_D_real: 0.00007722 Loss_D_fake: 0.00253835) Loss_G: 0.31636262 Loss_Enh_Dec: -2.86364412
| epoch 108 |   600/ 4361 batches | lr 0.000000 | ms/batch 400.23 | loss  2.31 | ppl    10.09 | acc     0.71 | train_ae_norm     1.00
[108/200][699/4361] Loss_D: 0.00330024 (Loss_D_real: 0.0000752

[108/200][3399/4361] Loss_D: 0.04076076 (Loss_D_real: 0.03818241 Loss_D_fake: 0.00257835) Loss_G: 0.35596868 Loss_Enh_Dec: -2.70771289
| epoch 108 |  3400/ 4361 batches | lr 0.000000 | ms/batch 401.10 | loss  2.42 | ppl    11.26 | acc     0.70 | train_ae_norm     1.00
[108/200][3499/4361] Loss_D: 0.00310193 (Loss_D_real: 0.00033107 Loss_D_fake: 0.00277086) Loss_G: 0.38259983 Loss_Enh_Dec: -2.25973034
| epoch 108 |  3500/ 4361 batches | lr 0.000000 | ms/batch 400.34 | loss  2.35 | ppl    10.47 | acc     0.72 | train_ae_norm     1.00
[108/200][3599/4361] Loss_D: 0.00579562 (Loss_D_real: 0.00196387 Loss_D_fake: 0.00383174) Loss_G: 0.34346792 Loss_Enh_Dec: -2.22268128
| epoch 108 |  3600/ 4361 batches | lr 0.000000 | ms/batch 399.81 | loss  2.37 | ppl    10.69 | acc     0.74 | train_ae_norm     1.00
[108/200][3699/4361] Loss_D: 0.12739219 (Loss_D_real: 0.12524483 Loss_D_fake: 0.00214736) Loss_G: 0.36394596 Loss_Enh_Dec: -1.70548522
| epoch 108 |  3700/ 4361 batches | lr 0.000000 | ms/batch

[109/200][1499/4361] Loss_D: 0.01173508 (Loss_D_real: 0.01057265 Loss_D_fake: 0.00116243) Loss_G: 0.37850547 Loss_Enh_Dec: -2.01453424
| epoch 109 |  1500/ 4361 batches | lr 0.000000 | ms/batch 401.18 | loss  2.39 | ppl    10.95 | acc     0.71 | train_ae_norm     1.00
[109/200][1599/4361] Loss_D: 0.00467355 (Loss_D_real: 0.00101768 Loss_D_fake: 0.00365587) Loss_G: 0.34558126 Loss_Enh_Dec: -2.01305747
| epoch 109 |  1600/ 4361 batches | lr 0.000000 | ms/batch 401.82 | loss  2.36 | ppl    10.57 | acc     0.72 | train_ae_norm     1.00
[109/200][1699/4361] Loss_D: 0.00435103 (Loss_D_real: 0.00309984 Loss_D_fake: 0.00125119) Loss_G: 0.42998919 Loss_Enh_Dec: -2.24585581
| epoch 109 |  1700/ 4361 batches | lr 0.000000 | ms/batch 402.58 | loss  2.33 | ppl    10.27 | acc     0.70 | train_ae_norm     1.00
[109/200][1799/4361] Loss_D: 0.01755168 (Loss_D_real: 0.01501525 Loss_D_fake: 0.00253644) Loss_G: 0.34687290 Loss_Enh_Dec: -2.11283636
| epoch 109 |  1800/ 4361 batches | lr 0.000000 | ms/batch

  Batch    60  of    230.    Elapsed: 0:00:22.
  Batch    70  of    230.    Elapsed: 0:00:26.
  Batch    80  of    230.    Elapsed: 0:00:30.
  Batch    90  of    230.    Elapsed: 0:00:34.
  Batch   100  of    230.    Elapsed: 0:00:37.
  Batch   110  of    230.    Elapsed: 0:00:41.
  Batch   120  of    230.    Elapsed: 0:00:45.
  Batch   130  of    230.    Elapsed: 0:00:49.
  Batch   140  of    230.    Elapsed: 0:00:52.
  Batch   150  of    230.    Elapsed: 0:00:56.
  Batch   160  of    230.    Elapsed: 0:01:00.
  Batch   170  of    230.    Elapsed: 0:01:04.
  Batch   180  of    230.    Elapsed: 0:01:07.
  Batch   190  of    230.    Elapsed: 0:01:11.
  Batch   200  of    230.    Elapsed: 0:01:15.
  Batch   210  of    230.    Elapsed: 0:01:19.
  Batch   220  of    230.    Elapsed: 0:01:22.

  Average training loss generetor: 0.697
  Average training loss discriminator: 0.699
  Training epcoh took: 0:01:26

Running Test...
  Accuracy: 0.497
  Test Loss: 4.384
  Test took: 0:00:00
Train ot

| epoch 110 |  2700/ 4361 batches | lr 0.000000 | ms/batch 401.10 | loss  2.35 | ppl    10.44 | acc     0.72 | train_ae_norm     1.00
[110/200][2799/4361] Loss_D: 0.00470057 (Loss_D_real: 0.00174646 Loss_D_fake: 0.00295412) Loss_G: 0.41233954 Loss_Enh_Dec: -2.22278190
| epoch 110 |  2800/ 4361 batches | lr 0.000000 | ms/batch 401.47 | loss  2.31 | ppl    10.05 | acc     0.70 | train_ae_norm     1.00
[110/200][2899/4361] Loss_D: 0.00117615 (Loss_D_real: 0.00036918 Loss_D_fake: 0.00080697) Loss_G: 0.40676576 Loss_Enh_Dec: -1.41449249
| epoch 110 |  2900/ 4361 batches | lr 0.000000 | ms/batch 400.72 | loss  2.33 | ppl    10.24 | acc     0.72 | train_ae_norm     1.00
[110/200][2999/4361] Loss_D: 0.01809627 (Loss_D_real: 0.01648848 Loss_D_fake: 0.00160780) Loss_G: 0.43827373 Loss_Enh_Dec: -2.26467872
| epoch 110 |  3000/ 4361 batches | lr 0.000000 | ms/batch 401.78 | loss  2.33 | ppl    10.30 | acc     0.72 | train_ae_norm     1.00
[110/200][3099/4361] Loss_D: 0.00233188 (Loss_D_real: 0.000

| epoch 111 |   800/ 4361 batches | lr 0.000000 | ms/batch 402.31 | loss  2.33 | ppl    10.33 | acc     0.71 | train_ae_norm     1.00
[111/200][899/4361] Loss_D: 0.00148005 (Loss_D_real: 0.00030123 Loss_D_fake: 0.00117882) Loss_G: 0.41069242 Loss_Enh_Dec: -2.47907901
| epoch 111 |   900/ 4361 batches | lr 0.000000 | ms/batch 400.95 | loss  2.33 | ppl    10.28 | acc     0.75 | train_ae_norm     1.00
[111/200][999/4361] Loss_D: 0.00167160 (Loss_D_real: 0.00010997 Loss_D_fake: 0.00156163) Loss_G: 0.41202483 Loss_Enh_Dec: -2.30365062
| epoch 111 |  1000/ 4361 batches | lr 0.000000 | ms/batch 401.12 | loss  2.32 | ppl    10.19 | acc     0.73 | train_ae_norm     1.00
[111/200][1099/4361] Loss_D: 0.00467658 (Loss_D_real: 0.00201658 Loss_D_fake: 0.00265999) Loss_G: 0.39901519 Loss_Enh_Dec: -2.14797950
| epoch 111 |  1100/ 4361 batches | lr 0.000000 | ms/batch 401.10 | loss  2.31 | ppl    10.09 | acc     0.71 | train_ae_norm     1.00
[111/200][1199/4361] Loss_D: 0.00256709 (Loss_D_real: 0.00054

[111/200][3899/4361] Loss_D: 0.00150301 (Loss_D_real: 0.00016966 Loss_D_fake: 0.00133335) Loss_G: 0.51653647 Loss_Enh_Dec: -1.95350647
| epoch 111 |  3900/ 4361 batches | lr 0.000000 | ms/batch 401.07 | loss  2.32 | ppl    10.19 | acc     0.70 | train_ae_norm     1.00
[111/200][3999/4361] Loss_D: 0.00077799 (Loss_D_real: 0.00004451 Loss_D_fake: 0.00073348) Loss_G: 0.40898535 Loss_Enh_Dec: -1.92406178
| epoch 111 |  4000/ 4361 batches | lr 0.000000 | ms/batch 401.23 | loss  2.32 | ppl    10.21 | acc     0.73 | train_ae_norm     1.00
[111/200][4099/4361] Loss_D: 0.00163398 (Loss_D_real: 0.00043036 Loss_D_fake: 0.00120363) Loss_G: 0.41659591 Loss_Enh_Dec: -2.45647287
| epoch 111 |  4100/ 4361 batches | lr 0.000000 | ms/batch 401.50 | loss  2.28 | ppl     9.79 | acc     0.72 | train_ae_norm     1.00
[111/200][4199/4361] Loss_D: 0.00354877 (Loss_D_real: 0.00245745 Loss_D_fake: 0.00109132) Loss_G: 0.44667587 Loss_Enh_Dec: -2.14439702
| epoch 111 |  4200/ 4361 batches | lr 0.000000 | ms/batch

[112/200][2099/4361] Loss_D: 0.00976783 (Loss_D_real: 0.00822287 Loss_D_fake: 0.00154496) Loss_G: 0.44004551 Loss_Enh_Dec: -1.59432828
| epoch 112 |  2100/ 4361 batches | lr 0.000000 | ms/batch 401.57 | loss  2.37 | ppl    10.71 | acc     0.75 | train_ae_norm     1.00
[112/200][2199/4361] Loss_D: 0.00157056 (Loss_D_real: 0.00041487 Loss_D_fake: 0.00115568) Loss_G: 0.48662004 Loss_Enh_Dec: -2.15679145
| epoch 112 |  2200/ 4361 batches | lr 0.000000 | ms/batch 401.17 | loss  2.34 | ppl    10.34 | acc     0.73 | train_ae_norm     1.00
[112/200][2299/4361] Loss_D: 0.02945388 (Loss_D_real: 0.02819164 Loss_D_fake: 0.00126224) Loss_G: 0.62805098 Loss_Enh_Dec: -2.01412773
| epoch 112 |  2300/ 4361 batches | lr 0.000000 | ms/batch 401.67 | loss  2.33 | ppl    10.33 | acc     0.75 | train_ae_norm     1.00
[112/200][2399/4361] Loss_D: 0.00260395 (Loss_D_real: 0.00193728 Loss_D_fake: 0.00066667) Loss_G: 0.44455311 Loss_Enh_Dec: -2.06319118
| epoch 112 |  2400/ 4361 batches | lr 0.000000 | ms/batch

[113/200][199/4361] Loss_D: 0.00269865 (Loss_D_real: 0.00049215 Loss_D_fake: 0.00220650) Loss_G: 0.53255051 Loss_Enh_Dec: -1.92767799
| epoch 113 |   200/ 4361 batches | lr 0.000000 | ms/batch 401.09 | loss  2.35 | ppl    10.51 | acc     0.74 | train_ae_norm     1.00
[113/200][299/4361] Loss_D: 0.00682903 (Loss_D_real: 0.00639779 Loss_D_fake: 0.00043123) Loss_G: 0.59810901 Loss_Enh_Dec: -2.18943691
| epoch 113 |   300/ 4361 batches | lr 0.000000 | ms/batch 401.67 | loss  2.37 | ppl    10.72 | acc     0.66 | train_ae_norm     1.00
[113/200][399/4361] Loss_D: 0.01075337 (Loss_D_real: 0.00952671 Loss_D_fake: 0.00122665) Loss_G: 0.37275195 Loss_Enh_Dec: -1.99749684
| epoch 113 |   400/ 4361 batches | lr 0.000000 | ms/batch 402.04 | loss  2.29 | ppl     9.85 | acc     0.72 | train_ae_norm     1.00
[113/200][499/4361] Loss_D: 0.00290534 (Loss_D_real: 0.00159725 Loss_D_fake: 0.00130808) Loss_G: 0.38240337 Loss_Enh_Dec: -2.38920951
| epoch 113 |   500/ 4361 batches | lr 0.000000 | ms/batch 401

| epoch 113 |  3200/ 4361 batches | lr 0.000000 | ms/batch 401.74 | loss  2.45 | ppl    11.60 | acc     0.74 | train_ae_norm     1.00
[113/200][3299/4361] Loss_D: 0.00422459 (Loss_D_real: 0.00051651 Loss_D_fake: 0.00370808) Loss_G: 0.42006031 Loss_Enh_Dec: -2.54760480
| epoch 113 |  3300/ 4361 batches | lr 0.000000 | ms/batch 401.61 | loss  2.45 | ppl    11.63 | acc     0.68 | train_ae_norm     1.00
[113/200][3399/4361] Loss_D: 0.00693321 (Loss_D_real: 0.00084357 Loss_D_fake: 0.00608964) Loss_G: 0.34486988 Loss_Enh_Dec: -2.72795224
| epoch 113 |  3400/ 4361 batches | lr 0.000000 | ms/batch 401.62 | loss  2.43 | ppl    11.39 | acc     0.70 | train_ae_norm     1.00
[113/200][3499/4361] Loss_D: 0.00678823 (Loss_D_real: 0.00313772 Loss_D_fake: 0.00365051) Loss_G: 0.36104706 Loss_Enh_Dec: -2.36450124
| epoch 113 |  3500/ 4361 batches | lr 0.000000 | ms/batch 402.21 | loss  2.37 | ppl    10.74 | acc     0.70 | train_ae_norm     1.00
[113/200][3599/4361] Loss_D: 0.00222693 (Loss_D_real: 0.000

| epoch 114 |  1400/ 4361 batches | lr 0.000000 | ms/batch 400.84 | loss  2.47 | ppl    11.79 | acc     0.68 | train_ae_norm     1.00
[114/200][1499/4361] Loss_D: 0.00235209 (Loss_D_real: 0.00033259 Loss_D_fake: 0.00201950) Loss_G: 0.43641922 Loss_Enh_Dec: -2.40079951
| epoch 114 |  1500/ 4361 batches | lr 0.000000 | ms/batch 401.78 | loss  2.50 | ppl    12.14 | acc     0.69 | train_ae_norm     1.00
[114/200][1599/4361] Loss_D: 0.00237015 (Loss_D_real: 0.00009686 Loss_D_fake: 0.00227329) Loss_G: 0.41005918 Loss_Enh_Dec: -2.36847162
| epoch 114 |  1600/ 4361 batches | lr 0.000000 | ms/batch 401.31 | loss  2.49 | ppl    12.10 | acc     0.68 | train_ae_norm     1.00
[114/200][1699/4361] Loss_D: 0.00246083 (Loss_D_real: 0.00113411 Loss_D_fake: 0.00132672) Loss_G: 0.44866291 Loss_Enh_Dec: -2.66747832
| epoch 114 |  1700/ 4361 batches | lr 0.000000 | ms/batch 400.81 | loss  2.47 | ppl    11.84 | acc     0.68 | train_ae_norm     1.00
[114/200][1799/4361] Loss_D: 0.00469098 (Loss_D_real: 0.002

  Batch   150  of    230.    Elapsed: 0:00:56.
  Batch   160  of    230.    Elapsed: 0:01:00.
  Batch   170  of    230.    Elapsed: 0:01:04.
  Batch   180  of    230.    Elapsed: 0:01:08.
  Batch   190  of    230.    Elapsed: 0:01:11.
  Batch   200  of    230.    Elapsed: 0:01:15.
  Batch   210  of    230.    Elapsed: 0:01:19.
  Batch   220  of    230.    Elapsed: 0:01:23.

  Average training loss generetor: 0.697
  Average training loss discriminator: 0.698
  Training epcoh took: 0:01:26

Running Test...
  Accuracy: 0.487
  Test Loss: 4.632
  Test took: 0:00:00
Train other shit
| epoch 115 |     0/ 4361 batches | lr 0.000000 | ms/batch 866.77 | loss  0.02 | ppl     1.02 | acc     0.72 | train_ae_norm     1.00
[115/200][99/4361] Loss_D: 0.00820733 (Loss_D_real: 0.00487988 Loss_D_fake: 0.00332744) Loss_G: 0.35289308 Loss_Enh_Dec: -2.30223656
| epoch 115 |   100/ 4361 batches | lr 0.000000 | ms/batch 402.24 | loss  2.52 | ppl    12.38 | acc     0.67 | train_ae_norm     1.00
[115/200][199

[115/200][3099/4361] Loss_D: 0.03899097 (Loss_D_real: 0.03639429 Loss_D_fake: 0.00259668) Loss_G: 0.30028149 Loss_Enh_Dec: -2.68060875
| epoch 115 |  3100/ 4361 batches | lr 0.000000 | ms/batch 401.78 | loss  2.49 | ppl    12.05 | acc     0.68 | train_ae_norm     1.00
[115/200][3199/4361] Loss_D: 0.00338249 (Loss_D_real: 0.00016290 Loss_D_fake: 0.00321959) Loss_G: 0.29402193 Loss_Enh_Dec: -2.06865668
| epoch 115 |  3200/ 4361 batches | lr 0.000000 | ms/batch 401.20 | loss  2.50 | ppl    12.23 | acc     0.72 | train_ae_norm     1.00
[115/200][3299/4361] Loss_D: 0.00304446 (Loss_D_real: 0.00094316 Loss_D_fake: 0.00210131) Loss_G: 0.33383045 Loss_Enh_Dec: -2.64394116
| epoch 115 |  3300/ 4361 batches | lr 0.000000 | ms/batch 400.88 | loss  2.51 | ppl    12.31 | acc     0.69 | train_ae_norm     1.00
[115/200][3399/4361] Loss_D: 0.00326662 (Loss_D_real: 0.00035662 Loss_D_fake: 0.00291000) Loss_G: 0.31019560 Loss_Enh_Dec: -2.25464368
| epoch 115 |  3400/ 4361 batches | lr 0.000000 | ms/batch

[116/200][1399/4361] Loss_D: 0.02259871 (Loss_D_real: 0.01841294 Loss_D_fake: 0.00418577) Loss_G: 0.47468731 Loss_Enh_Dec: -2.58623123
| epoch 116 |  1400/ 4361 batches | lr 0.000000 | ms/batch 401.03 | loss  2.43 | ppl    11.41 | acc     0.65 | train_ae_norm     1.00
[116/200][1499/4361] Loss_D: 0.00886580 (Loss_D_real: 0.00536797 Loss_D_fake: 0.00349783) Loss_G: 0.32018051 Loss_Enh_Dec: -2.36084914
| epoch 116 |  1500/ 4361 batches | lr 0.000000 | ms/batch 401.36 | loss  2.48 | ppl    11.99 | acc     0.68 | train_ae_norm     1.00
[116/200][1599/4361] Loss_D: 0.00889422 (Loss_D_real: 0.00014974 Loss_D_fake: 0.00874448) Loss_G: 0.35569543 Loss_Enh_Dec: -2.34372878
| epoch 116 |  1600/ 4361 batches | lr 0.000000 | ms/batch 401.91 | loss  2.45 | ppl    11.61 | acc     0.70 | train_ae_norm     1.00
[116/200][1699/4361] Loss_D: 0.00479756 (Loss_D_real: 0.00119786 Loss_D_fake: 0.00359970) Loss_G: 0.33385429 Loss_Enh_Dec: -2.23283815
| epoch 116 |  1700/ 4361 batches | lr 0.000000 | ms/batch

  Batch    50  of    230.    Elapsed: 0:00:19.
  Batch    60  of    230.    Elapsed: 0:00:23.
  Batch    70  of    230.    Elapsed: 0:00:26.
  Batch    80  of    230.    Elapsed: 0:00:30.
  Batch    90  of    230.    Elapsed: 0:00:34.
  Batch   100  of    230.    Elapsed: 0:00:38.
  Batch   110  of    230.    Elapsed: 0:00:41.
  Batch   120  of    230.    Elapsed: 0:00:45.
  Batch   130  of    230.    Elapsed: 0:00:49.
  Batch   140  of    230.    Elapsed: 0:00:53.
  Batch   150  of    230.    Elapsed: 0:00:56.
  Batch   160  of    230.    Elapsed: 0:01:00.
  Batch   170  of    230.    Elapsed: 0:01:04.
  Batch   180  of    230.    Elapsed: 0:01:08.
  Batch   190  of    230.    Elapsed: 0:01:11.
  Batch   200  of    230.    Elapsed: 0:01:15.
  Batch   210  of    230.    Elapsed: 0:01:19.
  Batch   220  of    230.    Elapsed: 0:01:23.

  Average training loss generetor: 0.697
  Average training loss discriminator: 0.699
  Training epcoh took: 0:01:26

Running Test...
  Accuracy: 0.497
 

| epoch 117 |  2700/ 4361 batches | lr 0.000000 | ms/batch 400.53 | loss  2.41 | ppl    11.13 | acc     0.70 | train_ae_norm     1.00
[117/200][2799/4361] Loss_D: 0.00706445 (Loss_D_real: 0.00235079 Loss_D_fake: 0.00471366) Loss_G: 0.30727804 Loss_Enh_Dec: -2.64344597
| epoch 117 |  2800/ 4361 batches | lr 0.000000 | ms/batch 400.74 | loss  2.37 | ppl    10.66 | acc     0.69 | train_ae_norm     1.00
[117/200][2899/4361] Loss_D: 0.03635345 (Loss_D_real: 0.03270496 Loss_D_fake: 0.00364850) Loss_G: 0.34716636 Loss_Enh_Dec: -2.44827533
| epoch 117 |  2900/ 4361 batches | lr 0.000000 | ms/batch 401.29 | loss  2.41 | ppl    11.17 | acc     0.71 | train_ae_norm     1.00
[117/200][2999/4361] Loss_D: 0.01550237 (Loss_D_real: 0.01071817 Loss_D_fake: 0.00478419) Loss_G: 0.35868043 Loss_Enh_Dec: -2.90693879
| epoch 117 |  3000/ 4361 batches | lr 0.000000 | ms/batch 401.23 | loss  2.42 | ppl    11.22 | acc     0.71 | train_ae_norm     1.00
[117/200][3099/4361] Loss_D: 0.01219727 (Loss_D_real: 0.008

| epoch 118 |   900/ 4361 batches | lr 0.000000 | ms/batch 401.13 | loss  2.40 | ppl    11.08 | acc     0.71 | train_ae_norm     1.00
[118/200][999/4361] Loss_D: 0.00834324 (Loss_D_real: 0.00635187 Loss_D_fake: 0.00199138) Loss_G: 0.37927589 Loss_Enh_Dec: -2.47224021
| epoch 118 |  1000/ 4361 batches | lr 0.000000 | ms/batch 401.45 | loss  2.38 | ppl    10.81 | acc     0.71 | train_ae_norm     1.00
[118/200][1099/4361] Loss_D: 0.02066432 (Loss_D_real: 0.01757117 Loss_D_fake: 0.00309315) Loss_G: 0.39551783 Loss_Enh_Dec: -2.17799592
| epoch 118 |  1100/ 4361 batches | lr 0.000000 | ms/batch 401.53 | loss  2.37 | ppl    10.65 | acc     0.72 | train_ae_norm     1.00
[118/200][1199/4361] Loss_D: 0.23863348 (Loss_D_real: 0.00871333 Loss_D_fake: 0.22992015) Loss_G: 0.54725164 Loss_Enh_Dec: -2.45020652
| epoch 118 |  1200/ 4361 batches | lr 0.000000 | ms/batch 401.04 | loss  2.39 | ppl    10.88 | acc     0.74 | train_ae_norm     1.00
[118/200][1299/4361] Loss_D: 0.00636858 (Loss_D_real: 0.0019

[118/200][4199/4361] Loss_D: 0.00497432 (Loss_D_real: 0.00034121 Loss_D_fake: 0.00463311) Loss_G: 0.39351621 Loss_Enh_Dec: -2.51665878
| epoch 118 |  4200/ 4361 batches | lr 0.000000 | ms/batch 401.19 | loss  2.37 | ppl    10.71 | acc     0.73 | train_ae_norm     1.00
[118/200][4299/4361] Loss_D: 0.00533830 (Loss_D_real: 0.00230689 Loss_D_fake: 0.00303140) Loss_G: 0.32739410 Loss_Enh_Dec: -2.68937182
| epoch 118 |  4300/ 4361 batches | lr 0.000000 | ms/batch 400.39 | loss  2.34 | ppl    10.41 | acc     0.72 | train_ae_norm     1.00
| end of epoch 118 | time: 1854.57s | test loss  2.36 | test ppl 10.61 | acc 0.767
Train classification discriminator

Training...
  Batch    10  of    230.    Elapsed: 0:00:04.
  Batch    20  of    230.    Elapsed: 0:00:07.
  Batch    30  of    230.    Elapsed: 0:00:11.
  Batch    40  of    230.    Elapsed: 0:00:15.
  Batch    50  of    230.    Elapsed: 0:00:19.
  Batch    60  of    230.    Elapsed: 0:00:22.
  Batch    70  of    230.    Elapsed: 0:00:26.
  

[119/200][2399/4361] Loss_D: 0.00486411 (Loss_D_real: 0.00203953 Loss_D_fake: 0.00282458) Loss_G: 0.35355550 Loss_Enh_Dec: -2.07973957
| epoch 119 |  2400/ 4361 batches | lr 0.000000 | ms/batch 401.41 | loss  2.34 | ppl    10.34 | acc     0.69 | train_ae_norm     1.00
[119/200][2499/4361] Loss_D: 0.02835406 (Loss_D_real: 0.02333953 Loss_D_fake: 0.00501453) Loss_G: 0.34620282 Loss_Enh_Dec: -2.45301986
| epoch 119 |  2500/ 4361 batches | lr 0.000000 | ms/batch 401.09 | loss  2.38 | ppl    10.79 | acc     0.72 | train_ae_norm     1.00
[119/200][2599/4361] Loss_D: 0.00831577 (Loss_D_real: 0.00424967 Loss_D_fake: 0.00406610) Loss_G: 0.30766019 Loss_Enh_Dec: -2.42614245
| epoch 119 |  2600/ 4361 batches | lr 0.000000 | ms/batch 401.22 | loss  2.36 | ppl    10.61 | acc     0.68 | train_ae_norm     1.00
[119/200][2699/4361] Loss_D: 0.00380989 (Loss_D_real: 0.00004689 Loss_D_fake: 0.00376300) Loss_G: 0.35138413 Loss_Enh_Dec: -1.88013732
| epoch 119 |  2700/ 4361 batches | lr 0.000000 | ms/batch

[120/200][599/4361] Loss_D: 0.00502030 (Loss_D_real: 0.00115614 Loss_D_fake: 0.00386416) Loss_G: 0.35184336 Loss_Enh_Dec: -2.70245361
| epoch 120 |   600/ 4361 batches | lr 0.000000 | ms/batch 400.88 | loss  2.38 | ppl    10.76 | acc     0.66 | train_ae_norm     1.00
[120/200][699/4361] Loss_D: 0.00274830 (Loss_D_real: 0.00081846 Loss_D_fake: 0.00192985) Loss_G: 0.43404523 Loss_Enh_Dec: -2.39775538
| epoch 120 |   700/ 4361 batches | lr 0.000000 | ms/batch 401.25 | loss  2.41 | ppl    11.09 | acc     0.72 | train_ae_norm     1.00
[120/200][799/4361] Loss_D: 0.00431623 (Loss_D_real: 0.00019128 Loss_D_fake: 0.00412495) Loss_G: 0.41859689 Loss_Enh_Dec: -2.27343559
| epoch 120 |   800/ 4361 batches | lr 0.000000 | ms/batch 401.45 | loss  2.38 | ppl    10.85 | acc     0.70 | train_ae_norm     1.00
[120/200][899/4361] Loss_D: 0.03578070 (Loss_D_real: 0.03091903 Loss_D_fake: 0.00486167) Loss_G: 0.34584960 Loss_Enh_Dec: -2.54521799
| epoch 120 |   900/ 4361 batches | lr 0.000000 | ms/batch 401

| epoch 120 |  3900/ 4361 batches | lr 0.000000 | ms/batch 401.14 | loss  2.37 | ppl    10.70 | acc     0.68 | train_ae_norm     1.00
[120/200][3999/4361] Loss_D: 0.01426957 (Loss_D_real: 0.00136090 Loss_D_fake: 0.01290867) Loss_G: 0.39129016 Loss_Enh_Dec: -1.76249468
| epoch 120 |  4000/ 4361 batches | lr 0.000000 | ms/batch 401.35 | loss  2.36 | ppl    10.57 | acc     0.73 | train_ae_norm     1.00
[120/200][4099/4361] Loss_D: 0.01350223 (Loss_D_real: 0.00967436 Loss_D_fake: 0.00382787) Loss_G: 0.41171393 Loss_Enh_Dec: -1.93458235
| epoch 120 |  4100/ 4361 batches | lr 0.000000 | ms/batch 401.41 | loss  2.30 | ppl     9.97 | acc     0.73 | train_ae_norm     1.00
[120/200][4199/4361] Loss_D: 0.00220867 (Loss_D_real: 0.00014626 Loss_D_fake: 0.00206240) Loss_G: 0.35905427 Loss_Enh_Dec: -1.58429801
| epoch 120 |  4200/ 4361 batches | lr 0.000000 | ms/batch 401.26 | loss  2.36 | ppl    10.62 | acc     0.72 | train_ae_norm     1.00
[120/200][4299/4361] Loss_D: 0.01456578 (Loss_D_real: 0.000

[121/200][1999/4361] Loss_D: 0.00660890 (Loss_D_real: 0.00547323 Loss_D_fake: 0.00113567) Loss_G: 0.37217721 Loss_Enh_Dec: -2.26199126
| epoch 121 |  2000/ 4361 batches | lr 0.000000 | ms/batch 401.37 | loss  2.30 | ppl     9.96 | acc     0.76 | train_ae_norm     1.00
[121/200][2099/4361] Loss_D: 0.00045021 (Loss_D_real: 0.00013989 Loss_D_fake: 0.00031031) Loss_G: 0.61161590 Loss_Enh_Dec: -1.72129822
| epoch 121 |  2100/ 4361 batches | lr 0.000000 | ms/batch 401.75 | loss  2.30 | ppl     9.93 | acc     0.78 | train_ae_norm     1.00
[121/200][2199/4361] Loss_D: 0.00183161 (Loss_D_real: 0.00026679 Loss_D_fake: 0.00156482) Loss_G: 0.44964454 Loss_Enh_Dec: -1.58667684
| epoch 121 |  2200/ 4361 batches | lr 0.000000 | ms/batch 400.98 | loss  2.27 | ppl     9.68 | acc     0.71 | train_ae_norm     1.00
[121/200][2299/4361] Loss_D: 0.00117718 (Loss_D_real: 0.00037064 Loss_D_fake: 0.00080654) Loss_G: 0.40727788 Loss_Enh_Dec: -1.76345766
| epoch 121 |  2300/ 4361 batches | lr 0.000000 | ms/batch

[122/200][199/4361] Loss_D: 0.00519025 (Loss_D_real: 0.00112408 Loss_D_fake: 0.00406617) Loss_G: 0.37780261 Loss_Enh_Dec: -2.41061091
| epoch 122 |   200/ 4361 batches | lr 0.000000 | ms/batch 401.87 | loss  2.34 | ppl    10.43 | acc     0.74 | train_ae_norm     1.00
[122/200][299/4361] Loss_D: 0.00334872 (Loss_D_real: 0.00146823 Loss_D_fake: 0.00188049) Loss_G: 0.38439760 Loss_Enh_Dec: -2.45025992
| epoch 122 |   300/ 4361 batches | lr 0.000000 | ms/batch 401.57 | loss  2.29 | ppl     9.83 | acc     0.67 | train_ae_norm     1.00
[122/200][399/4361] Loss_D: 0.00130878 (Loss_D_real: 0.00023031 Loss_D_fake: 0.00107848) Loss_G: 0.45791212 Loss_Enh_Dec: -2.23855019
| epoch 122 |   400/ 4361 batches | lr 0.000000 | ms/batch 401.95 | loss  2.21 | ppl     9.08 | acc     0.74 | train_ae_norm     1.00
[122/200][499/4361] Loss_D: 0.00490393 (Loss_D_real: 0.00229995 Loss_D_fake: 0.00260397) Loss_G: 0.38808450 Loss_Enh_Dec: -2.44466591
| epoch 122 |   500/ 4361 batches | lr 0.000000 | ms/batch 401

| epoch 122 |  3400/ 4361 batches | lr 0.000000 | ms/batch 400.35 | loss  2.25 | ppl     9.49 | acc     0.71 | train_ae_norm     1.00
[122/200][3499/4361] Loss_D: 0.00237216 (Loss_D_real: 0.00109235 Loss_D_fake: 0.00127981) Loss_G: 0.58327389 Loss_Enh_Dec: -2.17915511
| epoch 122 |  3500/ 4361 batches | lr 0.000000 | ms/batch 400.64 | loss  2.17 | ppl     8.80 | acc     0.72 | train_ae_norm     1.00
[122/200][3599/4361] Loss_D: 0.00631679 (Loss_D_real: 0.00122023 Loss_D_fake: 0.00509656) Loss_G: 0.40321040 Loss_Enh_Dec: -2.20701957
| epoch 122 |  3600/ 4361 batches | lr 0.000000 | ms/batch 400.97 | loss  2.22 | ppl     9.25 | acc     0.75 | train_ae_norm     1.00
[122/200][3699/4361] Loss_D: 0.00336712 (Loss_D_real: 0.00190576 Loss_D_fake: 0.00146137) Loss_G: 0.36614624 Loss_Enh_Dec: -2.29902411
| epoch 122 |  3700/ 4361 batches | lr 0.000000 | ms/batch 401.16 | loss  2.23 | ppl     9.26 | acc     0.71 | train_ae_norm     1.00
[122/200][3799/4361] Loss_D: 0.00325600 (Loss_D_real: 0.001

| epoch 123 |  1600/ 4361 batches | lr 0.000000 | ms/batch 400.93 | loss  2.43 | ppl    11.38 | acc     0.71 | train_ae_norm     1.00
[123/200][1699/4361] Loss_D: 0.01176746 (Loss_D_real: 0.00622808 Loss_D_fake: 0.00553939) Loss_G: 0.28583688 Loss_Enh_Dec: -2.48653913
| epoch 123 |  1700/ 4361 batches | lr 0.000000 | ms/batch 400.91 | loss  2.40 | ppl    11.01 | acc     0.69 | train_ae_norm     1.00
[123/200][1799/4361] Loss_D: 0.00324546 (Loss_D_real: 0.00053513 Loss_D_fake: 0.00271034) Loss_G: 0.31505638 Loss_Enh_Dec: -2.31437945
| epoch 123 |  1800/ 4361 batches | lr 0.000000 | ms/batch 399.57 | loss  2.34 | ppl    10.40 | acc     0.69 | train_ae_norm     1.00
[123/200][1899/4361] Loss_D: 0.02766219 (Loss_D_real: 0.02404102 Loss_D_fake: 0.00362117) Loss_G: 0.32123598 Loss_Enh_Dec: -2.54307818
| epoch 123 |  1900/ 4361 batches | lr 0.000000 | ms/batch 399.56 | loss  2.39 | ppl    10.86 | acc     0.72 | train_ae_norm     1.00
[123/200][1999/4361] Loss_D: 0.00944081 (Loss_D_real: 0.006

  Batch   150  of    230.    Elapsed: 0:00:56.
  Batch   160  of    230.    Elapsed: 0:01:00.
  Batch   170  of    230.    Elapsed: 0:01:04.
  Batch   180  of    230.    Elapsed: 0:01:07.
  Batch   190  of    230.    Elapsed: 0:01:11.
  Batch   200  of    230.    Elapsed: 0:01:15.
  Batch   210  of    230.    Elapsed: 0:01:19.
  Batch   220  of    230.    Elapsed: 0:01:22.

  Average training loss generetor: 0.696
  Average training loss discriminator: 0.701
  Training epcoh took: 0:01:26

Running Test...
  Accuracy: 0.472
  Test Loss: 4.564
  Test took: 0:00:00
Train other shit
| epoch 124 |     0/ 4361 batches | lr 0.000000 | ms/batch 863.53 | loss  0.02 | ppl     1.02 | acc     0.74 | train_ae_norm     1.00
[124/200][99/4361] Loss_D: 0.00625678 (Loss_D_real: 0.00246043 Loss_D_fake: 0.00379636) Loss_G: 0.30552813 Loss_Enh_Dec: -2.50589561
| epoch 124 |   100/ 4361 batches | lr 0.000000 | ms/batch 400.86 | loss  2.27 | ppl     9.66 | acc     0.70 | train_ae_norm     1.00
[124/200][199

[124/200][3099/4361] Loss_D: 0.00557393 (Loss_D_real: 0.00476644 Loss_D_fake: 0.00080748) Loss_G: 0.39838922 Loss_Enh_Dec: -2.12489700
| epoch 124 |  3100/ 4361 batches | lr 0.000000 | ms/batch 401.85 | loss  2.29 | ppl     9.85 | acc     0.72 | train_ae_norm     1.00
[124/200][3199/4361] Loss_D: 0.00191922 (Loss_D_real: 0.00010250 Loss_D_fake: 0.00181673) Loss_G: 0.51831228 Loss_Enh_Dec: -2.06025910
| epoch 124 |  3200/ 4361 batches | lr 0.000000 | ms/batch 400.26 | loss  2.27 | ppl     9.71 | acc     0.75 | train_ae_norm     1.00
[124/200][3299/4361] Loss_D: 0.00228878 (Loss_D_real: 0.00092370 Loss_D_fake: 0.00136507) Loss_G: 0.39418879 Loss_Enh_Dec: -1.96015775
| epoch 124 |  3300/ 4361 batches | lr 0.000000 | ms/batch 401.16 | loss  2.26 | ppl     9.62 | acc     0.73 | train_ae_norm     1.00
[124/200][3399/4361] Loss_D: 0.00314978 (Loss_D_real: 0.00236324 Loss_D_fake: 0.00078654) Loss_G: 0.44913360 Loss_Enh_Dec: -1.84850299
| epoch 124 |  3400/ 4361 batches | lr 0.000000 | ms/batch

[125/200][1199/4361] Loss_D: 0.00268770 (Loss_D_real: 0.00022677 Loss_D_fake: 0.00246093) Loss_G: 0.33572385 Loss_Enh_Dec: -2.56734967
| epoch 125 |  1200/ 4361 batches | lr 0.000000 | ms/batch 400.73 | loss  2.28 | ppl     9.78 | acc     0.76 | train_ae_norm     1.00
[125/200][1299/4361] Loss_D: 0.00397958 (Loss_D_real: 0.00191351 Loss_D_fake: 0.00206608) Loss_G: 0.37147853 Loss_Enh_Dec: -2.44011664
| epoch 125 |  1300/ 4361 batches | lr 0.000000 | ms/batch 401.15 | loss  2.31 | ppl    10.08 | acc     0.72 | train_ae_norm     1.00
[125/200][1399/4361] Loss_D: 0.01287963 (Loss_D_real: 0.01031091 Loss_D_fake: 0.00256872) Loss_G: 0.38013825 Loss_Enh_Dec: -2.49835801
| epoch 125 |  1400/ 4361 batches | lr 0.000000 | ms/batch 401.77 | loss  2.30 | ppl     9.94 | acc     0.67 | train_ae_norm     1.00
[125/200][1499/4361] Loss_D: 0.00500027 (Loss_D_real: 0.00177189 Loss_D_fake: 0.00322839) Loss_G: 0.50575864 Loss_Enh_Dec: -2.52412558
| epoch 125 |  1500/ 4361 batches | lr 0.000000 | ms/batch

  Batch    50  of    230.    Elapsed: 0:00:19.
  Batch    60  of    230.    Elapsed: 0:00:23.
  Batch    70  of    230.    Elapsed: 0:00:26.
  Batch    80  of    230.    Elapsed: 0:00:30.
  Batch    90  of    230.    Elapsed: 0:00:34.
  Batch   100  of    230.    Elapsed: 0:00:38.
  Batch   110  of    230.    Elapsed: 0:00:41.
  Batch   120  of    230.    Elapsed: 0:00:45.
  Batch   130  of    230.    Elapsed: 0:00:49.
  Batch   140  of    230.    Elapsed: 0:00:53.
  Batch   150  of    230.    Elapsed: 0:00:56.
  Batch   160  of    230.    Elapsed: 0:01:00.
  Batch   170  of    230.    Elapsed: 0:01:04.
  Batch   180  of    230.    Elapsed: 0:01:08.
  Batch   190  of    230.    Elapsed: 0:01:11.
  Batch   200  of    230.    Elapsed: 0:01:15.
  Batch   210  of    230.    Elapsed: 0:01:19.
  Batch   220  of    230.    Elapsed: 0:01:23.

  Average training loss generetor: 0.697
  Average training loss discriminator: 0.698
  Training epcoh took: 0:01:26

Running Test...
  Accuracy: 0.480
 

| epoch 126 |  2900/ 4361 batches | lr 0.000000 | ms/batch 400.39 | loss  2.27 | ppl     9.65 | acc     0.74 | train_ae_norm     1.00
[126/200][2999/4361] Loss_D: 0.00254147 (Loss_D_real: 0.00115740 Loss_D_fake: 0.00138407) Loss_G: 0.44246131 Loss_Enh_Dec: -1.91836393
| epoch 126 |  3000/ 4361 batches | lr 0.000000 | ms/batch 400.75 | loss  2.27 | ppl     9.73 | acc     0.73 | train_ae_norm     1.00
[126/200][3099/4361] Loss_D: 0.00082182 (Loss_D_real: 0.00014170 Loss_D_fake: 0.00068013) Loss_G: 0.40276098 Loss_Enh_Dec: -2.10287738
| epoch 126 |  3100/ 4361 batches | lr 0.000000 | ms/batch 401.51 | loss  2.27 | ppl     9.69 | acc     0.72 | train_ae_norm     1.00
[126/200][3199/4361] Loss_D: 0.00116202 (Loss_D_real: 0.00038598 Loss_D_fake: 0.00077604) Loss_G: 0.40946442 Loss_Enh_Dec: -2.27199221
| epoch 126 |  3200/ 4361 batches | lr 0.000000 | ms/batch 401.03 | loss  2.27 | ppl     9.63 | acc     0.75 | train_ae_norm     1.00
[126/200][3299/4361] Loss_D: 0.00382220 (Loss_D_real: 0.003

| epoch 127 |  1300/ 4361 batches | lr 0.000000 | ms/batch 401.70 | loss  2.23 | ppl     9.26 | acc     0.72 | train_ae_norm     1.00
[127/200][1399/4361] Loss_D: 0.00172042 (Loss_D_real: 0.00035158 Loss_D_fake: 0.00136884) Loss_G: 0.38489199 Loss_Enh_Dec: -2.23068666
| epoch 127 |  1400/ 4361 batches | lr 0.000000 | ms/batch 401.20 | loss  2.19 | ppl     8.92 | acc     0.72 | train_ae_norm     1.00
[127/200][1499/4361] Loss_D: 0.00459782 (Loss_D_real: 0.00338017 Loss_D_fake: 0.00121765) Loss_G: 0.38271165 Loss_Enh_Dec: -2.45503592
| epoch 127 |  1500/ 4361 batches | lr 0.000000 | ms/batch 400.88 | loss  2.26 | ppl     9.62 | acc     0.73 | train_ae_norm     1.00
[127/200][1599/4361] Loss_D: 0.00124117 (Loss_D_real: 0.00025925 Loss_D_fake: 0.00098192) Loss_G: 0.43564969 Loss_Enh_Dec: -2.23235893
| epoch 127 |  1600/ 4361 batches | lr 0.000000 | ms/batch 400.27 | loss  2.22 | ppl     9.16 | acc     0.73 | train_ae_norm     1.00
[127/200][1699/4361] Loss_D: 0.00346506 (Loss_D_real: 0.001

[128/200][99/4361] Loss_D: 0.00541895 (Loss_D_real: 0.00036525 Loss_D_fake: 0.00505370) Loss_G: 0.74300855 Loss_Enh_Dec: -2.25772667
| epoch 128 |   100/ 4361 batches | lr 0.000000 | ms/batch 401.27 | loss  2.15 | ppl     8.58 | acc     0.72 | train_ae_norm     1.00
[128/200][199/4361] Loss_D: 0.00672172 (Loss_D_real: 0.00620294 Loss_D_fake: 0.00051878) Loss_G: 0.46993238 Loss_Enh_Dec: -1.47687411
| epoch 128 |   200/ 4361 batches | lr 0.000000 | ms/batch 401.81 | loss  2.18 | ppl     8.86 | acc     0.75 | train_ae_norm     1.00
[128/200][299/4361] Loss_D: 0.00329110 (Loss_D_real: 0.00221271 Loss_D_fake: 0.00107839) Loss_G: 0.46369371 Loss_Enh_Dec: -2.53351355
| epoch 128 |   300/ 4361 batches | lr 0.000000 | ms/batch 401.27 | loss  2.18 | ppl     8.83 | acc     0.69 | train_ae_norm     1.00
[128/200][399/4361] Loss_D: 0.00107170 (Loss_D_real: 0.00043562 Loss_D_fake: 0.00063608) Loss_G: 0.43421790 Loss_Enh_Dec: -2.24746013
| epoch 128 |   400/ 4361 batches | lr 0.000000 | ms/batch 400.

| epoch 128 |  3300/ 4361 batches | lr 0.000000 | ms/batch 401.08 | loss  2.19 | ppl     8.96 | acc     0.74 | train_ae_norm     1.00
[128/200][3399/4361] Loss_D: 0.00434334 (Loss_D_real: 0.00165461 Loss_D_fake: 0.00268873) Loss_G: 0.39646339 Loss_Enh_Dec: -2.44580817
| epoch 128 |  3400/ 4361 batches | lr 0.000000 | ms/batch 400.79 | loss  2.19 | ppl     8.96 | acc     0.72 | train_ae_norm     1.00
[128/200][3499/4361] Loss_D: 0.00124870 (Loss_D_real: 0.00044747 Loss_D_fake: 0.00080123) Loss_G: 0.46803814 Loss_Enh_Dec: -2.06790280
| epoch 128 |  3500/ 4361 batches | lr 0.000000 | ms/batch 401.48 | loss  2.14 | ppl     8.50 | acc     0.72 | train_ae_norm     1.00
[128/200][3599/4361] Loss_D: 0.00479614 (Loss_D_real: 0.00043347 Loss_D_fake: 0.00436267) Loss_G: 0.37846261 Loss_Enh_Dec: -2.01093030
| epoch 128 |  3600/ 4361 batches | lr 0.000000 | ms/batch 401.56 | loss  2.19 | ppl     8.97 | acc     0.75 | train_ae_norm     1.00
[128/200][3699/4361] Loss_D: 0.00545905 (Loss_D_real: 0.000

| epoch 129 |  1400/ 4361 batches | lr 0.000000 | ms/batch 401.53 | loss  2.20 | ppl     9.01 | acc     0.68 | train_ae_norm     1.00
[129/200][1499/4361] Loss_D: 0.00266448 (Loss_D_real: 0.00069454 Loss_D_fake: 0.00196994) Loss_G: 0.42610261 Loss_Enh_Dec: -2.32381821
| epoch 129 |  1500/ 4361 batches | lr 0.000000 | ms/batch 401.65 | loss  2.27 | ppl     9.66 | acc     0.72 | train_ae_norm     1.00
[129/200][1599/4361] Loss_D: 0.00221421 (Loss_D_real: 0.00072867 Loss_D_fake: 0.00148553) Loss_G: 0.43264005 Loss_Enh_Dec: -2.58060980
| epoch 129 |  1600/ 4361 batches | lr 0.000000 | ms/batch 401.15 | loss  2.23 | ppl     9.34 | acc     0.70 | train_ae_norm     1.00
[129/200][1699/4361] Loss_D: 0.00607176 (Loss_D_real: 0.00294603 Loss_D_fake: 0.00312573) Loss_G: 0.37674615 Loss_Enh_Dec: -2.65412903
| epoch 129 |  1700/ 4361 batches | lr 0.000000 | ms/batch 401.29 | loss  2.18 | ppl     8.83 | acc     0.73 | train_ae_norm     1.00
[129/200][1799/4361] Loss_D: 0.00304466 (Loss_D_real: 0.000

  Batch    30  of    230.    Elapsed: 0:00:11.
  Batch    40  of    230.    Elapsed: 0:00:15.
  Batch    50  of    230.    Elapsed: 0:00:19.
  Batch    60  of    230.    Elapsed: 0:00:22.
  Batch    70  of    230.    Elapsed: 0:00:26.
  Batch    80  of    230.    Elapsed: 0:00:30.
  Batch    90  of    230.    Elapsed: 0:00:34.
  Batch   100  of    230.    Elapsed: 0:00:37.
  Batch   110  of    230.    Elapsed: 0:00:41.
  Batch   120  of    230.    Elapsed: 0:00:45.
  Batch   130  of    230.    Elapsed: 0:00:49.
  Batch   140  of    230.    Elapsed: 0:00:52.
  Batch   150  of    230.    Elapsed: 0:00:56.
  Batch   160  of    230.    Elapsed: 0:01:00.
  Batch   170  of    230.    Elapsed: 0:01:04.
  Batch   180  of    230.    Elapsed: 0:01:07.
  Batch   190  of    230.    Elapsed: 0:01:11.
  Batch   200  of    230.    Elapsed: 0:01:15.
  Batch   210  of    230.    Elapsed: 0:01:19.
  Batch   220  of    230.    Elapsed: 0:01:22.

  Average training loss generetor: 0.697
  Average training

[130/200][2899/4361] Loss_D: 0.01137129 (Loss_D_real: 0.00946638 Loss_D_fake: 0.00190491) Loss_G: 0.40877029 Loss_Enh_Dec: -2.74082637
| epoch 130 |  2900/ 4361 batches | lr 0.000000 | ms/batch 401.56 | loss  2.20 | ppl     9.04 | acc     0.73 | train_ae_norm     1.00
[130/200][2999/4361] Loss_D: 0.02080284 (Loss_D_real: 0.01838173 Loss_D_fake: 0.00242112) Loss_G: 0.40492830 Loss_Enh_Dec: -3.04315710
| epoch 130 |  3000/ 4361 batches | lr 0.000000 | ms/batch 401.29 | loss  2.20 | ppl     9.01 | acc     0.70 | train_ae_norm     1.00
[130/200][3099/4361] Loss_D: 0.01508431 (Loss_D_real: 0.01489416 Loss_D_fake: 0.00019015) Loss_G: 0.57999951 Loss_Enh_Dec: -2.68488431
| epoch 130 |  3100/ 4361 batches | lr 0.000000 | ms/batch 400.92 | loss  2.19 | ppl     8.95 | acc     0.74 | train_ae_norm     1.00
[130/200][3199/4361] Loss_D: 0.00121137 (Loss_D_real: 0.00052170 Loss_D_fake: 0.00068967) Loss_G: 0.43771297 Loss_Enh_Dec: -2.86856580
| epoch 130 |  3200/ 4361 batches | lr 0.000000 | ms/batch

[131/200][999/4361] Loss_D: 0.00617059 (Loss_D_real: 0.00605194 Loss_D_fake: 0.00011865) Loss_G: 0.66004813 Loss_Enh_Dec: -2.72441411
| epoch 131 |  1000/ 4361 batches | lr 0.000000 | ms/batch 401.60 | loss  2.19 | ppl     8.97 | acc     0.74 | train_ae_norm     1.00
[131/200][1099/4361] Loss_D: 0.00529500 (Loss_D_real: 0.00440124 Loss_D_fake: 0.00089376) Loss_G: 0.45314366 Loss_Enh_Dec: -2.56233478
| epoch 131 |  1100/ 4361 batches | lr 0.000000 | ms/batch 400.89 | loss  2.19 | ppl     8.89 | acc     0.71 | train_ae_norm     1.00
[131/200][1199/4361] Loss_D: 0.00156725 (Loss_D_real: 0.00049696 Loss_D_fake: 0.00107029) Loss_G: 0.40638217 Loss_Enh_Dec: -2.47098899
| epoch 131 |  1200/ 4361 batches | lr 0.000000 | ms/batch 400.74 | loss  2.19 | ppl     8.93 | acc     0.77 | train_ae_norm     1.00
[131/200][1299/4361] Loss_D: 0.00356518 (Loss_D_real: 0.00107836 Loss_D_fake: 0.00248681) Loss_G: 0.41632327 Loss_Enh_Dec: -2.28129864
| epoch 131 |  1300/ 4361 batches | lr 0.000000 | ms/batch 

| epoch 131 |  4000/ 4361 batches | lr 0.000000 | ms/batch 401.42 | loss  2.29 | ppl     9.85 | acc     0.75 | train_ae_norm     1.00
[131/200][4099/4361] Loss_D: 0.00236913 (Loss_D_real: 0.00085590 Loss_D_fake: 0.00151324) Loss_G: 0.41745597 Loss_Enh_Dec: -2.19749308
| epoch 131 |  4100/ 4361 batches | lr 0.000000 | ms/batch 401.19 | loss  2.21 | ppl     9.12 | acc     0.72 | train_ae_norm     1.00
[131/200][4199/4361] Loss_D: 0.00168713 (Loss_D_real: 0.00049357 Loss_D_fake: 0.00119356) Loss_G: 0.45820308 Loss_Enh_Dec: -1.97350538
| epoch 131 |  4200/ 4361 batches | lr 0.000000 | ms/batch 400.96 | loss  2.24 | ppl     9.35 | acc     0.74 | train_ae_norm     1.00
[131/200][4299/4361] Loss_D: 0.00263180 (Loss_D_real: 0.00047253 Loss_D_fake: 0.00215927) Loss_G: 0.40728280 Loss_Enh_Dec: -2.22147107
| epoch 131 |  4300/ 4361 batches | lr 0.000000 | ms/batch 402.07 | loss  2.19 | ppl     8.97 | acc     0.74 | train_ae_norm     1.00
| end of epoch 131 | time: 1852.99s | test loss  2.35 | tes

| epoch 132 |  2200/ 4361 batches | lr 0.000000 | ms/batch 401.97 | loss  2.36 | ppl    10.61 | acc     0.71 | train_ae_norm     1.00
[132/200][2299/4361] Loss_D: 0.00812195 (Loss_D_real: 0.00616074 Loss_D_fake: 0.00196122) Loss_G: 0.38158700 Loss_Enh_Dec: -1.74609268
| epoch 132 |  2300/ 4361 batches | lr 0.000000 | ms/batch 402.36 | loss  2.38 | ppl    10.84 | acc     0.72 | train_ae_norm     1.00
[132/200][2399/4361] Loss_D: 0.00314400 (Loss_D_real: 0.00169556 Loss_D_fake: 0.00144844) Loss_G: 0.43500826 Loss_Enh_Dec: -1.91641939
| epoch 132 |  2400/ 4361 batches | lr 0.000000 | ms/batch 401.67 | loss  2.31 | ppl    10.08 | acc     0.68 | train_ae_norm     1.00
[132/200][2499/4361] Loss_D: 0.00282806 (Loss_D_real: 0.00069611 Loss_D_fake: 0.00213194) Loss_G: 0.52217239 Loss_Enh_Dec: -2.37964129
| epoch 132 |  2500/ 4361 batches | lr 0.000000 | ms/batch 401.42 | loss  2.25 | ppl     9.53 | acc     0.72 | train_ae_norm     1.00
[132/200][2599/4361] Loss_D: 0.00314177 (Loss_D_real: 0.001

| epoch 133 |   500/ 4361 batches | lr 0.000000 | ms/batch 401.01 | loss  2.17 | ppl     8.76 | acc     0.76 | train_ae_norm     1.00
[133/200][599/4361] Loss_D: 0.00104350 (Loss_D_real: 0.00021107 Loss_D_fake: 0.00083243) Loss_G: 0.44417977 Loss_Enh_Dec: -2.21389937
| epoch 133 |   600/ 4361 batches | lr 0.000000 | ms/batch 400.72 | loss  2.14 | ppl     8.47 | acc     0.71 | train_ae_norm     1.00
[133/200][699/4361] Loss_D: 0.00177826 (Loss_D_real: 0.00057362 Loss_D_fake: 0.00120464) Loss_G: 0.46847567 Loss_Enh_Dec: -1.91332710
| epoch 133 |   700/ 4361 batches | lr 0.000000 | ms/batch 401.43 | loss  2.19 | ppl     8.95 | acc     0.73 | train_ae_norm     1.00
[133/200][799/4361] Loss_D: 0.00856328 (Loss_D_real: 0.00718834 Loss_D_fake: 0.00137494) Loss_G: 0.38241440 Loss_Enh_Dec: -2.38376689
| epoch 133 |   800/ 4361 batches | lr 0.000000 | ms/batch 401.28 | loss  2.18 | ppl     8.88 | acc     0.71 | train_ae_norm     1.00
[133/200][899/4361] Loss_D: 0.00681831 (Loss_D_real: 0.0000575

[133/200][3599/4361] Loss_D: 0.05020875 (Loss_D_real: 0.04852283 Loss_D_fake: 0.00168593) Loss_G: 0.55194718 Loss_Enh_Dec: -2.27728462
| epoch 133 |  3600/ 4361 batches | lr 0.000000 | ms/batch 401.87 | loss  2.40 | ppl    11.02 | acc     0.71 | train_ae_norm     1.00
[133/200][3699/4361] Loss_D: 0.00191730 (Loss_D_real: 0.00107763 Loss_D_fake: 0.00083967) Loss_G: 0.53486776 Loss_Enh_Dec: -2.45393491
| epoch 133 |  3700/ 4361 batches | lr 0.000000 | ms/batch 400.93 | loss  2.36 | ppl    10.63 | acc     0.67 | train_ae_norm     1.00
[133/200][3799/4361] Loss_D: 0.00337317 (Loss_D_real: 0.00299204 Loss_D_fake: 0.00038112) Loss_G: 0.54861772 Loss_Enh_Dec: -1.97809601
| epoch 133 |  3800/ 4361 batches | lr 0.000000 | ms/batch 401.37 | loss  2.39 | ppl    10.96 | acc     0.74 | train_ae_norm     1.00
[133/200][3899/4361] Loss_D: 0.01003810 (Loss_D_real: 0.00055497 Loss_D_fake: 0.00948313) Loss_G: 0.48389229 Loss_Enh_Dec: -1.69014728
| epoch 133 |  3900/ 4361 batches | lr 0.000000 | ms/batch

[134/200][1799/4361] Loss_D: 0.00479154 (Loss_D_real: 0.00030795 Loss_D_fake: 0.00448359) Loss_G: 0.44760752 Loss_Enh_Dec: -2.28257990
| epoch 134 |  1800/ 4361 batches | lr 0.000000 | ms/batch 401.81 | loss  2.32 | ppl    10.20 | acc     0.70 | train_ae_norm     1.00
[134/200][1899/4361] Loss_D: 0.00525287 (Loss_D_real: 0.00390362 Loss_D_fake: 0.00134925) Loss_G: 0.47333080 Loss_Enh_Dec: -1.96432877
| epoch 134 |  1900/ 4361 batches | lr 0.000000 | ms/batch 401.36 | loss  2.36 | ppl    10.63 | acc     0.69 | train_ae_norm     1.00
[134/200][1999/4361] Loss_D: 0.00491713 (Loss_D_real: 0.00025276 Loss_D_fake: 0.00466437) Loss_G: 0.49947926 Loss_Enh_Dec: -1.97693121
| epoch 134 |  2000/ 4361 batches | lr 0.000000 | ms/batch 401.91 | loss  2.29 | ppl     9.85 | acc     0.74 | train_ae_norm     1.00
[134/200][2099/4361] Loss_D: 0.00160661 (Loss_D_real: 0.00140654 Loss_D_fake: 0.00020007) Loss_G: 0.57575434 Loss_Enh_Dec: -1.96254122
| epoch 134 |  2100/ 4361 batches | lr 0.000000 | ms/batch


  Average training loss generetor: 0.697
  Average training loss discriminator: 0.698
  Training epcoh took: 0:01:26

Running Test...
  Accuracy: 0.492
  Test Loss: 4.735
  Test took: 0:00:00
Train other shit
| epoch 135 |     0/ 4361 batches | lr 0.000000 | ms/batch 862.46 | loss  0.02 | ppl     1.02 | acc     0.74 | train_ae_norm     1.00
[135/200][99/4361] Loss_D: 0.00246258 (Loss_D_real: 0.00206942 Loss_D_fake: 0.00039316) Loss_G: 0.49520737 Loss_Enh_Dec: -1.77977026
| epoch 135 |   100/ 4361 batches | lr 0.000000 | ms/batch 401.56 | loss  2.18 | ppl     8.88 | acc     0.69 | train_ae_norm     1.00
[135/200][199/4361] Loss_D: 0.00325027 (Loss_D_real: 0.00125289 Loss_D_fake: 0.00199738) Loss_G: 0.42977086 Loss_Enh_Dec: -1.85090530
| epoch 135 |   200/ 4361 batches | lr 0.000000 | ms/batch 401.30 | loss  2.22 | ppl     9.20 | acc     0.77 | train_ae_norm     1.00
[135/200][299/4361] Loss_D: 0.00553098 (Loss_D_real: 0.00150873 Loss_D_fake: 0.00402225) Loss_G: 0.70355409 Loss_Enh_Dec:

| epoch 135 |  3000/ 4361 batches | lr 0.000000 | ms/batch 401.68 | loss  2.23 | ppl     9.26 | acc     0.73 | train_ae_norm     1.00
[135/200][3099/4361] Loss_D: 0.00186326 (Loss_D_real: 0.00032626 Loss_D_fake: 0.00153701) Loss_G: 0.45083547 Loss_Enh_Dec: -2.67118120
| epoch 135 |  3100/ 4361 batches | lr 0.000000 | ms/batch 401.55 | loss  2.21 | ppl     9.16 | acc     0.72 | train_ae_norm     1.00
[135/200][3199/4361] Loss_D: 0.00308181 (Loss_D_real: 0.00115974 Loss_D_fake: 0.00192207) Loss_G: 0.47002441 Loss_Enh_Dec: -2.28861547
| epoch 135 |  3200/ 4361 batches | lr 0.000000 | ms/batch 401.99 | loss  2.27 | ppl     9.67 | acc     0.75 | train_ae_norm     1.00
[135/200][3299/4361] Loss_D: 0.00171173 (Loss_D_real: 0.00158885 Loss_D_fake: 0.00012288) Loss_G: 0.84073633 Loss_Enh_Dec: -2.24127316
| epoch 135 |  3300/ 4361 batches | lr 0.000000 | ms/batch 401.23 | loss  2.29 | ppl     9.85 | acc     0.72 | train_ae_norm     1.00
[135/200][3399/4361] Loss_D: 0.01980634 (Loss_D_real: 0.018

[136/200][1099/4361] Loss_D: 0.00328347 (Loss_D_real: 0.00023525 Loss_D_fake: 0.00304822) Loss_G: 0.41478512 Loss_Enh_Dec: -1.43626010
| epoch 136 |  1100/ 4361 batches | lr 0.000000 | ms/batch 401.33 | loss  2.22 | ppl     9.24 | acc     0.70 | train_ae_norm     1.00
[136/200][1199/4361] Loss_D: 0.01666483 (Loss_D_real: 0.01371816 Loss_D_fake: 0.00294667) Loss_G: 0.43029147 Loss_Enh_Dec: -1.38708448
| epoch 136 |  1200/ 4361 batches | lr 0.000000 | ms/batch 402.09 | loss  2.23 | ppl     9.35 | acc     0.76 | train_ae_norm     1.00
[136/200][1299/4361] Loss_D: 0.00196626 (Loss_D_real: 0.00080932 Loss_D_fake: 0.00115695) Loss_G: 0.52029794 Loss_Enh_Dec: -1.62013018
| epoch 136 |  1300/ 4361 batches | lr 0.000000 | ms/batch 401.56 | loss  2.26 | ppl     9.62 | acc     0.72 | train_ae_norm     1.00
[136/200][1399/4361] Loss_D: 0.00046280 (Loss_D_real: 0.00007985 Loss_D_fake: 0.00038295) Loss_G: 0.48628169 Loss_Enh_Dec: -1.73763025
| epoch 136 |  1400/ 4361 batches | lr 0.000000 | ms/batch

| epoch 136 |  4100/ 4361 batches | lr 0.000000 | ms/batch 401.46 | loss  2.27 | ppl     9.64 | acc     0.71 | train_ae_norm     1.00
[136/200][4199/4361] Loss_D: 0.00198124 (Loss_D_real: 0.00029583 Loss_D_fake: 0.00168541) Loss_G: 0.47951147 Loss_Enh_Dec: -2.11502814
| epoch 136 |  4200/ 4361 batches | lr 0.000000 | ms/batch 401.44 | loss  2.31 | ppl    10.11 | acc     0.72 | train_ae_norm     1.00
[136/200][4299/4361] Loss_D: 0.00231893 (Loss_D_real: 0.00017068 Loss_D_fake: 0.00214825) Loss_G: 0.59530884 Loss_Enh_Dec: -2.36191773
| epoch 136 |  4300/ 4361 batches | lr 0.000000 | ms/batch 401.51 | loss  2.28 | ppl     9.79 | acc     0.70 | train_ae_norm     1.00
| end of epoch 136 | time: 1855.04s | test loss  2.40 | test ppl 11.04 | acc 0.764
bleu_self:  [2.22938381e-01 3.88491377e-09 1.04452896e-11 5.57819826e-13
 9.88323447e-14]
bleu_test:  [8.20486111e-01 9.40558459e-02 6.77875076e-07 1.95407268e-09
 6.27957307e-11]
bleu_self: [0.22293838,0.00000000,0.00000000,0.00000000,0.0000000

In [None]:
plt.plot(accuracy_array)

In [None]:
print(max(accuracy_array))
#0.4025 previous best accuracy (bert-base-cased, epoch 50, 5e-6)

In [None]:
print(accuracy_array[-1])

In [None]:
df_to_save = pd.DataFrame(accuracy_array)

In [None]:
df_to_save.to_csv('accuracy_array_assigned_yahoo.csv')