In [1]:
import h5py
import pdb
import numpy as np
import argparse
import random
import sys, os
import torch
from torch.autograd import Variable
from torch.nn.parameter import Parameter
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.optim.lr_scheduler import ReduceLROnPlateau
from tqdm import tqdm
from cmd_args import cmd_args

In [2]:
sys.path.append('%s/../util/' % os.path.dirname(os.path.realpath('__file__')))
import cfg_parser as parser
seed = 19260817


In [3]:
# train vae model for one epoch
sys.path.append('%s/../' % os.path.dirname(os.path.realpath('__file__')))
from model import get_batch_input_vae, MolVAE,Prepare_data
sys.path.append('%s/../util/' % os.path.dirname(os.path.realpath('__file__')))
from pytorch_initializer import weights_init
from train_util import get_batch_input_vae
#q(z|x,y)
def epoch_train(phase, epoch, ae, sample_idxes, data_binary, data_masks, data_property, cmd_args, optimizer_vae=None):
    total_loss = []
    pbar = tqdm(range(0, (len(sample_idxes) + (cmd_args.batch_size - 1) * (optimizer_vae is None)) // cmd_args.batch_size), unit='batch')
    
    if phase == 'train' and optimizer_vae is not None:
        ae.train()
    else:
        ae.eval()
        
    n_samples = 0    
    for pos in pbar:
        selected_idx = sample_idxes[pos * cmd_args.batch_size : (pos + 1) * cmd_args.batch_size]
        x_inputs, y_inputs,v_tb, v_ms, t_y = get_batch_input_vae(selected_idx, data_binary, data_masks, data_property)  # no grad for evaluate mode.              
        loss_list = ae.forward(x_inputs, y_inputs,v_tb,v_ms, t_y)
        loss_vae = loss_list[0] + loss_list[1]
        
        perp = loss_list[0].data.cpu().numpy()[0] # reconstruction loss
        kl = loss_list[1].data.cpu().numpy()
        

        minibatch_vae_loss = loss_vae.data.cpu().numpy()
        pbar.set_description('At epoch: %d  %s vae loss: %0.5f perp: %0.5f kl: %0.5f' % (epoch, phase, minibatch_vae_loss, perp, kl))
        

        if optimizer_vae is not None:
            assert len(selected_idx) == cmd_args.batch_size
            optimizer_vae.zero_grad()
            loss_vae.backward(retain_graph=True)
            optimizer_vae.step()
            
            
        total_loss.append(np.array([minibatch_vae_loss, perp, kl]) * len(selected_idx))
       
        n_samples += len(selected_idx)
        
    if optimizer_vae is None:
        assert n_samples == len(sample_idxes)  
        
    total_loss = np.array(total_loss)

    avg_loss = np.sum(total_loss, 0) / n_samples   
    return ae, avg_loss


In [4]:
def main():
    
	random.seed(seed)
	np.random.seed(seed)
	torch.manual_seed(seed)
	torch.cuda.manual_seed_all(seed)

	if not os.path.exists(cmd_args.save_dir):
	    os.makedirs(cmd_args.save_dir)    
	    
	cmd_args.saved_model = cmd_args.save_dir + '/epoch-best.model'

	sys.path.append('%s/util/' % os.path.dirname(os.path.realpath('__file__')))
	getting_data = Prepare_data(cmd_args)
	train_binary_x, train_masks, valid_binary_x, valid_masks, train_y, valid_y = getting_data.load_data()
	print('num_train: %d\tnum_valid: %d' % (train_y.shape[0], valid_binary_x.shape[0]))    
	    
	ae = MolVAE()  
	if cmd_args.mode == 'gpu':
	    ae = ae.cuda()

	kl = []
	prep = []

	assert cmd_args.encoder_type == 'cnn'    
	optimizer_vae = optim.Adam(ae.parameters(), lr=cmd_args.learning_rate)
	lr_scheduler = ReduceLROnPlateau(optimizer_vae, 'min', factor=0.5, patience=5, verbose=True, min_lr=0.001)


	sample_idxes = list(range(train_binary_x.shape[0]))
	best_valid_loss = None

	for epoch in range(cmd_args.num_epochs):
	    random.shuffle(sample_idxes)
	    
	    ## update the vae

	    ae, vae_loss = epoch_train('train',epoch, ae, sample_idxes, train_binary_x, train_masks, train_y,cmd_args, optimizer_vae)    
	    print('>>>>average \033[92mtraining\033[0m of epoch %d: loss %.5f perp %.5f kl %.5f' % (epoch, vae_loss[0], vae_loss[1], vae_loss[2]))   
	    kl.append(vae_loss[2])
	    prep. append(vae_loss[1])
	            
	    if epoch % 1 == 0:
	        _, valid_loss = epoch_train('valid', epoch,  ae, list(range(valid_binary_x.shape[0])), valid_binary_x, valid_masks, valid_y,cmd_args)
	        print('>>>>average \033[93mvalid\033[0m of epoch %d: loss %.5f perp %.5f kl %.5f' % (epoch, valid_loss[0], valid_loss[1], valid_loss[2]))
	        valid_loss = valid_loss[0]
	        lr_scheduler.step(valid_loss)
	        torch.save(ae.state_dict(), cmd_args.save_dir + '/epoch-%d.model' % epoch)
	        
	        if best_valid_loss is None or valid_loss < best_valid_loss:
	            best_valid_loss = valid_loss
	            print('saving to best model since this is the best valid loss so far.----')
	            torch.save(ae.state_dict(), cmd_args.save_dir + '/epoch-best.model')
	            
	np.save('./kl.npy', kl) 
	np.save('./prep.npy', prep)

In [5]:
main() 

num_train: 113885	num_valid: 10000
a Conv1d inited
a Conv1d inited
a Conv1d inited
a Linear inited
a Linear inited
a Linear inited
a Linear inited
a GRU inited
a Linear inited


At epoch: 0  train vae loss: 20.70252 perp: 20.44808 kl: 0.25444: 100%|██████████| 379/379 [00:37<00:00, 10.18batch/s]
At epoch: 0  valid vae loss: 20.09672 perp: 19.83839 kl: 0.25833:   6%|▌         | 2/34 [00:00<00:02, 11.51batch/s]

>>>>average [92mtraining[0m of epoch 0: loss 25.68394 perp 25.14334 kl 0.54060


At epoch: 0  valid vae loss: 19.83746 perp: 19.58224 kl: 0.25521: 100%|██████████| 34/34 [00:01<00:00, 23.12batch/s]
At epoch: 1  train vae loss: 19.98828 perp: 19.73428 kl: 0.25400:   0%|          | 0/379 [00:00<?, ?batch/s]

>>>>average [93mvalid[0m of epoch 0: loss 20.32977 perp 20.07766 kl 0.25211
saving to best model since this is the best valid loss so far.----


At epoch: 1  train vae loss: 18.30332 perp: 18.05375 kl: 0.24957: 100%|██████████| 379/379 [00:37<00:00, 10.04batch/s]
At epoch: 1  valid vae loss: 17.84548 perp: 17.59573 kl: 0.24976:   6%|▌         | 2/34 [00:00<00:02, 11.46batch/s]

>>>>average [92mtraining[0m of epoch 1: loss 19.41123 perp 19.18523 kl 0.22600


At epoch: 1  valid vae loss: 17.67068 perp: 17.42517 kl: 0.24551: 100%|██████████| 34/34 [00:01<00:00, 23.32batch/s]
At epoch: 2  train vae loss: 18.03784 perp: 17.78758 kl: 0.25026:   0%|          | 0/379 [00:00<?, ?batch/s]

>>>>average [93mvalid[0m of epoch 1: loss 18.11210 perp 17.87165 kl 0.24045
saving to best model since this is the best valid loss so far.----


At epoch: 2  train vae loss: 15.16488 perp: 14.88579 kl: 0.27908: 100%|██████████| 379/379 [00:36<00:00, 10.26batch/s]
At epoch: 2  valid vae loss: 14.63798 perp: 14.35285 kl: 0.28513:   6%|▌         | 2/34 [00:00<00:02, 11.55batch/s]

>>>>average [92mtraining[0m of epoch 2: loss 16.68318 perp 16.42237 kl 0.26081


At epoch: 2  valid vae loss: 14.65314 perp: 14.37792 kl: 0.27521: 100%|██████████| 34/34 [00:01<00:00, 23.45batch/s]
At epoch: 3  train vae loss: 14.61470 perp: 14.34375 kl: 0.27095:   0%|          | 0/379 [00:00<?, ?batch/s]

>>>>average [93mvalid[0m of epoch 2: loss 14.99114 perp 14.71646 kl 0.27467
saving to best model since this is the best valid loss so far.----


At epoch: 3  train vae loss: 13.51190 perp: 13.21138 kl: 0.30052: 100%|██████████| 379/379 [00:36<00:00, 10.25batch/s]
At epoch: 3  valid vae loss: 12.79371 perp: 12.49670 kl: 0.29701:   6%|▌         | 2/34 [00:00<00:02, 11.35batch/s]

>>>>average [92mtraining[0m of epoch 3: loss 14.03754 perp 13.75086 kl 0.28668


At epoch: 3  valid vae loss: 12.77362 perp: 12.48934 kl: 0.28428: 100%|██████████| 34/34 [00:01<00:00, 23.60batch/s]
At epoch: 4  train vae loss: 13.07811 perp: 12.78592 kl: 0.29219:   0%|          | 0/379 [00:00<?, ?batch/s]

>>>>average [93mvalid[0m of epoch 3: loss 13.16777 perp 12.88073 kl 0.28704
saving to best model since this is the best valid loss so far.----


At epoch: 4  train vae loss: 12.41371 perp: 12.11930 kl: 0.29441: 100%|██████████| 379/379 [00:37<00:00, 10.21batch/s]
At epoch: 4  valid vae loss: 11.68110 perp: 11.37657 kl: 0.30453:   6%|▌         | 2/34 [00:00<00:02, 11.43batch/s]

>>>>average [92mtraining[0m of epoch 4: loss 12.72174 perp 12.42823 kl 0.29351


At epoch: 4  valid vae loss: 11.56216 perp: 11.27126 kl: 0.29090: 100%|██████████| 34/34 [00:01<00:00, 23.48batch/s]
At epoch: 5  train vae loss: 11.68335 perp: 11.39263 kl: 0.29072:   0%|          | 0/379 [00:00<?, ?batch/s]

>>>>average [93mvalid[0m of epoch 4: loss 12.03418 perp 11.73928 kl 0.29490
saving to best model since this is the best valid loss so far.----


At epoch: 5  train vae loss: 11.90853 perp: 11.60773 kl: 0.30079: 100%|██████████| 379/379 [00:37<00:00, 10.21batch/s]
At epoch: 5  valid vae loss: 11.09719 perp: 10.79454 kl: 0.30265:   6%|▌         | 2/34 [00:00<00:02, 11.57batch/s]

>>>>average [92mtraining[0m of epoch 5: loss 11.86560 perp 11.56811 kl 0.29749


At epoch: 5  valid vae loss: 11.00181 perp: 10.71282 kl: 0.28900: 100%|██████████| 34/34 [00:01<00:00, 23.55batch/s]
At epoch: 6  train vae loss: 11.95120 perp: 11.65954 kl: 0.29167:   0%|          | 0/379 [00:00<?, ?batch/s]

>>>>average [93mvalid[0m of epoch 5: loss 11.39503 perp 11.10203 kl 0.29300
saving to best model since this is the best valid loss so far.----


At epoch: 6  train vae loss: 10.96383 perp: 10.67574 kl: 0.28810: 100%|██████████| 379/379 [00:37<00:00, 10.20batch/s]
At epoch: 6  valid vae loss: 10.50122 perp: 10.19552 kl: 0.30570:   6%|▌         | 2/34 [00:00<00:02, 11.51batch/s]

>>>>average [92mtraining[0m of epoch 6: loss 11.23318 perp 10.93682 kl 0.29636


At epoch: 6  valid vae loss: 10.32513 perp: 10.03249 kl: 0.29264: 100%|██████████| 34/34 [00:01<00:00, 23.49batch/s]
At epoch: 7  train vae loss: 10.78589 perp: 10.47881 kl: 0.30708:   0%|          | 0/379 [00:00<?, ?batch/s]

>>>>average [93mvalid[0m of epoch 6: loss 10.79493 perp 10.49875 kl 0.29617
saving to best model since this is the best valid loss so far.----


At epoch: 7  train vae loss: 10.23506 perp: 9.93593 kl: 0.29914: 100%|██████████| 379/379 [00:37<00:00, 10.15batch/s] 
At epoch: 7  valid vae loss: 10.02526 perp: 9.72260 kl: 0.30266:   6%|▌         | 2/34 [00:00<00:02, 11.51batch/s] 

>>>>average [92mtraining[0m of epoch 7: loss 10.77105 perp 10.47281 kl 0.29824


At epoch: 7  valid vae loss: 9.82538 perp: 9.53433 kl: 0.29105: 100%|██████████| 34/34 [00:01<00:00, 23.11batch/s]  
At epoch: 8  train vae loss: 10.27437 perp: 9.97723 kl: 0.29715:   0%|          | 0/379 [00:00<?, ?batch/s]

>>>>average [93mvalid[0m of epoch 7: loss 10.31491 perp 10.02089 kl 0.29402
saving to best model since this is the best valid loss so far.----


At epoch: 8  train vae loss: 10.70418 perp: 10.39656 kl: 0.30762: 100%|██████████| 379/379 [00:37<00:00, 10.09batch/s]
At epoch: 8  valid vae loss: 9.71133 perp: 9.40423 kl: 0.30709:   6%|▌         | 2/34 [00:00<00:02, 11.11batch/s] 

>>>>average [92mtraining[0m of epoch 8: loss 10.34919 perp 10.04969 kl 0.29950


At epoch: 8  valid vae loss: 9.57643 perp: 9.28194 kl: 0.29450: 100%|██████████| 34/34 [00:01<00:00, 22.99batch/s]  
At epoch: 9  train vae loss: 10.46696 perp: 10.14979 kl: 0.31717:   0%|          | 0/379 [00:00<?, ?batch/s]

>>>>average [93mvalid[0m of epoch 8: loss 9.96338 perp 9.66532 kl 0.29806
saving to best model since this is the best valid loss so far.----


At epoch: 9  train vae loss: 9.80418 perp: 9.50909 kl: 0.29510: 100%|██████████| 379/379 [00:38<00:00,  9.79batch/s]  
At epoch: 9  valid vae loss: 9.33850 perp: 9.02957 kl: 0.30893:   6%|▌         | 2/34 [00:00<00:02, 11.52batch/s]

>>>>average [92mtraining[0m of epoch 9: loss 9.95381 perp 9.65479 kl 0.29902


At epoch: 9  valid vae loss: 9.26321 perp: 8.96639 kl: 0.29682: 100%|██████████| 34/34 [00:01<00:00, 23.63batch/s] 
At epoch: 10  train vae loss: 9.91410 perp: 9.61059 kl: 0.30351:   0%|          | 0/379 [00:00<?, ?batch/s]

>>>>average [93mvalid[0m of epoch 9: loss 9.65030 perp 9.35036 kl 0.29994
saving to best model since this is the best valid loss so far.----


At epoch: 10  train vae loss: 9.67492 perp: 9.37206 kl: 0.30286: 100%|██████████| 379/379 [00:37<00:00, 10.10batch/s]  
At epoch: 10  valid vae loss: 9.07707 perp: 8.76799 kl: 0.30908:   6%|▌         | 2/34 [00:00<00:02, 11.38batch/s]

>>>>average [92mtraining[0m of epoch 10: loss 9.64665 perp 9.34810 kl 0.29855


At epoch: 10  valid vae loss: 8.91570 perp: 8.61999 kl: 0.29570: 100%|██████████| 34/34 [00:01<00:00, 24.51batch/s]
At epoch: 11  train vae loss: 9.39808 perp: 9.10008 kl: 0.29800:   0%|          | 0/379 [00:00<?, ?batch/s]

>>>>average [93mvalid[0m of epoch 10: loss 9.37690 perp 9.07703 kl 0.29987
saving to best model since this is the best valid loss so far.----


At epoch: 11  train vae loss: 8.93082 perp: 8.63188 kl: 0.29894: 100%|██████████| 379/379 [00:36<00:00, 10.41batch/s] 
At epoch: 11  valid vae loss: 8.78576 perp: 8.47320 kl: 0.31256:   6%|▌         | 2/34 [00:00<00:02, 12.05batch/s]

>>>>average [92mtraining[0m of epoch 11: loss 9.35357 perp 9.05199 kl 0.30158


At epoch: 11  valid vae loss: 8.61153 perp: 8.31237 kl: 0.29916: 100%|██████████| 34/34 [00:01<00:00, 24.63batch/s]
At epoch: 12  train vae loss: 9.42642 perp: 9.11455 kl: 0.31187:   0%|          | 0/379 [00:00<?, ?batch/s]

>>>>average [93mvalid[0m of epoch 11: loss 9.05427 perp 8.75138 kl 0.30289
saving to best model since this is the best valid loss so far.----


At epoch: 12  train vae loss: 8.94937 perp: 8.64726 kl: 0.30211: 100%|██████████| 379/379 [00:36<00:00, 10.46batch/s]
At epoch: 12  valid vae loss: 8.64976 perp: 8.34277 kl: 0.30699:   6%|▌         | 2/34 [00:00<00:02, 12.05batch/s]

>>>>average [92mtraining[0m of epoch 12: loss 9.10145 perp 8.79882 kl 0.30263


At epoch: 12  valid vae loss: 8.48199 perp: 8.18925 kl: 0.29274: 100%|██████████| 34/34 [00:01<00:00, 24.96batch/s]
At epoch: 13  train vae loss: 9.07321 perp: 8.76337 kl: 0.30984:   0%|          | 0/379 [00:00<?, ?batch/s]

>>>>average [93mvalid[0m of epoch 12: loss 8.91490 perp 8.61788 kl 0.29702
saving to best model since this is the best valid loss so far.----


At epoch: 13  train vae loss: 8.56452 perp: 8.26453 kl: 0.29999: 100%|██████████| 379/379 [00:37<00:00, 10.20batch/s]
At epoch: 13  valid vae loss: 8.58154 perp: 8.26816 kl: 0.31339:   6%|▌         | 2/34 [00:00<00:02, 12.14batch/s]

>>>>average [92mtraining[0m of epoch 13: loss 8.86654 perp 8.56417 kl 0.30237


At epoch: 13  valid vae loss: 8.38869 perp: 8.08961 kl: 0.29908: 100%|██████████| 34/34 [00:01<00:00, 24.88batch/s]
At epoch: 14  train vae loss: 8.87621 perp: 8.55406 kl: 0.32215:   0%|          | 0/379 [00:00<?, ?batch/s]

>>>>average [93mvalid[0m of epoch 13: loss 8.81645 perp 8.51320 kl 0.30324
saving to best model since this is the best valid loss so far.----


At epoch: 14  train vae loss: 8.57311 perp: 8.27734 kl: 0.29577: 100%|██████████| 379/379 [00:37<00:00, 10.24batch/s]
At epoch: 14  valid vae loss: 8.23573 perp: 7.92137 kl: 0.31436:   6%|▌         | 2/34 [00:00<00:02, 11.02batch/s]

>>>>average [92mtraining[0m of epoch 14: loss 8.64077 perp 8.33699 kl 0.30377


At epoch: 14  valid vae loss: 8.04935 perp: 7.74885 kl: 0.30051: 100%|██████████| 34/34 [00:01<00:00, 24.18batch/s]
At epoch: 15  train vae loss: 8.66308 perp: 8.34707 kl: 0.31601:   0%|          | 0/379 [00:00<?, ?batch/s]

>>>>average [93mvalid[0m of epoch 14: loss 8.50238 perp 8.19839 kl 0.30399
saving to best model since this is the best valid loss so far.----


At epoch: 15  train vae loss: 8.17244 perp: 7.86319 kl: 0.30924: 100%|██████████| 379/379 [00:36<00:00, 10.28batch/s]
At epoch: 15  valid vae loss: 8.03786 perp: 7.72386 kl: 0.31400:   6%|▌         | 2/34 [00:00<00:02, 12.07batch/s]

>>>>average [92mtraining[0m of epoch 15: loss 8.43927 perp 8.13438 kl 0.30489


At epoch: 15  valid vae loss: 7.87613 perp: 7.57648 kl: 0.29965: 100%|██████████| 34/34 [00:01<00:00, 24.47batch/s]
At epoch: 16  train vae loss: 8.65811 perp: 8.36728 kl: 0.29084:   0%|          | 0/379 [00:00<?, ?batch/s]

>>>>average [93mvalid[0m of epoch 15: loss 8.28291 perp 7.97936 kl 0.30355
saving to best model since this is the best valid loss so far.----


At epoch: 16  train vae loss: 8.31995 perp: 8.02187 kl: 0.29808: 100%|██████████| 379/379 [00:36<00:00, 10.30batch/s]
At epoch: 16  valid vae loss: 7.65829 perp: 7.34564 kl: 0.31266:   6%|▌         | 2/34 [00:00<00:02, 11.97batch/s]

>>>>average [92mtraining[0m of epoch 16: loss 8.21808 perp 7.91245 kl 0.30563


At epoch: 16  valid vae loss: 7.47402 perp: 7.17592 kl: 0.29810: 100%|██████████| 34/34 [00:01<00:00, 24.47batch/s]
At epoch: 17  train vae loss: 8.01517 perp: 7.70751 kl: 0.30765:   0%|          | 0/379 [00:00<?, ?batch/s]

>>>>average [93mvalid[0m of epoch 16: loss 7.90848 perp 7.60649 kl 0.30199
saving to best model since this is the best valid loss so far.----


At epoch: 17  train vae loss: 7.93254 perp: 7.63049 kl: 0.30206: 100%|██████████| 379/379 [00:37<00:00, 10.21batch/s]
At epoch: 17  valid vae loss: 7.45378 perp: 7.14153 kl: 0.31226:   6%|▌         | 2/34 [00:00<00:02, 11.43batch/s]

>>>>average [92mtraining[0m of epoch 17: loss 8.04519 perp 7.73901 kl 0.30618


At epoch: 17  valid vae loss: 7.24870 perp: 6.95077 kl: 0.29793: 100%|██████████| 34/34 [00:01<00:00, 22.85batch/s]
At epoch: 18  train vae loss: 7.76366 perp: 7.47089 kl: 0.29276:   0%|          | 0/379 [00:00<?, ?batch/s]

>>>>average [93mvalid[0m of epoch 17: loss 7.70843 perp 7.40716 kl 0.30127
saving to best model since this is the best valid loss so far.----


At epoch: 18  train vae loss: 8.06201 perp: 7.75661 kl: 0.30540: 100%|██████████| 379/379 [00:37<00:00, 10.09batch/s]
At epoch: 18  valid vae loss: 7.65459 perp: 7.34219 kl: 0.31239:   6%|▌         | 2/34 [00:00<00:02, 11.86batch/s]

>>>>average [92mtraining[0m of epoch 18: loss 7.84978 perp 7.54467 kl 0.30511


At epoch: 18  valid vae loss: 7.35551 perp: 7.05686 kl: 0.29865: 100%|██████████| 34/34 [00:01<00:00, 23.71batch/s]
At epoch: 19  train vae loss: 7.80346 perp: 7.48181 kl: 0.32165:   0%|          | 0/379 [00:00<?, ?batch/s]

>>>>average [93mvalid[0m of epoch 18: loss 7.88809 perp 7.58655 kl 0.30153


At epoch: 19  train vae loss: 7.75889 perp: 7.44139 kl: 0.31750: 100%|██████████| 379/379 [00:39<00:00,  9.51batch/s]
At epoch: 19  valid vae loss: 7.77034 perp: 7.47808 kl: 0.29226:   3%|▎         | 1/34 [00:00<00:05,  6.44batch/s]

>>>>average [92mtraining[0m of epoch 19: loss 7.71288 perp 7.40592 kl 0.30696


At epoch: 19  valid vae loss: 6.96458 perp: 6.66555 kl: 0.29902: 100%|██████████| 34/34 [00:01<00:00, 19.60batch/s]
At epoch: 20  train vae loss: 7.33958 perp: 7.03168 kl: 0.30790:   0%|          | 0/379 [00:00<?, ?batch/s]

>>>>average [93mvalid[0m of epoch 19: loss 7.41525 perp 7.11318 kl 0.30207
saving to best model since this is the best valid loss so far.----


At epoch: 20  train vae loss: 7.78351 perp: 7.47012 kl: 0.31339: 100%|██████████| 379/379 [00:39<00:00,  9.52batch/s]
At epoch: 20  valid vae loss: 6.98126 perp: 6.66586 kl: 0.31540:   6%|▌         | 2/34 [00:00<00:02, 11.72batch/s]

>>>>average [92mtraining[0m of epoch 20: loss 7.56385 perp 7.25667 kl 0.30718


At epoch: 20  valid vae loss: 6.73243 perp: 6.43071 kl: 0.30173: 100%|██████████| 34/34 [00:01<00:00, 22.60batch/s]
At epoch: 21  train vae loss: 7.37362 perp: 7.06144 kl: 0.31219:   0%|          | 0/379 [00:00<?, ?batch/s]

>>>>average [93mvalid[0m of epoch 20: loss 7.21728 perp 6.91303 kl 0.30424
saving to best model since this is the best valid loss so far.----


At epoch: 21  train vae loss: 7.21512 perp: 6.91017 kl: 0.30495: 100%|██████████| 379/379 [00:36<00:00, 10.25batch/s]
At epoch: 21  valid vae loss: 6.85652 perp: 6.54106 kl: 0.31546:   6%|▌         | 2/34 [00:00<00:02, 11.87batch/s]

>>>>average [92mtraining[0m of epoch 21: loss 7.39008 perp 7.08363 kl 0.30645


At epoch: 21  valid vae loss: 6.51188 perp: 6.20992 kl: 0.30197: 100%|██████████| 34/34 [00:01<00:00, 23.48batch/s]
At epoch: 22  train vae loss: 7.37211 perp: 7.05989 kl: 0.31223:   0%|          | 0/379 [00:00<?, ?batch/s]

>>>>average [93mvalid[0m of epoch 21: loss 7.09382 perp 6.78932 kl 0.30450
saving to best model since this is the best valid loss so far.----


At epoch: 22  train vae loss: 6.80929 perp: 6.49524 kl: 0.31405: 100%|██████████| 379/379 [00:36<00:00, 10.28batch/s]
At epoch: 22  valid vae loss: 6.67215 perp: 6.35974 kl: 0.31240:   6%|▌         | 2/34 [00:00<00:02, 11.86batch/s]

>>>>average [92mtraining[0m of epoch 22: loss 7.28084 perp 6.97354 kl 0.30730


At epoch: 22  valid vae loss: 6.31870 perp: 6.02046 kl: 0.29824: 100%|██████████| 34/34 [00:01<00:00, 22.99batch/s]
At epoch: 23  train vae loss: 6.92114 perp: 6.61509 kl: 0.30605:   0%|          | 0/379 [00:00<?, ?batch/s]

>>>>average [93mvalid[0m of epoch 22: loss 6.93894 perp 6.63722 kl 0.30173
saving to best model since this is the best valid loss so far.----


At epoch: 23  train vae loss: 7.14288 perp: 6.84289 kl: 0.29999: 100%|██████████| 379/379 [00:36<00:00, 10.34batch/s]
At epoch: 23  valid vae loss: 6.54961 perp: 6.23375 kl: 0.31585:   6%|▌         | 2/34 [00:00<00:02, 11.73batch/s]

>>>>average [92mtraining[0m of epoch 23: loss 7.11743 perp 6.81190 kl 0.30553


At epoch: 23  valid vae loss: 6.19439 perp: 5.89280 kl: 0.30159: 100%|██████████| 34/34 [00:01<00:00, 23.53batch/s]
At epoch: 24  train vae loss: 7.16838 perp: 6.86699 kl: 0.30140:   0%|          | 0/379 [00:00<?, ?batch/s]

>>>>average [93mvalid[0m of epoch 23: loss 6.77391 perp 6.46888 kl 0.30503
saving to best model since this is the best valid loss so far.----


At epoch: 24  train vae loss: 6.68899 perp: 6.36896 kl: 0.32003: 100%|██████████| 379/379 [00:37<00:00, 10.09batch/s]
At epoch: 24  valid vae loss: 6.58117 perp: 6.26493 kl: 0.31624:   6%|▌         | 2/34 [00:00<00:02, 11.81batch/s]

>>>>average [92mtraining[0m of epoch 24: loss 6.96841 perp 6.66423 kl 0.30419


At epoch: 24  valid vae loss: 6.22087 perp: 5.91858 kl: 0.30229: 100%|██████████| 34/34 [00:01<00:00, 23.64batch/s]
At epoch: 25  train vae loss: 6.89091 perp: 6.58480 kl: 0.30612:   0%|          | 0/379 [00:00<?, ?batch/s]

>>>>average [93mvalid[0m of epoch 24: loss 6.79476 perp 6.48948 kl 0.30528


At epoch: 25  train vae loss: 6.68678 perp: 6.37392 kl: 0.31287: 100%|██████████| 379/379 [00:36<00:00, 10.27batch/s]
At epoch: 25  valid vae loss: 6.44988 perp: 6.13611 kl: 0.31378:   6%|▌         | 2/34 [00:00<00:02, 11.51batch/s]

>>>>average [92mtraining[0m of epoch 25: loss 6.87632 perp 6.56983 kl 0.30649


At epoch: 25  valid vae loss: 6.10796 perp: 5.80774 kl: 0.30023: 100%|██████████| 34/34 [00:01<00:00, 23.51batch/s]
At epoch: 26  train vae loss: 6.76556 perp: 6.45904 kl: 0.30652:   0%|          | 0/379 [00:00<?, ?batch/s]

>>>>average [93mvalid[0m of epoch 25: loss 6.67777 perp 6.37405 kl 0.30372
saving to best model since this is the best valid loss so far.----


At epoch: 26  train vae loss: 7.23089 perp: 6.93333 kl: 0.29756: 100%|██████████| 379/379 [00:36<00:00, 10.25batch/s]
At epoch: 26  valid vae loss: 6.35142 perp: 6.03803 kl: 0.31339:   6%|▌         | 2/34 [00:00<00:02, 11.59batch/s]

>>>>average [92mtraining[0m of epoch 26: loss 6.71875 perp 6.41415 kl 0.30460


At epoch: 26  valid vae loss: 5.95346 perp: 5.65406 kl: 0.29939: 100%|██████████| 34/34 [00:01<00:00, 23.55batch/s]
At epoch: 27  train vae loss: 6.51209 perp: 6.20958 kl: 0.30251:   0%|          | 0/379 [00:00<?, ?batch/s]

>>>>average [93mvalid[0m of epoch 26: loss 6.60316 perp 6.30042 kl 0.30274
saving to best model since this is the best valid loss so far.----


At epoch: 27  train vae loss: 6.69199 perp: 6.39269 kl: 0.29930: 100%|██████████| 379/379 [00:36<00:00, 10.27batch/s]
At epoch: 27  valid vae loss: 6.15457 perp: 5.84138 kl: 0.31318:   6%|▌         | 2/34 [00:00<00:02, 11.61batch/s]

>>>>average [92mtraining[0m of epoch 27: loss 6.60017 perp 6.29589 kl 0.30428


At epoch: 27  valid vae loss: 5.73471 perp: 5.43487 kl: 0.29984: 100%|██████████| 34/34 [00:01<00:00, 23.61batch/s]
At epoch: 28  train vae loss: 6.59523 perp: 6.29349 kl: 0.30175:   0%|          | 0/379 [00:00<?, ?batch/s]

>>>>average [93mvalid[0m of epoch 27: loss 6.37571 perp 6.07214 kl 0.30357
saving to best model since this is the best valid loss so far.----


At epoch: 28  train vae loss: 6.46781 perp: 6.17665 kl: 0.29116: 100%|██████████| 379/379 [00:36<00:00, 10.30batch/s]
At epoch: 28  valid vae loss: 6.20465 perp: 5.89094 kl: 0.31371:   6%|▌         | 2/34 [00:00<00:02, 11.72batch/s]

>>>>average [92mtraining[0m of epoch 28: loss 6.47728 perp 6.17197 kl 0.30530


At epoch: 28  valid vae loss: 5.73706 perp: 5.43671 kl: 0.30035: 100%|██████████| 34/34 [00:01<00:00, 23.64batch/s]
At epoch: 29  train vae loss: 6.35571 perp: 6.05436 kl: 0.30135:   0%|          | 0/379 [00:00<?, ?batch/s]

>>>>average [93mvalid[0m of epoch 28: loss 6.42438 perp 6.12060 kl 0.30378


At epoch: 29  train vae loss: 6.34567 perp: 6.05165 kl: 0.29402: 100%|██████████| 379/379 [00:36<00:00, 10.33batch/s]
At epoch: 29  valid vae loss: 5.86039 perp: 5.54785 kl: 0.31254:   6%|▌         | 2/34 [00:00<00:02, 11.38batch/s]

>>>>average [92mtraining[0m of epoch 29: loss 6.36663 perp 6.06116 kl 0.30547


At epoch: 29  valid vae loss: 5.51023 perp: 5.21111 kl: 0.29912: 100%|██████████| 34/34 [00:01<00:00, 22.91batch/s]
At epoch: 30  train vae loss: 6.20716 perp: 5.89628 kl: 0.31088:   0%|          | 0/379 [00:00<?, ?batch/s]

>>>>average [93mvalid[0m of epoch 29: loss 6.10182 perp 5.79883 kl 0.30298
saving to best model since this is the best valid loss so far.----


At epoch: 30  train vae loss: 6.18486 perp: 5.86507 kl: 0.31979: 100%|██████████| 379/379 [00:36<00:00, 10.25batch/s]
At epoch: 30  valid vae loss: 5.70066 perp: 5.38984 kl: 0.31082:   6%|▌         | 2/34 [00:00<00:02, 11.53batch/s]

>>>>average [92mtraining[0m of epoch 30: loss 6.21975 perp 5.91456 kl 0.30519


At epoch: 30  valid vae loss: 5.34402 perp: 5.04778 kl: 0.29624: 100%|██████████| 34/34 [00:01<00:00, 23.26batch/s]
At epoch: 31  train vae loss: 6.13009 perp: 5.83066 kl: 0.29943:   0%|          | 0/379 [00:00<?, ?batch/s]

>>>>average [93mvalid[0m of epoch 30: loss 5.93848 perp 5.63751 kl 0.30097
saving to best model since this is the best valid loss so far.----


At epoch: 31  train vae loss: 6.33411 perp: 6.01914 kl: 0.31498: 100%|██████████| 379/379 [00:36<00:00, 10.28batch/s]
At epoch: 31  valid vae loss: 5.65768 perp: 5.34495 kl: 0.31273:   6%|▌         | 2/34 [00:00<00:02, 11.82batch/s]

>>>>average [92mtraining[0m of epoch 31: loss 6.12860 perp 5.82172 kl 0.30689


At epoch: 31  valid vae loss: 5.24506 perp: 4.94660 kl: 0.29846: 100%|██████████| 34/34 [00:01<00:00, 23.70batch/s]
At epoch: 32  train vae loss: 5.88175 perp: 5.59104 kl: 0.29071:   0%|          | 0/379 [00:00<?, ?batch/s]

>>>>average [93mvalid[0m of epoch 31: loss 5.84074 perp 5.53768 kl 0.30305
saving to best model since this is the best valid loss so far.----


At epoch: 32  train vae loss: 6.11971 perp: 5.81214 kl: 0.30757: 100%|██████████| 379/379 [00:36<00:00, 10.27batch/s]
At epoch: 32  valid vae loss: 5.61234 perp: 5.29558 kl: 0.31675:   6%|▌         | 2/34 [00:00<00:02, 11.76batch/s]

>>>>average [92mtraining[0m of epoch 32: loss 6.04603 perp 5.73723 kl 0.30880


At epoch: 32  valid vae loss: 5.24927 perp: 4.94759 kl: 0.30168: 100%|██████████| 34/34 [00:01<00:00, 23.52batch/s]
At epoch: 33  train vae loss: 5.88638 perp: 5.55827 kl: 0.32811:   0%|          | 0/379 [00:00<?, ?batch/s]

>>>>average [93mvalid[0m of epoch 32: loss 5.81624 perp 5.50946 kl 0.30677
saving to best model since this is the best valid loss so far.----


At epoch: 33  train vae loss: 5.54353 perp: 5.24247 kl: 0.30106: 100%|██████████| 379/379 [00:36<00:00, 10.30batch/s]
At epoch: 33  valid vae loss: 5.34648 perp: 5.03019 kl: 0.31629:   6%|▌         | 2/34 [00:00<00:02, 11.66batch/s]

>>>>average [92mtraining[0m of epoch 33: loss 5.86132 perp 5.55226 kl 0.30906


At epoch: 33  valid vae loss: 5.09766 perp: 4.79634 kl: 0.30131: 100%|██████████| 34/34 [00:01<00:00, 23.31batch/s]
At epoch: 34  train vae loss: 5.73413 perp: 5.41909 kl: 0.31504:   0%|          | 0/379 [00:00<?, ?batch/s]

>>>>average [93mvalid[0m of epoch 33: loss 5.60101 perp 5.29409 kl 0.30692
saving to best model since this is the best valid loss so far.----


At epoch: 34  train vae loss: 5.31340 perp: 5.00353 kl: 0.30987: 100%|██████████| 379/379 [00:37<00:00, 10.24batch/s]
At epoch: 34  valid vae loss: 5.53117 perp: 5.21468 kl: 0.31650:   6%|▌         | 2/34 [00:00<00:02, 11.60batch/s]

>>>>average [92mtraining[0m of epoch 34: loss 5.78118 perp 5.47018 kl 0.31100


At epoch: 34  valid vae loss: 5.31648 perp: 5.01478 kl: 0.30171: 100%|██████████| 34/34 [00:01<00:00, 23.46batch/s]
At epoch: 35  train vae loss: 5.69859 perp: 5.38217 kl: 0.31642:   0%|          | 0/379 [00:00<?, ?batch/s]

>>>>average [93mvalid[0m of epoch 34: loss 5.84522 perp 5.53794 kl 0.30728


At epoch: 35  train vae loss: 5.87752 perp: 5.57114 kl: 0.30638: 100%|██████████| 379/379 [00:37<00:00, 10.24batch/s]
At epoch: 35  valid vae loss: 5.29263 perp: 4.96819 kl: 0.32444:   3%|▎         | 1/34 [00:00<00:06,  5.19batch/s]

>>>>average [92mtraining[0m of epoch 35: loss 5.67397 perp 5.36022 kl 0.31376


At epoch: 35  valid vae loss: 4.98106 perp: 4.67247 kl: 0.30859: 100%|██████████| 34/34 [00:01<00:00, 21.66batch/s]
At epoch: 36  train vae loss: 5.48694 perp: 5.16787 kl: 0.31906:   0%|          | 0/379 [00:00<?, ?batch/s]

>>>>average [93mvalid[0m of epoch 35: loss 5.49524 perp 5.18029 kl 0.31495
saving to best model since this is the best valid loss so far.----


At epoch: 36  train vae loss: 5.50376 perp: 5.19330 kl: 0.31046: 100%|██████████| 379/379 [00:37<00:00, 10.07batch/s]
At epoch: 36  valid vae loss: 5.08949 perp: 4.76886 kl: 0.32064:   6%|▌         | 2/34 [00:00<00:02, 11.72batch/s]

>>>>average [92mtraining[0m of epoch 36: loss 5.51610 perp 5.20110 kl 0.31500


At epoch: 36  valid vae loss: 4.71947 perp: 4.41483 kl: 0.30464: 100%|██████████| 34/34 [00:01<00:00, 23.65batch/s]
At epoch: 37  train vae loss: 5.26178 perp: 4.95616 kl: 0.30562:   0%|          | 0/379 [00:00<?, ?batch/s]

>>>>average [93mvalid[0m of epoch 36: loss 5.29719 perp 4.98583 kl 0.31136
saving to best model since this is the best valid loss so far.----


At epoch: 37  train vae loss: 5.56232 perp: 5.25256 kl: 0.30976: 100%|██████████| 379/379 [00:36<00:00, 10.37batch/s]
At epoch: 37  valid vae loss: 4.94340 perp: 4.61959 kl: 0.32381:   6%|▌         | 2/34 [00:00<00:02, 11.73batch/s]

>>>>average [92mtraining[0m of epoch 37: loss 5.43296 perp 5.11509 kl 0.31787


At epoch: 37  valid vae loss: 4.79744 perp: 4.49004 kl: 0.30739: 100%|██████████| 34/34 [00:01<00:00, 23.73batch/s]
At epoch: 38  train vae loss: 5.30213 perp: 4.97395 kl: 0.32818:   0%|          | 0/379 [00:00<?, ?batch/s]

>>>>average [93mvalid[0m of epoch 37: loss 5.21538 perp 4.90099 kl 0.31439
saving to best model since this is the best valid loss so far.----


At epoch: 38  train vae loss: 5.15392 perp: 4.82486 kl: 0.32906: 100%|██████████| 379/379 [00:36<00:00, 10.38batch/s]
At epoch: 38  valid vae loss: 4.85686 perp: 4.52928 kl: 0.32758:   6%|▌         | 2/34 [00:00<00:02, 11.84batch/s]

>>>>average [92mtraining[0m of epoch 38: loss 5.30425 perp 4.98350 kl 0.32075


At epoch: 38  valid vae loss: 4.59679 perp: 4.28575 kl: 0.31104: 100%|██████████| 34/34 [00:01<00:00, 23.74batch/s]
At epoch: 39  train vae loss: 5.03615 perp: 4.71727 kl: 0.31888:   0%|          | 0/379 [00:00<?, ?batch/s]

>>>>average [93mvalid[0m of epoch 38: loss 5.06702 perp 4.74861 kl 0.31840
saving to best model since this is the best valid loss so far.----


At epoch: 39  train vae loss: 4.93812 perp: 4.60696 kl: 0.33116: 100%|██████████| 379/379 [00:36<00:00, 10.38batch/s]
At epoch: 39  valid vae loss: 4.69388 perp: 4.36560 kl: 0.32828:   6%|▌         | 2/34 [00:00<00:02, 11.73batch/s]

>>>>average [92mtraining[0m of epoch 39: loss 5.14980 perp 4.82897 kl 0.32083


At epoch: 39  valid vae loss: 4.45128 perp: 4.13980 kl: 0.31148: 100%|██████████| 34/34 [00:01<00:00, 23.76batch/s]
At epoch: 40  train vae loss: 5.12840 perp: 4.80190 kl: 0.32650:   0%|          | 0/379 [00:00<?, ?batch/s]

>>>>average [93mvalid[0m of epoch 39: loss 4.91130 perp 4.59204 kl 0.31926
saving to best model since this is the best valid loss so far.----


At epoch: 40  train vae loss: 5.12121 perp: 4.78785 kl: 0.33336: 100%|██████████| 379/379 [00:36<00:00, 10.39batch/s]
At epoch: 40  valid vae loss: 4.57049 perp: 4.24218 kl: 0.32831:   6%|▌         | 2/34 [00:00<00:02, 11.70batch/s]

>>>>average [92mtraining[0m of epoch 40: loss 5.12681 perp 4.80192 kl 0.32489


At epoch: 40  valid vae loss: 4.31006 perp: 3.99963 kl: 0.31043: 100%|██████████| 34/34 [00:01<00:00, 23.77batch/s]
At epoch: 41  train vae loss: 4.77684 perp: 4.44739 kl: 0.32945:   0%|          | 0/379 [00:00<?, ?batch/s]

>>>>average [93mvalid[0m of epoch 40: loss 4.78436 perp 4.46500 kl 0.31935
saving to best model since this is the best valid loss so far.----


At epoch: 41  train vae loss: 5.23752 perp: 4.90590 kl: 0.33161: 100%|██████████| 379/379 [00:36<00:00, 10.38batch/s]
At epoch: 41  valid vae loss: 4.69585 perp: 4.36179 kl: 0.33406:   6%|▌         | 2/34 [00:00<00:02, 11.70batch/s]

>>>>average [92mtraining[0m of epoch 41: loss 4.89230 perp 4.56801 kl 0.32428


At epoch: 41  valid vae loss: 4.52935 perp: 4.21323 kl: 0.31612: 100%|██████████| 34/34 [00:01<00:00, 23.70batch/s]
At epoch: 42  train vae loss: 5.00736 perp: 4.67740 kl: 0.32996:   0%|          | 0/379 [00:00<?, ?batch/s]

>>>>average [93mvalid[0m of epoch 41: loss 4.91523 perp 4.59037 kl 0.32486


At epoch: 42  train vae loss: 4.74987 perp: 4.42898 kl: 0.32089: 100%|██████████| 379/379 [00:36<00:00, 10.36batch/s]
At epoch: 42  valid vae loss: 4.42543 perp: 4.09010 kl: 0.33533:   6%|▌         | 2/34 [00:00<00:02, 11.69batch/s]

>>>>average [92mtraining[0m of epoch 42: loss 4.79217 perp 4.46787 kl 0.32430


At epoch: 42  valid vae loss: 4.25330 perp: 3.93592 kl: 0.31739: 100%|██████████| 34/34 [00:01<00:00, 23.69batch/s]
At epoch: 43  train vae loss: 4.69369 perp: 4.37780 kl: 0.31589:   0%|          | 0/379 [00:00<?, ?batch/s]

>>>>average [93mvalid[0m of epoch 42: loss 4.70582 perp 4.37956 kl 0.32626
saving to best model since this is the best valid loss so far.----


At epoch: 43  train vae loss: 4.93499 perp: 4.59977 kl: 0.33522: 100%|██████████| 379/379 [00:37<00:00, 10.18batch/s]
At epoch: 43  valid vae loss: 4.22829 perp: 3.89638 kl: 0.33191:   6%|▌         | 2/34 [00:00<00:02, 11.94batch/s]

>>>>average [92mtraining[0m of epoch 43: loss 4.67871 perp 4.35123 kl 0.32748


At epoch: 43  valid vae loss: 4.05757 perp: 3.74442 kl: 0.31315: 100%|██████████| 34/34 [00:01<00:00, 23.61batch/s]
At epoch: 44  train vae loss: 4.57421 perp: 4.25455 kl: 0.31967:   0%|          | 0/379 [00:00<?, ?batch/s]

>>>>average [93mvalid[0m of epoch 43: loss 4.47451 perp 4.15163 kl 0.32289
saving to best model since this is the best valid loss so far.----


At epoch: 44  train vae loss: 4.34004 perp: 4.02202 kl: 0.31803: 100%|██████████| 379/379 [00:37<00:00, 10.24batch/s]
At epoch: 44  valid vae loss: 4.20162 perp: 3.86033 kl: 0.34129:   6%|▌         | 2/34 [00:00<00:02, 11.90batch/s]

>>>>average [92mtraining[0m of epoch 44: loss 4.58740 perp 4.25864 kl 0.32876


At epoch: 44  valid vae loss: 3.91474 perp: 3.59394 kl: 0.32080: 100%|██████████| 34/34 [00:01<00:00, 23.75batch/s]
At epoch: 45  train vae loss: 4.22399 perp: 3.89335 kl: 0.33064:   0%|          | 0/379 [00:00<?, ?batch/s]

>>>>average [93mvalid[0m of epoch 44: loss 4.40815 perp 4.07609 kl 0.33207
saving to best model since this is the best valid loss so far.----


At epoch: 45  train vae loss: 4.51351 perp: 4.18430 kl: 0.32921: 100%|██████████| 379/379 [00:36<00:00, 10.38batch/s]
At epoch: 45  valid vae loss: 4.11291 perp: 3.78143 kl: 0.33148:   6%|▌         | 2/34 [00:00<00:02, 11.71batch/s]

>>>>average [92mtraining[0m of epoch 45: loss 4.43970 perp 4.11025 kl 0.32945


At epoch: 45  valid vae loss: 3.85400 perp: 3.54098 kl: 0.31301: 100%|██████████| 34/34 [00:01<00:00, 23.75batch/s]
At epoch: 46  train vae loss: 4.27812 perp: 3.94199 kl: 0.33613:   0%|          | 0/379 [00:00<?, ?batch/s]

>>>>average [93mvalid[0m of epoch 45: loss 4.28214 perp 3.95902 kl 0.32313
saving to best model since this is the best valid loss so far.----


At epoch: 46  train vae loss: 4.53783 perp: 4.20402 kl: 0.33381: 100%|██████████| 379/379 [00:36<00:00, 10.31batch/s]
At epoch: 46  valid vae loss: 3.96113 perp: 3.62649 kl: 0.33464:   6%|▌         | 2/34 [00:00<00:02, 10.85batch/s]

>>>>average [92mtraining[0m of epoch 46: loss 4.35531 perp 4.02634 kl 0.32897


At epoch: 46  valid vae loss: 3.72785 perp: 3.41285 kl: 0.31500: 100%|██████████| 34/34 [00:01<00:00, 22.83batch/s]
At epoch: 47  train vae loss: 4.23628 perp: 3.89906 kl: 0.33722:   0%|          | 0/379 [00:00<?, ?batch/s]

>>>>average [93mvalid[0m of epoch 46: loss 4.12120 perp 3.79525 kl 0.32595
saving to best model since this is the best valid loss so far.----


At epoch: 47  train vae loss: 4.14265 perp: 3.81929 kl: 0.32335: 100%|██████████| 379/379 [00:37<00:00, 10.13batch/s]
At epoch: 47  valid vae loss: 3.94397 perp: 3.60691 kl: 0.33705:   6%|▌         | 2/34 [00:00<00:02, 11.61batch/s]

>>>>average [92mtraining[0m of epoch 47: loss 4.25263 perp 3.92223 kl 0.33039


At epoch: 47  valid vae loss: 3.65631 perp: 3.33809 kl: 0.31822: 100%|██████████| 34/34 [00:01<00:00, 23.43batch/s]
At epoch: 48  train vae loss: 4.22009 perp: 3.88444 kl: 0.33564:   0%|          | 0/379 [00:00<?, ?batch/s]

>>>>average [93mvalid[0m of epoch 47: loss 4.11610 perp 3.78740 kl 0.32870
saving to best model since this is the best valid loss so far.----


At epoch: 48  train vae loss: 3.99125 perp: 3.67338 kl: 0.31787: 100%|██████████| 379/379 [00:36<00:00, 10.31batch/s]
At epoch: 48  valid vae loss: 3.89174 perp: 3.55681 kl: 0.33492:   6%|▌         | 2/34 [00:00<00:02, 11.69batch/s]

>>>>average [92mtraining[0m of epoch 48: loss 4.17861 perp 3.84718 kl 0.33143


At epoch: 48  valid vae loss: 3.73678 perp: 3.42246 kl: 0.31432: 100%|██████████| 34/34 [00:01<00:00, 23.57batch/s]
At epoch: 49  train vae loss: 4.28405 perp: 3.95499 kl: 0.32906:   0%|          | 0/379 [00:00<?, ?batch/s]

>>>>average [93mvalid[0m of epoch 48: loss 4.10431 perp 3.77831 kl 0.32600
saving to best model since this is the best valid loss so far.----


At epoch: 49  train vae loss: 3.98985 perp: 3.67397 kl: 0.31588: 100%|██████████| 379/379 [00:36<00:00, 10.32batch/s]
At epoch: 49  valid vae loss: 3.68420 perp: 3.34781 kl: 0.33639:   6%|▌         | 2/34 [00:00<00:02, 11.43batch/s]

>>>>average [92mtraining[0m of epoch 49: loss 4.06420 perp 3.73163 kl 0.33257


At epoch: 49  valid vae loss: 3.50032 perp: 3.18510 kl: 0.31522: 100%|██████████| 34/34 [00:01<00:00, 23.50batch/s]
At epoch: 50  train vae loss: 3.84736 perp: 3.51795 kl: 0.32940:   0%|          | 0/379 [00:00<?, ?batch/s]

>>>>average [93mvalid[0m of epoch 49: loss 3.87108 perp 3.54369 kl 0.32739
saving to best model since this is the best valid loss so far.----


At epoch: 50  train vae loss: 3.82438 perp: 3.49415 kl: 0.33022: 100%|██████████| 379/379 [00:36<00:00, 10.32batch/s]
At epoch: 50  valid vae loss: 3.79030 perp: 3.45433 kl: 0.33596:   6%|▌         | 2/34 [00:00<00:02, 11.81batch/s]

>>>>average [92mtraining[0m of epoch 50: loss 3.98604 perp 3.65474 kl 0.33130


At epoch: 50  valid vae loss: 3.48659 perp: 3.17155 kl: 0.31504: 100%|██████████| 34/34 [00:01<00:00, 23.59batch/s]
At epoch: 51  train vae loss: 3.78828 perp: 3.45630 kl: 0.33198:   0%|          | 0/379 [00:00<?, ?batch/s]

>>>>average [93mvalid[0m of epoch 50: loss 4.00055 perp 3.67331 kl 0.32723


At epoch: 51  train vae loss: 3.65601 perp: 3.33629 kl: 0.31972: 100%|██████████| 379/379 [00:36<00:00, 10.35batch/s]
At epoch: 51  valid vae loss: 3.54907 perp: 3.21467 kl: 0.33440:   6%|▌         | 2/34 [00:00<00:02, 11.57batch/s]

>>>>average [92mtraining[0m of epoch 51: loss 3.87361 perp 3.54134 kl 0.33227


At epoch: 51  valid vae loss: 3.33369 perp: 3.01970 kl: 0.31399: 100%|██████████| 34/34 [00:01<00:00, 23.56batch/s]
At epoch: 52  train vae loss: 3.59617 perp: 3.28189 kl: 0.31429:   0%|          | 0/379 [00:00<?, ?batch/s]

>>>>average [93mvalid[0m of epoch 51: loss 3.73021 perp 3.40415 kl 0.32606
saving to best model since this is the best valid loss so far.----


At epoch: 52  train vae loss: 3.44028 perp: 3.11917 kl: 0.32111: 100%|██████████| 379/379 [00:36<00:00, 10.34batch/s]
At epoch: 52  valid vae loss: 3.61849 perp: 3.28164 kl: 0.33686:   6%|▌         | 2/34 [00:00<00:02, 11.83batch/s]

>>>>average [92mtraining[0m of epoch 52: loss 3.78131 perp 3.45036 kl 0.33095


At epoch: 52  valid vae loss: 3.26484 perp: 2.95000 kl: 0.31483: 100%|██████████| 34/34 [00:01<00:00, 23.67batch/s]
At epoch: 53  train vae loss: 3.74802 perp: 3.42179 kl: 0.32623:   0%|          | 0/379 [00:00<?, ?batch/s]

>>>>average [93mvalid[0m of epoch 52: loss 3.74636 perp 3.41865 kl 0.32771


At epoch: 53  train vae loss: 3.65304 perp: 3.32734 kl: 0.32569: 100%|██████████| 379/379 [00:36<00:00, 10.32batch/s]
At epoch: 53  valid vae loss: 3.30447 perp: 2.96868 kl: 0.33579:   6%|▌         | 2/34 [00:00<00:02, 11.62batch/s]

>>>>average [92mtraining[0m of epoch 53: loss 3.68452 perp 3.35283 kl 0.33169


At epoch: 53  valid vae loss: 3.18010 perp: 2.86612 kl: 0.31398: 100%|██████████| 34/34 [00:01<00:00, 23.63batch/s]
At epoch: 54  train vae loss: 3.83449 perp: 3.50093 kl: 0.33356:   0%|          | 0/379 [00:00<?, ?batch/s]

>>>>average [93mvalid[0m of epoch 53: loss 3.48178 perp 3.15501 kl 0.32677
saving to best model since this is the best valid loss so far.----


At epoch: 54  train vae loss: 3.39848 perp: 3.05328 kl: 0.34520: 100%|██████████| 379/379 [00:36<00:00, 10.36batch/s]
At epoch: 54  valid vae loss: 3.35650 perp: 3.01861 kl: 0.33789:   6%|▌         | 2/34 [00:00<00:02, 11.72batch/s]

>>>>average [92mtraining[0m of epoch 54: loss 3.61111 perp 3.27972 kl 0.33140


At epoch: 54  valid vae loss: 3.07380 perp: 2.75630 kl: 0.31750: 100%|██████████| 34/34 [00:01<00:00, 23.53batch/s]
At epoch: 55  train vae loss: 3.36508 perp: 3.03423 kl: 0.33085:   0%|          | 0/379 [00:00<?, ?batch/s]

>>>>average [93mvalid[0m of epoch 54: loss 3.50319 perp 3.17402 kl 0.32916


At epoch: 55  train vae loss: 3.57635 perp: 3.23722 kl: 0.33913: 100%|██████████| 379/379 [00:36<00:00, 10.30batch/s]
At epoch: 55  valid vae loss: 3.20536 perp: 2.86446 kl: 0.34090:   6%|▌         | 2/34 [00:00<00:02, 11.88batch/s]

>>>>average [92mtraining[0m of epoch 55: loss 3.58480 perp 3.25086 kl 0.33393


At epoch: 55  valid vae loss: 2.94630 perp: 2.62781 kl: 0.31849: 100%|██████████| 34/34 [00:01<00:00, 23.56batch/s]
At epoch: 56  train vae loss: 3.22640 perp: 2.89547 kl: 0.33092:   0%|          | 0/379 [00:00<?, ?batch/s]

>>>>average [93mvalid[0m of epoch 55: loss 3.33925 perp 3.00748 kl 0.33177
saving to best model since this is the best valid loss so far.----


At epoch: 56  train vae loss: 3.49937 perp: 3.17416 kl: 0.32522: 100%|██████████| 379/379 [00:36<00:00, 10.29batch/s]
At epoch: 56  valid vae loss: 3.34677 perp: 3.00843 kl: 0.33834:   6%|▌         | 2/34 [00:00<00:02, 11.58batch/s]

>>>>average [92mtraining[0m of epoch 56: loss 3.44867 perp 3.11612 kl 0.33254


At epoch: 56  valid vae loss: 2.98905 perp: 2.67323 kl: 0.31582: 100%|██████████| 34/34 [00:01<00:00, 23.49batch/s]
At epoch: 57  train vae loss: 3.23174 perp: 2.88819 kl: 0.34355:   0%|          | 0/379 [00:00<?, ?batch/s]

>>>>average [93mvalid[0m of epoch 56: loss 3.36058 perp 3.03106 kl 0.32953


At epoch: 57  train vae loss: 3.50895 perp: 3.18695 kl: 0.32200: 100%|██████████| 379/379 [00:36<00:00, 10.31batch/s]
At epoch: 57  valid vae loss: 3.22000 perp: 2.88540 kl: 0.33459:   6%|▌         | 2/34 [00:00<00:02, 11.58batch/s]

>>>>average [92mtraining[0m of epoch 57: loss 3.36301 perp 3.03006 kl 0.33295


At epoch: 57  valid vae loss: 2.83807 perp: 2.52583 kl: 0.31224: 100%|██████████| 34/34 [00:01<00:00, 23.57batch/s]
At epoch: 58  train vae loss: 3.34582 perp: 3.02582 kl: 0.32000:   0%|          | 0/379 [00:00<?, ?batch/s]

>>>>average [93mvalid[0m of epoch 57: loss 3.31770 perp 2.99166 kl 0.32604
saving to best model since this is the best valid loss so far.----


At epoch: 58  train vae loss: 3.10974 perp: 2.76950 kl: 0.34024: 100%|██████████| 379/379 [00:36<00:00, 10.34batch/s]
At epoch: 58  valid vae loss: 3.04422 perp: 2.70354 kl: 0.34068:   6%|▌         | 2/34 [00:00<00:02, 11.88batch/s]

>>>>average [92mtraining[0m of epoch 58: loss 3.32619 perp 2.99240 kl 0.33379


At epoch: 58  valid vae loss: 2.75056 perp: 2.43259 kl: 0.31797: 100%|██████████| 34/34 [00:01<00:00, 23.55batch/s]
At epoch: 59  train vae loss: 3.39379 perp: 3.05157 kl: 0.34221:   0%|          | 0/379 [00:00<?, ?batch/s]

>>>>average [93mvalid[0m of epoch 58: loss 3.16280 perp 2.83112 kl 0.33168
saving to best model since this is the best valid loss so far.----


At epoch: 59  train vae loss: 3.08146 perp: 2.75206 kl: 0.32941: 100%|██████████| 379/379 [00:36<00:00, 10.33batch/s]
At epoch: 59  valid vae loss: 2.97705 perp: 2.63829 kl: 0.33876:   6%|▌         | 2/34 [00:00<00:02, 11.62batch/s]

>>>>average [92mtraining[0m of epoch 59: loss 3.22570 perp 2.89242 kl 0.33328


At epoch: 59  valid vae loss: 2.70065 perp: 2.38495 kl: 0.31570: 100%|██████████| 34/34 [00:01<00:00, 23.43batch/s]
At epoch: 60  train vae loss: 2.98697 perp: 2.65847 kl: 0.32849:   0%|          | 0/379 [00:00<?, ?batch/s]

>>>>average [93mvalid[0m of epoch 59: loss 3.06598 perp 2.73611 kl 0.32988
saving to best model since this is the best valid loss so far.----


At epoch: 60  train vae loss: 3.04316 perp: 2.70961 kl: 0.33355: 100%|██████████| 379/379 [00:36<00:00, 10.35batch/s]
At epoch: 60  valid vae loss: 2.95279 perp: 2.61337 kl: 0.33942:   6%|▌         | 2/34 [00:00<00:02, 11.82batch/s]

>>>>average [92mtraining[0m of epoch 60: loss 3.21666 perp 2.88159 kl 0.33507


At epoch: 60  valid vae loss: 2.63409 perp: 2.31742 kl: 0.31667: 100%|██████████| 34/34 [00:01<00:00, 23.66batch/s]
At epoch: 61  train vae loss: 2.93652 perp: 2.59763 kl: 0.33889:   0%|          | 0/379 [00:00<?, ?batch/s]

>>>>average [93mvalid[0m of epoch 60: loss 3.02932 perp 2.69876 kl 0.33056
saving to best model since this is the best valid loss so far.----


At epoch: 61  train vae loss: 2.99171 perp: 2.67083 kl: 0.32088: 100%|██████████| 379/379 [00:36<00:00, 10.33batch/s]
At epoch: 61  valid vae loss: 2.77745 perp: 2.43965 kl: 0.33779:   6%|▌         | 2/34 [00:00<00:02, 11.69batch/s]

>>>>average [92mtraining[0m of epoch 61: loss 3.06996 perp 2.73665 kl 0.33330


At epoch: 61  valid vae loss: 2.52628 perp: 2.21071 kl: 0.31556: 100%|██████████| 34/34 [00:01<00:00, 23.58batch/s]
At epoch: 62  train vae loss: 2.87386 perp: 2.54253 kl: 0.33133:   0%|          | 0/379 [00:00<?, ?batch/s]

>>>>average [93mvalid[0m of epoch 61: loss 2.94331 perp 2.61399 kl 0.32931
saving to best model since this is the best valid loss so far.----


At epoch: 62  train vae loss: 3.24242 perp: 2.88884 kl: 0.35359: 100%|██████████| 379/379 [00:36<00:00, 10.30batch/s]
At epoch: 62  valid vae loss: 2.87618 perp: 2.53576 kl: 0.34043:   6%|▌         | 2/34 [00:00<00:02, 11.74batch/s]

>>>>average [92mtraining[0m of epoch 62: loss 3.02948 perp 2.69703 kl 0.33245


At epoch: 62  valid vae loss: 2.54937 perp: 2.23135 kl: 0.31802: 100%|██████████| 34/34 [00:01<00:00, 23.36batch/s]
At epoch: 63  train vae loss: 2.81279 perp: 2.48570 kl: 0.32709:   0%|          | 0/379 [00:00<?, ?batch/s]

>>>>average [93mvalid[0m of epoch 62: loss 2.92876 perp 2.59721 kl 0.33155
saving to best model since this is the best valid loss so far.----


At epoch: 63  train vae loss: 3.49141 perp: 3.15116 kl: 0.34026: 100%|██████████| 379/379 [00:36<00:00, 10.32batch/s]
At epoch: 63  valid vae loss: 3.30836 perp: 2.96748 kl: 0.34088:   6%|▌         | 2/34 [00:00<00:02, 11.62batch/s]

>>>>average [92mtraining[0m of epoch 63: loss 2.98408 perp 2.64978 kl 0.33430


At epoch: 63  valid vae loss: 3.11510 perp: 2.79781 kl: 0.31730: 100%|██████████| 34/34 [00:01<00:00, 23.48batch/s]
At epoch: 64  train vae loss: 3.57530 perp: 3.23446 kl: 0.34084:   0%|          | 0/379 [00:00<?, ?batch/s]

>>>>average [93mvalid[0m of epoch 63: loss 3.44995 perp 3.11782 kl 0.33213


At epoch: 64  train vae loss: 2.66363 perp: 2.34038 kl: 0.32325: 100%|██████████| 379/379 [00:36<00:00, 10.32batch/s]
At epoch: 64  valid vae loss: 2.60730 perp: 2.26844 kl: 0.33886:   6%|▌         | 2/34 [00:00<00:02, 11.11batch/s]

>>>>average [92mtraining[0m of epoch 64: loss 2.88268 perp 2.54888 kl 0.33380


At epoch: 64  valid vae loss: 2.34679 perp: 2.03042 kl: 0.31637: 100%|██████████| 34/34 [00:01<00:00, 23.07batch/s]
At epoch: 65  train vae loss: 2.60415 perp: 2.26476 kl: 0.33939:   0%|          | 0/379 [00:00<?, ?batch/s]

>>>>average [93mvalid[0m of epoch 64: loss 2.69698 perp 2.36693 kl 0.33005
saving to best model since this is the best valid loss so far.----


At epoch: 65  train vae loss: 3.13887 perp: 2.80809 kl: 0.33077: 100%|██████████| 379/379 [00:36<00:00, 10.35batch/s]
At epoch: 65  valid vae loss: 2.69207 perp: 2.35690 kl: 0.33517:   6%|▌         | 2/34 [00:00<00:02, 11.69batch/s]

>>>>average [92mtraining[0m of epoch 65: loss 2.81395 perp 2.48236 kl 0.33159


At epoch: 65  valid vae loss: 2.37051 perp: 2.05726 kl: 0.31325: 100%|██████████| 34/34 [00:01<00:00, 23.60batch/s]
At epoch: 66  train vae loss: 2.68662 perp: 2.34371 kl: 0.34291:   0%|          | 0/379 [00:00<?, ?batch/s]

>>>>average [93mvalid[0m of epoch 65: loss 2.76705 perp 2.44043 kl 0.32662


At epoch: 66  train vae loss: 2.73155 perp: 2.42018 kl: 0.31137: 100%|██████████| 379/379 [00:37<00:00,  9.98batch/s]
At epoch: 66  valid vae loss: 2.55883 perp: 2.22166 kl: 0.33717:   6%|▌         | 2/34 [00:00<00:02, 11.30batch/s]

>>>>average [92mtraining[0m of epoch 66: loss 2.77955 perp 2.44772 kl 0.33183


At epoch: 66  valid vae loss: 2.28398 perp: 1.96886 kl: 0.31511: 100%|██████████| 34/34 [00:01<00:00, 22.80batch/s]
At epoch: 67  train vae loss: 2.61998 perp: 2.29292 kl: 0.32707:   0%|          | 0/379 [00:00<?, ?batch/s]

>>>>average [93mvalid[0m of epoch 66: loss 2.61048 perp 2.28187 kl 0.32861
saving to best model since this is the best valid loss so far.----


At epoch: 67  train vae loss: 2.88266 perp: 2.55599 kl: 0.32666: 100%|██████████| 379/379 [00:36<00:00, 10.32batch/s]
At epoch: 67  valid vae loss: 2.56058 perp: 2.22195 kl: 0.33862:   6%|▌         | 2/34 [00:00<00:02, 11.62batch/s]

>>>>average [92mtraining[0m of epoch 67: loss 2.69215 perp 2.36138 kl 0.33076


At epoch: 67  valid vae loss: 2.38154 perp: 2.06460 kl: 0.31694: 100%|██████████| 34/34 [00:01<00:00, 23.61batch/s]
At epoch: 68  train vae loss: 2.79350 perp: 2.46012 kl: 0.33338:   0%|          | 0/379 [00:00<?, ?batch/s]

>>>>average [93mvalid[0m of epoch 67: loss 2.70476 perp 2.37469 kl 0.33007


At epoch: 68  train vae loss: 2.73847 perp: 2.39858 kl: 0.33989: 100%|██████████| 379/379 [00:37<00:00, 10.05batch/s]
At epoch: 68  valid vae loss: 2.40964 perp: 2.07071 kl: 0.33893:   6%|▌         | 2/34 [00:00<00:03, 10.25batch/s]

>>>>average [92mtraining[0m of epoch 68: loss 2.68672 perp 2.35448 kl 0.33224


At epoch: 68  valid vae loss: 2.06569 perp: 1.74975 kl: 0.31594: 100%|██████████| 34/34 [00:01<00:00, 21.07batch/s]
At epoch: 69  train vae loss: 2.44811 perp: 2.12297 kl: 0.32514:   0%|          | 0/379 [00:00<?, ?batch/s]

>>>>average [93mvalid[0m of epoch 68: loss 2.46900 perp 2.13911 kl 0.32989
saving to best model since this is the best valid loss so far.----


At epoch: 69  train vae loss: 2.43592 perp: 2.11271 kl: 0.32321: 100%|██████████| 379/379 [00:37<00:00, 10.09batch/s]
At epoch: 69  valid vae loss: 2.47971 perp: 2.14037 kl: 0.33934:   6%|▌         | 2/34 [00:00<00:02, 11.57batch/s]

>>>>average [92mtraining[0m of epoch 69: loss 2.59471 perp 2.26284 kl 0.33187


At epoch: 69  valid vae loss: 2.13366 perp: 1.81661 kl: 0.31705: 100%|██████████| 34/34 [00:01<00:00, 23.55batch/s]
At epoch: 70  train vae loss: 2.50371 perp: 2.17634 kl: 0.32737:   0%|          | 0/379 [00:00<?, ?batch/s]

>>>>average [93mvalid[0m of epoch 69: loss 2.54253 perp 2.21200 kl 0.33053


At epoch: 70  train vae loss: 2.50102 perp: 2.15152 kl: 0.34950: 100%|██████████| 379/379 [00:36<00:00, 10.28batch/s]
At epoch: 70  valid vae loss: 2.64630 perp: 2.30762 kl: 0.33868:   6%|▌         | 2/34 [00:00<00:02, 11.79batch/s]

>>>>average [92mtraining[0m of epoch 70: loss 2.54194 perp 2.21150 kl 0.33043


At epoch: 70  valid vae loss: 2.45419 perp: 2.13784 kl: 0.31635: 100%|██████████| 34/34 [00:01<00:00, 23.71batch/s]
At epoch: 71  train vae loss: 3.04727 perp: 2.70415 kl: 0.34312:   0%|          | 0/379 [00:00<?, ?batch/s]

>>>>average [93mvalid[0m of epoch 70: loss 2.76793 perp 2.43805 kl 0.32988


At epoch: 71  train vae loss: 2.61473 perp: 2.29034 kl: 0.32438: 100%|██████████| 379/379 [00:36<00:00, 10.37batch/s]
At epoch: 71  valid vae loss: 2.19210 perp: 1.86022 kl: 0.33187:   6%|▌         | 2/34 [00:00<00:02, 11.77batch/s]

>>>>average [92mtraining[0m of epoch 71: loss 2.52764 perp 2.19558 kl 0.33206


At epoch: 71  valid vae loss: 1.92064 perp: 1.61043 kl: 0.31021: 100%|██████████| 34/34 [00:01<00:00, 23.57batch/s]
At epoch: 72  train vae loss: 2.28484 perp: 1.97069 kl: 0.31416:   0%|          | 0/379 [00:00<?, ?batch/s]

>>>>average [93mvalid[0m of epoch 71: loss 2.30689 perp 1.98351 kl 0.32338
saving to best model since this is the best valid loss so far.----


At epoch: 72  train vae loss: 2.47485 perp: 2.14658 kl: 0.32828: 100%|██████████| 379/379 [00:36<00:00, 10.35batch/s]
At epoch: 72  valid vae loss: 2.24299 perp: 1.90918 kl: 0.33382:   6%|▌         | 2/34 [00:00<00:02, 11.73batch/s]

>>>>average [92mtraining[0m of epoch 72: loss 2.40484 perp 2.07771 kl 0.32713


At epoch: 72  valid vae loss: 1.93306 perp: 1.62059 kl: 0.31247: 100%|██████████| 34/34 [00:01<00:00, 23.66batch/s]
At epoch: 73  train vae loss: 2.19887 perp: 1.86257 kl: 0.33630:   0%|          | 0/379 [00:00<?, ?batch/s]

>>>>average [93mvalid[0m of epoch 72: loss 2.31047 perp 1.98490 kl 0.32557


At epoch: 73  train vae loss: 2.54814 perp: 2.21042 kl: 0.33772: 100%|██████████| 379/379 [00:36<00:00, 10.37batch/s]
At epoch: 73  valid vae loss: 2.14442 perp: 1.81260 kl: 0.33182:   6%|▌         | 2/34 [00:00<00:02, 11.49batch/s]

>>>>average [92mtraining[0m of epoch 73: loss 2.35775 perp 2.03042 kl 0.32732


At epoch: 73  valid vae loss: 1.86219 perp: 1.55212 kl: 0.31007: 100%|██████████| 34/34 [00:01<00:00, 23.59batch/s]
At epoch: 74  train vae loss: 2.42042 perp: 2.08465 kl: 0.33577:   0%|          | 0/379 [00:00<?, ?batch/s]

>>>>average [93mvalid[0m of epoch 73: loss 2.25213 perp 1.92859 kl 0.32355
saving to best model since this is the best valid loss so far.----


At epoch: 74  train vae loss: 2.59242 perp: 2.26026 kl: 0.33216: 100%|██████████| 379/379 [00:36<00:00, 10.35batch/s]
At epoch: 74  valid vae loss: 2.15251 perp: 1.81678 kl: 0.33573:   6%|▌         | 2/34 [00:00<00:02, 11.78batch/s]

>>>>average [92mtraining[0m of epoch 74: loss 2.33343 perp 2.00613 kl 0.32730


At epoch: 74  valid vae loss: 1.77561 perp: 1.46146 kl: 0.31414: 100%|██████████| 34/34 [00:01<00:00, 23.62batch/s]
At epoch: 75  train vae loss: 2.60204 perp: 2.26970 kl: 0.33234:   0%|          | 0/379 [00:00<?, ?batch/s]

>>>>average [93mvalid[0m of epoch 74: loss 2.24743 perp 1.91995 kl 0.32749
saving to best model since this is the best valid loss so far.----


At epoch: 75  train vae loss: 2.38960 perp: 2.05375 kl: 0.33585: 100%|██████████| 379/379 [00:36<00:00, 10.35batch/s]
At epoch: 75  valid vae loss: 2.04351 perp: 1.71204 kl: 0.33147:   6%|▌         | 2/34 [00:00<00:02, 11.63batch/s]

>>>>average [92mtraining[0m of epoch 75: loss 2.28937 perp 1.96195 kl 0.32742


At epoch: 75  valid vae loss: 1.65199 perp: 1.34207 kl: 0.30992: 100%|██████████| 34/34 [00:01<00:00, 23.64batch/s]
At epoch: 76  train vae loss: 2.18334 perp: 1.85939 kl: 0.32395:   0%|          | 0/379 [00:00<?, ?batch/s]

>>>>average [93mvalid[0m of epoch 75: loss 2.12478 perp 1.80143 kl 0.32335
saving to best model since this is the best valid loss so far.----


At epoch: 76  train vae loss: 2.10484 perp: 1.78386 kl: 0.32098: 100%|██████████| 379/379 [00:36<00:00, 10.36batch/s]
At epoch: 76  valid vae loss: 2.12249 perp: 1.79336 kl: 0.32914:   6%|▌         | 2/34 [00:00<00:02, 11.78batch/s]

>>>>average [92mtraining[0m of epoch 76: loss 2.22474 perp 1.89877 kl 0.32597


At epoch: 76  valid vae loss: 1.79365 perp: 1.48553 kl: 0.30812: 100%|██████████| 34/34 [00:01<00:00, 23.63batch/s]
At epoch: 77  train vae loss: 1.98055 perp: 1.65411 kl: 0.32644:   0%|          | 0/379 [00:00<?, ?batch/s]

>>>>average [93mvalid[0m of epoch 76: loss 2.11797 perp 1.79658 kl 0.32139
saving to best model since this is the best valid loss so far.----


At epoch: 77  train vae loss: 2.67388 perp: 2.34190 kl: 0.33198: 100%|██████████| 379/379 [00:36<00:00, 10.37batch/s]
At epoch: 77  valid vae loss: 2.19513 perp: 1.86011 kl: 0.33502:   6%|▌         | 2/34 [00:00<00:02, 11.72batch/s]

>>>>average [92mtraining[0m of epoch 77: loss 2.21332 perp 1.88767 kl 0.32565


At epoch: 77  valid vae loss: 1.84117 perp: 1.52836 kl: 0.31281: 100%|██████████| 34/34 [00:01<00:00, 23.55batch/s]
At epoch: 78  train vae loss: 2.23757 perp: 1.91398 kl: 0.32359:   0%|          | 0/379 [00:00<?, ?batch/s]

>>>>average [93mvalid[0m of epoch 77: loss 2.23701 perp 1.91026 kl 0.32675


At epoch: 78  train vae loss: 2.86902 perp: 2.55245 kl: 0.31657: 100%|██████████| 379/379 [00:36<00:00, 10.37batch/s]
At epoch: 78  valid vae loss: 1.97342 perp: 1.64309 kl: 0.33033:   6%|▌         | 2/34 [00:00<00:02, 11.75batch/s]

>>>>average [92mtraining[0m of epoch 78: loss 2.15349 perp 1.82707 kl 0.32642


At epoch: 78  valid vae loss: 1.57560 perp: 1.26703 kl: 0.30857: 100%|██████████| 34/34 [00:01<00:00, 23.55batch/s]
At epoch: 79  train vae loss: 1.97501 perp: 1.64655 kl: 0.32847:   0%|          | 0/379 [00:00<?, ?batch/s]

>>>>average [93mvalid[0m of epoch 78: loss 2.01272 perp 1.69042 kl 0.32230
saving to best model since this is the best valid loss so far.----


At epoch: 79  train vae loss: 2.02004 perp: 1.70545 kl: 0.31459: 100%|██████████| 379/379 [00:36<00:00, 10.33batch/s]
At epoch: 79  valid vae loss: 1.92818 perp: 1.59916 kl: 0.32902:   6%|▌         | 2/34 [00:00<00:02, 11.87batch/s]

>>>>average [92mtraining[0m of epoch 79: loss 2.11062 perp 1.78510 kl 0.32552


At epoch: 79  valid vae loss: 1.62195 perp: 1.31408 kl: 0.30787: 100%|██████████| 34/34 [00:01<00:00, 23.56batch/s]
At epoch: 80  train vae loss: 2.02113 perp: 1.70056 kl: 0.32057:   0%|          | 0/379 [00:00<?, ?batch/s]

>>>>average [93mvalid[0m of epoch 79: loss 1.99098 perp 1.67012 kl 0.32085
saving to best model since this is the best valid loss so far.----


At epoch: 80  train vae loss: 1.77102 perp: 1.45091 kl: 0.32011: 100%|██████████| 379/379 [00:36<00:00, 10.36batch/s]
At epoch: 80  valid vae loss: 1.96762 perp: 1.64163 kl: 0.32599:   6%|▌         | 2/34 [00:00<00:02, 11.77batch/s]

>>>>average [92mtraining[0m of epoch 80: loss 2.04900 perp 1.72502 kl 0.32398


At epoch: 80  valid vae loss: 1.58255 perp: 1.27804 kl: 0.30451: 100%|██████████| 34/34 [00:01<00:00, 22.78batch/s]
At epoch: 81  train vae loss: 2.17589 perp: 1.84491 kl: 0.33098:   0%|          | 0/379 [00:00<?, ?batch/s]

>>>>average [93mvalid[0m of epoch 80: loss 1.96787 perp 1.65017 kl 0.31770
saving to best model since this is the best valid loss so far.----


At epoch: 81  train vae loss: 1.96741 perp: 1.65361 kl: 0.31379: 100%|██████████| 379/379 [00:37<00:00, 10.15batch/s]
At epoch: 81  valid vae loss: 1.85498 perp: 1.53012 kl: 0.32486:   6%|▌         | 2/34 [00:00<00:02, 11.76batch/s]

>>>>average [92mtraining[0m of epoch 81: loss 1.98716 perp 1.66609 kl 0.32107


At epoch: 81  valid vae loss: 1.59983 perp: 1.29598 kl: 0.30386: 100%|██████████| 34/34 [00:01<00:00, 23.58batch/s]
At epoch: 82  train vae loss: 1.89757 perp: 1.58195 kl: 0.31562:   0%|          | 0/379 [00:00<?, ?batch/s]

>>>>average [93mvalid[0m of epoch 81: loss 1.92285 perp 1.60558 kl 0.31726
saving to best model since this is the best valid loss so far.----


At epoch: 82  train vae loss: 2.03145 perp: 1.70740 kl: 0.32404: 100%|██████████| 379/379 [00:38<00:00,  9.77batch/s]
At epoch: 82  valid vae loss: 2.05847 perp: 1.72702 kl: 0.33145:   3%|▎         | 1/34 [00:00<00:06,  5.17batch/s]

>>>>average [92mtraining[0m of epoch 82: loss 2.04089 perp 1.71790 kl 0.32299


At epoch: 82  valid vae loss: 1.67348 perp: 1.36451 kl: 0.30897: 100%|██████████| 34/34 [00:01<00:00, 20.86batch/s]
At epoch: 83  train vae loss: 1.91360 perp: 1.60289 kl: 0.31071:   0%|          | 0/379 [00:00<?, ?batch/s]

>>>>average [93mvalid[0m of epoch 82: loss 2.09351 perp 1.77004 kl 0.32347


At epoch: 83  train vae loss: 1.84215 perp: 1.51509 kl: 0.32706: 100%|██████████| 379/379 [00:38<00:00,  9.76batch/s]
At epoch: 83  valid vae loss: 1.80497 perp: 1.48093 kl: 0.32403:   6%|▌         | 2/34 [00:00<00:02, 11.52batch/s]

>>>>average [92mtraining[0m of epoch 83: loss 1.95042 perp 1.62727 kl 0.32315


At epoch: 83  valid vae loss: 1.46486 perp: 1.16201 kl: 0.30284: 100%|██████████| 34/34 [00:01<00:00, 22.80batch/s]
At epoch: 84  train vae loss: 1.85561 perp: 1.54324 kl: 0.31237:   0%|          | 0/379 [00:00<?, ?batch/s]

>>>>average [93mvalid[0m of epoch 83: loss 1.88743 perp 1.57097 kl 0.31646
saving to best model since this is the best valid loss so far.----


At epoch: 84  train vae loss: 1.84990 perp: 1.53765 kl: 0.31226: 100%|██████████| 379/379 [00:38<00:00,  9.77batch/s]
At epoch: 84  valid vae loss: 1.78863 perp: 1.46659 kl: 0.32204:   6%|▌         | 2/34 [00:00<00:02, 11.03batch/s]

>>>>average [92mtraining[0m of epoch 84: loss 1.86444 perp 1.54473 kl 0.31971


At epoch: 84  valid vae loss: 1.41498 perp: 1.11349 kl: 0.30149: 100%|██████████| 34/34 [00:01<00:00, 22.49batch/s]
At epoch: 85  train vae loss: 1.74983 perp: 1.43060 kl: 0.31923:   0%|          | 0/379 [00:00<?, ?batch/s]

>>>>average [93mvalid[0m of epoch 84: loss 1.82831 perp 1.51425 kl 0.31406
saving to best model since this is the best valid loss so far.----


At epoch: 85  train vae loss: 2.00762 perp: 1.68785 kl: 0.31976: 100%|██████████| 379/379 [00:38<00:00,  9.73batch/s]
At epoch: 85  valid vae loss: 1.71710 perp: 1.39508 kl: 0.32202:   6%|▌         | 2/34 [00:00<00:02, 11.11batch/s]

>>>>average [92mtraining[0m of epoch 85: loss 1.87477 perp 1.55552 kl 0.31925


At epoch: 85  valid vae loss: 1.40036 perp: 1.09909 kl: 0.30127: 100%|██████████| 34/34 [00:01<00:00, 22.43batch/s]
At epoch: 86  train vae loss: 1.63727 perp: 1.31604 kl: 0.32123:   0%|          | 0/379 [00:00<?, ?batch/s]

>>>>average [93mvalid[0m of epoch 85: loss 1.77806 perp 1.46359 kl 0.31447
saving to best model since this is the best valid loss so far.----


At epoch: 86  train vae loss: 2.64384 perp: 2.32404 kl: 0.31981: 100%|██████████| 379/379 [00:38<00:00,  9.73batch/s]
At epoch: 86  valid vae loss: 1.85191 perp: 1.53342 kl: 0.31849:   6%|▌         | 2/34 [00:00<00:02, 11.11batch/s]

>>>>average [92mtraining[0m of epoch 86: loss 1.81206 perp 1.49461 kl 0.31744


At epoch: 86  valid vae loss: 1.59412 perp: 1.29648 kl: 0.29764: 100%|██████████| 34/34 [00:01<00:00, 22.77batch/s]
At epoch: 87  train vae loss: 2.17827 perp: 1.86118 kl: 0.31708:   0%|          | 0/379 [00:00<?, ?batch/s]

>>>>average [93mvalid[0m of epoch 86: loss 1.84054 perp 1.52975 kl 0.31079


At epoch: 87  train vae loss: 1.59816 perp: 1.29275 kl: 0.30541: 100%|██████████| 379/379 [00:39<00:00,  9.63batch/s]
At epoch: 87  valid vae loss: 1.68939 perp: 1.36880 kl: 0.32060:   6%|▌         | 2/34 [00:00<00:02, 11.23batch/s]

>>>>average [92mtraining[0m of epoch 87: loss 1.91670 perp 1.59509 kl 0.32162


At epoch: 87  valid vae loss: 1.25593 perp: 0.95633 kl: 0.29960: 100%|██████████| 34/34 [00:01<00:00, 22.89batch/s]
At epoch: 88  train vae loss: 1.57626 perp: 1.26638 kl: 0.30988:   0%|          | 0/379 [00:00<?, ?batch/s]

>>>>average [93mvalid[0m of epoch 87: loss 1.69734 perp 1.38412 kl 0.31322
saving to best model since this is the best valid loss so far.----


At epoch: 88  train vae loss: 1.68482 perp: 1.38215 kl: 0.30267: 100%|██████████| 379/379 [00:39<00:00,  9.54batch/s]
At epoch: 88  valid vae loss: 1.62126 perp: 1.30192 kl: 0.31934:   6%|▌         | 2/34 [00:00<00:02, 10.84batch/s]

>>>>average [92mtraining[0m of epoch 88: loss 1.72366 perp 1.40875 kl 0.31491


At epoch: 88  valid vae loss: 1.27416 perp: 0.97530 kl: 0.29885: 100%|██████████| 34/34 [00:01<00:00, 21.97batch/s]
At epoch: 89  train vae loss: 1.68375 perp: 1.38847 kl: 0.29528:   0%|          | 0/379 [00:00<?, ?batch/s]

>>>>average [93mvalid[0m of epoch 88: loss 1.69235 perp 1.38023 kl 0.31211
saving to best model since this is the best valid loss so far.----


At epoch: 89  train vae loss: 1.92935 perp: 1.61223 kl: 0.31711: 100%|██████████| 379/379 [00:39<00:00,  9.61batch/s]
At epoch: 89  valid vae loss: 1.69243 perp: 1.37479 kl: 0.31764:   3%|▎         | 1/34 [00:00<00:06,  5.19batch/s]

>>>>average [92mtraining[0m of epoch 89: loss 1.71277 perp 1.39910 kl 0.31368


At epoch: 89  valid vae loss: 1.35516 perp: 1.05799 kl: 0.29717: 100%|██████████| 34/34 [00:01<00:00, 22.33batch/s]
At epoch: 90  train vae loss: 1.66966 perp: 1.34943 kl: 0.32023:   0%|          | 0/379 [00:00<?, ?batch/s]

>>>>average [93mvalid[0m of epoch 89: loss 1.83461 perp 1.52417 kl 0.31044


At epoch: 90  train vae loss: 1.73165 perp: 1.42063 kl: 0.31102: 100%|██████████| 379/379 [00:39<00:00,  9.58batch/s]
At epoch: 90  valid vae loss: 1.50710 perp: 1.18912 kl: 0.31798:   6%|▌         | 2/34 [00:00<00:02, 11.27batch/s]

>>>>average [92mtraining[0m of epoch 90: loss 1.68960 perp 1.37640 kl 0.31320


At epoch: 90  valid vae loss: 1.24956 perp: 0.95170 kl: 0.29786: 100%|██████████| 34/34 [00:01<00:00, 23.28batch/s]
At epoch: 91  train vae loss: 1.65878 perp: 1.34120 kl: 0.31759:   0%|          | 0/379 [00:00<?, ?batch/s]

>>>>average [93mvalid[0m of epoch 90: loss 1.59510 perp 1.28465 kl 0.31045
saving to best model since this is the best valid loss so far.----


At epoch: 91  train vae loss: 1.61627 perp: 1.30636 kl: 0.30992: 100%|██████████| 379/379 [00:39<00:00,  9.70batch/s]
At epoch: 91  valid vae loss: 1.52278 perp: 1.20766 kl: 0.31512:   6%|▌         | 2/34 [00:00<00:02, 10.91batch/s]

>>>>average [92mtraining[0m of epoch 91: loss 1.66238 perp 1.35109 kl 0.31129


At epoch: 91  valid vae loss: 1.29686 perp: 1.00205 kl: 0.29481: 100%|██████████| 34/34 [00:01<00:00, 23.18batch/s]
At epoch: 92  train vae loss: 1.61796 perp: 1.29617 kl: 0.32179:   0%|          | 0/379 [00:00<?, ?batch/s]

>>>>average [93mvalid[0m of epoch 91: loss 1.60783 perp 1.29993 kl 0.30790


At epoch: 92  train vae loss: 1.58433 perp: 1.26379 kl: 0.32055: 100%|██████████| 379/379 [00:38<00:00,  9.88batch/s]
At epoch: 92  valid vae loss: 1.52919 perp: 1.21295 kl: 0.31623:   3%|▎         | 1/34 [00:00<00:06,  5.41batch/s]

>>>>average [92mtraining[0m of epoch 92: loss 1.62336 perp 1.31307 kl 0.31029


At epoch: 92  valid vae loss: 1.20926 perp: 0.91352 kl: 0.29574: 100%|██████████| 34/34 [00:01<00:00, 24.54batch/s]
At epoch: 93  train vae loss: 1.60748 perp: 1.29533 kl: 0.31215:   0%|          | 0/379 [00:00<?, ?batch/s]

>>>>average [93mvalid[0m of epoch 92: loss 1.58118 perp 1.27193 kl 0.30925
saving to best model since this is the best valid loss so far.----


At epoch: 93  train vae loss: 1.75607 perp: 1.43740 kl: 0.31868: 100%|██████████| 379/379 [00:37<00:00, 10.03batch/s]
At epoch: 93  valid vae loss: 1.57746 perp: 1.26092 kl: 0.31653:   6%|▌         | 2/34 [00:00<00:02, 11.29batch/s]

>>>>average [92mtraining[0m of epoch 93: loss 1.60176 perp 1.29138 kl 0.31038


At epoch: 93  valid vae loss: 1.23852 perp: 0.94280 kl: 0.29573: 100%|██████████| 34/34 [00:01<00:00, 24.31batch/s]
At epoch: 94  train vae loss: 1.67664 perp: 1.36883 kl: 0.30781:   1%|          | 2/379 [00:00<00:26, 14.09batch/s]

>>>>average [93mvalid[0m of epoch 93: loss 1.62413 perp 1.31474 kl 0.30939


At epoch: 94  train vae loss: 1.63455 perp: 1.30508 kl: 0.32947: 100%|██████████| 379/379 [00:38<00:00,  9.89batch/s]
At epoch: 94  valid vae loss: 1.43732 perp: 1.12008 kl: 0.31725:   6%|▌         | 2/34 [00:00<00:02, 11.54batch/s]

>>>>average [92mtraining[0m of epoch 94: loss 1.61789 perp 1.30838 kl 0.30951


At epoch: 94  valid vae loss: 1.15473 perp: 0.85923 kl: 0.29550: 100%|██████████| 34/34 [00:01<00:00, 23.66batch/s]
At epoch: 95  train vae loss: 1.56764 perp: 1.25189 kl: 0.31575:   0%|          | 0/379 [00:00<?, ?batch/s]

>>>>average [93mvalid[0m of epoch 94: loss 1.51507 perp 1.20513 kl 0.30995
saving to best model since this is the best valid loss so far.----


At epoch: 95  train vae loss: 1.57718 perp: 1.26859 kl: 0.30859: 100%|██████████| 379/379 [00:40<00:00,  9.46batch/s]
At epoch: 95  valid vae loss: 1.41072 perp: 1.10257 kl: 0.30815:   6%|▌         | 2/34 [00:00<00:02, 11.30batch/s]

>>>>average [92mtraining[0m of epoch 95: loss 1.50867 perp 1.20057 kl 0.30811


At epoch: 95  valid vae loss: 1.21900 perp: 0.93176 kl: 0.28724: 100%|██████████| 34/34 [00:01<00:00, 22.84batch/s]
At epoch: 96  train vae loss: 1.55621 perp: 1.25205 kl: 0.30417:   0%|          | 0/379 [00:00<?, ?batch/s]

>>>>average [93mvalid[0m of epoch 95: loss 1.51814 perp 1.21692 kl 0.30121


At epoch: 96  train vae loss: 1.43868 perp: 1.12765 kl: 0.31102: 100%|██████████| 379/379 [00:38<00:00,  9.87batch/s]
At epoch: 96  valid vae loss: 1.41192 perp: 1.10384 kl: 0.30807:   6%|▌         | 2/34 [00:00<00:02, 11.89batch/s]

>>>>average [92mtraining[0m of epoch 96: loss 1.50708 perp 1.20051 kl 0.30656


At epoch: 96  valid vae loss: 1.18890 perp: 0.90049 kl: 0.28841: 100%|██████████| 34/34 [00:01<00:00, 23.46batch/s]
At epoch: 97  train vae loss: 1.35860 perp: 1.05877 kl: 0.29982:   0%|          | 0/379 [00:00<?, ?batch/s]

>>>>average [93mvalid[0m of epoch 96: loss 1.47965 perp 1.17840 kl 0.30125
saving to best model since this is the best valid loss so far.----


At epoch: 97  train vae loss: 1.89016 perp: 1.58099 kl: 0.30917: 100%|██████████| 379/379 [00:38<00:00,  9.72batch/s]
At epoch: 97  valid vae loss: 1.40681 perp: 1.09339 kl: 0.31342:   3%|▎         | 1/34 [00:00<00:06,  5.19batch/s]

>>>>average [92mtraining[0m of epoch 97: loss 1.47114 perp 1.16729 kl 0.30385


At epoch: 97  valid vae loss: 1.10594 perp: 0.81304 kl: 0.29290: 100%|██████████| 34/34 [00:01<00:00, 22.82batch/s]
At epoch: 98  train vae loss: 1.57027 perp: 1.25411 kl: 0.31616:   0%|          | 0/379 [00:00<?, ?batch/s]

>>>>average [93mvalid[0m of epoch 97: loss 1.49386 perp 1.18729 kl 0.30657


At epoch: 98  train vae loss: 1.32351 perp: 1.02376 kl: 0.29975: 100%|██████████| 379/379 [00:38<00:00,  9.88batch/s]
At epoch: 98  valid vae loss: 1.26368 perp: 0.95403 kl: 0.30965:   6%|▌         | 2/34 [00:00<00:02, 11.08batch/s]

>>>>average [92mtraining[0m of epoch 98: loss 1.49173 perp 1.18635 kl 0.30537


At epoch: 98  valid vae loss: 1.04775 perp: 0.75786 kl: 0.28989: 100%|██████████| 34/34 [00:01<00:00, 23.06batch/s]
At epoch: 99  train vae loss: 1.29881 perp: 0.99210 kl: 0.30671:   0%|          | 0/379 [00:00<?, ?batch/s]

>>>>average [93mvalid[0m of epoch 98: loss 1.37006 perp 1.06700 kl 0.30306
saving to best model since this is the best valid loss so far.----


At epoch: 99  train vae loss: 1.46228 perp: 1.15302 kl: 0.30926: 100%|██████████| 379/379 [00:40<00:00,  9.47batch/s]
At epoch: 99  valid vae loss: 1.37152 perp: 1.06477 kl: 0.30675:   6%|▌         | 2/34 [00:00<00:02, 11.14batch/s]

>>>>average [92mtraining[0m of epoch 99: loss 1.41722 perp 1.11504 kl 0.30218


At epoch: 99  valid vae loss: 1.15024 perp: 0.86387 kl: 0.28637: 100%|██████████| 34/34 [00:01<00:00, 22.73batch/s]
At epoch: 100  train vae loss: 1.31706 perp: 1.02602 kl: 0.29104:   0%|          | 0/379 [00:00<?, ?batch/s]

>>>>average [93mvalid[0m of epoch 99: loss 1.43273 perp 1.13286 kl 0.29987


At epoch: 100  train vae loss: 1.41926 perp: 1.11483 kl: 0.30442: 100%|██████████| 379/379 [00:38<00:00,  9.83batch/s]
At epoch: 100  valid vae loss: 1.36452 perp: 1.06277 kl: 0.30175:   6%|▌         | 2/34 [00:00<00:02, 11.03batch/s]

>>>>average [92mtraining[0m of epoch 100: loss 1.39481 perp 1.09318 kl 0.30163


At epoch: 100  valid vae loss: 0.98481 perp: 0.70298 kl: 0.28182: 100%|██████████| 34/34 [00:01<00:00, 22.31batch/s]
At epoch: 101  train vae loss: 1.13177 perp: 0.83163 kl: 0.30014:   0%|          | 0/379 [00:00<?, ?batch/s]

>>>>average [93mvalid[0m of epoch 100: loss 1.35457 perp 1.05959 kl 0.29498
saving to best model since this is the best valid loss so far.----


At epoch: 101  train vae loss: 1.34618 perp: 1.04728 kl: 0.29890: 100%|██████████| 379/379 [00:38<00:00,  9.77batch/s]
At epoch: 101  valid vae loss: 1.36631 perp: 1.06403 kl: 0.30228:   6%|▌         | 2/34 [00:00<00:02, 11.07batch/s]

>>>>average [92mtraining[0m of epoch 101: loss 1.38662 perp 1.08609 kl 0.30053


At epoch: 101  valid vae loss: 1.19308 perp: 0.91029 kl: 0.28279: 100%|██████████| 34/34 [00:01<00:00, 22.38batch/s]
At epoch: 102  train vae loss: 1.41965 perp: 1.10962 kl: 0.31003:   0%|          | 0/379 [00:00<?, ?batch/s]

>>>>average [93mvalid[0m of epoch 101: loss 1.43207 perp 1.13613 kl 0.29594


At epoch: 102  train vae loss: 1.33503 perp: 1.03835 kl: 0.29668: 100%|██████████| 379/379 [00:38<00:00,  9.79batch/s]
At epoch: 102  valid vae loss: 1.24313 perp: 0.94391 kl: 0.29922:   6%|▌         | 2/34 [00:00<00:02, 11.03batch/s]

>>>>average [92mtraining[0m of epoch 102: loss 1.40879 perp 1.10740 kl 0.30140


At epoch: 102  valid vae loss: 0.92570 perp: 0.64619 kl: 0.27951: 100%|██████████| 34/34 [00:01<00:00, 22.29batch/s]
At epoch: 103  train vae loss: 1.20309 perp: 0.91013 kl: 0.29296:   0%|          | 0/379 [00:00<?, ?batch/s]

>>>>average [93mvalid[0m of epoch 102: loss 1.27185 perp 0.97887 kl 0.29298
saving to best model since this is the best valid loss so far.----


At epoch: 103  train vae loss: 1.31160 perp: 1.02235 kl: 0.28925: 100%|██████████| 379/379 [00:39<00:00,  9.71batch/s]
At epoch: 103  valid vae loss: 1.30113 perp: 1.00235 kl: 0.29879:   6%|▌         | 2/34 [00:00<00:02, 11.13batch/s]

>>>>average [92mtraining[0m of epoch 103: loss 1.29541 perp 0.99951 kl 0.29591


At epoch: 103  valid vae loss: 1.01034 perp: 0.73149 kl: 0.27885: 100%|██████████| 34/34 [00:01<00:00, 22.32batch/s]
At epoch: 104  train vae loss: 1.35811 perp: 1.06145 kl: 0.29666:   0%|          | 0/379 [00:00<?, ?batch/s]

>>>>average [93mvalid[0m of epoch 103: loss 1.32348 perp 1.03132 kl 0.29216


At epoch: 104  train vae loss: 1.13449 perp: 0.84217 kl: 0.29233: 100%|██████████| 379/379 [00:38<00:00,  9.77batch/s]
At epoch: 104  valid vae loss: 1.25220 perp: 0.94793 kl: 0.30426:   6%|▌         | 2/34 [00:00<00:02, 11.16batch/s]

>>>>average [92mtraining[0m of epoch 104: loss 1.39437 perp 1.09674 kl 0.29763


At epoch: 104  valid vae loss: 0.99891 perp: 0.71484 kl: 0.28407: 100%|██████████| 34/34 [00:01<00:00, 22.38batch/s]
At epoch: 105  train vae loss: 1.13164 perp: 0.82980 kl: 0.30184:   0%|          | 0/379 [00:00<?, ?batch/s]

>>>>average [93mvalid[0m of epoch 104: loss 1.27313 perp 0.97527 kl 0.29786


At epoch: 105  train vae loss: 1.18095 perp: 0.89051 kl: 0.29044: 100%|██████████| 379/379 [00:39<00:00,  9.63batch/s]
At epoch: 105  valid vae loss: 1.19618 perp: 0.90216 kl: 0.29402:   6%|▌         | 2/34 [00:00<00:02, 11.00batch/s]

>>>>average [92mtraining[0m of epoch 105: loss 1.22336 perp 0.92856 kl 0.29479


At epoch: 105  valid vae loss: 1.06112 perp: 0.78649 kl: 0.27463: 100%|██████████| 34/34 [00:01<00:00, 22.28batch/s]
At epoch: 106  train vae loss: 1.20374 perp: 0.91443 kl: 0.28930:   0%|          | 0/379 [00:00<?, ?batch/s]

>>>>average [93mvalid[0m of epoch 105: loss 1.26751 perp 0.97941 kl 0.28811
saving to best model since this is the best valid loss so far.----


At epoch: 106  train vae loss: 1.12570 perp: 0.83604 kl: 0.28966: 100%|██████████| 379/379 [00:39<00:00,  9.68batch/s]
At epoch: 106  valid vae loss: 1.15135 perp: 0.85518 kl: 0.29617:   6%|▌         | 2/34 [00:00<00:02, 11.14batch/s]

>>>>average [92mtraining[0m of epoch 106: loss 1.24546 perp 0.95476 kl 0.29069


At epoch: 106  valid vae loss: 0.91336 perp: 0.63738 kl: 0.27598: 100%|██████████| 34/34 [00:01<00:00, 22.34batch/s]
At epoch: 107  train vae loss: 1.24056 perp: 0.95110 kl: 0.28946:   0%|          | 0/379 [00:00<?, ?batch/s]

>>>>average [93mvalid[0m of epoch 106: loss 1.24394 perp 0.95434 kl 0.28960
saving to best model since this is the best valid loss so far.----


At epoch: 107  train vae loss: 1.18569 perp: 0.89242 kl: 0.29327: 100%|██████████| 379/379 [00:39<00:00,  9.71batch/s]
At epoch: 107  valid vae loss: 1.15417 perp: 0.85673 kl: 0.29744:   6%|▌         | 2/34 [00:00<00:03, 10.53batch/s]

>>>>average [92mtraining[0m of epoch 107: loss 1.24160 perp 0.95028 kl 0.29132


At epoch: 107  valid vae loss: 0.88832 perp: 0.61105 kl: 0.27726: 100%|██████████| 34/34 [00:01<00:00, 20.88batch/s]
At epoch: 108  train vae loss: 1.21876 perp: 0.93034 kl: 0.28841:   0%|          | 0/379 [00:00<?, ?batch/s]

>>>>average [93mvalid[0m of epoch 107: loss 1.25013 perp 0.95914 kl 0.29098


At epoch: 108  train vae loss: 1.08482 perp: 0.79044 kl: 0.29439: 100%|██████████| 379/379 [00:40<00:00,  9.39batch/s]
At epoch: 108  valid vae loss: 1.06004 perp: 0.76816 kl: 0.29188:   6%|▌         | 2/34 [00:00<00:02, 11.04batch/s]

>>>>average [92mtraining[0m of epoch 108: loss 1.19952 perp 0.90923 kl 0.29029


At epoch: 108  valid vae loss: 0.88120 perp: 0.60857 kl: 0.27263: 100%|██████████| 34/34 [00:01<00:00, 22.37batch/s]
At epoch: 109  train vae loss: 1.17011 perp: 0.88681 kl: 0.28330:   0%|          | 0/379 [00:00<?, ?batch/s]

>>>>average [93mvalid[0m of epoch 108: loss 1.18481 perp 0.89866 kl 0.28615
saving to best model since this is the best valid loss so far.----


At epoch: 109  train vae loss: 1.25148 perp: 0.97657 kl: 0.27491: 100%|██████████| 379/379 [00:39<00:00,  9.70batch/s]
At epoch: 109  valid vae loss: 1.07881 perp: 0.78974 kl: 0.28907:   6%|▌         | 2/34 [00:00<00:02, 11.20batch/s]

>>>>average [92mtraining[0m of epoch 109: loss 1.20470 perp 0.91597 kl 0.28873


At epoch: 109  valid vae loss: 0.83341 perp: 0.56412 kl: 0.26929: 100%|██████████| 34/34 [00:01<00:00, 22.39batch/s]
At epoch: 110  train vae loss: 1.14448 perp: 0.85468 kl: 0.28980:   0%|          | 0/379 [00:00<?, ?batch/s]

>>>>average [93mvalid[0m of epoch 109: loss 1.16707 perp 0.88408 kl 0.28298
saving to best model since this is the best valid loss so far.----


At epoch: 110  train vae loss: 1.33714 perp: 1.05627 kl: 0.28087: 100%|██████████| 379/379 [00:38<00:00,  9.77batch/s]
At epoch: 110  valid vae loss: 1.18778 perp: 0.89949 kl: 0.28829:   6%|▌         | 2/34 [00:00<00:02, 11.11batch/s]

>>>>average [92mtraining[0m of epoch 110: loss 1.19186 perp 0.90357 kl 0.28830


At epoch: 110  valid vae loss: 0.99961 perp: 0.73129 kl: 0.26833: 100%|██████████| 34/34 [00:01<00:00, 22.41batch/s]
At epoch: 111  train vae loss: 1.01393 perp: 0.72966 kl: 0.28427:   0%|          | 0/379 [00:00<?, ?batch/s]

>>>>average [93mvalid[0m of epoch 110: loss 1.24036 perp 0.95870 kl 0.28166


At epoch: 111  train vae loss: 1.17829 perp: 0.90047 kl: 0.27782: 100%|██████████| 379/379 [00:39<00:00,  9.60batch/s]
At epoch: 111  valid vae loss: 1.04340 perp: 0.75467 kl: 0.28872:   6%|▌         | 2/34 [00:00<00:03, 10.27batch/s]

>>>>average [92mtraining[0m of epoch 111: loss 1.16534 perp 0.87860 kl 0.28674


At epoch: 111  valid vae loss: 0.82920 perp: 0.56022 kl: 0.26898: 100%|██████████| 34/34 [00:01<00:00, 21.13batch/s]
At epoch: 112  train vae loss: 1.12428 perp: 0.83488 kl: 0.28940:   0%|          | 0/379 [00:00<?, ?batch/s]

>>>>average [93mvalid[0m of epoch 111: loss 1.12379 perp 0.84126 kl 0.28253
saving to best model since this is the best valid loss so far.----


At epoch: 112  train vae loss: 1.15381 perp: 0.87574 kl: 0.27807: 100%|██████████| 379/379 [00:39<00:00,  9.69batch/s]
At epoch: 112  valid vae loss: 1.05073 perp: 0.76302 kl: 0.28771:   6%|▌         | 2/34 [00:00<00:02, 11.22batch/s]

>>>>average [92mtraining[0m of epoch 112: loss 1.12321 perp 0.83885 kl 0.28435


At epoch: 112  valid vae loss: 0.81713 perp: 0.54892 kl: 0.26821: 100%|██████████| 34/34 [00:01<00:00, 22.35batch/s]
At epoch: 113  train vae loss: 1.17361 perp: 0.88880 kl: 0.28480:   0%|          | 0/379 [00:00<?, ?batch/s]

>>>>average [93mvalid[0m of epoch 112: loss 1.14446 perp 0.86233 kl 0.28213


At epoch: 113  train vae loss: 1.12634 perp: 0.84959 kl: 0.27675: 100%|██████████| 379/379 [00:38<00:00,  9.76batch/s]
At epoch: 113  valid vae loss: 0.99023 perp: 0.70435 kl: 0.28588:   6%|▌         | 2/34 [00:00<00:02, 11.09batch/s]

>>>>average [92mtraining[0m of epoch 113: loss 1.12084 perp 0.83681 kl 0.28404


At epoch: 113  valid vae loss: 0.70944 perp: 0.44259 kl: 0.26685: 100%|██████████| 34/34 [00:01<00:00, 22.27batch/s]
At epoch: 114  train vae loss: 1.17666 perp: 0.89914 kl: 0.27753:   0%|          | 0/379 [00:00<?, ?batch/s]

>>>>average [93mvalid[0m of epoch 113: loss 1.08944 perp 0.80914 kl 0.28029
saving to best model since this is the best valid loss so far.----


At epoch: 114  train vae loss: 0.98810 perp: 0.70748 kl: 0.28062: 100%|██████████| 379/379 [00:38<00:00,  9.76batch/s]
At epoch: 114  valid vae loss: 0.98282 perp: 0.69828 kl: 0.28454:   6%|▌         | 2/34 [00:00<00:02, 10.86batch/s]

>>>>average [92mtraining[0m of epoch 114: loss 1.10988 perp 0.82649 kl 0.28339


At epoch: 114  valid vae loss: 0.73407 perp: 0.46890 kl: 0.26517: 100%|██████████| 34/34 [00:01<00:00, 22.21batch/s]
At epoch: 115  train vae loss: 0.95111 perp: 0.66930 kl: 0.28181:   0%|          | 0/379 [00:00<?, ?batch/s]

>>>>average [93mvalid[0m of epoch 114: loss 1.09512 perp 0.81637 kl 0.27875


At epoch: 115  train vae loss: 1.04901 perp: 0.77371 kl: 0.27530: 100%|██████████| 379/379 [00:38<00:00,  9.77batch/s]
At epoch: 115  valid vae loss: 0.98203 perp: 0.69757 kl: 0.28446:   6%|▌         | 2/34 [00:00<00:02, 11.05batch/s]

>>>>average [92mtraining[0m of epoch 115: loss 1.08494 perp 0.80374 kl 0.28120


At epoch: 115  valid vae loss: 0.83736 perp: 0.57254 kl: 0.26483: 100%|██████████| 34/34 [00:01<00:00, 22.42batch/s]
At epoch: 116  train vae loss: 1.01944 perp: 0.73502 kl: 0.28443:   0%|          | 0/379 [00:00<?, ?batch/s]

>>>>average [93mvalid[0m of epoch 115: loss 1.08978 perp 0.81102 kl 0.27876


At epoch: 116  train vae loss: 1.10579 perp: 0.82196 kl: 0.28383: 100%|██████████| 379/379 [00:38<00:00,  9.76batch/s]
At epoch: 116  valid vae loss: 0.99574 perp: 0.71291 kl: 0.28284:   6%|▌         | 2/34 [00:00<00:02, 11.28batch/s]

>>>>average [92mtraining[0m of epoch 116: loss 1.07625 perp 0.79658 kl 0.27967


At epoch: 116  valid vae loss: 0.80715 perp: 0.54421 kl: 0.26294: 100%|██████████| 34/34 [00:01<00:00, 22.47batch/s]
At epoch: 117  train vae loss: 1.02022 perp: 0.74963 kl: 0.27059:   0%|          | 0/379 [00:00<?, ?batch/s]

>>>>average [93mvalid[0m of epoch 116: loss 1.05239 perp 0.77534 kl 0.27705
saving to best model since this is the best valid loss so far.----


At epoch: 117  train vae loss: 1.14902 perp: 0.86806 kl: 0.28095: 100%|██████████| 379/379 [00:38<00:00,  9.76batch/s]
At epoch: 117  valid vae loss: 0.97648 perp: 0.69762 kl: 0.27886:   6%|▌         | 2/34 [00:00<00:02, 11.03batch/s]

>>>>average [92mtraining[0m of epoch 117: loss 1.02196 perp 0.74463 kl 0.27733


At epoch: 117  valid vae loss: 0.75160 perp: 0.49203 kl: 0.25957: 100%|██████████| 34/34 [00:01<00:00, 22.38batch/s]
At epoch: 118  train vae loss: 1.08874 perp: 0.80800 kl: 0.28074:   0%|          | 0/379 [00:00<?, ?batch/s]

>>>>average [93mvalid[0m of epoch 117: loss 1.08752 perp 0.81405 kl 0.27347


At epoch: 118  train vae loss: 1.07054 perp: 0.79237 kl: 0.27817: 100%|██████████| 379/379 [00:39<00:00,  9.59batch/s]
At epoch: 118  valid vae loss: 0.87943 perp: 0.60381 kl: 0.27562:   6%|▌         | 2/34 [00:00<00:02, 10.83batch/s]

>>>>average [92mtraining[0m of epoch 118: loss 1.03339 perp 0.75846 kl 0.27492


At epoch: 118  valid vae loss: 0.75111 perp: 0.49496 kl: 0.25615: 100%|██████████| 34/34 [00:01<00:00, 21.98batch/s]
At epoch: 119  train vae loss: 0.79240 perp: 0.52602 kl: 0.26637:   0%|          | 0/379 [00:00<?, ?batch/s]

>>>>average [93mvalid[0m of epoch 118: loss 0.99850 perp 0.72872 kl 0.26978
saving to best model since this is the best valid loss so far.----


At epoch: 119  train vae loss: 1.12949 perp: 0.85058 kl: 0.27891: 100%|██████████| 379/379 [00:39<00:00,  9.57batch/s]
At epoch: 119  valid vae loss: 0.89618 perp: 0.61570 kl: 0.28048:   6%|▌         | 2/34 [00:00<00:02, 10.77batch/s]

>>>>average [92mtraining[0m of epoch 119: loss 1.05259 perp 0.77661 kl 0.27597


At epoch: 119  valid vae loss: 0.71751 perp: 0.45746 kl: 0.26005: 100%|██████████| 34/34 [00:01<00:00, 21.96batch/s]
At epoch: 120  train vae loss: 0.93344 perp: 0.66076 kl: 0.27269:   0%|          | 0/379 [00:00<?, ?batch/s]

>>>>average [93mvalid[0m of epoch 119: loss 1.01732 perp 0.74245 kl 0.27487


At epoch: 120  train vae loss: 0.94433 perp: 0.67259 kl: 0.27174: 100%|██████████| 379/379 [00:39<00:00,  9.62batch/s]
At epoch: 120  valid vae loss: 0.87626 perp: 0.59986 kl: 0.27641:   6%|▌         | 2/34 [00:00<00:02, 11.09batch/s]

>>>>average [92mtraining[0m of epoch 120: loss 1.00350 perp 0.72852 kl 0.27498


At epoch: 120  valid vae loss: 0.70486 perp: 0.44716 kl: 0.25770: 100%|██████████| 34/34 [00:01<00:00, 21.98batch/s]
At epoch: 121  train vae loss: 0.97216 perp: 0.69240 kl: 0.27976:   0%|          | 0/379 [00:00<?, ?batch/s]

>>>>average [93mvalid[0m of epoch 120: loss 1.01735 perp 0.74637 kl 0.27098


At epoch: 121  train vae loss: 0.96934 perp: 0.69979 kl: 0.26955: 100%|██████████| 379/379 [00:39<00:00,  9.64batch/s]
At epoch: 121  valid vae loss: 0.83517 perp: 0.56221 kl: 0.27296:   6%|▌         | 2/34 [00:00<00:02, 10.93batch/s]

>>>>average [92mtraining[0m of epoch 121: loss 0.95579 perp 0.68397 kl 0.27182


At epoch: 121  valid vae loss: 0.72372 perp: 0.47042 kl: 0.25329: 100%|██████████| 34/34 [00:01<00:00, 22.12batch/s]
At epoch: 122  train vae loss: 0.90010 perp: 0.62999 kl: 0.27010:   0%|          | 0/379 [00:00<?, ?batch/s]

>>>>average [93mvalid[0m of epoch 121: loss 0.98001 perp 0.71244 kl 0.26756
saving to best model since this is the best valid loss so far.----


At epoch: 122  train vae loss: 0.86336 perp: 0.58813 kl: 0.27523: 100%|██████████| 379/379 [00:39<00:00,  9.63batch/s]
At epoch: 122  valid vae loss: 0.90336 perp: 0.63030 kl: 0.27306:   6%|▌         | 2/34 [00:00<00:02, 10.84batch/s]

>>>>average [92mtraining[0m of epoch 122: loss 0.99597 perp 0.72389 kl 0.27208


At epoch: 122  valid vae loss: 0.79537 perp: 0.54094 kl: 0.25443: 100%|██████████| 34/34 [00:01<00:00, 22.11batch/s]
At epoch: 123  train vae loss: 0.89586 perp: 0.63425 kl: 0.26161:   0%|          | 0/379 [00:00<?, ?batch/s]

>>>>average [93mvalid[0m of epoch 122: loss 1.01564 perp 0.74788 kl 0.26776


At epoch: 123  train vae loss: 0.93578 perp: 0.65532 kl: 0.28046: 100%|██████████| 379/379 [00:39<00:00,  9.64batch/s]
At epoch: 123  valid vae loss: 0.81541 perp: 0.54366 kl: 0.27175:   6%|▌         | 2/34 [00:00<00:02, 11.10batch/s]

>>>>average [92mtraining[0m of epoch 123: loss 0.99515 perp 0.72328 kl 0.27188


At epoch: 123  valid vae loss: 0.70579 perp: 0.45342 kl: 0.25236: 100%|██████████| 34/34 [00:01<00:00, 21.59batch/s]
At epoch: 124  train vae loss: 0.97082 perp: 0.67921 kl: 0.29161:   0%|          | 0/379 [00:00<?, ?batch/s]

>>>>average [93mvalid[0m of epoch 123: loss 0.92670 perp 0.66043 kl 0.26627
saving to best model since this is the best valid loss so far.----


At epoch: 124  train vae loss: 0.96767 perp: 0.70866 kl: 0.25901: 100%|██████████| 379/379 [00:40<00:00,  9.45batch/s]
At epoch: 124  valid vae loss: 0.88452 perp: 0.61337 kl: 0.27115:   3%|▎         | 1/34 [00:00<00:06,  5.14batch/s]

>>>>average [92mtraining[0m of epoch 124: loss 0.91479 perp 0.64661 kl 0.26819


At epoch: 124  valid vae loss: 0.74928 perp: 0.49761 kl: 0.25166: 100%|██████████| 34/34 [00:01<00:00, 20.94batch/s]
At epoch: 125  train vae loss: 0.90866 perp: 0.63796 kl: 0.27070:   0%|          | 0/379 [00:00<?, ?batch/s]

>>>>average [93mvalid[0m of epoch 124: loss 0.94556 perp 0.67964 kl 0.26593


At epoch: 125  train vae loss: 0.93177 perp: 0.66764 kl: 0.26413: 100%|██████████| 379/379 [00:39<00:00,  9.49batch/s]
At epoch: 125  valid vae loss: 0.78039 perp: 0.50946 kl: 0.27092:   6%|▌         | 2/34 [00:00<00:02, 10.87batch/s]

>>>>average [92mtraining[0m of epoch 125: loss 0.90325 perp 0.63785 kl 0.26539


At epoch: 125  valid vae loss: 0.59920 perp: 0.34755 kl: 0.25164: 100%|██████████| 34/34 [00:01<00:00, 22.11batch/s]
At epoch: 126  train vae loss: 0.86933 perp: 0.60354 kl: 0.26579:   0%|          | 0/379 [00:00<?, ?batch/s]

>>>>average [93mvalid[0m of epoch 125: loss 0.93706 perp 0.67153 kl 0.26553


At epoch: 126  train vae loss: 0.76272 perp: 0.50046 kl: 0.26226: 100%|██████████| 379/379 [00:39<00:00,  9.59batch/s]
At epoch: 126  valid vae loss: 0.83312 perp: 0.56357 kl: 0.26955:   6%|▌         | 2/34 [00:00<00:02, 11.07batch/s]

>>>>average [92mtraining[0m of epoch 126: loss 0.95122 perp 0.68232 kl 0.26890


At epoch: 126  valid vae loss: 0.66727 perp: 0.41711 kl: 0.25016: 100%|██████████| 34/34 [00:01<00:00, 21.98batch/s]
At epoch: 127  train vae loss: 0.92584 perp: 0.65746 kl: 0.26838:   0%|          | 0/379 [00:00<?, ?batch/s]

>>>>average [93mvalid[0m of epoch 126: loss 0.93077 perp 0.66681 kl 0.26396


At epoch: 127  train vae loss: 0.87756 perp: 0.61900 kl: 0.25856: 100%|██████████| 379/379 [00:39<00:00,  9.62batch/s]
At epoch: 127  valid vae loss: 0.78453 perp: 0.51868 kl: 0.26585:   6%|▌         | 2/34 [00:00<00:02, 10.89batch/s]

>>>>average [92mtraining[0m of epoch 127: loss 0.93735 perp 0.67157 kl 0.26578


At epoch: 127  valid vae loss: 0.62528 perp: 0.37933 kl: 0.24595: 100%|██████████| 34/34 [00:01<00:00, 22.05batch/s]
At epoch: 128  train vae loss: 0.77449 perp: 0.51788 kl: 0.25661:   0%|          | 0/379 [00:00<?, ?batch/s]

>>>>average [93mvalid[0m of epoch 127: loss 0.89083 perp 0.63049 kl 0.26034
saving to best model since this is the best valid loss so far.----


At epoch: 128  train vae loss: 0.88576 perp: 0.62861 kl: 0.25715: 100%|██████████| 379/379 [00:39<00:00,  9.62batch/s]
At epoch: 128  valid vae loss: 0.82958 perp: 0.56571 kl: 0.26387:   6%|▌         | 2/34 [00:00<00:02, 10.89batch/s]

>>>>average [92mtraining[0m of epoch 128: loss 0.85031 perp 0.58861 kl 0.26169


At epoch: 128  valid vae loss: 0.56669 perp: 0.32165 kl: 0.24503: 100%|██████████| 34/34 [00:01<00:00, 22.10batch/s]
At epoch: 129  train vae loss: 0.86180 perp: 0.60180 kl: 0.26000:   0%|          | 0/379 [00:00<?, ?batch/s]

>>>>average [93mvalid[0m of epoch 128: loss 0.87797 perp 0.61930 kl 0.25867
saving to best model since this is the best valid loss so far.----


At epoch: 129  train vae loss: 1.00028 perp: 0.74039 kl: 0.25990: 100%|██████████| 379/379 [00:39<00:00,  9.64batch/s]
At epoch: 129  valid vae loss: 0.77514 perp: 0.51337 kl: 0.26177:   6%|▌         | 2/34 [00:00<00:02, 11.11batch/s]

>>>>average [92mtraining[0m of epoch 129: loss 0.84809 perp 0.58866 kl 0.25943


At epoch: 129  valid vae loss: 0.66984 perp: 0.42684 kl: 0.24300: 100%|██████████| 34/34 [00:01<00:00, 22.16batch/s]
At epoch: 130  train vae loss: 0.82121 perp: 0.55614 kl: 0.26508:   0%|          | 0/379 [00:00<?, ?batch/s]

>>>>average [93mvalid[0m of epoch 129: loss 0.86002 perp 0.60340 kl 0.25662
saving to best model since this is the best valid loss so far.----


At epoch: 130  train vae loss: 0.94714 perp: 0.69789 kl: 0.24925: 100%|██████████| 379/379 [00:39<00:00,  9.61batch/s]
At epoch: 130  valid vae loss: 0.82020 perp: 0.55997 kl: 0.26023:   6%|▌         | 2/34 [00:00<00:03, 10.66batch/s]

>>>>average [92mtraining[0m of epoch 130: loss 0.85175 perp 0.59328 kl 0.25848


At epoch: 130  valid vae loss: 0.63802 perp: 0.39724 kl: 0.24078: 100%|██████████| 34/34 [00:01<00:00, 21.16batch/s]
At epoch: 131  train vae loss: 0.88337 perp: 0.62385 kl: 0.25953:   0%|          | 0/379 [00:00<?, ?batch/s]

>>>>average [93mvalid[0m of epoch 130: loss 0.92045 perp 0.66586 kl 0.25460


At epoch: 131  train vae loss: 0.72781 perp: 0.47220 kl: 0.25560: 100%|██████████| 379/379 [00:40<00:00,  9.41batch/s]
At epoch: 131  valid vae loss: 0.75050 perp: 0.49106 kl: 0.25943:   6%|▌         | 2/34 [00:00<00:02, 10.92batch/s]

>>>>average [92mtraining[0m of epoch 131: loss 0.85180 perp 0.59342 kl 0.25837


At epoch: 131  valid vae loss: 0.64375 perp: 0.40219 kl: 0.24156: 100%|██████████| 34/34 [00:01<00:00, 22.07batch/s]
At epoch: 132  train vae loss: 0.84925 perp: 0.59649 kl: 0.25276:   0%|          | 0/379 [00:00<?, ?batch/s]

>>>>average [93mvalid[0m of epoch 131: loss 0.86479 perp 0.60988 kl 0.25491


At epoch: 132  train vae loss: 0.89149 perp: 0.63265 kl: 0.25884: 100%|██████████| 379/379 [00:39<00:00,  9.54batch/s]
At epoch: 132  valid vae loss: 0.72643 perp: 0.47020 kl: 0.25623:   6%|▌         | 2/34 [00:00<00:02, 10.77batch/s]

>>>>average [92mtraining[0m of epoch 132: loss 0.84320 perp 0.58596 kl 0.25723


At epoch: 132  valid vae loss: 0.58765 perp: 0.35028 kl: 0.23737: 100%|██████████| 34/34 [00:01<00:00, 21.98batch/s]
At epoch: 133  train vae loss: 0.75379 perp: 0.50373 kl: 0.25007:   0%|          | 0/379 [00:00<?, ?batch/s]

>>>>average [93mvalid[0m of epoch 132: loss 0.81637 perp 0.56522 kl 0.25115
saving to best model since this is the best valid loss so far.----


At epoch: 133  train vae loss: 0.82299 perp: 0.56038 kl: 0.26261: 100%|██████████| 379/379 [00:39<00:00,  9.48batch/s]
At epoch: 133  valid vae loss: 0.71130 perp: 0.45498 kl: 0.25632:   6%|▌         | 2/34 [00:00<00:02, 10.80batch/s]

>>>>average [92mtraining[0m of epoch 133: loss 0.83051 perp 0.57561 kl 0.25490


At epoch: 133  valid vae loss: 0.53915 perp: 0.30181 kl: 0.23734: 100%|██████████| 34/34 [00:01<00:00, 21.93batch/s]
At epoch: 134  train vae loss: 0.65427 perp: 0.40632 kl: 0.24794:   0%|          | 0/379 [00:00<?, ?batch/s]

>>>>average [93mvalid[0m of epoch 133: loss 0.81451 perp 0.56342 kl 0.25109
saving to best model since this is the best valid loss so far.----


At epoch: 134  train vae loss: 0.80506 perp: 0.54726 kl: 0.25780: 100%|██████████| 379/379 [00:39<00:00,  9.61batch/s]
At epoch: 134  valid vae loss: 0.76271 perp: 0.50453 kl: 0.25818:   6%|▌         | 2/34 [00:00<00:02, 10.82batch/s]

>>>>average [92mtraining[0m of epoch 134: loss 0.82865 perp 0.57474 kl 0.25391


At epoch: 134  valid vae loss: 0.68575 perp: 0.44635 kl: 0.23940: 100%|██████████| 34/34 [00:01<00:00, 22.04batch/s]
At epoch: 135  train vae loss: 0.87861 perp: 0.62207 kl: 0.25654:   0%|          | 0/379 [00:00<?, ?batch/s]

>>>>average [93mvalid[0m of epoch 134: loss 0.88966 perp 0.63668 kl 0.25298


At epoch: 135  train vae loss: 0.73818 perp: 0.48765 kl: 0.25053: 100%|██████████| 379/379 [00:39<00:00,  9.63batch/s]
At epoch: 135  valid vae loss: 0.77964 perp: 0.52095 kl: 0.25868:   6%|▌         | 2/34 [00:00<00:02, 10.87batch/s]

>>>>average [92mtraining[0m of epoch 135: loss 0.81938 perp 0.56483 kl 0.25455


At epoch: 135  valid vae loss: 0.57980 perp: 0.33927 kl: 0.24053: 100%|██████████| 34/34 [00:01<00:00, 22.07batch/s]
At epoch: 136  train vae loss: 0.85550 perp: 0.59559 kl: 0.25991:   0%|          | 0/379 [00:00<?, ?batch/s]

>>>>average [93mvalid[0m of epoch 135: loss 0.85123 perp 0.59723 kl 0.25399


At epoch: 136  train vae loss: 0.69649 perp: 0.44261 kl: 0.25388: 100%|██████████| 379/379 [00:39<00:00,  9.56batch/s]
At epoch: 136  valid vae loss: 0.73957 perp: 0.48723 kl: 0.25234:   6%|▌         | 2/34 [00:00<00:02, 10.87batch/s]

>>>>average [92mtraining[0m of epoch 136: loss 0.78671 perp 0.53350 kl 0.25322


At epoch: 136  valid vae loss: 0.56469 perp: 0.33042 kl: 0.23427: 100%|██████████| 34/34 [00:01<00:00, 22.06batch/s]
At epoch: 137  train vae loss: 0.86190 perp: 0.61530 kl: 0.24660:   0%|          | 0/379 [00:00<?, ?batch/s]

>>>>average [93mvalid[0m of epoch 136: loss 0.81771 perp 0.57041 kl 0.24730


At epoch: 137  train vae loss: 0.76389 perp: 0.51590 kl: 0.24799: 100%|██████████| 379/379 [00:39<00:00,  9.62batch/s]
At epoch: 137  valid vae loss: 0.78265 perp: 0.53052 kl: 0.25213:   6%|▌         | 2/34 [00:00<00:03, 10.44batch/s]

>>>>average [92mtraining[0m of epoch 137: loss 0.76932 perp 0.51937 kl 0.24995


At epoch: 137  valid vae loss: 0.57505 perp: 0.34121 kl: 0.23384: 100%|██████████| 34/34 [00:01<00:00, 20.88batch/s]
At epoch: 138  train vae loss: 0.74313 perp: 0.49079 kl: 0.25234:   0%|          | 0/379 [00:00<?, ?batch/s]

>>>>average [93mvalid[0m of epoch 137: loss 0.84916 perp 0.60208 kl 0.24708


At epoch: 138  train vae loss: 0.68880 perp: 0.45175 kl: 0.23705: 100%|██████████| 379/379 [00:41<00:00,  9.16batch/s]
At epoch: 138  valid vae loss: 0.67514 perp: 0.42522 kl: 0.24992:   6%|▌         | 2/34 [00:00<00:03, 10.20batch/s]

>>>>average [92mtraining[0m of epoch 138: loss 0.83630 perp 0.58256 kl 0.25374


At epoch: 138  valid vae loss: 0.47715 perp: 0.24552 kl: 0.23163: 100%|██████████| 34/34 [00:01<00:00, 20.84batch/s]
At epoch: 139  train vae loss: 0.65517 perp: 0.40791 kl: 0.24727:   0%|          | 0/379 [00:00<?, ?batch/s]

>>>>average [93mvalid[0m of epoch 138: loss 0.75646 perp 0.51116 kl 0.24530
saving to best model since this is the best valid loss so far.----


At epoch: 139  train vae loss: 0.72281 perp: 0.47493 kl: 0.24789: 100%|██████████| 379/379 [00:40<00:00,  9.39batch/s]
At epoch: 139  valid vae loss: 0.67620 perp: 0.42666 kl: 0.24954:   6%|▌         | 2/34 [00:00<00:02, 11.11batch/s]

>>>>average [92mtraining[0m of epoch 139: loss 0.71674 perp 0.47136 kl 0.24538


At epoch: 139  valid vae loss: 0.45923 perp: 0.22762 kl: 0.23161: 100%|██████████| 34/34 [00:01<00:00, 22.09batch/s]
At epoch: 140  train vae loss: 0.66056 perp: 0.41725 kl: 0.24331:   0%|          | 0/379 [00:00<?, ?batch/s]

>>>>average [93mvalid[0m of epoch 139: loss 0.77603 perp 0.53079 kl 0.24525


At epoch: 140  train vae loss: 0.69359 perp: 0.46170 kl: 0.23189: 100%|██████████| 379/379 [00:39<00:00,  9.61batch/s]
At epoch: 140  valid vae loss: 0.68962 perp: 0.44450 kl: 0.24512:   6%|▌         | 2/34 [00:00<00:02, 10.79batch/s]

>>>>average [92mtraining[0m of epoch 140: loss 0.73470 perp 0.49014 kl 0.24457


At epoch: 140  valid vae loss: 0.51536 perp: 0.28825 kl: 0.22711: 100%|██████████| 34/34 [00:01<00:00, 21.87batch/s]
At epoch: 141  train vae loss: 0.72354 perp: 0.47690 kl: 0.24663:   0%|          | 0/379 [00:00<?, ?batch/s]

>>>>average [93mvalid[0m of epoch 140: loss 0.75815 perp 0.51752 kl 0.24063


At epoch: 141  train vae loss: 0.91260 perp: 0.67221 kl: 0.24039: 100%|██████████| 379/379 [00:39<00:00,  9.61batch/s]
At epoch: 141  valid vae loss: 0.77184 perp: 0.52414 kl: 0.24770:   6%|▌         | 2/34 [00:00<00:02, 10.95batch/s]

>>>>average [92mtraining[0m of epoch 141: loss 0.75638 perp 0.51146 kl 0.24492


At epoch: 141  valid vae loss: 0.61908 perp: 0.38860 kl: 0.23047: 100%|██████████| 34/34 [00:01<00:00, 21.89batch/s]
At epoch: 142  train vae loss: 0.95841 perp: 0.71186 kl: 0.24655:   0%|          | 0/379 [00:00<?, ?batch/s]

>>>>average [93mvalid[0m of epoch 141: loss 0.82916 perp 0.58585 kl 0.24331


At epoch: 142  train vae loss: 0.63389 perp: 0.38400 kl: 0.24988: 100%|██████████| 379/379 [00:39<00:00,  9.48batch/s]
At epoch: 142  valid vae loss: 0.62023 perp: 0.36880 kl: 0.25143:   3%|▎         | 1/34 [00:00<00:06,  5.13batch/s]

>>>>average [92mtraining[0m of epoch 142: loss 0.95447 perp 0.70154 kl 0.25293


At epoch: 142  valid vae loss: 0.56581 perp: 0.33369 kl: 0.23212: 100%|██████████| 34/34 [00:01<00:00, 21.17batch/s]
At epoch: 143  train vae loss: 0.72249 perp: 0.47712 kl: 0.24537:   0%|          | 0/379 [00:00<?, ?batch/s]

>>>>average [93mvalid[0m of epoch 142: loss 0.73023 perp 0.48398 kl 0.24625
saving to best model since this is the best valid loss so far.----


At epoch: 143  train vae loss: 0.61427 perp: 0.37270 kl: 0.24157: 100%|██████████| 379/379 [00:39<00:00,  9.64batch/s]
At epoch: 143  valid vae loss: 0.62368 perp: 0.38177 kl: 0.24191:   6%|▌         | 2/34 [00:00<00:03, 10.39batch/s]

>>>>average [92mtraining[0m of epoch 143: loss 0.64477 perp 0.40260 kl 0.24217


At epoch: 143  valid vae loss: 0.47921 perp: 0.25564 kl: 0.22357: 100%|██████████| 34/34 [00:01<00:00, 20.99batch/s]
At epoch: 144  train vae loss: 0.67351 perp: 0.43000 kl: 0.24351:   0%|          | 0/379 [00:00<?, ?batch/s]

>>>>average [93mvalid[0m of epoch 143: loss 0.74344 perp 0.50646 kl 0.23698


At epoch: 144  train vae loss: 0.72462 perp: 0.48343 kl: 0.24119: 100%|██████████| 379/379 [00:39<00:00,  9.50batch/s]
At epoch: 144  valid vae loss: 0.62175 perp: 0.38272 kl: 0.23903:   6%|▌         | 2/34 [00:00<00:02, 11.15batch/s]

>>>>average [92mtraining[0m of epoch 144: loss 0.66768 perp 0.42977 kl 0.23791


At epoch: 144  valid vae loss: 0.55550 perp: 0.33447 kl: 0.22103: 100%|██████████| 34/34 [00:01<00:00, 22.30batch/s]
At epoch: 145  train vae loss: 0.74898 perp: 0.51354 kl: 0.23544:   0%|          | 0/379 [00:00<?, ?batch/s]

>>>>average [93mvalid[0m of epoch 144: loss 0.73798 perp 0.50343 kl 0.23456


At epoch: 145  train vae loss: 0.72609 perp: 0.49280 kl: 0.23329: 100%|██████████| 379/379 [00:39<00:00,  9.59batch/s]
At epoch: 145  valid vae loss: 0.62725 perp: 0.38530 kl: 0.24195:   6%|▌         | 2/34 [00:00<00:02, 11.11batch/s]

>>>>average [92mtraining[0m of epoch 145: loss 0.67192 perp 0.43570 kl 0.23622


At epoch: 145  valid vae loss: 0.43284 perp: 0.20827 kl: 0.22456: 100%|██████████| 34/34 [00:01<00:00, 22.35batch/s]
At epoch: 146  train vae loss: 0.67777 perp: 0.43821 kl: 0.23955:   0%|          | 0/379 [00:00<?, ?batch/s]

>>>>average [93mvalid[0m of epoch 145: loss 0.72463 perp 0.48732 kl 0.23732
saving to best model since this is the best valid loss so far.----


At epoch: 146  train vae loss: 0.61006 perp: 0.36846 kl: 0.24160: 100%|██████████| 379/379 [00:40<00:00,  9.39batch/s]
At epoch: 146  valid vae loss: 0.60261 perp: 0.36215 kl: 0.24047:   6%|▌         | 2/34 [00:00<00:02, 11.07batch/s]

>>>>average [92mtraining[0m of epoch 146: loss 0.69977 perp 0.46099 kl 0.23878


At epoch: 146  valid vae loss: 0.46745 perp: 0.24533 kl: 0.22213: 100%|██████████| 34/34 [00:01<00:00, 22.19batch/s]
At epoch: 147  train vae loss: 0.57505 perp: 0.34226 kl: 0.23279:   0%|          | 0/379 [00:00<?, ?batch/s]

>>>>average [93mvalid[0m of epoch 146: loss 0.72112 perp 0.48564 kl 0.23549
saving to best model since this is the best valid loss so far.----


At epoch: 147  train vae loss: 0.78637 perp: 0.54835 kl: 0.23802: 100%|██████████| 379/379 [00:39<00:00,  9.69batch/s]
At epoch: 147  valid vae loss: 0.60915 perp: 0.36772 kl: 0.24143:   6%|▌         | 2/34 [00:00<00:02, 10.93batch/s]

>>>>average [92mtraining[0m of epoch 147: loss 0.69844 perp 0.45969 kl 0.23875


At epoch: 147  valid vae loss: 0.41685 perp: 0.19359 kl: 0.22325: 100%|██████████| 34/34 [00:01<00:00, 22.30batch/s]
At epoch: 148  train vae loss: 0.62372 perp: 0.38164 kl: 0.24208:   0%|          | 0/379 [00:00<?, ?batch/s]

>>>>average [93mvalid[0m of epoch 147: loss 0.72569 perp 0.48919 kl 0.23651


At epoch: 148  train vae loss: 0.64077 perp: 0.40908 kl: 0.23169: 100%|██████████| 379/379 [00:38<00:00,  9.75batch/s]
At epoch: 148  valid vae loss: 0.64109 perp: 0.40195 kl: 0.23915:   6%|▌         | 2/34 [00:00<00:02, 11.17batch/s]

>>>>average [92mtraining[0m of epoch 148: loss 0.67966 perp 0.44308 kl 0.23658


At epoch: 148  valid vae loss: 0.50762 perp: 0.28606 kl: 0.22156: 100%|██████████| 34/34 [00:01<00:00, 22.35batch/s]
At epoch: 149  train vae loss: 0.68429 perp: 0.44892 kl: 0.23537:   0%|          | 0/379 [00:00<?, ?batch/s]

>>>>average [93mvalid[0m of epoch 148: loss 0.74141 perp 0.50680 kl 0.23461


At epoch: 149  train vae loss: 0.55624 perp: 0.32185 kl: 0.23439: 100%|██████████| 379/379 [00:37<00:00, 10.18batch/s] 
At epoch: 149  valid vae loss: 0.58533 perp: 0.34588 kl: 0.23945:   6%|▌         | 2/34 [00:00<00:02, 11.81batch/s]

>>>>average [92mtraining[0m of epoch 149: loss 0.91852 perp 0.67310 kl 0.24542


At epoch: 149  valid vae loss: 0.46098 perp: 0.23910 kl: 0.22188: 100%|██████████| 34/34 [00:01<00:00, 23.66batch/s]
At epoch: 150  train vae loss: 0.51629 perp: 0.28588 kl: 0.23041:   0%|          | 0/379 [00:00<?, ?batch/s]

>>>>average [93mvalid[0m of epoch 149: loss 0.67308 perp 0.43798 kl 0.23510
saving to best model since this is the best valid loss so far.----


At epoch: 150  train vae loss: 0.61253 perp: 0.37429 kl: 0.23823: 100%|██████████| 379/379 [00:36<00:00, 10.39batch/s]
At epoch: 150  valid vae loss: 0.56478 perp: 0.33351 kl: 0.23127:   6%|▌         | 2/34 [00:00<00:02, 11.69batch/s]

>>>>average [92mtraining[0m of epoch 150: loss 0.59184 perp 0.35879 kl 0.23305


At epoch: 150  valid vae loss: 0.43772 perp: 0.22318 kl: 0.21455: 100%|██████████| 34/34 [00:01<00:00, 23.68batch/s]
At epoch: 151  train vae loss: 0.57259 perp: 0.34795 kl: 0.22464:   0%|          | 0/379 [00:00<?, ?batch/s]

>>>>average [93mvalid[0m of epoch 150: loss 0.65942 perp 0.43203 kl 0.22739
saving to best model since this is the best valid loss so far.----


At epoch: 151  train vae loss: 0.49903 perp: 0.26427 kl: 0.23477: 100%|██████████| 379/379 [00:36<00:00, 10.39batch/s]
At epoch: 151  valid vae loss: 0.66501 perp: 0.43254 kl: 0.23247:   6%|▌         | 2/34 [00:00<00:02, 11.90batch/s]

>>>>average [92mtraining[0m of epoch 151: loss 0.61214 perp 0.38266 kl 0.22948


At epoch: 151  valid vae loss: 0.42517 perp: 0.20955 kl: 0.21562: 100%|██████████| 34/34 [00:01<00:00, 23.63batch/s]
At epoch: 152  train vae loss: 0.59941 perp: 0.36276 kl: 0.23665:   0%|          | 0/379 [00:00<?, ?batch/s]

>>>>average [93mvalid[0m of epoch 151: loss 0.69516 perp 0.46700 kl 0.22816


At epoch: 152  train vae loss: 0.65897 perp: 0.42462 kl: 0.23435: 100%|██████████| 379/379 [00:36<00:00, 10.37batch/s]
At epoch: 152  valid vae loss: 0.63286 perp: 0.39872 kl: 0.23414:   6%|▌         | 2/34 [00:00<00:02, 11.77batch/s]

>>>>average [92mtraining[0m of epoch 152: loss 0.63140 perp 0.40156 kl 0.22984


At epoch: 152  valid vae loss: 0.47337 perp: 0.25612 kl: 0.21725: 100%|██████████| 34/34 [00:01<00:00, 23.66batch/s]
At epoch: 153  train vae loss: 0.59693 perp: 0.36600 kl: 0.23093:   0%|          | 0/379 [00:00<?, ?batch/s]

>>>>average [93mvalid[0m of epoch 152: loss 0.70418 perp 0.47454 kl 0.22964


At epoch: 153  train vae loss: 0.68099 perp: 0.45990 kl: 0.22109: 100%|██████████| 379/379 [00:36<00:00, 10.37batch/s]
At epoch: 153  valid vae loss: 0.63345 perp: 0.40292 kl: 0.23052:   6%|▌         | 2/34 [00:00<00:02, 11.74batch/s]

>>>>average [92mtraining[0m of epoch 153: loss 0.61686 perp 0.38902 kl 0.22784


At epoch: 153  valid vae loss: 0.52037 perp: 0.30766 kl: 0.21270: 100%|██████████| 34/34 [00:01<00:00, 23.57batch/s]
At epoch: 154  train vae loss: 0.57794 perp: 0.35924 kl: 0.21870:   0%|          | 0/379 [00:00<?, ?batch/s]

>>>>average [93mvalid[0m of epoch 153: loss 0.70137 perp 0.47543 kl 0.22593


At epoch: 154  train vae loss: 0.60871 perp: 0.37992 kl: 0.22879: 100%|██████████| 379/379 [00:36<00:00, 10.35batch/s]
At epoch: 154  valid vae loss: 0.59840 perp: 0.36146 kl: 0.23694:   6%|▌         | 2/34 [00:00<00:02, 11.90batch/s]

>>>>average [92mtraining[0m of epoch 154: loss 0.67106 perp 0.44088 kl 0.23018


At epoch: 154  valid vae loss: 0.42433 perp: 0.20383 kl: 0.22050: 100%|██████████| 34/34 [00:01<00:00, 23.66batch/s]
At epoch: 155  train vae loss: 0.60718 perp: 0.36591 kl: 0.24127:   0%|          | 0/379 [00:00<?, ?batch/s]

>>>>average [93mvalid[0m of epoch 154: loss 0.69846 perp 0.46554 kl 0.23291


At epoch: 155  train vae loss: 0.65214 perp: 0.41325 kl: 0.23889: 100%|██████████| 379/379 [00:36<00:00, 10.35batch/s]
At epoch: 155  valid vae loss: 0.62240 perp: 0.38984 kl: 0.23257:   6%|▌         | 2/34 [00:00<00:02, 11.82batch/s]

>>>>average [92mtraining[0m of epoch 155: loss 0.61906 perp 0.38931 kl 0.22976


At epoch: 155  valid vae loss: 0.47702 perp: 0.26162 kl: 0.21540: 100%|██████████| 34/34 [00:01<00:00, 23.62batch/s]
At epoch: 156  train vae loss: 0.69165 perp: 0.45443 kl: 0.23722:   0%|          | 0/379 [00:00<?, ?batch/s]

>>>>average [93mvalid[0m of epoch 155: loss 0.71141 perp 0.48348 kl 0.22793


At epoch: 156  train vae loss: 0.62019 perp: 0.38308 kl: 0.23711: 100%|██████████| 379/379 [00:36<00:00, 10.37batch/s]
At epoch: 156  valid vae loss: 0.61797 perp: 0.38370 kl: 0.23427:   6%|▌         | 2/34 [00:00<00:02, 11.88batch/s]

>>>>average [92mtraining[0m of epoch 156: loss 0.64670 perp 0.41707 kl 0.22963


At epoch: 156  valid vae loss: 0.57064 perp: 0.35342 kl: 0.21722: 100%|██████████| 34/34 [00:01<00:00, 23.67batch/s]
At epoch: 157  train vae loss: 0.60336 perp: 0.37389 kl: 0.22947:   0%|          | 0/379 [00:00<?, ?batch/s]

>>>>average [93mvalid[0m of epoch 156: loss 0.73796 perp 0.50761 kl 0.23035


At epoch: 157  train vae loss: 0.58753 perp: 0.35719 kl: 0.23034: 100%|██████████| 379/379 [00:36<00:00, 10.37batch/s]
At epoch: 157  valid vae loss: 0.57652 perp: 0.34737 kl: 0.22915:   6%|▌         | 2/34 [00:00<00:02, 11.77batch/s]

>>>>average [92mtraining[0m of epoch 157: loss 0.60418 perp 0.37615 kl 0.22803


At epoch: 157  valid vae loss: 0.37820 perp: 0.16588 kl: 0.21233: 100%|██████████| 34/34 [00:01<00:00, 23.66batch/s]
At epoch: 158  train vae loss: 0.58713 perp: 0.35925 kl: 0.22787:   0%|          | 0/379 [00:00<?, ?batch/s]

>>>>average [93mvalid[0m of epoch 157: loss 0.66420 perp 0.43884 kl 0.22537


At epoch: 158  train vae loss: 0.68979 perp: 0.45654 kl: 0.23325: 100%|██████████| 379/379 [00:36<00:00, 10.38batch/s]
At epoch: 158  valid vae loss: 0.68930 perp: 0.45368 kl: 0.23562:   6%|▌         | 2/34 [00:00<00:02, 11.75batch/s]

>>>>average [92mtraining[0m of epoch 158: loss 0.62944 perp 0.40323 kl 0.22621


At epoch: 158  valid vae loss: 0.45875 perp: 0.23939 kl: 0.21936: 100%|██████████| 34/34 [00:01<00:00, 23.69batch/s]
At epoch: 159  train vae loss: 0.74748 perp: 0.51210 kl: 0.23538:   0%|          | 0/379 [00:00<?, ?batch/s]

>>>>average [93mvalid[0m of epoch 158: loss 0.73036 perp 0.49851 kl 0.23184


At epoch: 159  train vae loss: 0.55566 perp: 0.32215 kl: 0.23351: 100%|██████████| 379/379 [00:36<00:00, 10.37batch/s]
At epoch: 159  valid vae loss: 0.51597 perp: 0.28429 kl: 0.23168:   6%|▌         | 2/34 [00:00<00:02, 11.84batch/s]

>>>>average [92mtraining[0m of epoch 159: loss 0.83192 perp 0.59123 kl 0.24068


At epoch: 159  valid vae loss: 0.39394 perp: 0.17894 kl: 0.21500: 100%|██████████| 34/34 [00:01<00:00, 23.64batch/s]
At epoch: 160  train vae loss: 0.48320 perp: 0.24962 kl: 0.23358:   0%|          | 0/379 [00:00<?, ?batch/s]

>>>>average [93mvalid[0m of epoch 159: loss 0.61589 perp 0.38830 kl 0.22759
saving to best model since this is the best valid loss so far.----


At epoch: 160  train vae loss: 0.53729 perp: 0.31906 kl: 0.21823: 100%|██████████| 379/379 [00:36<00:00, 10.37batch/s]
At epoch: 160  valid vae loss: 0.59066 perp: 0.36953 kl: 0.22114:   6%|▌         | 2/34 [00:00<00:02, 11.78batch/s]

>>>>average [92mtraining[0m of epoch 160: loss 0.52509 perp 0.30139 kl 0.22370


At epoch: 160  valid vae loss: 0.47135 perp: 0.26614 kl: 0.20521: 100%|██████████| 34/34 [00:01<00:00, 23.56batch/s]
At epoch: 161  train vae loss: 0.64039 perp: 0.42579 kl: 0.21460:   0%|          | 0/379 [00:00<?, ?batch/s]

>>>>average [93mvalid[0m of epoch 160: loss 0.66396 perp 0.44671 kl 0.21724


At epoch: 161  train vae loss: 0.66845 perp: 0.44821 kl: 0.22023: 100%|██████████| 379/379 [00:36<00:00, 10.39batch/s]
At epoch: 161  valid vae loss: 0.57449 perp: 0.35104 kl: 0.22344:   6%|▌         | 2/34 [00:00<00:02, 11.68batch/s]

>>>>average [92mtraining[0m of epoch 161: loss 0.55408 perp 0.33444 kl 0.21964


At epoch: 161  valid vae loss: 0.34950 perp: 0.14282 kl: 0.20667: 100%|██████████| 34/34 [00:01<00:00, 23.60batch/s]
At epoch: 162  train vae loss: 0.58910 perp: 0.35820 kl: 0.23090:   0%|          | 0/379 [00:00<?, ?batch/s]

>>>>average [93mvalid[0m of epoch 161: loss 0.66019 perp 0.44109 kl 0.21910


At epoch: 162  train vae loss: 0.55989 perp: 0.34662 kl: 0.21328: 100%|██████████| 379/379 [00:36<00:00, 10.39batch/s]
At epoch: 162  valid vae loss: 0.56982 perp: 0.35112 kl: 0.21871:   6%|▌         | 2/34 [00:00<00:02, 11.68batch/s]

>>>>average [92mtraining[0m of epoch 162: loss 0.54733 perp 0.32894 kl 0.21839


At epoch: 162  valid vae loss: 0.43156 perp: 0.22911 kl: 0.20245: 100%|██████████| 34/34 [00:01<00:00, 23.66batch/s]
At epoch: 163  train vae loss: 0.54727 perp: 0.32009 kl: 0.22718:   0%|          | 0/379 [00:00<?, ?batch/s]

>>>>average [93mvalid[0m of epoch 162: loss 0.63877 perp 0.42357 kl 0.21520


At epoch: 163  train vae loss: 0.51640 perp: 0.29556 kl: 0.22084: 100%|██████████| 379/379 [00:36<00:00, 10.39batch/s]
At epoch: 163  valid vae loss: 0.62493 perp: 0.40316 kl: 0.22176:   6%|▌         | 2/34 [00:00<00:02, 11.59batch/s]

>>>>average [92mtraining[0m of epoch 163: loss 0.57037 perp 0.35193 kl 0.21843


At epoch: 163  valid vae loss: 0.40043 perp: 0.19523 kl: 0.20520: 100%|██████████| 34/34 [00:01<00:00, 23.64batch/s]
At epoch: 164  train vae loss: 0.60108 perp: 0.38219 kl: 0.21889:   0%|          | 0/379 [00:00<?, ?batch/s]

>>>>average [93mvalid[0m of epoch 163: loss 0.68474 perp 0.46696 kl 0.21777


At epoch: 164  train vae loss: 0.72169 perp: 0.49476 kl: 0.22693: 100%|██████████| 379/379 [00:36<00:00, 10.41batch/s]
At epoch: 164  valid vae loss: 0.57982 perp: 0.35762 kl: 0.22220:   6%|▌         | 2/34 [00:00<00:02, 11.71batch/s]

>>>>average [92mtraining[0m of epoch 164: loss 0.58150 perp 0.36131 kl 0.22019


At epoch: 164  valid vae loss: 0.37281 perp: 0.16708 kl: 0.20573: 100%|██████████| 34/34 [00:01<00:00, 23.63batch/s]
At epoch: 165  train vae loss: 0.63341 perp: 0.40622 kl: 0.22720:   0%|          | 0/379 [00:00<?, ?batch/s]

>>>>average [93mvalid[0m of epoch 164: loss 0.65259 perp 0.43421 kl 0.21838


At epoch: 165  train vae loss: 0.47331 perp: 0.25930 kl: 0.21401: 100%|██████████| 379/379 [00:36<00:00, 10.41batch/s]
At epoch: 165  valid vae loss: 0.49517 perp: 0.27671 kl: 0.21846:   6%|▌         | 2/34 [00:00<00:02, 11.70batch/s]

>>>>average [92mtraining[0m of epoch 165: loss 0.75656 perp 0.53067 kl 0.22590


At epoch: 165  valid vae loss: 0.39012 perp: 0.18863 kl: 0.20149: 100%|██████████| 34/34 [00:01<00:00, 23.72batch/s]
At epoch: 166  train vae loss: 0.40774 perp: 0.20078 kl: 0.20695:   0%|          | 0/379 [00:00<?, ?batch/s]

>>>>average [93mvalid[0m of epoch 165: loss 0.58540 perp 0.37087 kl 0.21453
saving to best model since this is the best valid loss so far.----


At epoch: 166  train vae loss: 0.53751 perp: 0.31070 kl: 0.22681: 100%|██████████| 379/379 [00:36<00:00, 10.39batch/s]
At epoch: 166  valid vae loss: 0.52664 perp: 0.30945 kl: 0.21719:   6%|▌         | 2/34 [00:00<00:02, 11.76batch/s]

>>>>average [92mtraining[0m of epoch 166: loss 0.50366 perp 0.28917 kl 0.21449


At epoch: 166  valid vae loss: 0.35438 perp: 0.15347 kl: 0.20091: 100%|██████████| 34/34 [00:01<00:00, 23.72batch/s]
At epoch: 167  train vae loss: 0.49731 perp: 0.28235 kl: 0.21496:   0%|          | 0/379 [00:00<?, ?batch/s]

>>>>average [93mvalid[0m of epoch 166: loss 0.59619 perp 0.38250 kl 0.21369


At epoch: 167  train vae loss: 0.49253 perp: 0.28705 kl: 0.20548: 100%|██████████| 379/379 [00:36<00:00, 10.39batch/s]
At epoch: 167  valid vae loss: 0.51251 perp: 0.30062 kl: 0.21190:   6%|▌         | 2/34 [00:00<00:02, 11.77batch/s]

>>>>average [92mtraining[0m of epoch 167: loss 0.51254 perp 0.30014 kl 0.21241


At epoch: 167  valid vae loss: 0.39930 perp: 0.20354 kl: 0.19576: 100%|██████████| 34/34 [00:01<00:00, 23.60batch/s]
At epoch: 168  train vae loss: 0.41136 perp: 0.20895 kl: 0.20241:   0%|          | 0/379 [00:00<?, ?batch/s]

>>>>average [93mvalid[0m of epoch 167: loss 0.58758 perp 0.37909 kl 0.20849


At epoch: 168  train vae loss: 0.50739 perp: 0.29959 kl: 0.20780: 100%|██████████| 379/379 [00:36<00:00, 10.38batch/s]
At epoch: 168  valid vae loss: 0.51524 perp: 0.30349 kl: 0.21175:   6%|▌         | 2/34 [00:00<00:02, 11.91batch/s]

>>>>average [92mtraining[0m of epoch 168: loss 0.52373 perp 0.31221 kl 0.21152


At epoch: 168  valid vae loss: 0.39400 perp: 0.19814 kl: 0.19587: 100%|██████████| 34/34 [00:01<00:00, 23.61batch/s]
At epoch: 169  train vae loss: 0.47339 perp: 0.26848 kl: 0.20491:   0%|          | 0/379 [00:00<?, ?batch/s]

>>>>average [93mvalid[0m of epoch 168: loss 0.58791 perp 0.37968 kl 0.20822


At epoch: 169  train vae loss: 0.45912 perp: 0.24845 kl: 0.21066: 100%|██████████| 379/379 [00:36<00:00, 10.37batch/s]
At epoch: 169  valid vae loss: 0.51363 perp: 0.29543 kl: 0.21820:   6%|▌         | 2/34 [00:00<00:02, 11.77batch/s]

>>>>average [92mtraining[0m of epoch 169: loss 0.62864 perp 0.40971 kl 0.21893


At epoch: 169  valid vae loss: 0.38448 perp: 0.18216 kl: 0.20232: 100%|██████████| 34/34 [00:01<00:00, 23.61batch/s]
At epoch: 170  train vae loss: 0.44687 perp: 0.23647 kl: 0.21040:   0%|          | 0/379 [00:00<?, ?batch/s]

>>>>average [93mvalid[0m of epoch 169: loss 0.59682 perp 0.38196 kl 0.21486


At epoch: 170  train vae loss: 0.51877 perp: 0.29905 kl: 0.21972: 100%|██████████| 379/379 [00:36<00:00, 10.36batch/s]
At epoch: 170  valid vae loss: 0.53942 perp: 0.31284 kl: 0.22658:   6%|▌         | 2/34 [00:00<00:02, 11.57batch/s]

>>>>average [92mtraining[0m of epoch 170: loss 0.68103 perp 0.46488 kl 0.21615


At epoch: 170  valid vae loss: 0.35327 perp: 0.14369 kl: 0.20958: 100%|██████████| 34/34 [00:01<00:00, 23.62batch/s]
At epoch: 171  train vae loss: 0.50685 perp: 0.27416 kl: 0.23269:   0%|          | 0/379 [00:00<?, ?batch/s]

>>>>average [93mvalid[0m of epoch 170: loss 0.60624 perp 0.38353 kl 0.22270


At epoch: 171  train vae loss: 0.50652 perp: 0.30169 kl: 0.20483: 100%|██████████| 379/379 [00:36<00:00, 10.37batch/s]
At epoch: 171  valid vae loss: 0.53078 perp: 0.32077 kl: 0.21002:   6%|▌         | 2/34 [00:00<00:02, 11.76batch/s]

>>>>average [92mtraining[0m of epoch 171: loss 0.46884 perp 0.25607 kl 0.21277


At epoch: 171  valid vae loss: 0.31474 perp: 0.12042 kl: 0.19432: 100%|██████████| 34/34 [00:01<00:00, 23.70batch/s]
At epoch: 172  train vae loss: 0.41988 perp: 0.21767 kl: 0.20220:   0%|          | 0/379 [00:00<?, ?batch/s]

>>>>average [93mvalid[0m of epoch 171: loss 0.57832 perp 0.37165 kl 0.20667
saving to best model since this is the best valid loss so far.----


At epoch: 172  train vae loss: 0.43469 perp: 0.23363 kl: 0.20106: 100%|██████████| 379/379 [00:36<00:00, 10.39batch/s]
At epoch: 172  valid vae loss: 0.46126 perp: 0.25606 kl: 0.20520:   6%|▌         | 2/34 [00:00<00:02, 11.76batch/s]

>>>>average [92mtraining[0m of epoch 172: loss 0.47446 perp 0.26794 kl 0.20652


At epoch: 172  valid vae loss: 0.32320 perp: 0.13361 kl: 0.18958: 100%|██████████| 34/34 [00:01<00:00, 23.64batch/s]
At epoch: 173  train vae loss: 0.46133 perp: 0.25824 kl: 0.20309:   0%|          | 0/379 [00:00<?, ?batch/s]

>>>>average [93mvalid[0m of epoch 172: loss 0.54432 perp 0.34264 kl 0.20168
saving to best model since this is the best valid loss so far.----


At epoch: 173  train vae loss: 0.55592 perp: 0.34226 kl: 0.21365: 100%|██████████| 379/379 [00:36<00:00, 10.39batch/s]
At epoch: 173  valid vae loss: 0.53862 perp: 0.32946 kl: 0.20916:   6%|▌         | 2/34 [00:00<00:02, 11.85batch/s]

>>>>average [92mtraining[0m of epoch 173: loss 0.50497 perp 0.29921 kl 0.20576


At epoch: 173  valid vae loss: 0.33838 perp: 0.14402 kl: 0.19436: 100%|██████████| 34/34 [00:01<00:00, 23.69batch/s]
At epoch: 174  train vae loss: 0.52170 perp: 0.31743 kl: 0.20427:   0%|          | 0/379 [00:00<?, ?batch/s]

>>>>average [93mvalid[0m of epoch 173: loss 0.59535 perp 0.38923 kl 0.20612


At epoch: 174  train vae loss: 0.49183 perp: 0.27431 kl: 0.21752: 100%|██████████| 379/379 [00:36<00:00, 10.39batch/s]
At epoch: 174  valid vae loss: 0.53637 perp: 0.32660 kl: 0.20977:   6%|▌         | 2/34 [00:00<00:02, 11.74batch/s]

>>>>average [92mtraining[0m of epoch 174: loss 0.50681 perp 0.29905 kl 0.20776


At epoch: 174  valid vae loss: 0.34868 perp: 0.15455 kl: 0.19413: 100%|██████████| 34/34 [00:01<00:00, 23.59batch/s]
At epoch: 175  train vae loss: 0.51644 perp: 0.30824 kl: 0.20819:   0%|          | 0/379 [00:00<?, ?batch/s]

>>>>average [93mvalid[0m of epoch 174: loss 0.61624 perp 0.41013 kl 0.20611


At epoch: 175  train vae loss: 0.43280 perp: 0.23407 kl: 0.19872: 100%|██████████| 379/379 [00:36<00:00, 10.37batch/s]
At epoch: 175  valid vae loss: 0.48502 perp: 0.27897 kl: 0.20605:   6%|▌         | 2/34 [00:00<00:02, 11.73batch/s]

>>>>average [92mtraining[0m of epoch 175: loss 0.51366 perp 0.30674 kl 0.20692


At epoch: 175  valid vae loss: 0.42648 perp: 0.23549 kl: 0.19100: 100%|██████████| 34/34 [00:01<00:00, 23.61batch/s]
At epoch: 176  train vae loss: 0.40909 perp: 0.20816 kl: 0.20093:   0%|          | 0/379 [00:00<?, ?batch/s]

>>>>average [93mvalid[0m of epoch 175: loss 0.57324 perp 0.37024 kl 0.20300


At epoch: 176  train vae loss: 0.49013 perp: 0.28102 kl: 0.20911: 100%|██████████| 379/379 [00:36<00:00, 10.38batch/s]
At epoch: 176  valid vae loss: 0.48346 perp: 0.27373 kl: 0.20973:   6%|▌         | 2/34 [00:00<00:02, 11.91batch/s]

>>>>average [92mtraining[0m of epoch 176: loss 0.52365 perp 0.31637 kl 0.20728


At epoch: 176  valid vae loss: 0.46732 perp: 0.27234 kl: 0.19498: 100%|██████████| 34/34 [00:01<00:00, 23.67batch/s]
At epoch: 177  train vae loss: 0.61253 perp: 0.41256 kl: 0.19997:   0%|          | 0/379 [00:00<?, ?batch/s]

>>>>average [93mvalid[0m of epoch 176: loss 0.58880 perp 0.38199 kl 0.20682


At epoch: 177  train vae loss: 0.48713 perp: 0.28330 kl: 0.20382: 100%|██████████| 379/379 [00:36<00:00, 10.38batch/s]
At epoch: 177  valid vae loss: 0.49548 perp: 0.28670 kl: 0.20878:   6%|▌         | 2/34 [00:00<00:02, 11.91batch/s]

>>>>average [92mtraining[0m of epoch 177: loss 0.50576 perp 0.29867 kl 0.20709


At epoch: 177  valid vae loss: 0.35892 perp: 0.16501 kl: 0.19391: 100%|██████████| 34/34 [00:01<00:00, 23.71batch/s]
At epoch: 178  train vae loss: 0.46031 perp: 0.24766 kl: 0.21265:   0%|          | 0/379 [00:00<?, ?batch/s]

>>>>average [93mvalid[0m of epoch 177: loss 0.56284 perp 0.35711 kl 0.20573


At epoch: 178  train vae loss: 0.53673 perp: 0.32791 kl: 0.20882: 100%|██████████| 379/379 [00:36<00:00, 10.39batch/s]
At epoch: 178  valid vae loss: 0.57443 perp: 0.36064 kl: 0.21379:   6%|▌         | 2/34 [00:00<00:02, 11.91batch/s]

>>>>average [92mtraining[0m of epoch 178: loss 0.53278 perp 0.32606 kl 0.20673


At epoch: 178  valid vae loss: 0.36592 perp: 0.16788 kl: 0.19804: 100%|██████████| 34/34 [00:01<00:00, 23.71batch/s]
At epoch: 179  train vae loss: 0.55566 perp: 0.34560 kl: 0.21007:   0%|          | 0/379 [00:00<?, ?batch/s]

>>>>average [93mvalid[0m of epoch 178: loss 0.67619 perp 0.46590 kl 0.21030


At epoch: 179  train vae loss: 0.49002 perp: 0.28945 kl: 0.20056: 100%|██████████| 379/379 [00:36<00:00, 10.41batch/s]
At epoch: 179  valid vae loss: 0.47165 perp: 0.26644 kl: 0.20521:   6%|▌         | 2/34 [00:00<00:02, 11.79batch/s]

>>>>average [92mtraining[0m of epoch 179: loss 0.50508 perp 0.29679 kl 0.20830


At epoch: 179  valid vae loss: 0.31096 perp: 0.12078 kl: 0.19018: 100%|██████████| 34/34 [00:01<00:00, 23.69batch/s]
At epoch: 180  train vae loss: 0.53633 perp: 0.32933 kl: 0.20700:   0%|          | 0/379 [00:00<?, ?batch/s]

>>>>average [93mvalid[0m of epoch 179: loss 0.56749 perp 0.36531 kl 0.20218


At epoch: 180  train vae loss: 0.48164 perp: 0.27394 kl: 0.20771: 100%|██████████| 379/379 [00:36<00:00, 10.37batch/s]
At epoch: 180  valid vae loss: 0.47557 perp: 0.26738 kl: 0.20818:   6%|▌         | 2/34 [00:00<00:02, 11.72batch/s]

>>>>average [92mtraining[0m of epoch 180: loss 0.49819 perp 0.29350 kl 0.20470


At epoch: 180  valid vae loss: 0.36763 perp: 0.17471 kl: 0.19292: 100%|██████████| 34/34 [00:01<00:00, 23.63batch/s]
At epoch: 181  train vae loss: 0.50346 perp: 0.29406 kl: 0.20940:   0%|          | 0/379 [00:00<?, ?batch/s]

>>>>average [93mvalid[0m of epoch 180: loss 0.62564 perp 0.42058 kl 0.20506


At epoch: 181  train vae loss: 0.52940 perp: 0.31822 kl: 0.21119: 100%|██████████| 379/379 [00:36<00:00, 10.35batch/s]
At epoch: 181  valid vae loss: 0.45456 perp: 0.25125 kl: 0.20331:   6%|▌         | 2/34 [00:00<00:02, 11.75batch/s]

>>>>average [92mtraining[0m of epoch 181: loss 0.49317 perp 0.28794 kl 0.20523


At epoch: 181  valid vae loss: 0.38805 perp: 0.20041 kl: 0.18764: 100%|██████████| 34/34 [00:01<00:00, 23.58batch/s]
At epoch: 182  train vae loss: 0.47930 perp: 0.28010 kl: 0.19920:   0%|          | 0/379 [00:00<?, ?batch/s]

>>>>average [93mvalid[0m of epoch 181: loss 0.54166 perp 0.34188 kl 0.19979
saving to best model since this is the best valid loss so far.----


At epoch: 182  train vae loss: 0.53728 perp: 0.33124 kl: 0.20603: 100%|██████████| 379/379 [00:36<00:00, 10.37batch/s]
At epoch: 182  valid vae loss: 0.49797 perp: 0.29130 kl: 0.20667:   6%|▌         | 2/34 [00:00<00:02, 11.73batch/s]

>>>>average [92mtraining[0m of epoch 182: loss 0.49836 perp 0.29413 kl 0.20423


At epoch: 182  valid vae loss: 0.36034 perp: 0.16908 kl: 0.19126: 100%|██████████| 34/34 [00:01<00:00, 23.61batch/s]
At epoch: 183  train vae loss: 0.47009 perp: 0.26412 kl: 0.20598:   0%|          | 0/379 [00:00<?, ?batch/s]

>>>>average [93mvalid[0m of epoch 182: loss 0.57917 perp 0.37598 kl 0.20319


At epoch: 183  train vae loss: 0.48355 perp: 0.28342 kl: 0.20014: 100%|██████████| 379/379 [00:36<00:00, 10.39batch/s]
At epoch: 183  valid vae loss: 0.42236 perp: 0.21792 kl: 0.20444:   6%|▌         | 2/34 [00:00<00:02, 11.75batch/s]

>>>>average [92mtraining[0m of epoch 183: loss 0.53899 perp 0.33328 kl 0.20571


At epoch: 183  valid vae loss: 0.28342 perp: 0.09456 kl: 0.18886: 100%|██████████| 34/34 [00:01<00:00, 23.51batch/s]
At epoch: 184  train vae loss: 0.42683 perp: 0.21784 kl: 0.20899:   0%|          | 0/379 [00:00<?, ?batch/s]

>>>>average [93mvalid[0m of epoch 183: loss 0.53718 perp 0.33585 kl 0.20133
saving to best model since this is the best valid loss so far.----


At epoch: 184  train vae loss: 0.45660 perp: 0.25630 kl: 0.20030: 100%|██████████| 379/379 [00:36<00:00, 10.36batch/s]
At epoch: 184  valid vae loss: 0.46554 perp: 0.26349 kl: 0.20206:   6%|▌         | 2/34 [00:00<00:02, 11.85batch/s]

>>>>average [92mtraining[0m of epoch 184: loss 0.44197 perp 0.24279 kl 0.19918


At epoch: 184  valid vae loss: 0.33528 perp: 0.14868 kl: 0.18660: 100%|██████████| 34/34 [00:01<00:00, 23.63batch/s]
At epoch: 185  train vae loss: 0.43666 perp: 0.23175 kl: 0.20491:   0%|          | 0/379 [00:00<?, ?batch/s]

>>>>average [93mvalid[0m of epoch 184: loss 0.53854 perp 0.33983 kl 0.19871


At epoch: 185  train vae loss: 0.40238 perp: 0.20018 kl: 0.20220: 100%|██████████| 379/379 [00:36<00:00, 10.38batch/s]
At epoch: 185  valid vae loss: 0.43159 perp: 0.23221 kl: 0.19938:   6%|▌         | 2/34 [00:00<00:02, 11.92batch/s]

>>>>average [92mtraining[0m of epoch 185: loss 0.45568 perp 0.25795 kl 0.19773


At epoch: 185  valid vae loss: 0.30416 perp: 0.12036 kl: 0.18380: 100%|██████████| 34/34 [00:01<00:00, 23.72batch/s]
At epoch: 186  train vae loss: 0.35556 perp: 0.15800 kl: 0.19756:   0%|          | 0/379 [00:00<?, ?batch/s]

>>>>average [93mvalid[0m of epoch 185: loss 0.52055 perp 0.32448 kl 0.19607
saving to best model since this is the best valid loss so far.----


At epoch: 186  train vae loss: 0.51602 perp: 0.30998 kl: 0.20604: 100%|██████████| 379/379 [00:36<00:00, 10.40batch/s]
At epoch: 186  valid vae loss: 0.52302 perp: 0.32402 kl: 0.19900:   6%|▌         | 2/34 [00:00<00:02, 11.78batch/s]

>>>>average [92mtraining[0m of epoch 186: loss 0.50350 perp 0.30250 kl 0.20100


At epoch: 186  valid vae loss: 0.37779 perp: 0.19359 kl: 0.18420: 100%|██████████| 34/34 [00:01<00:00, 23.64batch/s]
At epoch: 187  train vae loss: 0.49857 perp: 0.29199 kl: 0.20658:   0%|          | 0/379 [00:00<?, ?batch/s]

>>>>average [93mvalid[0m of epoch 186: loss 0.54314 perp 0.34747 kl 0.19567


At epoch: 187  train vae loss: 0.68001 perp: 0.47542 kl: 0.20459: 100%|██████████| 379/379 [00:36<00:00, 10.39batch/s]
At epoch: 187  valid vae loss: 0.49129 perp: 0.29073 kl: 0.20056:   6%|▌         | 2/34 [00:00<00:02, 11.72batch/s]

>>>>average [92mtraining[0m of epoch 187: loss 0.45672 perp 0.25852 kl 0.19820


At epoch: 187  valid vae loss: 0.34856 perp: 0.16378 kl: 0.18479: 100%|██████████| 34/34 [00:01<00:00, 23.61batch/s]
At epoch: 188  train vae loss: 0.52851 perp: 0.32555 kl: 0.20296:   0%|          | 0/379 [00:00<?, ?batch/s]

>>>>average [93mvalid[0m of epoch 187: loss 0.58090 perp 0.38340 kl 0.19750


At epoch: 188  train vae loss: 0.44116 perp: 0.23546 kl: 0.20569: 100%|██████████| 379/379 [00:36<00:00, 10.36batch/s]
At epoch: 188  valid vae loss: 0.44271 perp: 0.23359 kl: 0.20912:   6%|▌         | 2/34 [00:00<00:02, 11.77batch/s]

>>>>average [92mtraining[0m of epoch 188: loss 0.64197 perp 0.43629 kl 0.20569


At epoch: 188  valid vae loss: 0.32974 perp: 0.13626 kl: 0.19348: 100%|██████████| 34/34 [00:01<00:00, 23.62batch/s]
At epoch: 189  train vae loss: 0.40485 perp: 0.19868 kl: 0.20617:   0%|          | 0/379 [00:00<?, ?batch/s]

>>>>average [93mvalid[0m of epoch 188: loss 0.52072 perp 0.31461 kl 0.20611


At epoch: 189  train vae loss: 0.43339 perp: 0.24052 kl: 0.19286: 100%|██████████| 379/379 [00:36<00:00, 10.40batch/s]
At epoch: 189  valid vae loss: 0.42554 perp: 0.23209 kl: 0.19345:   6%|▌         | 2/34 [00:00<00:02, 11.73batch/s]

>>>>average [92mtraining[0m of epoch 189: loss 0.39423 perp 0.19606 kl 0.19817


At epoch: 189  valid vae loss: 0.31369 perp: 0.13486 kl: 0.17883: 100%|██████████| 34/34 [00:01<00:00, 23.61batch/s]
At epoch: 190  train vae loss: 0.35970 perp: 0.17407 kl: 0.18563:   0%|          | 0/379 [00:00<?, ?batch/s]

>>>>average [93mvalid[0m of epoch 189: loss 0.48725 perp 0.29677 kl 0.19047
saving to best model since this is the best valid loss so far.----


At epoch: 190  train vae loss: 0.48525 perp: 0.29666 kl: 0.18859: 100%|██████████| 379/379 [00:36<00:00, 10.38batch/s]
At epoch: 190  valid vae loss: 0.45716 perp: 0.26720 kl: 0.18996:   6%|▌         | 2/34 [00:00<00:02, 11.73batch/s]

>>>>average [92mtraining[0m of epoch 190: loss 0.40062 perp 0.20979 kl 0.19083


At epoch: 190  valid vae loss: 0.29748 perp: 0.12171 kl: 0.17576: 100%|██████████| 34/34 [00:01<00:00, 23.56batch/s]
At epoch: 191  train vae loss: 0.37309 perp: 0.18526 kl: 0.18783:   0%|          | 0/379 [00:00<?, ?batch/s]

>>>>average [93mvalid[0m of epoch 190: loss 0.52280 perp 0.33577 kl 0.18703


At epoch: 191  train vae loss: 0.42826 perp: 0.22951 kl: 0.19874: 100%|██████████| 379/379 [00:36<00:00, 10.37batch/s]
At epoch: 191  valid vae loss: 0.41522 perp: 0.22144 kl: 0.19378:   6%|▌         | 2/34 [00:00<00:02, 11.78batch/s]

>>>>average [92mtraining[0m of epoch 191: loss 0.42185 perp 0.23171 kl 0.19014


At epoch: 191  valid vae loss: 0.29515 perp: 0.11582 kl: 0.17934: 100%|██████████| 34/34 [00:01<00:00, 23.64batch/s]
At epoch: 192  train vae loss: 0.44758 perp: 0.26042 kl: 0.18716:   0%|          | 0/379 [00:00<?, ?batch/s]

>>>>average [93mvalid[0m of epoch 191: loss 0.51959 perp 0.32849 kl 0.19110


At epoch: 192  train vae loss: 0.35330 perp: 0.16698 kl: 0.18632: 100%|██████████| 379/379 [00:36<00:00, 10.40batch/s]
At epoch: 192  valid vae loss: 0.51224 perp: 0.31779 kl: 0.19444:   6%|▌         | 2/34 [00:00<00:02, 11.96batch/s]

>>>>average [92mtraining[0m of epoch 192: loss 0.44058 perp 0.24903 kl 0.19155


At epoch: 192  valid vae loss: 0.35650 perp: 0.17640 kl: 0.18010: 100%|██████████| 34/34 [00:01<00:00, 23.72batch/s]
At epoch: 193  train vae loss: 0.59143 perp: 0.40129 kl: 0.19014:   0%|          | 0/379 [00:00<?, ?batch/s]

>>>>average [93mvalid[0m of epoch 192: loss 0.56291 perp 0.37090 kl 0.19200


At epoch: 193  train vae loss: 0.47056 perp: 0.28024 kl: 0.19032: 100%|██████████| 379/379 [00:36<00:00, 10.38batch/s]
At epoch: 193  valid vae loss: 0.45292 perp: 0.26165 kl: 0.19127:   6%|▌         | 2/34 [00:00<00:02, 11.71batch/s]

>>>>average [92mtraining[0m of epoch 193: loss 0.44121 perp 0.24901 kl 0.19220


At epoch: 193  valid vae loss: 0.29969 perp: 0.12268 kl: 0.17701: 100%|██████████| 34/34 [00:01<00:00, 23.65batch/s]
At epoch: 194  train vae loss: 0.45679 perp: 0.26190 kl: 0.19489:   0%|          | 0/379 [00:00<?, ?batch/s]

>>>>average [93mvalid[0m of epoch 193: loss 0.55154 perp 0.36316 kl 0.18838


At epoch: 194  train vae loss: 0.37180 perp: 0.18447 kl: 0.18733: 100%|██████████| 379/379 [00:36<00:00, 10.39batch/s]
At epoch: 194  valid vae loss: 0.42036 perp: 0.23028 kl: 0.19007:   6%|▌         | 2/34 [00:00<00:02, 11.92batch/s]

>>>>average [92mtraining[0m of epoch 194: loss 0.43285 perp 0.24148 kl 0.19137


At epoch: 194  valid vae loss: 0.29442 perp: 0.11817 kl: 0.17624: 100%|██████████| 34/34 [00:01<00:00, 23.64batch/s]
At epoch: 195  train vae loss: 0.36621 perp: 0.17756 kl: 0.18865:   0%|          | 0/379 [00:00<?, ?batch/s]

>>>>average [93mvalid[0m of epoch 194: loss 0.49113 perp 0.30346 kl 0.18767


At epoch: 195  train vae loss: 0.37020 perp: 0.17358 kl: 0.19662: 100%|██████████| 379/379 [00:36<00:00, 10.36batch/s]  
At epoch: 195  valid vae loss: 0.42136 perp: 0.22589 kl: 0.19547:   6%|▌         | 2/34 [00:00<00:02, 11.74batch/s]

>>>>average [92mtraining[0m of epoch 195: loss 0.67440 perp 0.47295 kl 0.20144


At epoch: 195  valid vae loss: 0.26292 perp: 0.08079 kl: 0.18213: 100%|██████████| 34/34 [00:01<00:00, 23.62batch/s]
At epoch: 196  train vae loss: 0.39391 perp: 0.19782 kl: 0.19609:   0%|          | 0/379 [00:00<?, ?batch/s]

>>>>average [93mvalid[0m of epoch 195: loss 0.47788 perp 0.28478 kl 0.19311
saving to best model since this is the best valid loss so far.----


At epoch: 196  train vae loss: 0.30567 perp: 0.12456 kl: 0.18111: 100%|██████████| 379/379 [00:36<00:00, 10.39batch/s]
At epoch: 196  valid vae loss: 0.39900 perp: 0.21428 kl: 0.18472:   6%|▌         | 2/34 [00:00<00:02, 11.53batch/s]

>>>>average [92mtraining[0m of epoch 196: loss 0.35852 perp 0.16957 kl 0.18896


At epoch: 196  valid vae loss: 0.28550 perp: 0.11390 kl: 0.17161: 100%|██████████| 34/34 [00:01<00:00, 23.39batch/s]
At epoch: 197  train vae loss: 0.36803 perp: 0.18060 kl: 0.18743:   0%|          | 0/379 [00:00<?, ?batch/s]

>>>>average [93mvalid[0m of epoch 196: loss 0.46469 perp 0.28229 kl 0.18239
saving to best model since this is the best valid loss so far.----


At epoch: 197  train vae loss: 0.33083 perp: 0.14780 kl: 0.18302: 100%|██████████| 379/379 [00:36<00:00, 10.36batch/s]
At epoch: 197  valid vae loss: 0.42642 perp: 0.24244 kl: 0.18398:   6%|▌         | 2/34 [00:00<00:02, 11.62batch/s]

>>>>average [92mtraining[0m of epoch 197: loss 0.37354 perp 0.19025 kl 0.18329


At epoch: 197  valid vae loss: 0.28132 perp: 0.11041 kl: 0.17091: 100%|██████████| 34/34 [00:01<00:00, 23.48batch/s]
At epoch: 198  train vae loss: 0.32111 perp: 0.14620 kl: 0.17491:   0%|          | 0/379 [00:00<?, ?batch/s]

>>>>average [93mvalid[0m of epoch 197: loss 0.47597 perp 0.29435 kl 0.18163


At epoch: 198  train vae loss: 0.36479 perp: 0.17540 kl: 0.18939: 100%|██████████| 379/379 [00:36<00:00, 10.38batch/s]
At epoch: 198  valid vae loss: 0.38553 perp: 0.20323 kl: 0.18230:   6%|▌         | 2/34 [00:00<00:02, 11.77batch/s]

>>>>average [92mtraining[0m of epoch 198: loss 0.39078 perp 0.20769 kl 0.18309


At epoch: 198  valid vae loss: 0.29624 perp: 0.12802 kl: 0.16822: 100%|██████████| 34/34 [00:01<00:00, 23.66batch/s]
At epoch: 199  train vae loss: 0.40745 perp: 0.22314 kl: 0.18431:   0%|          | 0/379 [00:00<?, ?batch/s]

>>>>average [93mvalid[0m of epoch 198: loss 0.49099 perp 0.31128 kl 0.17971


At epoch: 199  train vae loss: 0.36429 perp: 0.18112 kl: 0.18317: 100%|██████████| 379/379 [00:36<00:00, 10.39batch/s]
At epoch: 199  valid vae loss: 0.41158 perp: 0.22694 kl: 0.18464:   6%|▌         | 2/34 [00:00<00:02, 11.97batch/s]

>>>>average [92mtraining[0m of epoch 199: loss 0.42005 perp 0.23479 kl 0.18526


At epoch: 199  valid vae loss: 0.26440 perp: 0.09289 kl: 0.17151: 100%|██████████| 34/34 [00:01<00:00, 23.70batch/s]
At epoch: 200  train vae loss: 0.41725 perp: 0.23687 kl: 0.18039:   0%|          | 0/379 [00:00<?, ?batch/s]

>>>>average [93mvalid[0m of epoch 199: loss 0.49325 perp 0.31087 kl 0.18237


At epoch: 200  train vae loss: 0.45701 perp: 0.27637 kl: 0.18064: 100%|██████████| 379/379 [00:36<00:00, 10.41batch/s]
At epoch: 200  valid vae loss: 0.43074 perp: 0.24504 kl: 0.18570:   6%|▌         | 2/34 [00:00<00:02, 11.62batch/s]

>>>>average [92mtraining[0m of epoch 200: loss 0.41429 perp 0.22952 kl 0.18477


At epoch: 200  valid vae loss: 0.33639 perp: 0.16399 kl: 0.17240: 100%|██████████| 34/34 [00:01<00:00, 23.66batch/s]
At epoch: 201  train vae loss: 0.45272 perp: 0.27001 kl: 0.18271:   0%|          | 0/379 [00:00<?, ?batch/s]

>>>>average [93mvalid[0m of epoch 200: loss 0.50113 perp 0.31785 kl 0.18328


At epoch: 201  train vae loss: 0.59303 perp: 0.40681 kl: 0.18622: 100%|██████████| 379/379 [00:36<00:00, 10.40batch/s]
At epoch: 201  valid vae loss: 0.42524 perp: 0.23863 kl: 0.18661:   6%|▌         | 2/34 [00:00<00:02, 11.67batch/s]

>>>>average [92mtraining[0m of epoch 201: loss 0.42967 perp 0.24369 kl 0.18598


At epoch: 201  valid vae loss: 0.31209 perp: 0.13875 kl: 0.17334: 100%|██████████| 34/34 [00:01<00:00, 23.60batch/s]
At epoch: 202  train vae loss: 0.41296 perp: 0.22200 kl: 0.19096:   0%|          | 0/379 [00:00<?, ?batch/s]

>>>>average [93mvalid[0m of epoch 201: loss 0.50778 perp 0.32338 kl 0.18440


At epoch: 202  train vae loss: 0.50930 perp: 0.29910 kl: 0.21020: 100%|██████████| 379/379 [00:36<00:00, 10.39batch/s]  
At epoch: 202  valid vae loss: 0.43427 perp: 0.22730 kl: 0.20697:   6%|▌         | 2/34 [00:00<00:02, 11.86batch/s]

>>>>average [92mtraining[0m of epoch 202: loss 0.65546 perp 0.46390 kl 0.19156


At epoch: 202  valid vae loss: 0.32264 perp: 0.13146 kl: 0.19118: 100%|██████████| 34/34 [00:01<00:00, 23.48batch/s]
At epoch: 203  train vae loss: 0.42170 perp: 0.21335 kl: 0.20835:   0%|          | 0/379 [00:00<?, ?batch/s]

>>>>average [93mvalid[0m of epoch 202: loss 0.53006 perp 0.32622 kl 0.20384


At epoch: 203  train vae loss: 0.39489 perp: 0.21404 kl: 0.18084: 100%|██████████| 379/379 [00:36<00:00, 10.40batch/s]
At epoch: 203  valid vae loss: 0.36479 perp: 0.18151 kl: 0.18328:   6%|▌         | 2/34 [00:00<00:02, 11.70batch/s]

>>>>average [92mtraining[0m of epoch 203: loss 0.34803 perp 0.15779 kl 0.19024


At epoch: 203  valid vae loss: 0.28165 perp: 0.11160 kl: 0.17005: 100%|██████████| 34/34 [00:01<00:00, 23.58batch/s]
At epoch: 204  train vae loss: 0.37299 perp: 0.19243 kl: 0.18055:   0%|          | 0/379 [00:00<?, ?batch/s]

>>>>average [93mvalid[0m of epoch 203: loss 0.44190 perp 0.26128 kl 0.18062
saving to best model since this is the best valid loss so far.----


At epoch: 204  train vae loss: 0.28894 perp: 0.11390 kl: 0.17504: 100%|██████████| 379/379 [00:36<00:00, 10.38batch/s]
At epoch: 204  valid vae loss: 0.36017 perp: 0.18386 kl: 0.17631:   6%|▌         | 2/34 [00:00<00:02, 11.66batch/s]

>>>>average [92mtraining[0m of epoch 204: loss 0.33566 perp 0.15733 kl 0.17834


At epoch: 204  valid vae loss: 0.24883 perp: 0.08566 kl: 0.16317: 100%|██████████| 34/34 [00:01<00:00, 23.49batch/s]
At epoch: 205  train vae loss: 0.30058 perp: 0.12576 kl: 0.17482:   0%|          | 0/379 [00:00<?, ?batch/s]

>>>>average [93mvalid[0m of epoch 204: loss 0.43285 perp 0.25905 kl 0.17380
saving to best model since this is the best valid loss so far.----


At epoch: 205  train vae loss: 0.33169 perp: 0.15054 kl: 0.18116: 100%|██████████| 379/379 [00:36<00:00, 10.40batch/s]
At epoch: 205  valid vae loss: 0.37845 perp: 0.20154 kl: 0.17690:   6%|▌         | 2/34 [00:00<00:02, 11.72batch/s]

>>>>average [92mtraining[0m of epoch 205: loss 0.35781 perp 0.18180 kl 0.17601


At epoch: 205  valid vae loss: 0.32380 perp: 0.16008 kl: 0.16372: 100%|██████████| 34/34 [00:01<00:00, 23.56batch/s]
At epoch: 206  train vae loss: 0.39663 perp: 0.21342 kl: 0.18322:   0%|          | 0/379 [00:00<?, ?batch/s]

>>>>average [93mvalid[0m of epoch 205: loss 0.49009 perp 0.31559 kl 0.17451


At epoch: 206  train vae loss: 0.33849 perp: 0.16701 kl: 0.17148: 100%|██████████| 379/379 [00:36<00:00, 10.39batch/s]
At epoch: 206  valid vae loss: 0.37249 perp: 0.19472 kl: 0.17777:   6%|▌         | 2/34 [00:00<00:02, 11.73batch/s]

>>>>average [92mtraining[0m of epoch 206: loss 0.36356 perp 0.18795 kl 0.17561


At epoch: 206  valid vae loss: 0.39781 perp: 0.23278 kl: 0.16503: 100%|██████████| 34/34 [00:01<00:00, 23.56batch/s]
At epoch: 207  train vae loss: 0.42750 perp: 0.24366 kl: 0.18384:   0%|          | 0/379 [00:00<?, ?batch/s]

>>>>average [93mvalid[0m of epoch 206: loss 0.47173 perp 0.29620 kl 0.17553


At epoch: 207  train vae loss: 0.36047 perp: 0.18069 kl: 0.17978: 100%|██████████| 379/379 [00:36<00:00, 10.37batch/s]
At epoch: 207  valid vae loss: 0.36388 perp: 0.18642 kl: 0.17745:   6%|▌         | 2/34 [00:00<00:02, 11.70batch/s]

>>>>average [92mtraining[0m of epoch 207: loss 0.38308 perp 0.20594 kl 0.17714


At epoch: 207  valid vae loss: 0.24961 perp: 0.08464 kl: 0.16497: 100%|██████████| 34/34 [00:01<00:00, 23.56batch/s]
At epoch: 208  train vae loss: 0.37791 perp: 0.19470 kl: 0.18321:   0%|          | 0/379 [00:00<?, ?batch/s]

>>>>average [93mvalid[0m of epoch 207: loss 0.45986 perp 0.28459 kl 0.17527


At epoch: 208  train vae loss: 0.37275 perp: 0.19387 kl: 0.17888: 100%|██████████| 379/379 [00:36<00:00, 10.41batch/s]
At epoch: 208  valid vae loss: 0.43410 perp: 0.25343 kl: 0.18066:   6%|▌         | 2/34 [00:00<00:02, 11.70batch/s]

>>>>average [92mtraining[0m of epoch 208: loss 0.39990 perp 0.22061 kl 0.17929


At epoch: 208  valid vae loss: 0.28646 perp: 0.11879 kl: 0.16766: 100%|██████████| 34/34 [00:01<00:00, 23.62batch/s]
At epoch: 209  train vae loss: 0.36865 perp: 0.19035 kl: 0.17830:   0%|          | 0/379 [00:00<?, ?batch/s]

>>>>average [93mvalid[0m of epoch 208: loss 0.48817 perp 0.30994 kl 0.17823


At epoch: 209  train vae loss: 0.36633 perp: 0.18440 kl: 0.18193: 100%|██████████| 379/379 [00:36<00:00, 10.38batch/s]
At epoch: 209  valid vae loss: 0.38586 perp: 0.20680 kl: 0.17907:   6%|▌         | 2/34 [00:00<00:02, 11.64batch/s]

>>>>average [92mtraining[0m of epoch 209: loss 0.39228 perp 0.21325 kl 0.17903


At epoch: 209  valid vae loss: 0.30815 perp: 0.14136 kl: 0.16679: 100%|██████████| 34/34 [00:01<00:00, 23.52batch/s]
At epoch: 210  train vae loss: 0.41321 perp: 0.23389 kl: 0.17932:   0%|          | 0/379 [00:00<?, ?batch/s]

>>>>average [93mvalid[0m of epoch 209: loss 0.48685 perp 0.30975 kl 0.17710


At epoch: 210  train vae loss: 0.34876 perp: 0.17469 kl: 0.17407: 100%|██████████| 379/379 [00:36<00:00, 10.39batch/s]
At epoch: 210  valid vae loss: 0.43716 perp: 0.25911 kl: 0.17806:   6%|▌         | 2/34 [00:00<00:02, 11.68batch/s]

>>>>average [92mtraining[0m of epoch 210: loss 0.39137 perp 0.21232 kl 0.17905


At epoch: 210  valid vae loss: 0.39186 perp: 0.22616 kl: 0.16570: 100%|██████████| 34/34 [00:01<00:00, 23.54batch/s]
At epoch: 211  train vae loss: 0.36166 perp: 0.18535 kl: 0.17631:   0%|          | 0/379 [00:00<?, ?batch/s]

>>>>average [93mvalid[0m of epoch 210: loss 0.49556 perp 0.31958 kl 0.17598


At epoch: 211  train vae loss: 0.39535 perp: 0.21249 kl: 0.18286: 100%|██████████| 379/379 [00:36<00:00, 10.37batch/s]
At epoch: 211  valid vae loss: 0.49745 perp: 0.31301 kl: 0.18444:   6%|▌         | 2/34 [00:00<00:02, 11.60batch/s]

>>>>average [92mtraining[0m of epoch 211: loss 0.41577 perp 0.23539 kl 0.18037


At epoch: 211  valid vae loss: 0.41798 perp: 0.24750 kl: 0.17048: 100%|██████████| 34/34 [00:01<00:00, 23.54batch/s]
At epoch: 212  train vae loss: 0.48251 perp: 0.30053 kl: 0.18197:   0%|          | 0/379 [00:00<?, ?batch/s]

>>>>average [93mvalid[0m of epoch 211: loss 0.57611 perp 0.39430 kl 0.18181


At epoch: 212  train vae loss: 0.40570 perp: 0.22428 kl: 0.18142: 100%|██████████| 379/379 [00:36<00:00, 10.40batch/s]
At epoch: 212  valid vae loss: 0.44082 perp: 0.25794 kl: 0.18288:   6%|▌         | 2/34 [00:00<00:02, 11.75batch/s]

>>>>average [92mtraining[0m of epoch 212: loss 0.39753 perp 0.21746 kl 0.18007


At epoch: 212  valid vae loss: 0.32357 perp: 0.15288 kl: 0.17069: 100%|██████████| 34/34 [00:01<00:00, 23.57batch/s]
At epoch: 213  train vae loss: 0.40810 perp: 0.22123 kl: 0.18687:   0%|          | 0/379 [00:00<?, ?batch/s]

>>>>average [93mvalid[0m of epoch 212: loss 0.48537 perp 0.30436 kl 0.18102


At epoch: 213  train vae loss: 0.30577 perp: 0.12833 kl: 0.17744: 100%|██████████| 379/379 [00:36<00:00, 10.39batch/s]
At epoch: 213  valid vae loss: 0.35745 perp: 0.18071 kl: 0.17674:   6%|▌         | 2/34 [00:00<00:02, 11.73batch/s]

>>>>average [92mtraining[0m of epoch 213: loss 0.36536 perp 0.18782 kl 0.17754


At epoch: 213  valid vae loss: 0.28120 perp: 0.11761 kl: 0.16359: 100%|██████████| 34/34 [00:01<00:00, 23.67batch/s]
At epoch: 214  train vae loss: 0.31760 perp: 0.14193 kl: 0.17567:   0%|          | 0/379 [00:00<?, ?batch/s]

>>>>average [93mvalid[0m of epoch 213: loss 0.44678 perp 0.27209 kl 0.17469


At epoch: 214  train vae loss: 0.35131 perp: 0.15183 kl: 0.19948: 100%|██████████| 379/379 [00:36<00:00, 10.40batch/s]  
At epoch: 214  valid vae loss: 0.40523 perp: 0.20973 kl: 0.19550:   6%|▌         | 2/34 [00:00<00:02, 11.62batch/s]

>>>>average [92mtraining[0m of epoch 214: loss 0.75895 perp 0.57229 kl 0.18666


At epoch: 214  valid vae loss: 0.27314 perp: 0.09144 kl: 0.18170: 100%|██████████| 34/34 [00:01<00:00, 23.58batch/s]
At epoch: 215  train vae loss: 0.32652 perp: 0.13559 kl: 0.19094:   0%|          | 0/379 [00:00<?, ?batch/s]

>>>>average [93mvalid[0m of epoch 214: loss 0.46143 perp 0.26861 kl 0.19281


At epoch: 215  train vae loss: 0.26656 perp: 0.09864 kl: 0.16792: 100%|██████████| 379/379 [00:36<00:00, 10.38batch/s]
At epoch: 215  valid vae loss: 0.33087 perp: 0.15561 kl: 0.17526:   6%|▌         | 2/34 [00:00<00:02, 11.69batch/s]

>>>>average [92mtraining[0m of epoch 215: loss 0.31563 perp 0.13336 kl 0.18227


At epoch: 215  valid vae loss: 0.25510 perp: 0.09246 kl: 0.16264: 100%|██████████| 34/34 [00:01<00:00, 23.64batch/s]
At epoch: 216  train vae loss: 0.34292 perp: 0.16668 kl: 0.17624:   0%|          | 0/379 [00:00<?, ?batch/s]

>>>>average [93mvalid[0m of epoch 215: loss 0.41930 perp 0.24640 kl 0.17290
saving to best model since this is the best valid loss so far.----


At epoch: 216  train vae loss: 0.28849 perp: 0.12142 kl: 0.16706: 100%|██████████| 379/379 [00:36<00:00, 10.40batch/s]
At epoch: 216  valid vae loss: 0.30640 perp: 0.13847 kl: 0.16794:   6%|▌         | 2/34 [00:00<00:02, 11.71batch/s]

>>>>average [92mtraining[0m of epoch 216: loss 0.29696 perp 0.12670 kl 0.17026


At epoch: 216  valid vae loss: 0.25414 perp: 0.09811 kl: 0.15603: 100%|██████████| 34/34 [00:01<00:00, 23.61batch/s]
At epoch: 217  train vae loss: 0.31500 perp: 0.14180 kl: 0.17320:   0%|          | 0/379 [00:00<?, ?batch/s]

>>>>average [93mvalid[0m of epoch 216: loss 0.41129 perp 0.24542 kl 0.16587
saving to best model since this is the best valid loss so far.----


At epoch: 217  train vae loss: 0.30525 perp: 0.13935 kl: 0.16590: 100%|██████████| 379/379 [00:36<00:00, 10.38batch/s]
At epoch: 217  valid vae loss: 0.38042 perp: 0.21388 kl: 0.16654:   6%|▌         | 2/34 [00:00<00:02, 11.88batch/s]

>>>>average [92mtraining[0m of epoch 217: loss 0.31601 perp 0.14913 kl 0.16688


At epoch: 217  valid vae loss: 0.25380 perp: 0.09944 kl: 0.15436: 100%|██████████| 34/34 [00:01<00:00, 23.65batch/s]
At epoch: 218  train vae loss: 0.35802 perp: 0.19399 kl: 0.16403:   0%|          | 0/379 [00:00<?, ?batch/s]

>>>>average [93mvalid[0m of epoch 217: loss 0.43892 perp 0.27440 kl 0.16452


At epoch: 218  train vae loss: 0.30989 perp: 0.14404 kl: 0.16585: 100%|██████████| 379/379 [00:36<00:00, 10.39batch/s]
At epoch: 218  valid vae loss: 0.33662 perp: 0.17060 kl: 0.16602:   6%|▌         | 2/34 [00:00<00:02, 11.66batch/s]

>>>>average [92mtraining[0m of epoch 218: loss 0.32390 perp 0.15804 kl 0.16586


At epoch: 218  valid vae loss: 0.23554 perp: 0.08163 kl: 0.15391: 100%|██████████| 34/34 [00:01<00:00, 23.61batch/s]
At epoch: 219  train vae loss: 0.26793 perp: 0.10526 kl: 0.16266:   0%|          | 0/379 [00:00<?, ?batch/s]

>>>>average [93mvalid[0m of epoch 218: loss 0.42792 perp 0.26382 kl 0.16410


At epoch: 219  train vae loss: 0.31042 perp: 0.14152 kl: 0.16890: 100%|██████████| 379/379 [00:36<00:00, 10.35batch/s]
At epoch: 219  valid vae loss: 0.36363 perp: 0.19602 kl: 0.16761:   6%|▌         | 2/34 [00:00<00:02, 11.78batch/s]

>>>>average [92mtraining[0m of epoch 219: loss 0.36367 perp 0.19388 kl 0.16979


At epoch: 219  valid vae loss: 0.24161 perp: 0.08546 kl: 0.15615: 100%|██████████| 34/34 [00:01<00:00, 23.63batch/s]
At epoch: 220  train vae loss: 0.37967 perp: 0.21020 kl: 0.16948:   0%|          | 0/379 [00:00<?, ?batch/s]

>>>>average [93mvalid[0m of epoch 219: loss 0.43028 perp 0.26442 kl 0.16586


At epoch: 220  train vae loss: 0.33491 perp: 0.16381 kl: 0.17110: 100%|██████████| 379/379 [00:36<00:00, 10.37batch/s]
At epoch: 220  valid vae loss: 0.32223 perp: 0.15300 kl: 0.16923:   6%|▌         | 2/34 [00:00<00:02, 11.76batch/s]

>>>>average [92mtraining[0m of epoch 220: loss 0.34057 perp 0.17269 kl 0.16788


At epoch: 220  valid vae loss: 0.23299 perp: 0.07564 kl: 0.15735: 100%|██████████| 34/34 [00:01<00:00, 23.65batch/s]
At epoch: 221  train vae loss: 0.31649 perp: 0.14517 kl: 0.17131:   0%|          | 0/379 [00:00<?, ?batch/s]

>>>>average [93mvalid[0m of epoch 220: loss 0.42852 perp 0.26111 kl 0.16741


At epoch: 221  train vae loss: 0.31287 perp: 0.15181 kl: 0.16105: 100%|██████████| 379/379 [00:36<00:00, 10.40batch/s]
At epoch: 221  valid vae loss: 0.32817 perp: 0.16097 kl: 0.16720:   6%|▌         | 2/34 [00:00<00:02, 11.76batch/s]

>>>>average [92mtraining[0m of epoch 221: loss 0.33809 perp 0.17094 kl 0.16715


At epoch: 221  valid vae loss: 0.24722 perp: 0.09172 kl: 0.15550: 100%|██████████| 34/34 [00:01<00:00, 23.61batch/s]
At epoch: 222  train vae loss: 0.25845 perp: 0.09463 kl: 0.16381:   0%|          | 0/379 [00:00<?, ?batch/s]

>>>>average [93mvalid[0m of epoch 221: loss 0.43046 perp 0.26523 kl 0.16523


At epoch: 222  train vae loss: 0.29096 perp: 0.11475 kl: 0.17621: 100%|██████████| 379/379 [00:36<00:00, 10.37batch/s]  
At epoch: 222  valid vae loss: 0.35259 perp: 0.17179 kl: 0.18080:   6%|▌         | 2/34 [00:00<00:02, 11.67batch/s]

>>>>average [92mtraining[0m of epoch 222: loss 0.64411 perp 0.46753 kl 0.17659


At epoch: 222  valid vae loss: 0.25876 perp: 0.09085 kl: 0.16791: 100%|██████████| 34/34 [00:01<00:00, 23.58batch/s]
At epoch: 223  train vae loss: 0.31289 perp: 0.12850 kl: 0.18439:   0%|          | 0/379 [00:00<?, ?batch/s]

>>>>average [93mvalid[0m of epoch 222: loss 0.43852 perp 0.25982 kl 0.17870


At epoch: 223  train vae loss: 0.30231 perp: 0.13794 kl: 0.16437: 100%|██████████| 379/379 [00:36<00:00, 10.40batch/s]
At epoch: 223  valid vae loss: 0.32426 perp: 0.15734 kl: 0.16692:   6%|▌         | 2/34 [00:00<00:02, 11.71batch/s]

>>>>average [92mtraining[0m of epoch 223: loss 0.30673 perp 0.13419 kl 0.17254


At epoch: 223  valid vae loss: 0.23527 perp: 0.07958 kl: 0.15570: 100%|██████████| 34/34 [00:01<00:00, 23.55batch/s]
At epoch: 224  train vae loss: 0.30765 perp: 0.13974 kl: 0.16791:   0%|          | 0/379 [00:00<?, ?batch/s]

>>>>average [93mvalid[0m of epoch 223: loss 0.40044 perp 0.23500 kl 0.16545
saving to best model since this is the best valid loss so far.----


At epoch: 224  train vae loss: 0.28117 perp: 0.11630 kl: 0.16486: 100%|██████████| 379/379 [00:36<00:00, 10.37batch/s]
At epoch: 224  valid vae loss: 0.35580 perp: 0.19579 kl: 0.16001:   6%|▌         | 2/34 [00:00<00:02, 11.71batch/s]

>>>>average [92mtraining[0m of epoch 224: loss 0.28723 perp 0.12430 kl 0.16293


At epoch: 224  valid vae loss: 0.21821 perp: 0.06961 kl: 0.14861: 100%|██████████| 34/34 [00:01<00:00, 23.60batch/s]
At epoch: 225  train vae loss: 0.26219 perp: 0.10178 kl: 0.16041:   0%|          | 0/379 [00:00<?, ?batch/s]

>>>>average [93mvalid[0m of epoch 224: loss 0.39214 perp 0.23387 kl 0.15827
saving to best model since this is the best valid loss so far.----


At epoch: 225  train vae loss: 0.28304 perp: 0.12879 kl: 0.15425: 100%|██████████| 379/379 [00:36<00:00, 10.40batch/s]
At epoch: 225  valid vae loss: 0.33958 perp: 0.17957 kl: 0.16001:   6%|▌         | 2/34 [00:00<00:02, 11.68batch/s]

>>>>average [92mtraining[0m of epoch 225: loss 0.30320 perp 0.14259 kl 0.16061


At epoch: 225  valid vae loss: 0.23274 perp: 0.08377 kl: 0.14897: 100%|██████████| 34/34 [00:01<00:00, 23.60batch/s]
At epoch: 226  train vae loss: 0.31421 perp: 0.15286 kl: 0.16135:   0%|          | 0/379 [00:00<?, ?batch/s]

>>>>average [93mvalid[0m of epoch 225: loss 0.42130 perp 0.26325 kl 0.15804


At epoch: 226  train vae loss: 0.27000 perp: 0.10742 kl: 0.16259: 100%|██████████| 379/379 [00:36<00:00, 10.36batch/s]
At epoch: 226  valid vae loss: 0.32111 perp: 0.15549 kl: 0.16562:   6%|▌         | 2/34 [00:00<00:02, 11.85batch/s]

>>>>average [92mtraining[0m of epoch 226: loss 0.35780 perp 0.19347 kl 0.16433


At epoch: 226  valid vae loss: 0.24613 perp: 0.09230 kl: 0.15383: 100%|██████████| 34/34 [00:01<00:00, 23.58batch/s]
At epoch: 227  train vae loss: 0.29594 perp: 0.13100 kl: 0.16494:   0%|          | 0/379 [00:00<?, ?batch/s]

>>>>average [93mvalid[0m of epoch 226: loss 0.40951 perp 0.24563 kl 0.16388


At epoch: 227  train vae loss: 0.32027 perp: 0.15650 kl: 0.16377: 100%|██████████| 379/379 [00:36<00:00, 10.39batch/s]
At epoch: 227  valid vae loss: 0.38497 perp: 0.22065 kl: 0.16431:   6%|▌         | 2/34 [00:00<00:02, 11.91batch/s]

>>>>average [92mtraining[0m of epoch 227: loss 0.33518 perp 0.16989 kl 0.16528


At epoch: 227  valid vae loss: 0.23678 perp: 0.08408 kl: 0.15270: 100%|██████████| 34/34 [00:01<00:00, 23.63batch/s]
At epoch: 228  train vae loss: 0.38739 perp: 0.22023 kl: 0.16716:   0%|          | 0/379 [00:00<?, ?batch/s]

>>>>average [93mvalid[0m of epoch 227: loss 0.41313 perp 0.25061 kl 0.16252


At epoch: 228  train vae loss: 0.37813 perp: 0.20391 kl: 0.17422: 100%|██████████| 379/379 [00:36<00:00, 10.35batch/s]
At epoch: 228  valid vae loss: 0.35403 perp: 0.18579 kl: 0.16824:   6%|▌         | 2/34 [00:00<00:02, 11.74batch/s]

>>>>average [92mtraining[0m of epoch 228: loss 0.36372 perp 0.19677 kl 0.16695


At epoch: 228  valid vae loss: 0.23410 perp: 0.07700 kl: 0.15710: 100%|██████████| 34/34 [00:01<00:00, 23.63batch/s]
At epoch: 229  train vae loss: 0.33790 perp: 0.16484 kl: 0.17305:   0%|          | 0/379 [00:00<?, ?batch/s]

>>>>average [93mvalid[0m of epoch 228: loss 0.42494 perp 0.25819 kl 0.16675


At epoch: 229  train vae loss: 0.33372 perp: 0.17034 kl: 0.16339: 100%|██████████| 379/379 [00:36<00:00, 10.39batch/s]
At epoch: 229  valid vae loss: 0.32487 perp: 0.15534 kl: 0.16953:   6%|▌         | 2/34 [00:00<00:02, 11.58batch/s]

>>>>average [92mtraining[0m of epoch 229: loss 0.34818 perp 0.18114 kl 0.16704


At epoch: 229  valid vae loss: 0.23406 perp: 0.07660 kl: 0.15747: 100%|██████████| 34/34 [00:01<00:00, 23.58batch/s]
At epoch: 230  train vae loss: 0.32202 perp: 0.15037 kl: 0.17165:   0%|          | 0/379 [00:00<?, ?batch/s]

>>>>average [93mvalid[0m of epoch 229: loss 0.42577 perp 0.25806 kl 0.16772


At epoch: 230  train vae loss: 0.33289 perp: 0.16725 kl: 0.16564: 100%|██████████| 379/379 [00:36<00:00, 10.36batch/s]
At epoch: 230  valid vae loss: 0.35267 perp: 0.18892 kl: 0.16375:   6%|▌         | 2/34 [00:00<00:02, 11.74batch/s]

>>>>average [92mtraining[0m of epoch 230: loss 0.32322 perp 0.15902 kl 0.16420


At epoch: 230  valid vae loss: 0.26965 perp: 0.11728 kl: 0.15237: 100%|██████████| 34/34 [00:01<00:00, 23.58batch/s]
At epoch: 231  train vae loss: 0.28063 perp: 0.11426 kl: 0.16638:   0%|          | 0/379 [00:00<?, ?batch/s]

>>>>average [93mvalid[0m of epoch 230: loss 0.40906 perp 0.24697 kl 0.16209


At epoch: 231  train vae loss: 0.34704 perp: 0.18155 kl: 0.16549: 100%|██████████| 379/379 [00:36<00:00, 10.38batch/s]
At epoch: 231  valid vae loss: 0.35217 perp: 0.18642 kl: 0.16575:   6%|▌         | 2/34 [00:00<00:02, 11.75batch/s]

>>>>average [92mtraining[0m of epoch 231: loss 0.36118 perp 0.19474 kl 0.16644


At epoch: 231  valid vae loss: 0.21746 perp: 0.06343 kl: 0.15402: 100%|██████████| 34/34 [00:01<00:00, 23.55batch/s]
At epoch: 232  train vae loss: 0.30916 perp: 0.14200 kl: 0.16716:   0%|          | 0/379 [00:00<?, ?batch/s]

>>>>average [93mvalid[0m of epoch 231: loss 0.42953 perp 0.26552 kl 0.16400


At epoch: 232  train vae loss: 0.33550 perp: 0.17255 kl: 0.16295: 100%|██████████| 379/379 [00:36<00:00, 10.37batch/s]
At epoch: 232  valid vae loss: 0.32251 perp: 0.15401 kl: 0.16851:   6%|▌         | 2/34 [00:00<00:02, 11.57batch/s]

>>>>average [92mtraining[0m of epoch 232: loss 0.35365 perp 0.18700 kl 0.16665


At epoch: 232  valid vae loss: 0.21303 perp: 0.05680 kl: 0.15624: 100%|██████████| 34/34 [00:01<00:00, 23.54batch/s]
At epoch: 233  train vae loss: 0.30467 perp: 0.13382 kl: 0.17085:   0%|          | 0/379 [00:00<?, ?batch/s]

>>>>average [93mvalid[0m of epoch 232: loss 0.42984 perp 0.26299 kl 0.16684


At epoch: 233  train vae loss: 0.46694 perp: 0.29464 kl: 0.17231: 100%|██████████| 379/379 [00:36<00:00, 10.39batch/s]
At epoch: 233  valid vae loss: 0.37490 perp: 0.20426 kl: 0.17064:   6%|▌         | 2/34 [00:00<00:02, 11.70batch/s]

>>>>average [92mtraining[0m of epoch 233: loss 0.34362 perp 0.17849 kl 0.16513


At epoch: 233  valid vae loss: 0.23800 perp: 0.07882 kl: 0.15917: 100%|██████████| 34/34 [00:01<00:00, 23.58batch/s]
At epoch: 234  train vae loss: 0.30300 perp: 0.13203 kl: 0.17098:   0%|          | 0/379 [00:00<?, ?batch/s]

>>>>average [93mvalid[0m of epoch 233: loss 0.46641 perp 0.29738 kl 0.16903


At epoch: 234  train vae loss: 0.34643 perp: 0.18681 kl: 0.15962: 100%|██████████| 379/379 [00:36<00:00, 10.38batch/s]
At epoch: 234  valid vae loss: 0.30862 perp: 0.14812 kl: 0.16051:   6%|▌         | 2/34 [00:00<00:02, 11.59batch/s]

>>>>average [92mtraining[0m of epoch 234: loss 0.32134 perp 0.15697 kl 0.16437


At epoch: 234  valid vae loss: 0.24096 perp: 0.09195 kl: 0.14902: 100%|██████████| 34/34 [00:01<00:00, 23.62batch/s]
At epoch: 235  train vae loss: 0.29845 perp: 0.13835 kl: 0.16010:   0%|          | 0/379 [00:00<?, ?batch/s]

>>>>average [93mvalid[0m of epoch 234: loss 0.42649 perp 0.26777 kl 0.15872


At epoch: 235  train vae loss: 0.41135 perp: 0.22263 kl: 0.18872: 100%|██████████| 379/379 [00:36<00:00, 10.40batch/s]
At epoch: 235  valid vae loss: 0.43880 perp: 0.24881 kl: 0.18999:   6%|▌         | 2/34 [00:00<00:02, 11.70batch/s]

>>>>average [92mtraining[0m of epoch 235: loss 0.55272 perp 0.38262 kl 0.17010


At epoch: 235  valid vae loss: 0.29634 perp: 0.11851 kl: 0.17784: 100%|██████████| 34/34 [00:01<00:00, 23.59batch/s]
At epoch: 236  train vae loss: 0.38598 perp: 0.20603 kl: 0.17994:   0%|          | 0/379 [00:00<?, ?batch/s]

>>>>average [93mvalid[0m of epoch 235: loss 0.53067 perp 0.34308 kl 0.18759


At epoch: 236  train vae loss: 0.26747 perp: 0.10213 kl: 0.16535: 100%|██████████| 379/379 [00:36<00:00, 10.39batch/s]
At epoch: 236  valid vae loss: 0.29649 perp: 0.13551 kl: 0.16097:   6%|▌         | 2/34 [00:00<00:02, 11.66batch/s]

>>>>average [92mtraining[0m of epoch 236: loss 0.28867 perp 0.11865 kl 0.17002


At epoch: 236  valid vae loss: 0.26709 perp: 0.11630 kl: 0.15079: 100%|██████████| 34/34 [00:01<00:00, 23.59batch/s]
At epoch: 237  train vae loss: 0.23874 perp: 0.07984 kl: 0.15890:   0%|          | 0/379 [00:00<?, ?batch/s]

>>>>average [93mvalid[0m of epoch 236: loss 0.38511 perp 0.22559 kl 0.15952
saving to best model since this is the best valid loss so far.----


At epoch: 237  train vae loss: 0.30593 perp: 0.15154 kl: 0.15439: 100%|██████████| 379/379 [00:36<00:00, 10.38batch/s]
At epoch: 237  valid vae loss: 0.30660 perp: 0.15170 kl: 0.15490:   6%|▌         | 2/34 [00:00<00:02, 11.85batch/s]

>>>>average [92mtraining[0m of epoch 237: loss 0.26691 perp 0.10980 kl 0.15711


At epoch: 237  valid vae loss: 0.18761 perp: 0.04320 kl: 0.14440: 100%|██████████| 34/34 [00:01<00:00, 23.62batch/s]
At epoch: 238  train vae loss: 0.24159 perp: 0.08548 kl: 0.15610:   0%|          | 0/379 [00:00<?, ?batch/s]

>>>>average [93mvalid[0m of epoch 237: loss 0.37903 perp 0.22568 kl 0.15336
saving to best model since this is the best valid loss so far.----


At epoch: 238  train vae loss: 0.29038 perp: 0.14097 kl: 0.14941: 100%|██████████| 379/379 [00:36<00:00, 10.35batch/s]
At epoch: 238  valid vae loss: 0.30678 perp: 0.15225 kl: 0.15453:   6%|▌         | 2/34 [00:00<00:02, 11.74batch/s]

>>>>average [92mtraining[0m of epoch 238: loss 0.28718 perp 0.13249 kl 0.15469


At epoch: 238  valid vae loss: 0.32523 perp: 0.18100 kl: 0.14422: 100%|██████████| 34/34 [00:01<00:00, 23.55batch/s]
At epoch: 239  train vae loss: 0.29922 perp: 0.14663 kl: 0.15258:   0%|          | 0/379 [00:00<?, ?batch/s]

>>>>average [93mvalid[0m of epoch 238: loss 0.40824 perp 0.25537 kl 0.15287


At epoch: 239  train vae loss: 0.29279 perp: 0.12096 kl: 0.17182: 100%|██████████| 379/379 [00:36<00:00, 10.40batch/s]
At epoch: 239  valid vae loss: 0.31233 perp: 0.14139 kl: 0.17094:   6%|▌         | 2/34 [00:00<00:02, 11.66batch/s]

>>>>average [92mtraining[0m of epoch 239: loss 0.47449 perp 0.31017 kl 0.16432


At epoch: 239  valid vae loss: 0.32449 perp: 0.16432 kl: 0.16017: 100%|██████████| 34/34 [00:01<00:00, 23.55batch/s]
At epoch: 240  train vae loss: 0.30350 perp: 0.12993 kl: 0.17356:   0%|          | 0/379 [00:00<?, ?batch/s]

>>>>average [93mvalid[0m of epoch 239: loss 0.39372 perp 0.22401 kl 0.16971


At epoch: 240  train vae loss: 0.25819 perp: 0.10675 kl: 0.15144: 100%|██████████| 379/379 [00:36<00:00, 10.37batch/s]
At epoch: 240  valid vae loss: 0.31911 perp: 0.16534 kl: 0.15378:   6%|▌         | 2/34 [00:00<00:02, 11.74batch/s]

>>>>average [92mtraining[0m of epoch 240: loss 0.27254 perp 0.11151 kl 0.16104


At epoch: 240  valid vae loss: 0.28303 perp: 0.13900 kl: 0.14403: 100%|██████████| 34/34 [00:01<00:00, 23.64batch/s]
At epoch: 241  train vae loss: 0.25666 perp: 0.10753 kl: 0.14913:   0%|          | 0/379 [00:00<?, ?batch/s]

>>>>average [93mvalid[0m of epoch 240: loss 0.36373 perp 0.21116 kl 0.15257
saving to best model since this is the best valid loss so far.----


At epoch: 241  train vae loss: 0.30601 perp: 0.15504 kl: 0.15097: 100%|██████████| 379/379 [00:36<00:00, 10.39batch/s]
At epoch: 241  valid vae loss: 0.29493 perp: 0.14279 kl: 0.15214:   6%|▌         | 2/34 [00:00<00:02, 11.72batch/s]

>>>>average [92mtraining[0m of epoch 241: loss 0.26320 perp 0.11098 kl 0.15222


At epoch: 241  valid vae loss: 0.26856 perp: 0.12710 kl: 0.14147: 100%|██████████| 34/34 [00:01<00:00, 23.59batch/s]
At epoch: 242  train vae loss: 0.30721 perp: 0.15651 kl: 0.15071:   0%|          | 0/379 [00:00<?, ?batch/s]

>>>>average [93mvalid[0m of epoch 241: loss 0.38095 perp 0.23028 kl 0.15067


At epoch: 242  train vae loss: 0.37250 perp: 0.21990 kl: 0.15259: 100%|██████████| 379/379 [00:36<00:00, 10.38batch/s]
At epoch: 242  valid vae loss: 0.31673 perp: 0.16452 kl: 0.15221:   6%|▌         | 2/34 [00:00<00:02, 11.75batch/s]

>>>>average [92mtraining[0m of epoch 242: loss 0.28779 perp 0.13564 kl 0.15215


At epoch: 242  valid vae loss: 0.33513 perp: 0.19328 kl: 0.14185: 100%|██████████| 34/34 [00:01<00:00, 23.64batch/s]
At epoch: 243  train vae loss: 0.31429 perp: 0.16458 kl: 0.14970:   0%|          | 0/379 [00:00<?, ?batch/s]

>>>>average [93mvalid[0m of epoch 242: loss 0.42373 perp 0.27297 kl 0.15076


At epoch: 243  train vae loss: 0.39090 perp: 0.23133 kl: 0.15958: 100%|██████████| 379/379 [00:36<00:00, 10.35batch/s]
At epoch: 243  valid vae loss: 0.30696 perp: 0.15271 kl: 0.15425:   6%|▌         | 2/34 [00:00<00:02, 11.61batch/s]

>>>>average [92mtraining[0m of epoch 243: loss 0.30456 perp 0.15102 kl 0.15354


At epoch: 243  valid vae loss: 0.34893 perp: 0.20519 kl: 0.14374: 100%|██████████| 34/34 [00:01<00:00, 23.50batch/s]
At epoch: 244  train vae loss: 0.30700 perp: 0.15389 kl: 0.15311:   0%|          | 0/379 [00:00<?, ?batch/s]

>>>>average [93mvalid[0m of epoch 243: loss 0.40582 perp 0.25319 kl 0.15263


At epoch: 244  train vae loss: 0.36070 perp: 0.20207 kl: 0.15863: 100%|██████████| 379/379 [00:36<00:00, 10.39batch/s]
At epoch: 244  valid vae loss: 0.50860 perp: 0.35150 kl: 0.15711:   6%|▌         | 2/34 [00:00<00:02, 11.74batch/s]

>>>>average [92mtraining[0m of epoch 244: loss 0.32009 perp 0.16555 kl 0.15454


At epoch: 244  valid vae loss: 0.31697 perp: 0.17074 kl: 0.14623: 100%|██████████| 34/34 [00:01<00:00, 23.58batch/s]
At epoch: 245  train vae loss: 0.40274 perp: 0.24653 kl: 0.15620:   0%|          | 0/379 [00:00<?, ?batch/s]

>>>>average [93mvalid[0m of epoch 244: loss 0.57333 perp 0.41779 kl 0.15554


At epoch: 245  train vae loss: 0.30505 perp: 0.14188 kl: 0.16316: 100%|██████████| 379/379 [00:36<00:00, 10.39batch/s]
At epoch: 245  valid vae loss: 0.35409 perp: 0.19167 kl: 0.16242:   6%|▌         | 2/34 [00:00<00:02, 11.78batch/s]

>>>>average [92mtraining[0m of epoch 245: loss 0.34596 perp 0.18879 kl 0.15716


At epoch: 245  valid vae loss: 0.31648 perp: 0.16469 kl: 0.15179: 100%|██████████| 34/34 [00:01<00:00, 23.62batch/s]
At epoch: 246  train vae loss: 0.26582 perp: 0.10966 kl: 0.15616:   0%|          | 0/379 [00:00<?, ?batch/s]

>>>>average [93mvalid[0m of epoch 245: loss 0.44225 perp 0.28138 kl 0.16087


At epoch: 246  train vae loss: 0.27707 perp: 0.11583 kl: 0.16124: 100%|██████████| 379/379 [00:36<00:00, 10.39batch/s]
At epoch: 246  valid vae loss: 0.30217 perp: 0.14690 kl: 0.15527:   6%|▌         | 2/34 [00:00<00:02, 11.62batch/s]

>>>>average [92mtraining[0m of epoch 246: loss 0.30744 perp 0.14943 kl 0.15801


At epoch: 246  valid vae loss: 0.21607 perp: 0.07170 kl: 0.14437: 100%|██████████| 34/34 [00:01<00:00, 23.60batch/s]
At epoch: 247  train vae loss: 0.23836 perp: 0.08407 kl: 0.15429:   0%|          | 0/379 [00:00<?, ?batch/s]

>>>>average [93mvalid[0m of epoch 246: loss 0.37581 perp 0.22230 kl 0.15351


At epoch: 247  train vae loss: 0.27329 perp: 0.12241 kl: 0.15088: 100%|██████████| 379/379 [00:36<00:00, 10.39batch/s]
At epoch: 247  valid vae loss: 0.30279 perp: 0.15021 kl: 0.15258:   6%|▌         | 2/34 [00:00<00:02, 11.66batch/s]

>>>>average [92mtraining[0m of epoch 247: loss 0.30452 perp 0.14904 kl 0.15548


At epoch: 247  valid vae loss: 0.19302 perp: 0.05078 kl: 0.14225: 100%|██████████| 34/34 [00:01<00:00, 23.59batch/s]
At epoch: 248  train vae loss: 0.24782 perp: 0.09751 kl: 0.15031:   0%|          | 0/379 [00:00<?, ?batch/s]

>>>>average [93mvalid[0m of epoch 247: loss 0.38004 perp 0.22912 kl 0.15092


At epoch: 248  train vae loss: 0.34193 perp: 0.18197 kl: 0.15996: 100%|██████████| 379/379 [00:36<00:00, 10.38batch/s]
At epoch: 248  valid vae loss: 0.40775 perp: 0.24944 kl: 0.15831:   6%|▌         | 2/34 [00:00<00:02, 11.64batch/s]

>>>>average [92mtraining[0m of epoch 248: loss 0.32377 perp 0.16846 kl 0.15531


At epoch: 248  valid vae loss: 0.24036 perp: 0.09245 kl: 0.14791: 100%|██████████| 34/34 [00:01<00:00, 23.67batch/s]
At epoch: 249  train vae loss: 0.31153 perp: 0.15351 kl: 0.15803:   0%|          | 0/379 [00:00<?, ?batch/s]

>>>>average [93mvalid[0m of epoch 248: loss 0.40744 perp 0.25059 kl 0.15686


At epoch: 249  train vae loss: 0.28259 perp: 0.12904 kl: 0.15355: 100%|██████████| 379/379 [00:36<00:00, 10.41batch/s]
At epoch: 249  valid vae loss: 0.28177 perp: 0.12843 kl: 0.15334:   6%|▌         | 2/34 [00:00<00:02, 11.89batch/s]

>>>>average [92mtraining[0m of epoch 249: loss 0.29482 perp 0.14066 kl 0.15416


At epoch: 249  valid vae loss: 0.23006 perp: 0.08662 kl: 0.14344: 100%|██████████| 34/34 [00:01<00:00, 23.76batch/s]
At epoch: 250  train vae loss: 0.22818 perp: 0.07989 kl: 0.14829:   0%|          | 0/379 [00:00<?, ?batch/s]

>>>>average [93mvalid[0m of epoch 249: loss 0.37392 perp 0.22229 kl 0.15163


At epoch: 250  train vae loss: 0.32581 perp: 0.17218 kl: 0.15364: 100%|██████████| 379/379 [00:36<00:00, 10.40batch/s]
At epoch: 250  valid vae loss: 0.37720 perp: 0.21933 kl: 0.15787:   6%|▌         | 2/34 [00:00<00:02, 11.65batch/s]

>>>>average [92mtraining[0m of epoch 250: loss 0.31535 perp 0.16070 kl 0.15465


At epoch: 250  valid vae loss: 0.25513 perp: 0.10839 kl: 0.14674: 100%|██████████| 34/34 [00:01<00:00, 23.65batch/s]
At epoch: 251  train vae loss: 0.36435 perp: 0.20182 kl: 0.16253:   0%|          | 0/379 [00:00<?, ?batch/s]

>>>>average [93mvalid[0m of epoch 250: loss 0.47143 perp 0.31560 kl 0.15584


At epoch: 251  train vae loss: 0.30336 perp: 0.15392 kl: 0.14944: 100%|██████████| 379/379 [00:36<00:00, 10.38batch/s]
At epoch: 251  valid vae loss: 0.28807 perp: 0.13380 kl: 0.15427:   6%|▌         | 2/34 [00:00<00:02, 11.89batch/s]

>>>>average [92mtraining[0m of epoch 251: loss 0.32455 perp 0.16810 kl 0.15644


At epoch: 251  valid vae loss: 0.26257 perp: 0.11847 kl: 0.14410: 100%|██████████| 34/34 [00:01<00:00, 23.63batch/s]
At epoch: 252  train vae loss: 0.38424 perp: 0.22911 kl: 0.15513:   0%|          | 0/379 [00:00<?, ?batch/s]

>>>>average [93mvalid[0m of epoch 251: loss 0.42545 perp 0.27276 kl 0.15269


At epoch: 252  train vae loss: 0.30564 perp: 0.15654 kl: 0.14910: 100%|██████████| 379/379 [00:36<00:00, 10.40batch/s]
At epoch: 252  valid vae loss: 0.35589 perp: 0.20135 kl: 0.15453:   6%|▌         | 2/34 [00:00<00:02, 11.69batch/s]

>>>>average [92mtraining[0m of epoch 252: loss 0.29456 perp 0.14105 kl 0.15352


At epoch: 252  valid vae loss: 0.20368 perp: 0.05971 kl: 0.14397: 100%|██████████| 34/34 [00:01<00:00, 23.58batch/s]
At epoch: 253  train vae loss: 0.30622 perp: 0.15497 kl: 0.15126:   0%|          | 0/379 [00:00<?, ?batch/s]

>>>>average [93mvalid[0m of epoch 252: loss 0.41311 perp 0.26022 kl 0.15289


At epoch: 253  train vae loss: 0.29753 perp: 0.13009 kl: 0.16745: 100%|██████████| 379/379 [00:36<00:00, 10.40batch/s]
At epoch: 253  valid vae loss: 0.27033 perp: 0.10649 kl: 0.16383:   6%|▌         | 2/34 [00:00<00:02, 11.81batch/s]

>>>>average [92mtraining[0m of epoch 253: loss 0.38595 perp 0.22742 kl 0.15852


At epoch: 253  valid vae loss: 0.19240 perp: 0.03958 kl: 0.15282: 100%|██████████| 34/34 [00:01<00:00, 23.57batch/s]
At epoch: 254  train vae loss: 0.26114 perp: 0.09856 kl: 0.16258:   0%|          | 0/379 [00:00<?, ?batch/s]

>>>>average [93mvalid[0m of epoch 253: loss 0.38273 perp 0.22004 kl 0.16269


At epoch: 254  train vae loss: 0.31872 perp: 0.16360 kl: 0.15512: 100%|██████████| 379/379 [00:36<00:00, 10.41batch/s]
At epoch: 254  valid vae loss: 0.28884 perp: 0.13729 kl: 0.15155:   6%|▌         | 2/34 [00:00<00:02, 11.77batch/s]

>>>>average [92mtraining[0m of epoch 254: loss 0.26792 perp 0.11396 kl 0.15396


At epoch: 254  valid vae loss: 0.19532 perp: 0.05357 kl: 0.14175: 100%|██████████| 34/34 [00:01<00:00, 23.62batch/s]
At epoch: 255  train vae loss: 0.26449 perp: 0.11048 kl: 0.15401:   0%|          | 0/379 [00:00<?, ?batch/s]

>>>>average [93mvalid[0m of epoch 254: loss 0.37226 perp 0.22166 kl 0.15060


At epoch: 255  train vae loss: 0.32838 perp: 0.18064 kl: 0.14774: 100%|██████████| 379/379 [00:36<00:00, 10.40batch/s]
At epoch: 255  valid vae loss: 0.26566 perp: 0.11830 kl: 0.14736:   6%|▌         | 2/34 [00:00<00:02, 11.88batch/s]

>>>>average [92mtraining[0m of epoch 255: loss 0.26812 perp 0.11935 kl 0.14877


At epoch: 255  valid vae loss: 0.16160 perp: 0.02455 kl: 0.13705: 100%|██████████| 34/34 [00:01<00:00, 23.74batch/s]
At epoch: 256  train vae loss: 0.22677 perp: 0.07792 kl: 0.14885:   0%|          | 0/379 [00:00<?, ?batch/s]

>>>>average [93mvalid[0m of epoch 255: loss 0.36770 perp 0.22169 kl 0.14601


At epoch: 256  train vae loss: 0.25509 perp: 0.10608 kl: 0.14901: 100%|██████████| 379/379 [00:36<00:00, 10.41batch/s]
At epoch: 256  valid vae loss: 0.34670 perp: 0.19658 kl: 0.15012:   6%|▌         | 2/34 [00:00<00:02, 11.84batch/s]

>>>>average [92mtraining[0m of epoch 256: loss 0.29426 perp 0.14405 kl 0.15020


At epoch: 256  valid vae loss: 0.21918 perp: 0.07922 kl: 0.13996: 100%|██████████| 34/34 [00:01<00:00, 23.70batch/s]
At epoch: 257  train vae loss: 0.27672 perp: 0.12471 kl: 0.15201:   0%|          | 0/379 [00:00<?, ?batch/s]

>>>>average [93mvalid[0m of epoch 256: loss 0.40068 perp 0.25194 kl 0.14874


At epoch: 257  train vae loss: 0.43824 perp: 0.27741 kl: 0.16083: 100%|██████████| 379/379 [00:36<00:00, 10.41batch/s]
At epoch: 257  valid vae loss: 0.47405 perp: 0.31543 kl: 0.15863:   6%|▌         | 2/34 [00:00<00:02, 11.77batch/s]

>>>>average [92mtraining[0m of epoch 257: loss 0.32126 perp 0.17011 kl 0.15115


At epoch: 257  valid vae loss: 0.31171 perp: 0.16398 kl: 0.14774: 100%|██████████| 34/34 [00:01<00:00, 23.63batch/s]
At epoch: 258  train vae loss: 0.36586 perp: 0.20800 kl: 0.15787:   0%|          | 0/379 [00:00<?, ?batch/s]

>>>>average [93mvalid[0m of epoch 257: loss 0.47139 perp 0.31450 kl 0.15688


At epoch: 258  train vae loss: 0.29972 perp: 0.15105 kl: 0.14867: 100%|██████████| 379/379 [00:36<00:00, 10.41batch/s]
At epoch: 258  valid vae loss: 0.32200 perp: 0.17199 kl: 0.15000:   6%|▌         | 2/34 [00:00<00:02, 11.74batch/s]

>>>>average [92mtraining[0m of epoch 258: loss 0.28889 perp 0.13743 kl 0.15147


At epoch: 258  valid vae loss: 0.27038 perp: 0.13027 kl: 0.14010: 100%|██████████| 34/34 [00:01<00:00, 23.58batch/s]
At epoch: 259  train vae loss: 0.29328 perp: 0.14432 kl: 0.14896:   0%|          | 0/379 [00:00<?, ?batch/s]

>>>>average [93mvalid[0m of epoch 258: loss 0.43789 perp 0.28924 kl 0.14865


At epoch: 259  train vae loss: 0.31560 perp: 0.16122 kl: 0.15438: 100%|██████████| 379/379 [00:36<00:00, 10.39batch/s]
At epoch: 259  valid vae loss: 0.30918 perp: 0.15948 kl: 0.14969:   6%|▌         | 2/34 [00:00<00:02, 11.72batch/s]

>>>>average [92mtraining[0m of epoch 259: loss 0.29597 perp 0.14591 kl 0.15006


At epoch: 259  valid vae loss: 0.21055 perp: 0.07086 kl: 0.13968: 100%|██████████| 34/34 [00:01<00:00, 23.66batch/s]
At epoch: 260  train vae loss: 0.29981 perp: 0.14721 kl: 0.15260:   0%|          | 0/379 [00:00<?, ?batch/s]

>>>>average [93mvalid[0m of epoch 259: loss 0.39998 perp 0.25180 kl 0.14818


At epoch: 260  train vae loss: 0.42892 perp: 0.28247 kl: 0.14645: 100%|██████████| 379/379 [00:36<00:00, 10.37batch/s]
At epoch: 260  valid vae loss: 0.30536 perp: 0.15394 kl: 0.15142:   6%|▌         | 2/34 [00:00<00:02, 11.72batch/s]

>>>>average [92mtraining[0m of epoch 260: loss 0.28358 perp 0.13519 kl 0.14840


At epoch: 260  valid vae loss: 0.48434 perp: 0.34334 kl: 0.14100: 100%|██████████| 34/34 [00:01<00:00, 23.58batch/s]
At epoch: 261  train vae loss: 0.32170 perp: 0.16922 kl: 0.15248:   0%|          | 0/379 [00:00<?, ?batch/s]

>>>>average [93mvalid[0m of epoch 260: loss 0.41864 perp 0.26866 kl 0.14998


At epoch: 261  train vae loss: 0.29993 perp: 0.15687 kl: 0.14307: 100%|██████████| 379/379 [00:36<00:00, 10.39batch/s]
At epoch: 261  valid vae loss: 0.31499 perp: 0.16747 kl: 0.14751:   6%|▌         | 2/34 [00:00<00:02, 11.77batch/s]

>>>>average [92mtraining[0m of epoch 261: loss 0.28627 perp 0.13697 kl 0.14930


At epoch: 261  valid vae loss: 0.24311 perp: 0.10549 kl: 0.13761: 100%|██████████| 34/34 [00:01<00:00, 23.66batch/s]
At epoch: 262  train vae loss: 0.33571 perp: 0.18840 kl: 0.14732:   0%|          | 0/379 [00:00<?, ?batch/s]

>>>>average [93mvalid[0m of epoch 261: loss 0.40457 perp 0.25864 kl 0.14594


At epoch: 262  train vae loss: 0.26778 perp: 0.11977 kl: 0.14801: 100%|██████████| 379/379 [00:36<00:00, 10.39batch/s]
At epoch: 262  valid vae loss: 0.28729 perp: 0.13713 kl: 0.15016:   6%|▌         | 2/34 [00:00<00:02, 11.68batch/s]

>>>>average [92mtraining[0m of epoch 262: loss 0.30877 perp 0.15887 kl 0.14990


At epoch: 262  valid vae loss: 0.18575 perp: 0.04527 kl: 0.14048: 100%|██████████| 34/34 [00:01<00:00, 23.63batch/s]
At epoch: 263  train vae loss: 0.28141 perp: 0.13317 kl: 0.14824:   0%|          | 0/379 [00:00<?, ?batch/s]

>>>>average [93mvalid[0m of epoch 262: loss 0.39942 perp 0.25054 kl 0.14888


At epoch: 263  train vae loss: 0.26902 perp: 0.11840 kl: 0.15062: 100%|██████████| 379/379 [00:36<00:00, 10.39batch/s]
At epoch: 263  valid vae loss: 0.28730 perp: 0.13857 kl: 0.14873:   6%|▌         | 2/34 [00:00<00:02, 11.73batch/s]

>>>>average [92mtraining[0m of epoch 263: loss 0.27778 perp 0.13105 kl 0.14673


At epoch: 263  valid vae loss: 0.24438 perp: 0.10545 kl: 0.13893: 100%|██████████| 34/34 [00:01<00:00, 23.56batch/s]
At epoch: 264  train vae loss: 0.27784 perp: 0.13097 kl: 0.14687:   0%|          | 0/379 [00:00<?, ?batch/s]

>>>>average [93mvalid[0m of epoch 263: loss 0.40224 perp 0.25464 kl 0.14760


At epoch: 264  train vae loss: 0.22045 perp: 0.06895 kl: 0.15150: 100%|██████████| 379/379 [00:36<00:00, 10.40batch/s]
At epoch: 264  valid vae loss: 0.26204 perp: 0.11522 kl: 0.14682:   6%|▌         | 2/34 [00:00<00:02, 11.35batch/s]

>>>>average [92mtraining[0m of epoch 264: loss 0.28159 perp 0.13475 kl 0.14684


At epoch: 264  valid vae loss: 0.17784 perp: 0.04063 kl: 0.13721: 100%|██████████| 34/34 [00:01<00:00, 23.54batch/s]
At epoch: 265  train vae loss: 0.25092 perp: 0.10051 kl: 0.15041:   0%|          | 0/379 [00:00<?, ?batch/s]

>>>>average [93mvalid[0m of epoch 264: loss 0.36154 perp 0.21622 kl 0.14532
saving to best model since this is the best valid loss so far.----


At epoch: 265  train vae loss: 0.22700 perp: 0.07723 kl: 0.14977: 100%|██████████| 379/379 [00:36<00:00, 10.40batch/s]
At epoch: 265  valid vae loss: 0.29740 perp: 0.14766 kl: 0.14974:   6%|▌         | 2/34 [00:00<00:02, 11.72batch/s]

>>>>average [92mtraining[0m of epoch 265: loss 0.30212 perp 0.15262 kl 0.14950


At epoch: 265  valid vae loss: 0.18881 perp: 0.04875 kl: 0.14006: 100%|██████████| 34/34 [00:01<00:00, 23.57batch/s]
At epoch: 266  train vae loss: 0.34708 perp: 0.20014 kl: 0.14693:   0%|          | 0/379 [00:00<?, ?batch/s]

>>>>average [93mvalid[0m of epoch 265: loss 0.39594 perp 0.24736 kl 0.14858


At epoch: 266  train vae loss: 0.26557 perp: 0.11604 kl: 0.14953: 100%|██████████| 379/379 [00:36<00:00, 10.39batch/s]
At epoch: 266  valid vae loss: 0.32140 perp: 0.17142 kl: 0.14997:   6%|▌         | 2/34 [00:00<00:02, 11.70batch/s]

>>>>average [92mtraining[0m of epoch 266: loss 0.28006 perp 0.13333 kl 0.14672


At epoch: 266  valid vae loss: 0.19785 perp: 0.05722 kl: 0.14062: 100%|██████████| 34/34 [00:01<00:00, 23.58batch/s]
At epoch: 267  train vae loss: 0.27691 perp: 0.13250 kl: 0.14441:   0%|          | 0/379 [00:00<?, ?batch/s]

>>>>average [93mvalid[0m of epoch 266: loss 0.40472 perp 0.25610 kl 0.14862


At epoch: 267  train vae loss: 0.25451 perp: 0.11092 kl: 0.14358: 100%|██████████| 379/379 [00:36<00:00, 10.40batch/s]
At epoch: 267  valid vae loss: 0.24643 perp: 0.10200 kl: 0.14443:   6%|▌         | 2/34 [00:00<00:02, 11.71batch/s]

>>>>average [92mtraining[0m of epoch 267: loss 0.28171 perp 0.13403 kl 0.14768


At epoch: 267  valid vae loss: 0.22177 perp: 0.08659 kl: 0.13518: 100%|██████████| 34/34 [00:01<00:00, 23.60batch/s]
At epoch: 268  train vae loss: 0.21620 perp: 0.07464 kl: 0.14156:   0%|          | 0/379 [00:00<?, ?batch/s]

>>>>average [93mvalid[0m of epoch 267: loss 0.35996 perp 0.21682 kl 0.14314
saving to best model since this is the best valid loss so far.----


At epoch: 268  train vae loss: 0.28294 perp: 0.14390 kl: 0.13904: 100%|██████████| 379/379 [00:36<00:00, 10.36batch/s]
At epoch: 268  valid vae loss: 0.26394 perp: 0.12082 kl: 0.14312:   6%|▌         | 2/34 [00:00<00:02, 11.66batch/s]

>>>>average [92mtraining[0m of epoch 268: loss 0.27270 perp 0.12874 kl 0.14396


At epoch: 268  valid vae loss: 0.22175 perp: 0.08848 kl: 0.13327: 100%|██████████| 34/34 [00:01<00:00, 23.58batch/s]
At epoch: 269  train vae loss: 0.24305 perp: 0.10011 kl: 0.14294:   0%|          | 0/379 [00:00<?, ?batch/s]

>>>>average [93mvalid[0m of epoch 268: loss 0.37368 perp 0.23215 kl 0.14153


At epoch: 269  train vae loss: 0.32115 perp: 0.17424 kl: 0.14691: 100%|██████████| 379/379 [00:36<00:00, 10.38batch/s]
At epoch: 269  valid vae loss: 0.32977 perp: 0.18143 kl: 0.14834:   6%|▌         | 2/34 [00:00<00:02, 11.73batch/s]

>>>>average [92mtraining[0m of epoch 269: loss 0.29106 perp 0.14641 kl 0.14465


At epoch: 269  valid vae loss: 0.25074 perp: 0.11218 kl: 0.13857: 100%|██████████| 34/34 [00:01<00:00, 23.68batch/s]
At epoch: 270  train vae loss: 0.34660 perp: 0.19451 kl: 0.15210:   0%|          | 0/379 [00:00<?, ?batch/s]

>>>>average [93mvalid[0m of epoch 269: loss 0.44627 perp 0.29940 kl 0.14687


At epoch: 270  train vae loss: 0.28457 perp: 0.14606 kl: 0.13851: 100%|██████████| 379/379 [00:36<00:00, 10.39batch/s]
At epoch: 270  valid vae loss: 0.28472 perp: 0.14109 kl: 0.14363:   6%|▌         | 2/34 [00:00<00:02, 11.76batch/s]

>>>>average [92mtraining[0m of epoch 270: loss 0.27378 perp 0.12818 kl 0.14560


At epoch: 270  valid vae loss: 0.24431 perp: 0.10967 kl: 0.13465: 100%|██████████| 34/34 [00:01<00:00, 23.60batch/s]
At epoch: 271  train vae loss: 0.29460 perp: 0.14806 kl: 0.14654:   0%|          | 0/379 [00:00<?, ?batch/s]

>>>>average [93mvalid[0m of epoch 270: loss 0.40112 perp 0.25848 kl 0.14263


At epoch: 271  train vae loss: 0.26171 perp: 0.11107 kl: 0.15063: 100%|██████████| 379/379 [00:36<00:00, 10.37batch/s]
At epoch: 271  valid vae loss: 0.29175 perp: 0.13696 kl: 0.15479:   6%|▌         | 2/34 [00:00<00:02, 11.58batch/s]

>>>>average [92mtraining[0m of epoch 271: loss 0.41118 perp 0.25913 kl 0.15205


At epoch: 271  valid vae loss: 0.19253 perp: 0.04733 kl: 0.14520: 100%|██████████| 34/34 [00:01<00:00, 23.44batch/s]
At epoch: 272  train vae loss: 0.25768 perp: 0.10260 kl: 0.15508:   0%|          | 0/379 [00:00<?, ?batch/s]

>>>>average [93mvalid[0m of epoch 271: loss 0.38078 perp 0.22729 kl 0.15349


At epoch: 272  train vae loss: 0.18706 perp: 0.04678 kl: 0.14027: 100%|██████████| 379/379 [00:36<00:00, 10.40batch/s]
At epoch: 272  valid vae loss: 0.21955 perp: 0.08302 kl: 0.13653:   6%|▌         | 2/34 [00:00<00:02, 11.34batch/s]

>>>>average [92mtraining[0m of epoch 272: loss 0.21712 perp 0.07331 kl 0.14381


At epoch: 272  valid vae loss: 0.16239 perp: 0.03472 kl: 0.12767: 100%|██████████| 34/34 [00:01<00:00, 23.36batch/s]
At epoch: 273  train vae loss: 0.18465 perp: 0.04917 kl: 0.13548:   0%|          | 0/379 [00:00<?, ?batch/s]

>>>>average [93mvalid[0m of epoch 272: loss 0.31722 perp 0.18178 kl 0.13545
saving to best model since this is the best valid loss so far.----


At epoch: 273  train vae loss: 0.21522 perp: 0.08158 kl: 0.13364: 100%|██████████| 379/379 [00:36<00:00, 10.36batch/s]
At epoch: 273  valid vae loss: 0.23436 perp: 0.09746 kl: 0.13690:   6%|▌         | 2/34 [00:00<00:02, 11.80batch/s]

>>>>average [92mtraining[0m of epoch 273: loss 0.22303 perp 0.08766 kl 0.13537


At epoch: 273  valid vae loss: 0.25501 perp: 0.12672 kl: 0.12829: 100%|██████████| 34/34 [00:01<00:00, 23.63batch/s]
At epoch: 274  train vae loss: 0.20432 perp: 0.06914 kl: 0.13518:   0%|          | 0/379 [00:00<?, ?batch/s]

>>>>average [93mvalid[0m of epoch 273: loss 0.35481 perp 0.21916 kl 0.13565


At epoch: 274  train vae loss: 0.24647 perp: 0.11116 kl: 0.13531: 100%|██████████| 379/379 [00:36<00:00, 10.40batch/s]
At epoch: 274  valid vae loss: 0.23050 perp: 0.09502 kl: 0.13547:   6%|▌         | 2/34 [00:00<00:02, 11.89batch/s]

>>>>average [92mtraining[0m of epoch 274: loss 0.23799 perp 0.10258 kl 0.13541


At epoch: 274  valid vae loss: 0.17527 perp: 0.04828 kl: 0.12699: 100%|██████████| 34/34 [00:01<00:00, 23.67batch/s]
At epoch: 275  train vae loss: 0.21949 perp: 0.08881 kl: 0.13068:   0%|          | 0/379 [00:00<?, ?batch/s]

>>>>average [93mvalid[0m of epoch 274: loss 0.33955 perp 0.20501 kl 0.13453


At epoch: 275  train vae loss: 0.49632 perp: 0.33651 kl: 0.15981: 100%|██████████| 379/379 [00:36<00:00, 10.40batch/s]
At epoch: 275  valid vae loss: 0.46306 perp: 0.30936 kl: 0.15369:   6%|▌         | 2/34 [00:00<00:02, 11.86batch/s]

>>>>average [92mtraining[0m of epoch 275: loss 0.29287 perp 0.15340 kl 0.13946


At epoch: 275  valid vae loss: 0.37041 perp: 0.22735 kl: 0.14306: 100%|██████████| 34/34 [00:01<00:00, 23.64batch/s]
At epoch: 276  train vae loss: 0.49788 perp: 0.34492 kl: 0.15296:   0%|          | 0/379 [00:00<?, ?batch/s]

>>>>average [93mvalid[0m of epoch 275: loss 0.58659 perp 0.43396 kl 0.15263


At epoch: 276  train vae loss: 0.19956 perp: 0.05729 kl: 0.14227: 100%|██████████| 379/379 [00:36<00:00, 10.39batch/s]
At epoch: 276  valid vae loss: 0.25143 perp: 0.11297 kl: 0.13847:   6%|▌         | 2/34 [00:00<00:02, 11.71batch/s]

>>>>average [92mtraining[0m of epoch 276: loss 0.28585 perp 0.13902 kl 0.14683


At epoch: 276  valid vae loss: 0.17674 perp: 0.04689 kl: 0.12985: 100%|██████████| 34/34 [00:01<00:00, 23.63batch/s]
At epoch: 277  train vae loss: 0.20072 perp: 0.06366 kl: 0.13706:   0%|          | 0/379 [00:00<?, ?batch/s]

>>>>average [93mvalid[0m of epoch 276: loss 0.33491 perp 0.19744 kl 0.13748


At epoch: 277  train vae loss: 0.27666 perp: 0.13824 kl: 0.13842: 100%|██████████| 379/379 [00:36<00:00, 10.41batch/s]
At epoch: 277  valid vae loss: 0.24579 perp: 0.11096 kl: 0.13483:   6%|▌         | 2/34 [00:00<00:02, 11.90batch/s]

>>>>average [92mtraining[0m of epoch 277: loss 0.22929 perp 0.09377 kl 0.13552


At epoch: 277  valid vae loss: 0.18547 perp: 0.05947 kl: 0.12600: 100%|██████████| 34/34 [00:01<00:00, 23.68batch/s]
At epoch: 278  train vae loss: 0.28750 perp: 0.15387 kl: 0.13364:   0%|          | 0/379 [00:00<?, ?batch/s]

>>>>average [93mvalid[0m of epoch 277: loss 0.35414 perp 0.22031 kl 0.13383


At epoch: 278  train vae loss: 0.25261 perp: 0.11115 kl: 0.14146: 100%|██████████| 379/379 [00:36<00:00, 10.40batch/s]
At epoch: 278  valid vae loss: 0.26905 perp: 0.13067 kl: 0.13839:   6%|▌         | 2/34 [00:00<00:02, 11.74batch/s]

>>>>average [92mtraining[0m of epoch 278: loss 0.26841 perp 0.13005 kl 0.13836


At epoch: 278  valid vae loss: 0.16424 perp: 0.03471 kl: 0.12954: 100%|██████████| 34/34 [00:01<00:00, 23.67batch/s]
At epoch: 279  train vae loss: 0.21343 perp: 0.07352 kl: 0.13991:   0%|          | 0/379 [00:00<?, ?batch/s]

>>>>average [93mvalid[0m of epoch 278: loss 0.32833 perp 0.19108 kl 0.13725


At epoch: 279  train vae loss: 0.25082 perp: 0.11037 kl: 0.14046: 100%|██████████| 379/379 [00:36<00:00, 10.40batch/s]
At epoch: 279  valid vae loss: 0.28171 perp: 0.14407 kl: 0.13764:   6%|▌         | 2/34 [00:00<00:02, 11.70batch/s]

>>>>average [92mtraining[0m of epoch 279: loss 0.25795 perp 0.11958 kl 0.13837


At epoch: 279  valid vae loss: 0.17552 perp: 0.04683 kl: 0.12869: 100%|██████████| 34/34 [00:01<00:00, 23.62batch/s]
At epoch: 280  train vae loss: 0.22584 perp: 0.08733 kl: 0.13851:   0%|          | 0/379 [00:00<?, ?batch/s]

>>>>average [93mvalid[0m of epoch 279: loss 0.36383 perp 0.22741 kl 0.13642


At epoch: 280  train vae loss: 0.23274 perp: 0.09988 kl: 0.13286: 100%|██████████| 379/379 [00:36<00:00, 10.40batch/s]
At epoch: 280  valid vae loss: 0.25479 perp: 0.11666 kl: 0.13813:   6%|▌         | 2/34 [00:00<00:02, 11.73batch/s]

>>>>average [92mtraining[0m of epoch 280: loss 0.25725 perp 0.12024 kl 0.13700


At epoch: 280  valid vae loss: 0.16949 perp: 0.04061 kl: 0.12889: 100%|██████████| 34/34 [00:01<00:00, 23.55batch/s]
At epoch: 281  train vae loss: 0.30550 perp: 0.16718 kl: 0.13832:   0%|          | 0/379 [00:00<?, ?batch/s]

>>>>average [93mvalid[0m of epoch 280: loss 0.34631 perp 0.20910 kl 0.13721


At epoch: 281  train vae loss: 0.26763 perp: 0.12692 kl: 0.14070: 100%|██████████| 379/379 [00:36<00:00, 10.39batch/s]
At epoch: 281  valid vae loss: 0.27212 perp: 0.13367 kl: 0.13845:   6%|▌         | 2/34 [00:00<00:02, 11.84batch/s]

>>>>average [92mtraining[0m of epoch 281: loss 0.26061 perp 0.12304 kl 0.13757


At epoch: 281  valid vae loss: 0.28386 perp: 0.15498 kl: 0.12887: 100%|██████████| 34/34 [00:01<00:00, 23.64batch/s]
At epoch: 282  train vae loss: 0.28221 perp: 0.14685 kl: 0.13536:   0%|          | 0/379 [00:00<?, ?batch/s]

>>>>average [93mvalid[0m of epoch 281: loss 0.43234 perp 0.29509 kl 0.13726


At epoch: 282  train vae loss: 0.19678 perp: 0.05723 kl: 0.13955: 100%|██████████| 379/379 [00:36<00:00, 10.38batch/s]
At epoch: 282  valid vae loss: 0.27323 perp: 0.13226 kl: 0.14097:   6%|▌         | 2/34 [00:00<00:02, 11.76batch/s]

>>>>average [92mtraining[0m of epoch 282: loss 0.28507 perp 0.14464 kl 0.14043


At epoch: 282  valid vae loss: 0.23977 perp: 0.10746 kl: 0.13231: 100%|██████████| 34/34 [00:01<00:00, 23.63batch/s]
At epoch: 283  train vae loss: 0.20862 perp: 0.07367 kl: 0.13495:   0%|          | 0/379 [00:00<?, ?batch/s]

>>>>average [93mvalid[0m of epoch 282: loss 0.36381 perp 0.22353 kl 0.14028


At epoch: 283  train vae loss: 0.21952 perp: 0.07094 kl: 0.14858: 100%|██████████| 379/379 [00:36<00:00, 10.40batch/s]
At epoch: 283  valid vae loss: 0.29786 perp: 0.14855 kl: 0.14930:   6%|▌         | 2/34 [00:00<00:02, 11.88batch/s]

>>>>average [92mtraining[0m of epoch 283: loss 0.32621 perp 0.18658 kl 0.13963


At epoch: 283  valid vae loss: 0.24586 perp: 0.10572 kl: 0.14014: 100%|██████████| 34/34 [00:01<00:00, 23.71batch/s]
At epoch: 284  train vae loss: 0.24935 perp: 0.10331 kl: 0.14605:   0%|          | 0/379 [00:00<?, ?batch/s]

>>>>average [93mvalid[0m of epoch 283: loss 0.37441 perp 0.22611 kl 0.14830


At epoch: 284  train vae loss: 0.19531 perp: 0.06519 kl: 0.13012: 100%|██████████| 379/379 [00:36<00:00, 10.38batch/s]
At epoch: 284  valid vae loss: 0.20322 perp: 0.07362 kl: 0.12961:   6%|▌         | 2/34 [00:00<00:02, 11.90batch/s]

>>>>average [92mtraining[0m of epoch 284: loss 0.21319 perp 0.07553 kl 0.13766


At epoch: 284  valid vae loss: 0.15729 perp: 0.03658 kl: 0.12071: 100%|██████████| 34/34 [00:01<00:00, 23.68batch/s]
At epoch: 285  train vae loss: 0.20576 perp: 0.07613 kl: 0.12963:   0%|          | 0/379 [00:00<?, ?batch/s]

>>>>average [93mvalid[0m of epoch 284: loss 0.29253 perp 0.16423 kl 0.12830
saving to best model since this is the best valid loss so far.----


At epoch: 285  train vae loss: 0.22434 perp: 0.09235 kl: 0.13199: 100%|██████████| 379/379 [00:36<00:00, 10.39batch/s]
At epoch: 285  valid vae loss: 0.25792 perp: 0.12491 kl: 0.13301:   6%|▌         | 2/34 [00:00<00:02, 11.67batch/s]

>>>>average [92mtraining[0m of epoch 285: loss 0.22922 perp 0.09746 kl 0.13176


At epoch: 285  valid vae loss: 0.16791 perp: 0.04340 kl: 0.12452: 100%|██████████| 34/34 [00:01<00:00, 23.61batch/s]
At epoch: 286  train vae loss: 0.19748 perp: 0.06785 kl: 0.12963:   0%|          | 0/379 [00:00<?, ?batch/s]

>>>>average [93mvalid[0m of epoch 285: loss 0.33176 perp 0.19959 kl 0.13217


At epoch: 286  train vae loss: 0.25922 perp: 0.12183 kl: 0.13739: 100%|██████████| 379/379 [00:36<00:00, 10.39batch/s]
At epoch: 286  valid vae loss: 0.30187 perp: 0.16664 kl: 0.13522:   6%|▌         | 2/34 [00:00<00:02, 11.71batch/s]

>>>>average [92mtraining[0m of epoch 286: loss 0.23753 perp 0.10603 kl 0.13150


At epoch: 286  valid vae loss: 0.17489 perp: 0.04828 kl: 0.12662: 100%|██████████| 34/34 [00:01<00:00, 23.57batch/s]
At epoch: 287  train vae loss: 0.24451 perp: 0.10926 kl: 0.13526:   0%|          | 0/379 [00:00<?, ?batch/s]

>>>>average [93mvalid[0m of epoch 286: loss 0.36704 perp 0.23302 kl 0.13402


At epoch: 287  train vae loss: 0.26774 perp: 0.13357 kl: 0.13417: 100%|██████████| 379/379 [00:36<00:00, 10.39batch/s]
At epoch: 287  valid vae loss: 0.25761 perp: 0.12186 kl: 0.13575:   6%|▌         | 2/34 [00:00<00:02, 11.80batch/s]

>>>>average [92mtraining[0m of epoch 287: loss 0.26115 perp 0.12703 kl 0.13412


At epoch: 287  valid vae loss: 0.19897 perp: 0.07206 kl: 0.12691: 100%|██████████| 34/34 [00:01<00:00, 23.64batch/s]
At epoch: 288  train vae loss: 0.22381 perp: 0.08570 kl: 0.13811:   0%|          | 0/379 [00:00<?, ?batch/s]

>>>>average [93mvalid[0m of epoch 287: loss 0.35927 perp 0.22459 kl 0.13468


At epoch: 288  train vae loss: 0.24725 perp: 0.11489 kl: 0.13236: 100%|██████████| 379/379 [00:36<00:00, 10.40batch/s]
At epoch: 288  valid vae loss: 0.24134 perp: 0.11047 kl: 0.13087:   6%|▌         | 2/34 [00:00<00:02, 11.66batch/s]

>>>>average [92mtraining[0m of epoch 288: loss 0.23922 perp 0.10527 kl 0.13395


At epoch: 288  valid vae loss: 0.38055 perp: 0.25836 kl: 0.12219: 100%|██████████| 34/34 [00:01<00:00, 23.53batch/s]
At epoch: 289  train vae loss: 0.20394 perp: 0.07795 kl: 0.12599:   0%|          | 0/379 [00:00<?, ?batch/s]

>>>>average [93mvalid[0m of epoch 288: loss 0.34049 perp 0.21093 kl 0.12955


At epoch: 289  train vae loss: 0.27427 perp: 0.13953 kl: 0.13474: 100%|██████████| 379/379 [00:36<00:00, 10.40batch/s]
At epoch: 289  valid vae loss: 0.23609 perp: 0.10027 kl: 0.13582:   6%|▌         | 2/34 [00:00<00:02, 11.68batch/s]

>>>>average [92mtraining[0m of epoch 289: loss 0.24621 perp 0.11323 kl 0.13298


At epoch: 289  valid vae loss: 0.17439 perp: 0.04769 kl: 0.12670: 100%|██████████| 34/34 [00:01<00:00, 23.56batch/s]
At epoch: 290  train vae loss: 0.26751 perp: 0.12770 kl: 0.13981:   0%|          | 0/379 [00:00<?, ?batch/s]

>>>>average [93mvalid[0m of epoch 289: loss 0.32878 perp 0.19402 kl 0.13475


At epoch: 290  train vae loss: 0.27935 perp: 0.14651 kl: 0.13284: 100%|██████████| 379/379 [00:36<00:00, 10.41batch/s]
At epoch: 290  valid vae loss: 0.29027 perp: 0.15898 kl: 0.13129:   6%|▌         | 2/34 [00:00<00:02, 11.82batch/s]

>>>>average [92mtraining[0m of epoch 290: loss 0.23743 perp 0.10541 kl 0.13201


At epoch: 290  valid vae loss: 0.35345 perp: 0.23094 kl: 0.12251: 100%|██████████| 34/34 [00:01<00:00, 23.66batch/s]
At epoch: 291  train vae loss: 0.24811 perp: 0.11810 kl: 0.13001:   0%|          | 0/379 [00:00<?, ?batch/s]

>>>>average [93mvalid[0m of epoch 290: loss 0.38047 perp 0.24996 kl 0.13051


At epoch: 291  train vae loss: 0.23496 perp: 0.10257 kl: 0.13240: 100%|██████████| 379/379 [00:36<00:00, 10.39batch/s]
At epoch: 291  valid vae loss: 0.22562 perp: 0.09321 kl: 0.13241:   6%|▌         | 2/34 [00:00<00:02, 11.68batch/s]

>>>>average [92mtraining[0m of epoch 291: loss 0.26475 perp 0.12950 kl 0.13525


At epoch: 291  valid vae loss: 0.18903 perp: 0.06503 kl: 0.12400: 100%|██████████| 34/34 [00:01<00:00, 23.59batch/s]
At epoch: 292  train vae loss: 0.23087 perp: 0.09621 kl: 0.13466:   0%|          | 0/379 [00:00<?, ?batch/s]

>>>>average [93mvalid[0m of epoch 291: loss 0.33263 perp 0.20119 kl 0.13144


At epoch: 292  train vae loss: 0.28821 perp: 0.15482 kl: 0.13339: 100%|██████████| 379/379 [00:36<00:00, 10.39batch/s]
At epoch: 292  valid vae loss: 0.23812 perp: 0.10504 kl: 0.13308:   6%|▌         | 2/34 [00:00<00:02, 11.55batch/s]

>>>>average [92mtraining[0m of epoch 292: loss 0.24970 perp 0.11611 kl 0.13359


At epoch: 292  valid vae loss: 0.15336 perp: 0.02858 kl: 0.12479: 100%|██████████| 34/34 [00:01<00:00, 23.53batch/s]
At epoch: 293  train vae loss: 0.23483 perp: 0.10287 kl: 0.13196:   0%|          | 0/379 [00:00<?, ?batch/s]

>>>>average [93mvalid[0m of epoch 292: loss 0.34270 perp 0.21061 kl 0.13209


At epoch: 293  train vae loss: 0.25026 perp: 0.11817 kl: 0.13209: 100%|██████████| 379/379 [00:36<00:00, 10.38batch/s]
At epoch: 293  valid vae loss: 0.24812 perp: 0.11117 kl: 0.13694:   6%|▌         | 2/34 [00:00<00:02, 11.79batch/s]

>>>>average [92mtraining[0m of epoch 293: loss 0.27141 perp 0.13639 kl 0.13503


At epoch: 293  valid vae loss: 0.17435 perp: 0.04605 kl: 0.12830: 100%|██████████| 34/34 [00:01<00:00, 23.58batch/s]
At epoch: 294  train vae loss: 0.19913 perp: 0.06125 kl: 0.13787:   0%|          | 0/379 [00:00<?, ?batch/s]

>>>>average [93mvalid[0m of epoch 293: loss 0.33735 perp 0.20176 kl 0.13559


At epoch: 294  train vae loss: 0.26612 perp: 0.13689 kl: 0.12923: 100%|██████████| 379/379 [00:36<00:00, 10.40batch/s]
At epoch: 294  valid vae loss: 0.24028 perp: 0.10851 kl: 0.13177:   6%|▌         | 2/34 [00:00<00:02, 11.87batch/s]

>>>>average [92mtraining[0m of epoch 294: loss 0.24171 perp 0.10729 kl 0.13442


At epoch: 294  valid vae loss: 0.17958 perp: 0.05637 kl: 0.12321: 100%|██████████| 34/34 [00:01<00:00, 23.54batch/s]
At epoch: 295  train vae loss: 0.18467 perp: 0.05158 kl: 0.13309:   0%|          | 0/379 [00:00<?, ?batch/s]

>>>>average [93mvalid[0m of epoch 294: loss 0.32682 perp 0.19633 kl 0.13049


At epoch: 295  train vae loss: 0.24723 perp: 0.11239 kl: 0.13484: 100%|██████████| 379/379 [00:36<00:00, 10.38batch/s]
At epoch: 295  valid vae loss: 0.27839 perp: 0.14247 kl: 0.13592:   6%|▌         | 2/34 [00:00<00:02, 11.83batch/s]

>>>>average [92mtraining[0m of epoch 295: loss 0.23732 perp 0.10632 kl 0.13100


At epoch: 295  valid vae loss: 0.17059 perp: 0.04329 kl: 0.12730: 100%|██████████| 34/34 [00:01<00:00, 23.49batch/s]
At epoch: 296  train vae loss: 0.25255 perp: 0.11347 kl: 0.13909:   0%|          | 0/379 [00:00<?, ?batch/s]

>>>>average [93mvalid[0m of epoch 295: loss 0.37152 perp 0.23677 kl 0.13475


At epoch: 296  train vae loss: 0.25154 perp: 0.11251 kl: 0.13902: 100%|██████████| 379/379 [00:36<00:00, 10.40batch/s]
At epoch: 296  valid vae loss: 0.34360 perp: 0.20583 kl: 0.13777:   6%|▌         | 2/34 [00:00<00:02, 11.70batch/s]

>>>>average [92mtraining[0m of epoch 296: loss 0.26586 perp 0.13121 kl 0.13465


At epoch: 296  valid vae loss: 0.21300 perp: 0.08368 kl: 0.12932: 100%|██████████| 34/34 [00:01<00:00, 23.45batch/s]
At epoch: 297  train vae loss: 0.20369 perp: 0.07207 kl: 0.13162:   0%|          | 0/379 [00:00<?, ?batch/s]

>>>>average [93mvalid[0m of epoch 296: loss 0.37238 perp 0.23560 kl 0.13678


At epoch: 297  train vae loss: 0.23593 perp: 0.10850 kl: 0.12743: 100%|██████████| 379/379 [00:36<00:00, 10.37batch/s]
At epoch: 297  valid vae loss: 0.23948 perp: 0.10916 kl: 0.13032:   6%|▌         | 2/34 [00:00<00:02, 11.74batch/s]

>>>>average [92mtraining[0m of epoch 297: loss 0.23615 perp 0.10299 kl 0.13316


At epoch: 297  valid vae loss: 0.26364 perp: 0.14115 kl: 0.12249: 100%|██████████| 34/34 [00:01<00:00, 23.54batch/s]
At epoch: 298  train vae loss: 0.18460 perp: 0.05604 kl: 0.12856:   0%|          | 0/379 [00:00<?, ?batch/s]

>>>>average [93mvalid[0m of epoch 297: loss 0.32851 perp 0.19897 kl 0.12954


At epoch: 298  train vae loss: 0.31768 perp: 0.18101 kl: 0.13667: 100%|██████████| 379/379 [00:36<00:00, 10.39batch/s]
At epoch: 298  valid vae loss: 0.32345 perp: 0.18669 kl: 0.13676:   6%|▌         | 2/34 [00:00<00:02, 11.66batch/s]

>>>>average [92mtraining[0m of epoch 298: loss 0.26104 perp 0.12722 kl 0.13381


At epoch: 298  valid vae loss: 0.24361 perp: 0.11500 kl: 0.12861: 100%|██████████| 34/34 [00:01<00:00, 23.55batch/s]
At epoch: 299  train vae loss: 0.36567 perp: 0.22615 kl: 0.13951:   0%|          | 0/379 [00:00<?, ?batch/s]

>>>>average [93mvalid[0m of epoch 298: loss 0.39095 perp 0.25498 kl 0.13596


At epoch: 299  train vae loss: 0.20661 perp: 0.07489 kl: 0.13172: 100%|██████████| 379/379 [00:36<00:00, 10.38batch/s]
At epoch: 299  valid vae loss: 0.23656 perp: 0.10776 kl: 0.12880:   6%|▌         | 2/34 [00:00<00:02, 11.69batch/s]

>>>>average [92mtraining[0m of epoch 299: loss 0.23088 perp 0.09836 kl 0.13252


At epoch: 299  valid vae loss: 0.13810 perp: 0.01706 kl: 0.12104: 100%|██████████| 34/34 [00:01<00:00, 23.58batch/s]
At epoch: 300  train vae loss: 0.20381 perp: 0.07706 kl: 0.12675:   0%|          | 0/379 [00:00<?, ?batch/s]

>>>>average [93mvalid[0m of epoch 299: loss 0.32201 perp 0.19413 kl 0.12789


At epoch: 300  train vae loss: 0.24234 perp: 0.11055 kl: 0.13179: 100%|██████████| 379/379 [00:36<00:00, 10.40batch/s]
At epoch: 300  valid vae loss: 0.26060 perp: 0.12995 kl: 0.13065:   6%|▌         | 2/34 [00:00<00:02, 11.71batch/s]

>>>>average [92mtraining[0m of epoch 300: loss 0.23037 perp 0.10196 kl 0.12841


At epoch: 300  valid vae loss: 0.15656 perp: 0.03405 kl: 0.12251: 100%|██████████| 34/34 [00:01<00:00, 23.45batch/s]
At epoch: 301  train vae loss: 0.21953 perp: 0.08669 kl: 0.13284:   0%|          | 0/379 [00:00<?, ?batch/s]

>>>>average [93mvalid[0m of epoch 300: loss 0.33186 perp 0.20270 kl 0.12915


At epoch: 301  train vae loss: 0.19387 perp: 0.05977 kl: 0.13410: 100%|██████████| 379/379 [00:36<00:00, 10.39batch/s]
At epoch: 301  valid vae loss: 0.22333 perp: 0.09295 kl: 0.13038:   6%|▌         | 2/34 [00:00<00:02, 11.83batch/s]

>>>>average [92mtraining[0m of epoch 301: loss 0.32839 perp 0.18849 kl 0.13990


At epoch: 301  valid vae loss: 0.18281 perp: 0.06048 kl: 0.12233: 100%|██████████| 34/34 [00:01<00:00, 23.57batch/s]
At epoch: 302  train vae loss: 0.17765 perp: 0.04898 kl: 0.12867:   0%|          | 0/379 [00:00<?, ?batch/s]

>>>>average [93mvalid[0m of epoch 301: loss 0.30165 perp 0.17208 kl 0.12957


At epoch: 302  train vae loss: 0.19281 perp: 0.06922 kl: 0.12359: 100%|██████████| 379/379 [00:36<00:00, 10.40batch/s]
At epoch: 302  valid vae loss: 0.19818 perp: 0.07681 kl: 0.12137:   6%|▌         | 2/34 [00:00<00:02, 11.68batch/s]

>>>>average [92mtraining[0m of epoch 302: loss 0.19003 perp 0.06413 kl 0.12590


At epoch: 302  valid vae loss: 0.14019 perp: 0.02636 kl: 0.11383: 100%|██████████| 34/34 [00:01<00:00, 23.51batch/s]
At epoch: 303  train vae loss: 0.17613 perp: 0.05809 kl: 0.11804:   0%|          | 0/379 [00:00<?, ?batch/s]

>>>>average [93mvalid[0m of epoch 302: loss 0.29901 perp 0.17853 kl 0.12048


At epoch: 303  train vae loss: 0.22090 perp: 0.09661 kl: 0.12429: 100%|██████████| 379/379 [00:36<00:00, 10.40batch/s]
At epoch: 303  valid vae loss: 0.25290 perp: 0.12884 kl: 0.12406:   6%|▌         | 2/34 [00:00<00:02, 11.68batch/s]

>>>>average [92mtraining[0m of epoch 303: loss 0.20326 perp 0.08036 kl 0.12290


At epoch: 303  valid vae loss: 0.14995 perp: 0.03329 kl: 0.11666: 100%|██████████| 34/34 [00:01<00:00, 23.56batch/s]
At epoch: 304  train vae loss: 0.20740 perp: 0.08257 kl: 0.12484:   0%|          | 0/379 [00:00<?, ?batch/s]

>>>>average [93mvalid[0m of epoch 303: loss 0.33203 perp 0.20891 kl 0.12312


At epoch: 304  train vae loss: 0.18309 perp: 0.06128 kl: 0.12181: 100%|██████████| 379/379 [00:36<00:00, 10.40batch/s]
At epoch: 304  valid vae loss: 0.21463 perp: 0.09279 kl: 0.12183:   6%|▌         | 2/34 [00:00<00:02, 11.83batch/s]

>>>>average [92mtraining[0m of epoch 304: loss 0.20816 perp 0.08589 kl 0.12227


At epoch: 304  valid vae loss: 0.15336 perp: 0.03902 kl: 0.11434: 100%|██████████| 34/34 [00:01<00:00, 23.70batch/s]
At epoch: 305  train vae loss: 0.21114 perp: 0.09262 kl: 0.11851:   0%|          | 0/379 [00:00<?, ?batch/s]

>>>>average [93mvalid[0m of epoch 304: loss 0.31756 perp 0.19634 kl 0.12122


At epoch: 305  train vae loss: 0.23277 perp: 0.11185 kl: 0.12092: 100%|██████████| 379/379 [00:36<00:00, 10.41batch/s]
At epoch: 305  valid vae loss: 0.26364 perp: 0.13979 kl: 0.12386:   6%|▌         | 2/34 [00:00<00:02, 11.70batch/s]

>>>>average [92mtraining[0m of epoch 305: loss 0.22373 perp 0.09950 kl 0.12423


At epoch: 305  valid vae loss: 0.20176 perp: 0.08541 kl: 0.11635: 100%|██████████| 34/34 [00:01<00:00, 23.62batch/s]
At epoch: 306  train vae loss: 0.21732 perp: 0.09215 kl: 0.12517:   0%|          | 0/379 [00:00<?, ?batch/s]

>>>>average [93mvalid[0m of epoch 305: loss 0.33586 perp 0.21288 kl 0.12298


At epoch: 306  train vae loss: 0.34252 perp: 0.21480 kl: 0.12771: 100%|██████████| 379/379 [00:36<00:00, 10.38batch/s]
At epoch: 306  valid vae loss: 0.31474 perp: 0.18855 kl: 0.12619:   6%|▌         | 2/34 [00:00<00:02, 11.70batch/s]

>>>>average [92mtraining[0m of epoch 306: loss 0.22388 perp 0.09936 kl 0.12452


At epoch: 306  valid vae loss: 0.49069 perp: 0.37212 kl: 0.11857: 100%|██████████| 34/34 [00:01<00:00, 23.53batch/s]
At epoch: 307  train vae loss: 0.30640 perp: 0.18306 kl: 0.12334:   0%|          | 0/379 [00:00<?, ?batch/s]

>>>>average [93mvalid[0m of epoch 306: loss 0.42534 perp 0.29991 kl 0.12544


At epoch: 307  train vae loss: 0.25337 perp: 0.12612 kl: 0.12725: 100%|██████████| 379/379 [00:36<00:00, 10.39batch/s]
At epoch: 307  valid vae loss: 0.21115 perp: 0.08289 kl: 0.12827:   6%|▌         | 2/34 [00:00<00:02, 11.88batch/s]

>>>>average [92mtraining[0m of epoch 307: loss 0.23843 perp 0.11109 kl 0.12734


At epoch: 307  valid vae loss: 0.22635 perp: 0.10542 kl: 0.12092: 100%|██████████| 34/34 [00:01<00:00, 23.54batch/s]
At epoch: 308  train vae loss: 0.17775 perp: 0.05085 kl: 0.12690:   0%|          | 0/379 [00:00<?, ?batch/s]

>>>>average [93mvalid[0m of epoch 307: loss 0.33641 perp 0.20888 kl 0.12754


At epoch: 308  train vae loss: 0.24258 perp: 0.11037 kl: 0.13220: 100%|██████████| 379/379 [00:36<00:00, 10.39batch/s]
At epoch: 308  valid vae loss: 0.23826 perp: 0.10968 kl: 0.12858:   6%|▌         | 2/34 [00:00<00:02, 11.79batch/s]

>>>>average [92mtraining[0m of epoch 308: loss 0.23154 perp 0.10523 kl 0.12631


At epoch: 308  valid vae loss: 0.17685 perp: 0.05572 kl: 0.12113: 100%|██████████| 34/34 [00:01<00:00, 23.56batch/s]
At epoch: 309  train vae loss: 0.25308 perp: 0.12048 kl: 0.13260:   0%|          | 0/379 [00:00<?, ?batch/s]

>>>>average [93mvalid[0m of epoch 308: loss 0.34669 perp 0.21908 kl 0.12760


At epoch: 309  train vae loss: 0.24932 perp: 0.12310 kl: 0.12622: 100%|██████████| 379/379 [00:36<00:00, 10.39batch/s]
At epoch: 309  valid vae loss: 0.21763 perp: 0.09456 kl: 0.12306:   6%|▌         | 2/34 [00:00<00:02, 11.83batch/s]

>>>>average [92mtraining[0m of epoch 309: loss 0.22936 perp 0.10206 kl 0.12729


At epoch: 309  valid vae loss: 0.14422 perp: 0.02840 kl: 0.11582: 100%|██████████| 34/34 [00:01<00:00, 23.56batch/s]
At epoch: 310  train vae loss: 0.22431 perp: 0.09512 kl: 0.12919:   0%|          | 0/379 [00:00<?, ?batch/s]

>>>>average [93mvalid[0m of epoch 309: loss 0.29600 perp 0.17337 kl 0.12263


At epoch: 310  train vae loss: 0.17032 perp: 0.04174 kl: 0.12858: 100%|██████████| 379/379 [00:36<00:00, 10.39batch/s]
At epoch: 310  valid vae loss: 0.24803 perp: 0.12074 kl: 0.12730:   6%|▌         | 2/34 [00:00<00:02, 11.84batch/s]

>>>>average [92mtraining[0m of epoch 310: loss 0.24718 perp 0.11976 kl 0.12742


At epoch: 310  valid vae loss: 0.16496 perp: 0.04499 kl: 0.11997: 100%|██████████| 34/34 [00:01<00:00, 23.57batch/s]
At epoch: 311  train vae loss: 0.23836 perp: 0.11364 kl: 0.12472:   0%|          | 0/379 [00:00<?, ?batch/s]

>>>>average [93mvalid[0m of epoch 310: loss 0.33059 perp 0.20392 kl 0.12667


At epoch: 311  train vae loss: 0.24372 perp: 0.12277 kl: 0.12094: 100%|██████████| 379/379 [00:36<00:00, 10.40batch/s]
At epoch: 311  valid vae loss: 0.25715 perp: 0.13274 kl: 0.12441:   6%|▌         | 2/34 [00:00<00:02, 11.85batch/s]

>>>>average [92mtraining[0m of epoch 311: loss 0.22242 perp 0.09691 kl 0.12552


At epoch: 311  valid vae loss: 0.19158 perp: 0.07368 kl: 0.11790: 100%|██████████| 34/34 [00:01<00:00, 23.59batch/s]
At epoch: 312  train vae loss: 0.29800 perp: 0.17274 kl: 0.12525:   0%|          | 0/379 [00:00<?, ?batch/s]

>>>>average [93mvalid[0m of epoch 311: loss 0.37084 perp 0.24698 kl 0.12386


At epoch: 312  train vae loss: 0.23006 perp: 0.10040 kl: 0.12965: 100%|██████████| 379/379 [00:36<00:00, 10.39batch/s]
At epoch: 312  valid vae loss: 0.21999 perp: 0.09103 kl: 0.12896:   6%|▌         | 2/34 [00:00<00:02, 11.76batch/s]

>>>>average [92mtraining[0m of epoch 312: loss 0.24630 perp 0.11860 kl 0.12770


At epoch: 312  valid vae loss: 0.17522 perp: 0.05403 kl: 0.12120: 100%|██████████| 34/34 [00:01<00:00, 23.66batch/s]
At epoch: 313  train vae loss: 0.22990 perp: 0.10094 kl: 0.12896:   0%|          | 0/379 [00:00<?, ?batch/s]

>>>>average [93mvalid[0m of epoch 312: loss 0.32313 perp 0.19508 kl 0.12806


At epoch: 313  train vae loss: 0.16652 perp: 0.04039 kl: 0.12613: 100%|██████████| 379/379 [00:36<00:00, 10.39batch/s]
At epoch: 313  valid vae loss: 0.32123 perp: 0.19482 kl: 0.12641:   6%|▌         | 2/34 [00:00<00:02, 11.85batch/s]

>>>>average [92mtraining[0m of epoch 313: loss 0.22326 perp 0.09740 kl 0.12586


At epoch: 313  valid vae loss: 0.13811 perp: 0.01909 kl: 0.11902: 100%|██████████| 34/34 [00:01<00:00, 23.57batch/s]
At epoch: 314  train vae loss: 0.28092 perp: 0.15395 kl: 0.12698:   0%|          | 0/379 [00:00<?, ?batch/s]

>>>>average [93mvalid[0m of epoch 313: loss 0.36062 perp 0.23515 kl 0.12548


At epoch: 314  train vae loss: 0.21922 perp: 0.09831 kl: 0.12092: 100%|██████████| 379/379 [00:36<00:00, 10.38batch/s]
At epoch: 314  valid vae loss: 0.25741 perp: 0.13817 kl: 0.11924:   6%|▌         | 2/34 [00:00<00:02, 11.73batch/s]

>>>>average [92mtraining[0m of epoch 314: loss 0.21288 perp 0.08887 kl 0.12402


At epoch: 314  valid vae loss: 0.13934 perp: 0.02714 kl: 0.11220: 100%|██████████| 34/34 [00:01<00:00, 23.45batch/s]
At epoch: 315  train vae loss: 0.21989 perp: 0.09794 kl: 0.12195:   0%|          | 0/379 [00:00<?, ?batch/s]

>>>>average [93mvalid[0m of epoch 314: loss 0.32117 perp 0.20269 kl 0.11848


At epoch: 315  train vae loss: 0.32031 perp: 0.19934 kl: 0.12097: 100%|██████████| 379/379 [00:36<00:00, 10.39batch/s]
At epoch: 315  valid vae loss: 0.19765 perp: 0.07676 kl: 0.12088:   6%|▌         | 2/34 [00:00<00:02, 11.70batch/s]

>>>>average [92mtraining[0m of epoch 315: loss 0.22061 perp 0.09762 kl 0.12299


At epoch: 315  valid vae loss: 0.13508 perp: 0.02069 kl: 0.11439: 100%|██████████| 34/34 [00:01<00:00, 23.51batch/s]
At epoch: 316  train vae loss: 0.26205 perp: 0.14221 kl: 0.11984:   0%|          | 0/379 [00:00<?, ?batch/s]

>>>>average [93mvalid[0m of epoch 315: loss 0.31535 perp 0.19515 kl 0.12020


At epoch: 316  train vae loss: 0.27667 perp: 0.15559 kl: 0.12108: 100%|██████████| 379/379 [00:36<00:00, 10.39batch/s]
At epoch: 316  valid vae loss: 0.20911 perp: 0.08641 kl: 0.12270:   6%|▌         | 2/34 [00:00<00:02, 11.78batch/s]

>>>>average [92mtraining[0m of epoch 316: loss 0.23226 perp 0.10763 kl 0.12463


At epoch: 316  valid vae loss: 0.15070 perp: 0.03516 kl: 0.11554: 100%|██████████| 34/34 [00:01<00:00, 23.53batch/s]
At epoch: 317  train vae loss: 0.24068 perp: 0.11616 kl: 0.12452:   0%|          | 0/379 [00:00<?, ?batch/s]

>>>>average [93mvalid[0m of epoch 316: loss 0.32981 perp 0.20791 kl 0.12191


At epoch: 317  train vae loss: 0.21779 perp: 0.09175 kl: 0.12605: 100%|██████████| 379/379 [00:36<00:00, 10.39batch/s]
At epoch: 317  valid vae loss: 0.20447 perp: 0.08326 kl: 0.12121:   6%|▌         | 2/34 [00:00<00:02, 11.79batch/s]

>>>>average [92mtraining[0m of epoch 317: loss 0.22419 perp 0.10006 kl 0.12413


At epoch: 317  valid vae loss: 0.17952 perp: 0.06519 kl: 0.11433: 100%|██████████| 34/34 [00:01<00:00, 23.59batch/s]
At epoch: 318  train vae loss: 0.22849 perp: 0.10564 kl: 0.12285:   0%|          | 0/379 [00:00<?, ?batch/s]

>>>>average [93mvalid[0m of epoch 317: loss 0.31722 perp 0.19681 kl 0.12041


At epoch: 318  train vae loss: 0.18871 perp: 0.05388 kl: 0.13483: 100%|██████████| 379/379 [00:36<00:00, 10.39batch/s]
At epoch: 318  valid vae loss: 0.21336 perp: 0.08396 kl: 0.12941:   6%|▌         | 2/34 [00:00<00:02, 11.48batch/s]

>>>>average [92mtraining[0m of epoch 318: loss 0.32959 perp 0.20062 kl 0.12897


At epoch: 318  valid vae loss: 0.14038 perp: 0.01851 kl: 0.12186: 100%|██████████| 34/34 [00:01<00:00, 23.59batch/s]
At epoch: 319  train vae loss: 0.20582 perp: 0.07896 kl: 0.12686:   0%|          | 0/379 [00:00<?, ?batch/s]

>>>>average [93mvalid[0m of epoch 318: loss 0.29404 perp 0.16549 kl 0.12855


At epoch: 319  train vae loss: 0.21731 perp: 0.09536 kl: 0.12196: 100%|██████████| 379/379 [00:36<00:00, 10.40batch/s]
At epoch: 319  valid vae loss: 0.20115 perp: 0.08100 kl: 0.12015:   6%|▌         | 2/34 [00:00<00:02, 11.78batch/s]

>>>>average [92mtraining[0m of epoch 319: loss 0.17711 perp 0.05481 kl 0.12230


At epoch: 319  valid vae loss: 0.13357 perp: 0.01996 kl: 0.11361: 100%|██████████| 34/34 [00:01<00:00, 23.62batch/s]
At epoch: 320  train vae loss: 0.16670 perp: 0.04801 kl: 0.11870:   0%|          | 0/379 [00:00<?, ?batch/s]

>>>>average [93mvalid[0m of epoch 319: loss 0.28454 perp 0.16523 kl 0.11931
saving to best model since this is the best valid loss so far.----


At epoch: 320  train vae loss: 0.21499 perp: 0.09251 kl: 0.12248: 100%|██████████| 379/379 [00:36<00:00, 10.39batch/s]
At epoch: 320  valid vae loss: 0.22051 perp: 0.10376 kl: 0.11675:   6%|▌         | 2/34 [00:00<00:02, 11.72batch/s]

>>>>average [92mtraining[0m of epoch 320: loss 0.18779 perp 0.06878 kl 0.11901


At epoch: 320  valid vae loss: 0.12644 perp: 0.01678 kl: 0.10966: 100%|██████████| 34/34 [00:01<00:00, 23.50batch/s]
At epoch: 321  train vae loss: 0.19338 perp: 0.07961 kl: 0.11377:   0%|          | 0/379 [00:00<?, ?batch/s]

>>>>average [93mvalid[0m of epoch 320: loss 0.29330 perp 0.17718 kl 0.11612


At epoch: 321  train vae loss: 0.20376 perp: 0.09046 kl: 0.11330: 100%|██████████| 379/379 [00:36<00:00, 10.37batch/s]
At epoch: 321  valid vae loss: 0.25473 perp: 0.13779 kl: 0.11693:   6%|▌         | 2/34 [00:00<00:02, 11.61batch/s]

>>>>average [92mtraining[0m of epoch 321: loss 0.18875 perp 0.07257 kl 0.11618


At epoch: 321  valid vae loss: 0.14401 perp: 0.03408 kl: 0.10992: 100%|██████████| 34/34 [00:01<00:00, 23.46batch/s]
At epoch: 322  train vae loss: 0.17135 perp: 0.05767 kl: 0.11368:   0%|          | 0/379 [00:00<?, ?batch/s]

>>>>average [93mvalid[0m of epoch 321: loss 0.31796 perp 0.20179 kl 0.11617


At epoch: 322  train vae loss: 0.18806 perp: 0.06879 kl: 0.11927: 100%|██████████| 379/379 [00:36<00:00, 10.38batch/s]
At epoch: 322  valid vae loss: 0.21491 perp: 0.09572 kl: 0.11920:   6%|▌         | 2/34 [00:00<00:02, 11.76batch/s]

>>>>average [92mtraining[0m of epoch 322: loss 0.20974 perp 0.09263 kl 0.11711


At epoch: 322  valid vae loss: 0.14672 perp: 0.03483 kl: 0.11189: 100%|██████████| 34/34 [00:01<00:00, 23.54batch/s]
At epoch: 323  train vae loss: 0.23067 perp: 0.10743 kl: 0.12324:   0%|          | 0/379 [00:00<?, ?batch/s]

>>>>average [93mvalid[0m of epoch 322: loss 0.32138 perp 0.20311 kl 0.11827


At epoch: 323  train vae loss: 0.24742 perp: 0.12323 kl: 0.12418: 100%|██████████| 379/379 [00:36<00:00, 10.38batch/s]
At epoch: 323  valid vae loss: 0.24249 perp: 0.11943 kl: 0.12306:   6%|▌         | 2/34 [00:00<00:02, 11.71batch/s]

>>>>average [92mtraining[0m of epoch 323: loss 0.21683 perp 0.09672 kl 0.12011


At epoch: 323  valid vae loss: 0.13569 perp: 0.02086 kl: 0.11483: 100%|██████████| 34/34 [00:01<00:00, 23.53batch/s]
At epoch: 324  train vae loss: 0.23502 perp: 0.11503 kl: 0.11999:   0%|          | 0/379 [00:00<?, ?batch/s]

>>>>average [93mvalid[0m of epoch 323: loss 0.32419 perp 0.20245 kl 0.12174


At epoch: 324  train vae loss: 0.16432 perp: 0.04629 kl: 0.11804: 100%|██████████| 379/379 [00:36<00:00, 10.37batch/s]
At epoch: 324  valid vae loss: 0.18496 perp: 0.07167 kl: 0.11329:   6%|▌         | 2/34 [00:00<00:02, 11.58batch/s]

>>>>average [92mtraining[0m of epoch 324: loss 0.19223 perp 0.07434 kl 0.11790


At epoch: 324  valid vae loss: 0.11660 perp: 0.01026 kl: 0.10634: 100%|██████████| 34/34 [00:01<00:00, 23.44batch/s]
At epoch: 325  train vae loss: 0.19853 perp: 0.08587 kl: 0.11266:   0%|          | 0/379 [00:00<?, ?batch/s]

>>>>average [93mvalid[0m of epoch 324: loss 0.28606 perp 0.17359 kl 0.11247


At epoch: 325  train vae loss: 0.20567 perp: 0.08340 kl: 0.12226: 100%|██████████| 379/379 [00:36<00:00, 10.39batch/s]
At epoch: 325  valid vae loss: 0.23620 perp: 0.11528 kl: 0.12092:   6%|▌         | 2/34 [00:00<00:02, 11.87batch/s]

>>>>average [92mtraining[0m of epoch 325: loss 0.21704 perp 0.09956 kl 0.11748


At epoch: 325  valid vae loss: 0.14442 perp: 0.03010 kl: 0.11432: 100%|██████████| 34/34 [00:01<00:00, 23.65batch/s]
At epoch: 326  train vae loss: 0.24456 perp: 0.11972 kl: 0.12484:   0%|          | 0/379 [00:00<?, ?batch/s]

>>>>average [93mvalid[0m of epoch 325: loss 0.33094 perp 0.21060 kl 0.12034


At epoch: 326  train vae loss: 0.44736 perp: 0.31843 kl: 0.12893: 100%|██████████| 379/379 [00:36<00:00, 10.37batch/s]
At epoch: 326  valid vae loss: 0.38345 perp: 0.25125 kl: 0.13221:   6%|▌         | 2/34 [00:00<00:02, 11.66batch/s]

>>>>average [92mtraining[0m of epoch 326: loss 0.23313 perp 0.11214 kl 0.12099


At epoch: 326  valid vae loss: 0.36506 perp: 0.24053 kl: 0.12453: 100%|██████████| 34/34 [00:01<00:00, 23.56batch/s]
At epoch: 327  train vae loss: 0.47063 perp: 0.33992 kl: 0.13070:   0%|          | 0/379 [00:00<?, ?batch/s]

>>>>average [93mvalid[0m of epoch 326: loss 0.53320 perp 0.40194 kl 0.13126


At epoch: 327  train vae loss: 0.25936 perp: 0.13283 kl: 0.12653: 100%|██████████| 379/379 [00:36<00:00, 10.39batch/s]
At epoch: 327  valid vae loss: 0.18502 perp: 0.06332 kl: 0.12170:   6%|▌         | 2/34 [00:00<00:02, 11.90batch/s]

>>>>average [92mtraining[0m of epoch 327: loss 0.23221 perp 0.10640 kl 0.12581


At epoch: 327  valid vae loss: 0.12997 perp: 0.01586 kl: 0.11411: 100%|██████████| 34/34 [00:01<00:00, 23.48batch/s]
At epoch: 328  train vae loss: 0.17870 perp: 0.05764 kl: 0.12106:   0%|          | 0/379 [00:00<?, ?batch/s]

>>>>average [93mvalid[0m of epoch 327: loss 0.30363 perp 0.18287 kl 0.12076


At epoch: 328  train vae loss: 0.34551 perp: 0.22808 kl: 0.11743: 100%|██████████| 379/379 [00:36<00:00, 10.39batch/s]
At epoch: 328  valid vae loss: 0.22566 perp: 0.10707 kl: 0.11859:   6%|▌         | 2/34 [00:00<00:02, 11.79batch/s]

>>>>average [92mtraining[0m of epoch 328: loss 0.21508 perp 0.09457 kl 0.12051


At epoch: 328  valid vae loss: 0.14381 perp: 0.03226 kl: 0.11155: 100%|██████████| 34/34 [00:01<00:00, 23.57batch/s]
At epoch: 329  train vae loss: 0.22255 perp: 0.10110 kl: 0.12145:   0%|          | 0/379 [00:00<?, ?batch/s]

>>>>average [93mvalid[0m of epoch 328: loss 0.33001 perp 0.21235 kl 0.11766


At epoch: 329  train vae loss: 0.19008 perp: 0.07164 kl: 0.11844: 100%|██████████| 379/379 [00:36<00:00, 10.39batch/s]
At epoch: 329  valid vae loss: 0.20979 perp: 0.09321 kl: 0.11659:   6%|▌         | 2/34 [00:00<00:02, 11.63batch/s]

>>>>average [92mtraining[0m of epoch 329: loss 0.21767 perp 0.09595 kl 0.12171


At epoch: 329  valid vae loss: 0.14293 perp: 0.03304 kl: 0.10990: 100%|██████████| 34/34 [00:01<00:00, 23.48batch/s]
At epoch: 330  train vae loss: 0.18978 perp: 0.06791 kl: 0.12186:   0%|          | 0/379 [00:00<?, ?batch/s]

>>>>average [93mvalid[0m of epoch 329: loss 0.30166 perp 0.18572 kl 0.11594


At epoch: 330  train vae loss: 0.18122 perp: 0.06147 kl: 0.11974: 100%|██████████| 379/379 [00:36<00:00, 10.38batch/s]
At epoch: 330  valid vae loss: 0.20341 perp: 0.08697 kl: 0.11644:   6%|▌         | 2/34 [00:00<00:02, 11.75batch/s]

>>>>average [92mtraining[0m of epoch 330: loss 0.20483 perp 0.08673 kl 0.11811


At epoch: 330  valid vae loss: 0.13032 perp: 0.02062 kl: 0.10970: 100%|██████████| 34/34 [00:01<00:00, 23.55batch/s]
At epoch: 331  train vae loss: 0.21363 perp: 0.09311 kl: 0.12052:   0%|          | 0/379 [00:00<?, ?batch/s]

>>>>average [93mvalid[0m of epoch 330: loss 0.29668 perp 0.18090 kl 0.11579


At epoch: 331  train vae loss: 0.16840 perp: 0.05108 kl: 0.11732: 100%|██████████| 379/379 [00:36<00:00, 10.38batch/s]
At epoch: 331  valid vae loss: 0.21361 perp: 0.09430 kl: 0.11931:   6%|▌         | 2/34 [00:00<00:02, 11.77batch/s]

>>>>average [92mtraining[0m of epoch 331: loss 0.21851 perp 0.09899 kl 0.11952


At epoch: 331  valid vae loss: 0.13590 perp: 0.02343 kl: 0.11247: 100%|██████████| 34/34 [00:01<00:00, 23.57batch/s]
At epoch: 332  train vae loss: 0.19912 perp: 0.07845 kl: 0.12067:   0%|          | 0/379 [00:00<?, ?batch/s]

>>>>average [93mvalid[0m of epoch 331: loss 0.29204 perp 0.17352 kl 0.11852


At epoch: 332  train vae loss: 0.24336 perp: 0.12102 kl: 0.12234: 100%|██████████| 379/379 [00:36<00:00, 10.40batch/s]
At epoch: 332  valid vae loss: 0.24629 perp: 0.12692 kl: 0.11937:   6%|▌         | 2/34 [00:00<00:02, 11.70batch/s]

>>>>average [92mtraining[0m of epoch 332: loss 0.20471 perp 0.08652 kl 0.11819


At epoch: 332  valid vae loss: 0.15106 perp: 0.03845 kl: 0.11260: 100%|██████████| 34/34 [00:01<00:00, 23.56batch/s]
At epoch: 333  train vae loss: 0.31428 perp: 0.19457 kl: 0.11970:   0%|          | 0/379 [00:00<?, ?batch/s]

>>>>average [93mvalid[0m of epoch 332: loss 0.38035 perp 0.26156 kl 0.11879


At epoch: 333  train vae loss: 0.31176 perp: 0.19094 kl: 0.12082: 100%|██████████| 379/379 [00:36<00:00, 10.38batch/s]
At epoch: 333  valid vae loss: 0.19631 perp: 0.07563 kl: 0.12068:   6%|▌         | 2/34 [00:00<00:02, 11.69batch/s]

>>>>average [92mtraining[0m of epoch 333: loss 0.22816 perp 0.10647 kl 0.12169


At epoch: 333  valid vae loss: 0.14210 perp: 0.02902 kl: 0.11308: 100%|██████████| 34/34 [00:01<00:00, 23.51batch/s]
At epoch: 334  train vae loss: 0.20425 perp: 0.08603 kl: 0.11821:   0%|          | 0/379 [00:00<?, ?batch/s]

>>>>average [93mvalid[0m of epoch 333: loss 0.34037 perp 0.22022 kl 0.12015


At epoch: 334  train vae loss: 0.19598 perp: 0.07956 kl: 0.11642: 100%|██████████| 379/379 [00:36<00:00, 10.38batch/s]
At epoch: 334  valid vae loss: 0.19842 perp: 0.08039 kl: 0.11802:   6%|▌         | 2/34 [00:00<00:02, 11.62batch/s]

>>>>average [92mtraining[0m of epoch 334: loss 0.21423 perp 0.09328 kl 0.12095


At epoch: 334  valid vae loss: 0.14267 perp: 0.03179 kl: 0.11088: 100%|██████████| 34/34 [00:01<00:00, 23.45batch/s]
At epoch: 335  train vae loss: 0.17598 perp: 0.05904 kl: 0.11694:   0%|          | 0/379 [00:00<?, ?batch/s]

>>>>average [93mvalid[0m of epoch 334: loss 0.30625 perp 0.18871 kl 0.11754


At epoch: 335  train vae loss: 0.15098 perp: 0.03259 kl: 0.11839: 100%|██████████| 379/379 [00:36<00:00, 10.39batch/s]
At epoch: 335  valid vae loss: 0.20444 perp: 0.08474 kl: 0.11970:   6%|▌         | 2/34 [00:00<00:02, 11.62batch/s]

>>>>average [92mtraining[0m of epoch 335: loss 0.23096 perp 0.10896 kl 0.12200


At epoch: 335  valid vae loss: 0.17074 perp: 0.05875 kl: 0.11200: 100%|██████████| 34/34 [00:01<00:00, 23.37batch/s]
At epoch: 336  train vae loss: 0.23326 perp: 0.11349 kl: 0.11977:   0%|          | 0/379 [00:00<?, ?batch/s]

>>>>average [93mvalid[0m of epoch 335: loss 0.31235 perp 0.19313 kl 0.11922


At epoch: 336  train vae loss: 0.20068 perp: 0.08900 kl: 0.11168: 100%|██████████| 379/379 [00:36<00:00, 10.39batch/s]
At epoch: 336  valid vae loss: 0.19085 perp: 0.07975 kl: 0.11110:   6%|▌         | 2/34 [00:00<00:02, 11.71batch/s]

>>>>average [92mtraining[0m of epoch 336: loss 0.17514 perp 0.06063 kl 0.11451


At epoch: 336  valid vae loss: 0.12835 perp: 0.02400 kl: 0.10435: 100%|██████████| 34/34 [00:01<00:00, 23.55batch/s]
At epoch: 337  train vae loss: 0.15595 perp: 0.04190 kl: 0.11405:   0%|          | 0/379 [00:00<?, ?batch/s]

>>>>average [93mvalid[0m of epoch 336: loss 0.26570 perp 0.15493 kl 0.11077
saving to best model since this is the best valid loss so far.----


At epoch: 337  train vae loss: 0.16602 perp: 0.04772 kl: 0.11831: 100%|██████████| 379/379 [00:36<00:00, 10.38batch/s]
At epoch: 337  valid vae loss: 0.17785 perp: 0.06203 kl: 0.11582:   6%|▌         | 2/34 [00:00<00:02, 11.77batch/s]

>>>>average [92mtraining[0m of epoch 337: loss 0.18851 perp 0.07635 kl 0.11216


At epoch: 337  valid vae loss: 0.12025 perp: 0.01149 kl: 0.10876: 100%|██████████| 34/34 [00:01<00:00, 23.60batch/s]
At epoch: 338  train vae loss: 0.16577 perp: 0.05085 kl: 0.11492:   0%|          | 0/379 [00:00<?, ?batch/s]

>>>>average [93mvalid[0m of epoch 337: loss 0.28822 perp 0.17288 kl 0.11534


At epoch: 338  train vae loss: 0.21771 perp: 0.09766 kl: 0.12005: 100%|██████████| 379/379 [00:36<00:00, 10.39batch/s]
At epoch: 338  valid vae loss: 0.22130 perp: 0.10249 kl: 0.11881:   6%|▌         | 2/34 [00:00<00:02, 11.71batch/s]

>>>>average [92mtraining[0m of epoch 338: loss 0.20128 perp 0.08679 kl 0.11449


At epoch: 338  valid vae loss: 0.13123 perp: 0.01960 kl: 0.11163: 100%|██████████| 34/34 [00:01<00:00, 23.56batch/s]
At epoch: 339  train vae loss: 0.20397 perp: 0.08527 kl: 0.11869:   0%|          | 0/379 [00:00<?, ?batch/s]

>>>>average [93mvalid[0m of epoch 338: loss 0.30530 perp 0.18719 kl 0.11811


At epoch: 339  train vae loss: 0.17314 perp: 0.05969 kl: 0.11346: 100%|██████████| 379/379 [00:36<00:00, 10.38batch/s]
At epoch: 339  valid vae loss: 0.21201 perp: 0.09808 kl: 0.11393:   6%|▌         | 2/34 [00:00<00:02, 11.77batch/s]

>>>>average [92mtraining[0m of epoch 339: loss 0.22029 perp 0.10090 kl 0.11939


At epoch: 339  valid vae loss: 0.12174 perp: 0.01434 kl: 0.10740: 100%|██████████| 34/34 [00:01<00:00, 23.60batch/s]
At epoch: 340  train vae loss: 0.16335 perp: 0.04530 kl: 0.11805:   0%|          | 0/379 [00:00<?, ?batch/s]

>>>>average [93mvalid[0m of epoch 339: loss 0.28864 perp 0.17531 kl 0.11333


At epoch: 340  train vae loss: 0.19614 perp: 0.08571 kl: 0.11043: 100%|██████████| 379/379 [00:36<00:00, 10.39batch/s]
At epoch: 340  valid vae loss: 0.17409 perp: 0.06308 kl: 0.11101:   6%|▌         | 2/34 [00:00<00:02, 11.72batch/s]

>>>>average [92mtraining[0m of epoch 340: loss 0.17802 perp 0.06593 kl 0.11209


At epoch: 340  valid vae loss: 0.13102 perp: 0.02657 kl: 0.10445: 100%|██████████| 34/34 [00:01<00:00, 23.61batch/s]
At epoch: 341  train vae loss: 0.16168 perp: 0.05295 kl: 0.10873:   0%|          | 0/379 [00:00<?, ?batch/s]

>>>>average [93mvalid[0m of epoch 340: loss 0.29620 perp 0.18570 kl 0.11050


At epoch: 341  train vae loss: 0.16692 perp: 0.05456 kl: 0.11237: 100%|██████████| 379/379 [00:36<00:00, 10.40batch/s]
At epoch: 341  valid vae loss: 0.18861 perp: 0.07461 kl: 0.11399:   6%|▌         | 2/34 [00:00<00:02, 11.76batch/s]

>>>>average [92mtraining[0m of epoch 341: loss 0.22870 perp 0.11101 kl 0.11769


At epoch: 341  valid vae loss: 0.12694 perp: 0.01959 kl: 0.10735: 100%|██████████| 34/34 [00:01<00:00, 23.63batch/s]
At epoch: 342  train vae loss: 0.16716 perp: 0.05057 kl: 0.11659:   0%|          | 0/379 [00:00<?, ?batch/s]

>>>>average [93mvalid[0m of epoch 341: loss 0.28789 perp 0.17463 kl 0.11326


At epoch: 342  train vae loss: 0.16356 perp: 0.05202 kl: 0.11155: 100%|██████████| 379/379 [00:36<00:00, 10.40batch/s]
At epoch: 342  valid vae loss: 0.20799 perp: 0.09553 kl: 0.11246:   6%|▌         | 2/34 [00:00<00:02, 11.90batch/s]

>>>>average [92mtraining[0m of epoch 342: loss 0.21051 perp 0.09345 kl 0.11705


At epoch: 342  valid vae loss: 0.12806 perp: 0.02209 kl: 0.10597: 100%|██████████| 34/34 [00:01<00:00, 23.62batch/s]
At epoch: 343  train vae loss: 0.17534 perp: 0.06147 kl: 0.11386:   0%|          | 0/379 [00:00<?, ?batch/s]

>>>>average [93mvalid[0m of epoch 342: loss 0.28859 perp 0.17672 kl 0.11187


At epoch: 343  train vae loss: 0.15880 perp: 0.03494 kl: 0.12387: 100%|██████████| 379/379 [00:36<00:00, 10.39batch/s]
At epoch: 343  valid vae loss: 0.21149 perp: 0.08974 kl: 0.12176:   6%|▌         | 2/34 [00:00<00:02, 11.60batch/s]

>>>>average [92mtraining[0m of epoch 343: loss 0.22689 perp 0.10950 kl 0.11739


At epoch: 343  valid vae loss: 0.17134 perp: 0.05706 kl: 0.11428: 100%|██████████| 34/34 [00:01<00:00, 23.50batch/s]
At epoch: 344  train vae loss: 0.18454 perp: 0.06124 kl: 0.12330:   0%|          | 0/379 [00:00<?, ?batch/s]

>>>>average [93mvalid[0m of epoch 343: loss 0.30611 perp 0.18454 kl 0.12157


At epoch: 344  train vae loss: 0.15772 perp: 0.04806 kl: 0.10966: 100%|██████████| 379/379 [00:36<00:00, 10.39batch/s]
At epoch: 344  valid vae loss: 0.15466 perp: 0.04598 kl: 0.10868:   6%|▌         | 2/34 [00:00<00:02, 11.72batch/s]

>>>>average [92mtraining[0m of epoch 344: loss 0.17838 perp 0.06320 kl 0.11518


At epoch: 344  valid vae loss: 0.10945 perp: 0.00733 kl: 0.10212: 100%|██████████| 34/34 [00:01<00:00, 23.43batch/s]
At epoch: 345  train vae loss: 0.19950 perp: 0.09158 kl: 0.10792:   0%|          | 0/379 [00:00<?, ?batch/s]

>>>>average [93mvalid[0m of epoch 344: loss 0.27209 perp 0.16391 kl 0.10817


At epoch: 345  train vae loss: 0.22891 perp: 0.11360 kl: 0.11531: 100%|██████████| 379/379 [00:36<00:00, 10.39batch/s]
At epoch: 345  valid vae loss: 0.26756 perp: 0.15749 kl: 0.11008:   6%|▌         | 2/34 [00:00<00:02, 11.64batch/s]

>>>>average [92mtraining[0m of epoch 345: loss 0.18462 perp 0.07348 kl 0.11114


At epoch: 345  valid vae loss: 0.12069 perp: 0.01674 kl: 0.10394: 100%|██████████| 34/34 [00:01<00:00, 23.54batch/s]
At epoch: 346  train vae loss: 0.13079 perp: 0.02122 kl: 0.10957:   0%|          | 0/379 [00:00<?, ?batch/s]

>>>>average [93mvalid[0m of epoch 345: loss 0.29798 perp 0.18836 kl 0.10962


At epoch: 346  train vae loss: 0.18629 perp: 0.06659 kl: 0.11970: 100%|██████████| 379/379 [00:36<00:00, 10.41batch/s]
At epoch: 346  valid vae loss: 0.19264 perp: 0.07893 kl: 0.11371:   6%|▌         | 2/34 [00:00<00:02, 11.72batch/s]

>>>>average [92mtraining[0m of epoch 346: loss 0.21528 perp 0.10009 kl 0.11519


At epoch: 346  valid vae loss: 0.12953 perp: 0.02246 kl: 0.10707: 100%|██████████| 34/34 [00:01<00:00, 23.56batch/s]
At epoch: 347  train vae loss: 0.15948 perp: 0.04796 kl: 0.11152:   0%|          | 0/379 [00:00<?, ?batch/s]

>>>>average [93mvalid[0m of epoch 346: loss 0.26949 perp 0.15639 kl 0.11310


At epoch: 347  train vae loss: 0.21146 perp: 0.09578 kl: 0.11569: 100%|██████████| 379/379 [00:36<00:00, 10.39batch/s]
At epoch: 347  valid vae loss: 0.23502 perp: 0.11930 kl: 0.11572:   6%|▌         | 2/34 [00:00<00:02, 11.83batch/s]

>>>>average [92mtraining[0m of epoch 347: loss 0.19475 perp 0.08127 kl 0.11348


At epoch: 347  valid vae loss: 0.12692 perp: 0.01808 kl: 0.10884: 100%|██████████| 34/34 [00:01<00:00, 23.63batch/s]
At epoch: 348  train vae loss: 0.22216 perp: 0.10327 kl: 0.11889:   0%|          | 0/379 [00:00<?, ?batch/s]

>>>>average [93mvalid[0m of epoch 347: loss 0.32355 perp 0.20837 kl 0.11518


At epoch: 348  train vae loss: 0.16385 perp: 0.04947 kl: 0.11438: 100%|██████████| 379/379 [00:36<00:00, 10.38batch/s]
At epoch: 348  valid vae loss: 0.18998 perp: 0.07770 kl: 0.11229:   6%|▌         | 2/34 [00:00<00:02, 11.71batch/s]

>>>>average [92mtraining[0m of epoch 348: loss 0.19474 perp 0.08097 kl 0.11377


At epoch: 348  valid vae loss: 0.20364 perp: 0.09754 kl: 0.10610: 100%|██████████| 34/34 [00:01<00:00, 23.55batch/s]
At epoch: 349  train vae loss: 0.19695 perp: 0.08611 kl: 0.11084:   0%|          | 0/379 [00:00<?, ?batch/s]

>>>>average [93mvalid[0m of epoch 348: loss 0.29004 perp 0.17810 kl 0.11194


At epoch: 349  train vae loss: 0.15388 perp: 0.04286 kl: 0.11102: 100%|██████████| 379/379 [00:36<00:00, 10.40batch/s]
At epoch: 349  valid vae loss: 0.18556 perp: 0.07411 kl: 0.11145:   6%|▌         | 2/34 [00:00<00:02, 11.74batch/s]

>>>>average [92mtraining[0m of epoch 349: loss 0.20465 perp 0.08932 kl 0.11533


At epoch: 349  valid vae loss: 0.12171 perp: 0.01666 kl: 0.10505: 100%|██████████| 34/34 [00:01<00:00, 23.47batch/s]
At epoch: 350  train vae loss: 0.16171 perp: 0.05098 kl: 0.11073:   0%|          | 0/379 [00:00<?, ?batch/s]

>>>>average [93mvalid[0m of epoch 349: loss 0.27394 perp 0.16323 kl 0.11072


At epoch: 350  train vae loss: 0.15126 perp: 0.03571 kl: 0.11554: 100%|██████████| 379/379 [00:36<00:00, 10.38batch/s]
At epoch: 350  valid vae loss: 0.19290 perp: 0.07896 kl: 0.11393:   6%|▌         | 2/34 [00:00<00:02, 11.72batch/s]

>>>>average [92mtraining[0m of epoch 350: loss 0.20591 perp 0.09215 kl 0.11376


At epoch: 350  valid vae loss: 0.12771 perp: 0.02047 kl: 0.10724: 100%|██████████| 34/34 [00:01<00:00, 23.51batch/s]
At epoch: 351  train vae loss: 0.14969 perp: 0.03269 kl: 0.11701:   0%|          | 0/379 [00:00<?, ?batch/s]

>>>>average [93mvalid[0m of epoch 350: loss 0.26903 perp 0.15610 kl 0.11293


At epoch: 351  train vae loss: 0.18585 perp: 0.07245 kl: 0.11340: 100%|██████████| 379/379 [00:36<00:00, 10.39batch/s]
At epoch: 351  valid vae loss: 0.22223 perp: 0.10883 kl: 0.11340:   6%|▌         | 2/34 [00:00<00:02, 11.69batch/s]

>>>>average [92mtraining[0m of epoch 351: loss 0.18398 perp 0.07244 kl 0.11154


At epoch: 351  valid vae loss: 0.22647 perp: 0.11981 kl: 0.10666: 100%|██████████| 34/34 [00:01<00:00, 23.41batch/s]
At epoch: 352  train vae loss: 0.25091 perp: 0.13522 kl: 0.11569:   0%|          | 0/379 [00:00<?, ?batch/s]

>>>>average [93mvalid[0m of epoch 351: loss 0.32254 perp 0.21005 kl 0.11249


At epoch: 352  train vae loss: 0.23992 perp: 0.12854 kl: 0.11138: 100%|██████████| 379/379 [00:36<00:00, 10.39batch/s]
At epoch: 352  valid vae loss: 0.19593 perp: 0.08545 kl: 0.11047:   6%|▌         | 2/34 [00:00<00:02, 11.88batch/s]

>>>>average [92mtraining[0m of epoch 352: loss 0.19087 perp 0.07919 kl 0.11168


At epoch: 352  valid vae loss: 0.14819 perp: 0.04335 kl: 0.10483: 100%|██████████| 34/34 [00:01<00:00, 23.57batch/s]
At epoch: 353  train vae loss: 0.18266 perp: 0.06857 kl: 0.11410:   0%|          | 0/379 [00:00<?, ?batch/s]

>>>>average [93mvalid[0m of epoch 352: loss 0.30738 perp 0.19720 kl 0.11018


At epoch: 353  train vae loss: 0.23030 perp: 0.11952 kl: 0.11078: 100%|██████████| 379/379 [00:36<00:00, 10.39batch/s]
At epoch: 353  valid vae loss: 0.19733 perp: 0.08902 kl: 0.10831:   6%|▌         | 2/34 [00:00<00:02, 11.76batch/s]

>>>>average [92mtraining[0m of epoch 353: loss 0.18716 perp 0.07647 kl 0.11069


At epoch: 353  valid vae loss: 0.28412 perp: 0.18151 kl: 0.10260: 100%|██████████| 34/34 [00:01<00:00, 23.50batch/s]
At epoch: 354  train vae loss: 0.20339 perp: 0.09484 kl: 0.10856:   0%|          | 0/379 [00:00<?, ?batch/s]

>>>>average [93mvalid[0m of epoch 353: loss 0.27520 perp 0.16739 kl 0.10780


At epoch: 354  train vae loss: 0.17657 perp: 0.06652 kl: 0.11005: 100%|██████████| 379/379 [00:36<00:00, 10.38batch/s]
At epoch: 354  valid vae loss: 0.19443 perp: 0.08334 kl: 0.11109:   6%|▌         | 2/34 [00:00<00:02, 11.59batch/s]

>>>>average [92mtraining[0m of epoch 354: loss 0.20794 perp 0.09457 kl 0.11337


At epoch: 354  valid vae loss: 0.12116 perp: 0.01554 kl: 0.10562: 100%|██████████| 34/34 [00:01<00:00, 23.54batch/s]
At epoch: 355  train vae loss: 0.16238 perp: 0.05043 kl: 0.11195:   0%|          | 0/379 [00:00<?, ?batch/s]

>>>>average [93mvalid[0m of epoch 354: loss 0.28964 perp 0.17900 kl 0.11064


At epoch: 355  train vae loss: 0.17345 perp: 0.06166 kl: 0.11179: 100%|██████████| 379/379 [00:36<00:00, 10.39batch/s]
At epoch: 355  valid vae loss: 0.18324 perp: 0.07279 kl: 0.11044:   6%|▌         | 2/34 [00:00<00:02, 11.67batch/s]

>>>>average [92mtraining[0m of epoch 355: loss 0.19203 perp 0.08044 kl 0.11158


At epoch: 355  valid vae loss: 0.11339 perp: 0.00864 kl: 0.10475: 100%|██████████| 34/34 [00:01<00:00, 23.52batch/s]
At epoch: 356  train vae loss: 0.18400 perp: 0.07457 kl: 0.10943:   0%|          | 0/379 [00:00<?, ?batch/s]

>>>>average [93mvalid[0m of epoch 355: loss 0.29948 perp 0.18939 kl 0.11008


At epoch: 356  train vae loss: 0.23814 perp: 0.12634 kl: 0.11179: 100%|██████████| 379/379 [00:36<00:00, 10.38batch/s]
At epoch: 356  valid vae loss: 0.22504 perp: 0.11265 kl: 0.11239:   6%|▌         | 2/34 [00:00<00:02, 11.66batch/s]

>>>>average [92mtraining[0m of epoch 356: loss 0.21429 perp 0.10060 kl 0.11369


At epoch: 356  valid vae loss: 0.12216 perp: 0.01553 kl: 0.10663: 100%|██████████| 34/34 [00:01<00:00, 23.58batch/s]
At epoch: 357  train vae loss: 0.20566 perp: 0.09444 kl: 0.11122:   0%|          | 0/379 [00:00<?, ?batch/s]

>>>>average [93mvalid[0m of epoch 356: loss 0.31075 perp 0.19864 kl 0.11211


At epoch: 357  train vae loss: 0.16551 perp: 0.05402 kl: 0.11149: 100%|██████████| 379/379 [00:36<00:00, 10.35batch/s]
At epoch: 357  valid vae loss: 0.17438 perp: 0.06431 kl: 0.11007:   6%|▌         | 2/34 [00:00<00:02, 11.66batch/s]

>>>>average [92mtraining[0m of epoch 357: loss 0.19250 perp 0.07955 kl 0.11294


At epoch: 357  valid vae loss: 0.12732 perp: 0.02351 kl: 0.10381: 100%|██████████| 34/34 [00:01<00:00, 23.53batch/s]
At epoch: 358  train vae loss: 0.16869 perp: 0.06048 kl: 0.10821:   0%|          | 0/379 [00:00<?, ?batch/s]

>>>>average [93mvalid[0m of epoch 357: loss 0.28654 perp 0.17703 kl 0.10952


At epoch: 358  train vae loss: 0.18084 perp: 0.06625 kl: 0.11459: 100%|██████████| 379/379 [00:36<00:00, 10.38batch/s]
At epoch: 358  valid vae loss: 0.21617 perp: 0.10269 kl: 0.11348:   6%|▌         | 2/34 [00:00<00:02, 11.66batch/s]

>>>>average [92mtraining[0m of epoch 358: loss 0.19928 perp 0.08791 kl 0.11138


At epoch: 358  valid vae loss: 0.12791 perp: 0.02049 kl: 0.10742: 100%|██████████| 34/34 [00:01<00:00, 23.49batch/s]
At epoch: 359  train vae loss: 0.19883 perp: 0.08636 kl: 0.11247:   0%|          | 0/379 [00:00<?, ?batch/s]

>>>>average [93mvalid[0m of epoch 358: loss 0.29448 perp 0.18141 kl 0.11307


At epoch: 359  train vae loss: 0.17186 perp: 0.05844 kl: 0.11341: 100%|██████████| 379/379 [00:36<00:00, 10.39batch/s]
At epoch: 359  valid vae loss: 0.21048 perp: 0.09713 kl: 0.11335:   6%|▌         | 2/34 [00:00<00:02, 11.79batch/s]

>>>>average [92mtraining[0m of epoch 359: loss 0.24378 perp 0.12335 kl 0.12043


At epoch: 359  valid vae loss: 0.16789 perp: 0.06067 kl: 0.10722: 100%|██████████| 34/34 [00:01<00:00, 23.56batch/s]
At epoch: 360  train vae loss: 0.17163 perp: 0.06028 kl: 0.11135:   0%|          | 0/379 [00:00<?, ?batch/s]

>>>>average [93mvalid[0m of epoch 359: loss 0.26536 perp 0.15231 kl 0.11305
saving to best model since this is the best valid loss so far.----


At epoch: 360  train vae loss: 0.17142 perp: 0.06905 kl: 0.10237: 100%|██████████| 379/379 [00:36<00:00, 10.37batch/s]
At epoch: 360  valid vae loss: 0.18261 perp: 0.07881 kl: 0.10380:   6%|▌         | 2/34 [00:00<00:02, 11.82batch/s]

>>>>average [92mtraining[0m of epoch 360: loss 0.15917 perp 0.05078 kl 0.10839


At epoch: 360  valid vae loss: 0.10677 perp: 0.00817 kl: 0.09860: 100%|██████████| 34/34 [00:01<00:00, 23.52batch/s]
At epoch: 361  train vae loss: 0.16695 perp: 0.06157 kl: 0.10538:   0%|          | 0/379 [00:00<?, ?batch/s]

>>>>average [93mvalid[0m of epoch 360: loss 0.29871 perp 0.19504 kl 0.10367


At epoch: 361  train vae loss: 0.17573 perp: 0.07037 kl: 0.10536: 100%|██████████| 379/379 [00:36<00:00, 10.40batch/s]
At epoch: 361  valid vae loss: 0.15406 perp: 0.05085 kl: 0.10321:   6%|▌         | 2/34 [00:00<00:02, 11.82batch/s]

>>>>average [92mtraining[0m of epoch 361: loss 0.17384 perp 0.06696 kl 0.10688


At epoch: 361  valid vae loss: 0.11443 perp: 0.01657 kl: 0.09786: 100%|██████████| 34/34 [00:01<00:00, 23.58batch/s]
At epoch: 362  train vae loss: 0.16679 perp: 0.06237 kl: 0.10442:   0%|          | 0/379 [00:00<?, ?batch/s]

>>>>average [93mvalid[0m of epoch 361: loss 0.26362 perp 0.16061 kl 0.10301
saving to best model since this is the best valid loss so far.----


At epoch: 362  train vae loss: 0.20934 perp: 0.10417 kl: 0.10516: 100%|██████████| 379/379 [00:36<00:00, 10.39batch/s]
At epoch: 362  valid vae loss: 0.17851 perp: 0.07153 kl: 0.10698:   6%|▌         | 2/34 [00:00<00:02, 11.69batch/s]

>>>>average [92mtraining[0m of epoch 362: loss 0.17560 perp 0.06954 kl 0.10606


At epoch: 362  valid vae loss: 0.12492 perp: 0.02329 kl: 0.10163: 100%|██████████| 34/34 [00:01<00:00, 23.59batch/s]
At epoch: 363  train vae loss: 0.15232 perp: 0.04272 kl: 0.10960:   0%|          | 0/379 [00:00<?, ?batch/s]

>>>>average [93mvalid[0m of epoch 362: loss 0.28455 perp 0.17762 kl 0.10693


At epoch: 363  train vae loss: 0.13052 perp: 0.02298 kl: 0.10754: 100%|██████████| 379/379 [00:36<00:00, 10.38batch/s]
At epoch: 363  valid vae loss: 0.18367 perp: 0.07760 kl: 0.10607:   6%|▌         | 2/34 [00:00<00:02, 11.66batch/s]

>>>>average [92mtraining[0m of epoch 363: loss 0.18289 perp 0.07577 kl 0.10712


At epoch: 363  valid vae loss: 0.10840 perp: 0.00785 kl: 0.10055: 100%|██████████| 34/34 [00:01<00:00, 23.48batch/s]
At epoch: 364  train vae loss: 0.15957 perp: 0.05185 kl: 0.10772:   0%|          | 0/379 [00:00<?, ?batch/s]

>>>>average [93mvalid[0m of epoch 363: loss 0.26506 perp 0.15936 kl 0.10570


At epoch: 364  train vae loss: 0.16894 perp: 0.06418 kl: 0.10476: 100%|██████████| 379/379 [00:36<00:00, 10.39batch/s]
At epoch: 364  valid vae loss: 0.17165 perp: 0.06543 kl: 0.10622:   6%|▌         | 2/34 [00:00<00:02, 11.65batch/s]

>>>>average [92mtraining[0m of epoch 364: loss 0.18515 perp 0.07686 kl 0.10829


At epoch: 364  valid vae loss: 0.11656 perp: 0.01605 kl: 0.10051: 100%|██████████| 34/34 [00:01<00:00, 23.51batch/s]
At epoch: 365  train vae loss: 0.18433 perp: 0.07843 kl: 0.10590:   0%|          | 0/379 [00:00<?, ?batch/s]

>>>>average [93mvalid[0m of epoch 364: loss 0.28162 perp 0.17589 kl 0.10572


At epoch: 365  train vae loss: 0.17251 perp: 0.06128 kl: 0.11123: 100%|██████████| 379/379 [00:36<00:00, 10.40batch/s]
At epoch: 365  valid vae loss: 0.17301 perp: 0.06382 kl: 0.10919:   6%|▌         | 2/34 [00:00<00:02, 11.85batch/s]

>>>>average [92mtraining[0m of epoch 365: loss 0.19323 perp 0.08458 kl 0.10864


At epoch: 365  valid vae loss: 0.13514 perp: 0.03126 kl: 0.10388: 100%|██████████| 34/34 [00:01<00:00, 23.52batch/s]
At epoch: 366  train vae loss: 0.15991 perp: 0.04993 kl: 0.10998:   0%|          | 0/379 [00:00<?, ?batch/s]

>>>>average [93mvalid[0m of epoch 365: loss 0.27148 perp 0.16269 kl 0.10878


At epoch: 366  train vae loss: 0.14957 perp: 0.04198 kl: 0.10758: 100%|██████████| 379/379 [00:36<00:00, 10.39batch/s]
At epoch: 366  valid vae loss: 0.18196 perp: 0.07165 kl: 0.11031:   6%|▌         | 2/34 [00:00<00:02, 11.90batch/s]

>>>>average [92mtraining[0m of epoch 366: loss 0.23355 perp 0.11776 kl 0.11578


At epoch: 366  valid vae loss: 0.11628 perp: 0.01180 kl: 0.10448: 100%|██████████| 34/34 [00:01<00:00, 23.56batch/s]
At epoch: 367  train vae loss: 0.17021 perp: 0.05756 kl: 0.11265:   0%|          | 0/379 [00:00<?, ?batch/s]

>>>>average [93mvalid[0m of epoch 366: loss 0.27383 perp 0.16386 kl 0.10997


At epoch: 367  train vae loss: 0.13605 perp: 0.03035 kl: 0.10570: 100%|██████████| 379/379 [00:36<00:00, 10.39batch/s]
At epoch: 367  valid vae loss: 0.16751 perp: 0.06281 kl: 0.10470:   6%|▌         | 2/34 [00:00<00:02, 11.82batch/s]

>>>>average [92mtraining[0m of epoch 367: loss 0.16004 perp 0.05366 kl 0.10638


At epoch: 367  valid vae loss: 0.15915 perp: 0.05998 kl: 0.09916: 100%|██████████| 34/34 [00:01<00:00, 23.61batch/s]
At epoch: 368  train vae loss: 0.15643 perp: 0.05425 kl: 0.10218:   0%|          | 0/379 [00:00<?, ?batch/s]

>>>>average [93mvalid[0m of epoch 367: loss 0.25938 perp 0.15494 kl 0.10444
saving to best model since this is the best valid loss so far.----


At epoch: 368  train vae loss: 0.16098 perp: 0.05873 kl: 0.10225: 100%|██████████| 379/379 [00:36<00:00, 10.38batch/s]
At epoch: 368  valid vae loss: 0.16946 perp: 0.06445 kl: 0.10500:   6%|▌         | 2/34 [00:00<00:02, 11.75batch/s]

>>>>average [92mtraining[0m of epoch 368: loss 0.17209 perp 0.06633 kl 0.10575


At epoch: 368  valid vae loss: 0.11134 perp: 0.01176 kl: 0.09958: 100%|██████████| 34/34 [00:01<00:00, 23.66batch/s]
At epoch: 369  train vae loss: 0.19372 perp: 0.08874 kl: 0.10498:   0%|          | 0/379 [00:00<?, ?batch/s]

>>>>average [93mvalid[0m of epoch 368: loss 0.28186 perp 0.17717 kl 0.10468


At epoch: 369  train vae loss: 0.14691 perp: 0.04184 kl: 0.10507: 100%|██████████| 379/379 [00:36<00:00, 10.38batch/s]
At epoch: 369  valid vae loss: 0.20842 perp: 0.10126 kl: 0.10715:   6%|▌         | 2/34 [00:00<00:02, 11.75batch/s]

>>>>average [92mtraining[0m of epoch 369: loss 0.19202 perp 0.08373 kl 0.10829


At epoch: 369  valid vae loss: 0.11966 perp: 0.01756 kl: 0.10210: 100%|██████████| 34/34 [00:01<00:00, 23.55batch/s]
At epoch: 370  train vae loss: 0.15127 perp: 0.04344 kl: 0.10783:   0%|          | 0/379 [00:00<?, ?batch/s]

>>>>average [93mvalid[0m of epoch 369: loss 0.27586 perp 0.16927 kl 0.10660


At epoch: 370  train vae loss: 0.17005 perp: 0.05313 kl: 0.11691: 100%|██████████| 379/379 [00:36<00:00, 10.39batch/s]
At epoch: 370  valid vae loss: 0.19589 perp: 0.08052 kl: 0.11538:   6%|▌         | 2/34 [00:00<00:02, 11.84batch/s]

>>>>average [92mtraining[0m of epoch 370: loss 0.22923 perp 0.11666 kl 0.11257


At epoch: 370  valid vae loss: 0.12597 perp: 0.01637 kl: 0.10960: 100%|██████████| 34/34 [00:01<00:00, 23.50batch/s]
At epoch: 371  train vae loss: 0.18548 perp: 0.06886 kl: 0.11663:   0%|          | 0/379 [00:00<?, ?batch/s]

>>>>average [93mvalid[0m of epoch 370: loss 0.28952 perp 0.17437 kl 0.11515


At epoch: 371  train vae loss: 0.12525 perp: 0.01836 kl: 0.10689: 100%|██████████| 379/379 [00:36<00:00, 10.39batch/s]
At epoch: 371  valid vae loss: 0.17788 perp: 0.07319 kl: 0.10470:   6%|▌         | 2/34 [00:00<00:02, 11.68batch/s]

>>>>average [92mtraining[0m of epoch 371: loss 0.17519 perp 0.06354 kl 0.11166


At epoch: 371  valid vae loss: 0.14004 perp: 0.04035 kl: 0.09970: 100%|██████████| 34/34 [00:01<00:00, 23.54batch/s]
At epoch: 372  train vae loss: 0.14559 perp: 0.04146 kl: 0.10413:   0%|          | 0/379 [00:00<?, ?batch/s]

>>>>average [93mvalid[0m of epoch 371: loss 0.25554 perp 0.15088 kl 0.10466
saving to best model since this is the best valid loss so far.----


At epoch: 372  train vae loss: 0.20178 perp: 0.09616 kl: 0.10561: 100%|██████████| 379/379 [00:36<00:00, 10.40batch/s]
At epoch: 372  valid vae loss: 0.18055 perp: 0.07659 kl: 0.10396:   6%|▌         | 2/34 [00:00<00:02, 11.64batch/s]

>>>>average [92mtraining[0m of epoch 372: loss 0.16841 perp 0.06258 kl 0.10583


At epoch: 372  valid vae loss: 0.11621 perp: 0.01808 kl: 0.09814: 100%|██████████| 34/34 [00:01<00:00, 23.50batch/s]
At epoch: 373  train vae loss: 0.18130 perp: 0.07534 kl: 0.10596:   0%|          | 0/379 [00:00<?, ?batch/s]

>>>>average [93mvalid[0m of epoch 372: loss 0.28391 perp 0.18017 kl 0.10374


At epoch: 373  train vae loss: 0.33816 perp: 0.23095 kl: 0.10721: 100%|██████████| 379/379 [00:36<00:00, 10.39batch/s]
At epoch: 373  valid vae loss: 0.23164 perp: 0.12374 kl: 0.10790:   6%|▌         | 2/34 [00:00<00:02, 11.81batch/s]

>>>>average [92mtraining[0m of epoch 373: loss 0.18459 perp 0.07804 kl 0.10655


At epoch: 373  valid vae loss: 0.11775 perp: 0.01559 kl: 0.10215: 100%|██████████| 34/34 [00:01<00:00, 23.63batch/s]
At epoch: 374  train vae loss: 0.23920 perp: 0.13268 kl: 0.10652:   0%|          | 0/379 [00:00<?, ?batch/s]

>>>>average [93mvalid[0m of epoch 373: loss 0.34063 perp 0.23327 kl 0.10737


At epoch: 374  train vae loss: 0.43588 perp: 0.31719 kl: 0.11869: 100%|██████████| 379/379 [00:36<00:00, 10.39batch/s]
At epoch: 374  valid vae loss: 0.52799 perp: 0.40711 kl: 0.12088:   6%|▌         | 2/34 [00:00<00:02, 11.70batch/s]

>>>>average [92mtraining[0m of epoch 374: loss 0.21086 perp 0.10428 kl 0.10658


At epoch: 374  valid vae loss: 0.39114 perp: 0.27746 kl: 0.11368: 100%|██████████| 34/34 [00:01<00:00, 23.51batch/s]
At epoch: 375  train vae loss: 0.57393 perp: 0.45362 kl: 0.12031:   0%|          | 0/379 [00:00<?, ?batch/s]

>>>>average [93mvalid[0m of epoch 374: loss 0.58085 perp 0.46116 kl 0.11969


At epoch: 375  train vae loss: 0.22046 perp: 0.11137 kl: 0.10910: 100%|██████████| 379/379 [00:36<00:00, 10.38batch/s]
At epoch: 375  valid vae loss: 0.19443 perp: 0.08538 kl: 0.10905:   6%|▌         | 2/34 [00:00<00:02, 11.85batch/s]

>>>>average [92mtraining[0m of epoch 375: loss 0.19471 perp 0.08345 kl 0.11126


At epoch: 375  valid vae loss: 0.17840 perp: 0.07543 kl: 0.10297: 100%|██████████| 34/34 [00:01<00:00, 23.65batch/s]
At epoch: 376  train vae loss: 0.15977 perp: 0.04853 kl: 0.11124:   0%|          | 0/379 [00:00<?, ?batch/s]

>>>>average [93mvalid[0m of epoch 375: loss 0.27931 perp 0.17095 kl 0.10836


At epoch: 376  train vae loss: 0.14878 perp: 0.04766 kl: 0.10112: 100%|██████████| 379/379 [00:36<00:00, 10.39batch/s]
At epoch: 376  valid vae loss: 0.18242 perp: 0.07989 kl: 0.10252:   6%|▌         | 2/34 [00:00<00:02, 11.76batch/s]

>>>>average [92mtraining[0m of epoch 376: loss 0.16225 perp 0.05656 kl 0.10569


At epoch: 376  valid vae loss: 0.10376 perp: 0.00691 kl: 0.09685: 100%|██████████| 34/34 [00:01<00:00, 23.52batch/s]
At epoch: 377  train vae loss: 0.13831 perp: 0.03558 kl: 0.10273:   0%|          | 0/379 [00:00<?, ?batch/s]

>>>>average [93mvalid[0m of epoch 376: loss 0.26443 perp 0.16260 kl 0.10182


At epoch: 377  train vae loss: 0.16791 perp: 0.06252 kl: 0.10539: 100%|██████████| 379/379 [00:36<00:00, 10.39batch/s]
At epoch: 377  valid vae loss: 0.21114 perp: 0.10664 kl: 0.10449:   6%|▌         | 2/34 [00:00<00:02, 11.82batch/s]

>>>>average [92mtraining[0m of epoch 377: loss 0.17509 perp 0.07050 kl 0.10459


At epoch: 377  valid vae loss: 0.10828 perp: 0.00957 kl: 0.09871: 100%|██████████| 34/34 [00:01<00:00, 23.46batch/s]
At epoch: 378  train vae loss: 0.16637 perp: 0.06364 kl: 0.10273:   0%|          | 0/379 [00:00<?, ?batch/s]

>>>>average [93mvalid[0m of epoch 377: loss 0.26969 perp 0.16568 kl 0.10402


At epoch: 378  train vae loss: 0.14257 perp: 0.03085 kl: 0.11172: 100%|██████████| 379/379 [00:36<00:00, 10.38batch/s]
At epoch: 378  valid vae loss: 0.17986 perp: 0.07433 kl: 0.10552:   6%|▌         | 2/34 [00:00<00:02, 11.80batch/s]

>>>>average [92mtraining[0m of epoch 378: loss 0.21635 perp 0.10748 kl 0.10886


At epoch: 378  valid vae loss: 0.10477 perp: 0.00496 kl: 0.09981: 100%|██████████| 34/34 [00:01<00:00, 23.56batch/s]
At epoch: 379  train vae loss: 0.13441 perp: 0.02616 kl: 0.10825:   0%|          | 0/379 [00:00<?, ?batch/s]

>>>>average [93mvalid[0m of epoch 378: loss 0.26197 perp 0.15683 kl 0.10513


At epoch: 379  train vae loss: 0.16546 perp: 0.06500 kl: 0.10046: 100%|██████████| 379/379 [00:36<00:00, 10.39batch/s]
At epoch: 379  valid vae loss: 0.19386 perp: 0.09420 kl: 0.09966:   6%|▌         | 2/34 [00:00<00:02, 11.71batch/s]

>>>>average [92mtraining[0m of epoch 379: loss 0.14560 perp 0.04413 kl 0.10146


At epoch: 379  valid vae loss: 0.21979 perp: 0.12532 kl: 0.09447: 100%|██████████| 34/34 [00:01<00:00, 23.49batch/s]
At epoch: 380  train vae loss: 0.17725 perp: 0.07888 kl: 0.09837:   0%|          | 0/379 [00:00<?, ?batch/s]

>>>>average [93mvalid[0m of epoch 379: loss 0.25512 perp 0.15572 kl 0.09940
saving to best model since this is the best valid loss so far.----


At epoch: 380  train vae loss: 0.12959 perp: 0.02748 kl: 0.10210: 100%|██████████| 379/379 [00:36<00:00, 10.39batch/s]
At epoch: 380  valid vae loss: 0.18051 perp: 0.07719 kl: 0.10332:   6%|▌         | 2/34 [00:00<00:02, 11.85batch/s]

>>>>average [92mtraining[0m of epoch 380: loss 0.17412 perp 0.07117 kl 0.10295


At epoch: 380  valid vae loss: 0.11576 perp: 0.01810 kl: 0.09766: 100%|██████████| 34/34 [00:01<00:00, 23.60batch/s]
At epoch: 381  train vae loss: 0.18522 perp: 0.07953 kl: 0.10569:   0%|          | 0/379 [00:00<?, ?batch/s]

>>>>average [93mvalid[0m of epoch 380: loss 0.27177 perp 0.16884 kl 0.10293


At epoch: 381  train vae loss: 0.12974 perp: 0.02700 kl: 0.10275: 100%|██████████| 379/379 [00:36<00:00, 10.39batch/s]
At epoch: 381  valid vae loss: 0.18656 perp: 0.08324 kl: 0.10332:   6%|▌         | 2/34 [00:00<00:02, 11.74batch/s]

>>>>average [92mtraining[0m of epoch 381: loss 0.16034 perp 0.05905 kl 0.10129


At epoch: 381  valid vae loss: 0.11099 perp: 0.01323 kl: 0.09776: 100%|██████████| 34/34 [00:01<00:00, 23.58batch/s]
At epoch: 382  train vae loss: 0.15945 perp: 0.05901 kl: 0.10044:   0%|          | 0/379 [00:00<?, ?batch/s]

>>>>average [93mvalid[0m of epoch 381: loss 0.27565 perp 0.17265 kl 0.10301


At epoch: 382  train vae loss: 0.14449 perp: 0.03950 kl: 0.10498: 100%|██████████| 379/379 [00:36<00:00, 10.39batch/s]
At epoch: 382  valid vae loss: 0.17021 perp: 0.06690 kl: 0.10332:   6%|▌         | 2/34 [00:00<00:02, 11.68batch/s]

>>>>average [92mtraining[0m of epoch 382: loss 0.27647 perp 0.16433 kl 0.11214


At epoch: 382  valid vae loss: 0.10768 perp: 0.00933 kl: 0.09835: 100%|██████████| 34/34 [00:01<00:00, 23.55batch/s]
At epoch: 383  train vae loss: 0.15213 perp: 0.04307 kl: 0.10906:   0%|          | 0/379 [00:00<?, ?batch/s]

>>>>average [93mvalid[0m of epoch 382: loss 0.25422 perp 0.15083 kl 0.10338
saving to best model since this is the best valid loss so far.----


At epoch: 383  train vae loss: 0.15162 perp: 0.04902 kl: 0.10260: 100%|██████████| 379/379 [00:36<00:00, 10.37batch/s]
At epoch: 383  valid vae loss: 0.19190 perp: 0.09256 kl: 0.09935:   6%|▌         | 2/34 [00:00<00:02, 11.67batch/s]

>>>>average [92mtraining[0m of epoch 383: loss 0.15141 perp 0.04777 kl 0.10364


At epoch: 383  valid vae loss: 0.10265 perp: 0.00818 kl: 0.09448: 100%|██████████| 34/34 [00:01<00:00, 23.60batch/s]
At epoch: 384  train vae loss: 0.18140 perp: 0.07695 kl: 0.10445:   0%|          | 0/379 [00:00<?, ?batch/s]

>>>>average [93mvalid[0m of epoch 383: loss 0.25495 perp 0.15574 kl 0.09921


At epoch: 384  train vae loss: 0.14294 perp: 0.04732 kl: 0.09562: 100%|██████████| 379/379 [00:36<00:00, 10.39batch/s]
At epoch: 384  valid vae loss: 0.19222 perp: 0.09706 kl: 0.09516:   6%|▌         | 2/34 [00:00<00:02, 11.74batch/s]

>>>>average [92mtraining[0m of epoch 384: loss 0.15085 perp 0.05093 kl 0.09992


At epoch: 384  valid vae loss: 0.10203 perp: 0.01146 kl: 0.09057: 100%|██████████| 34/34 [00:01<00:00, 23.64batch/s]
At epoch: 385  train vae loss: 0.14287 perp: 0.04851 kl: 0.09436:   0%|          | 0/379 [00:00<?, ?batch/s]

>>>>average [93mvalid[0m of epoch 384: loss 0.24647 perp 0.15125 kl 0.09522
saving to best model since this is the best valid loss so far.----


At epoch: 385  train vae loss: 0.17813 perp: 0.07762 kl: 0.10051: 100%|██████████| 379/379 [00:36<00:00, 10.39batch/s]
At epoch: 385  valid vae loss: 0.18705 perp: 0.08557 kl: 0.10148:   6%|▌         | 2/34 [00:00<00:02, 11.64batch/s]

>>>>average [92mtraining[0m of epoch 385: loss 0.16803 perp 0.06895 kl 0.09908


At epoch: 385  valid vae loss: 0.12618 perp: 0.02990 kl: 0.09627: 100%|██████████| 34/34 [00:01<00:00, 23.51batch/s]
At epoch: 386  train vae loss: 0.16006 perp: 0.05724 kl: 0.10282:   0%|          | 0/379 [00:00<?, ?batch/s]

>>>>average [93mvalid[0m of epoch 385: loss 0.27437 perp 0.17310 kl 0.10126


At epoch: 386  train vae loss: 0.16905 perp: 0.07025 kl: 0.09880: 100%|██████████| 379/379 [00:36<00:00, 10.39batch/s]
At epoch: 386  valid vae loss: 0.21107 perp: 0.11249 kl: 0.09858:   6%|▌         | 2/34 [00:00<00:02, 11.73batch/s]

>>>>average [92mtraining[0m of epoch 386: loss 0.15940 perp 0.05912 kl 0.10028


At epoch: 386  valid vae loss: 0.09705 perp: 0.00360 kl: 0.09345: 100%|██████████| 34/34 [00:01<00:00, 23.62batch/s]
At epoch: 387  train vae loss: 0.13770 perp: 0.03923 kl: 0.09847:   0%|          | 0/379 [00:00<?, ?batch/s]

>>>>average [93mvalid[0m of epoch 386: loss 0.26141 perp 0.16291 kl 0.09851


At epoch: 387  train vae loss: 0.12165 perp: 0.02272 kl: 0.09892: 100%|██████████| 379/379 [00:36<00:00, 10.38batch/s]
At epoch: 387  valid vae loss: 0.22809 perp: 0.12691 kl: 0.10118:   6%|▌         | 2/34 [00:00<00:02, 11.77batch/s]

>>>>average [92mtraining[0m of epoch 387: loss 0.15632 perp 0.05815 kl 0.09817


At epoch: 387  valid vae loss: 0.27439 perp: 0.17837 kl: 0.09602: 100%|██████████| 34/34 [00:01<00:00, 23.60batch/s]
At epoch: 388  train vae loss: 0.19547 perp: 0.09527 kl: 0.10021:   0%|          | 0/379 [00:00<?, ?batch/s]

>>>>average [93mvalid[0m of epoch 387: loss 0.29548 perp 0.19440 kl 0.10108


At epoch: 388  train vae loss: 0.16423 perp: 0.05788 kl: 0.10635: 100%|██████████| 379/379 [00:36<00:00, 10.38batch/s]
At epoch: 388  valid vae loss: 0.20821 perp: 0.10432 kl: 0.10389:   6%|▌         | 2/34 [00:00<00:02, 11.81batch/s]

>>>>average [92mtraining[0m of epoch 388: loss 0.21101 perp 0.10611 kl 0.10490


At epoch: 388  valid vae loss: 0.10360 perp: 0.00498 kl: 0.09861: 100%|██████████| 34/34 [00:01<00:00, 23.56batch/s]
At epoch: 389  train vae loss: 0.14411 perp: 0.03485 kl: 0.10926:   0%|          | 0/379 [00:00<?, ?batch/s]

>>>>average [93mvalid[0m of epoch 388: loss 0.25398 perp 0.15012 kl 0.10386


At epoch: 389  train vae loss: 0.17300 perp: 0.07022 kl: 0.10277: 100%|██████████| 379/379 [00:36<00:00, 10.38batch/s]
At epoch: 389  valid vae loss: 0.18165 perp: 0.07973 kl: 0.10192:   6%|▌         | 2/34 [00:00<00:02, 11.87batch/s]

>>>>average [92mtraining[0m of epoch 389: loss 0.18582 perp 0.07890 kl 0.10692


At epoch: 389  valid vae loss: 0.12099 perp: 0.02430 kl: 0.09669: 100%|██████████| 34/34 [00:01<00:00, 23.61batch/s]
At epoch: 390  train vae loss: 0.12979 perp: 0.03196 kl: 0.09784:   0%|          | 0/379 [00:00<?, ?batch/s]

>>>>average [93mvalid[0m of epoch 389: loss 0.26034 perp 0.15863 kl 0.10171


At epoch: 390  train vae loss: 0.14592 perp: 0.04875 kl: 0.09716: 100%|██████████| 379/379 [00:36<00:00, 10.39batch/s]
At epoch: 390  valid vae loss: 0.19447 perp: 0.09929 kl: 0.09517:   6%|▌         | 2/34 [00:00<00:02, 11.74batch/s]

>>>>average [92mtraining[0m of epoch 390: loss 0.14451 perp 0.04569 kl 0.09882


At epoch: 390  valid vae loss: 0.10820 perp: 0.01801 kl: 0.09020: 100%|██████████| 34/34 [00:01<00:00, 23.56batch/s]
At epoch: 391  train vae loss: 0.16948 perp: 0.07094 kl: 0.09854:   0%|          | 0/379 [00:00<?, ?batch/s]

>>>>average [93mvalid[0m of epoch 390: loss 0.25883 perp 0.16379 kl 0.09504


At epoch: 391  train vae loss: 0.15668 perp: 0.05920 kl: 0.09748: 100%|██████████| 379/379 [00:36<00:00, 10.39batch/s]
At epoch: 391  valid vae loss: 0.18018 perp: 0.08266 kl: 0.09752:   6%|▌         | 2/34 [00:00<00:02, 11.71batch/s]

>>>>average [92mtraining[0m of epoch 391: loss 0.15758 perp 0.05958 kl 0.09800


At epoch: 391  valid vae loss: 0.10695 perp: 0.01404 kl: 0.09291: 100%|██████████| 34/34 [00:01<00:00, 23.58batch/s]
At epoch: 392  train vae loss: 0.20536 perp: 0.11178 kl: 0.09359:   0%|          | 0/379 [00:00<?, ?batch/s]

>>>>average [93mvalid[0m of epoch 391: loss 0.25831 perp 0.16112 kl 0.09719


At epoch: 392  train vae loss: 0.13240 perp: 0.03356 kl: 0.09884: 100%|██████████| 379/379 [00:36<00:00, 10.40batch/s]
At epoch: 392  valid vae loss: 0.19959 perp: 0.09839 kl: 0.10121:   6%|▌         | 2/34 [00:00<00:02, 11.70batch/s]

>>>>average [92mtraining[0m of epoch 392: loss 0.17231 perp 0.07219 kl 0.10012


At epoch: 392  valid vae loss: 0.12506 perp: 0.02895 kl: 0.09611: 100%|██████████| 34/34 [00:01<00:00, 23.57batch/s]
At epoch: 393  train vae loss: 0.20396 perp: 0.10508 kl: 0.09888:   0%|          | 0/379 [00:00<?, ?batch/s]

>>>>average [93mvalid[0m of epoch 392: loss 0.30416 perp 0.20340 kl 0.10076


At epoch: 393  train vae loss: 0.16667 perp: 0.06183 kl: 0.10484: 100%|██████████| 379/379 [00:36<00:00, 10.39batch/s]
At epoch: 393  valid vae loss: 0.21289 perp: 0.11197 kl: 0.10092:   6%|▌         | 2/34 [00:00<00:02, 11.62batch/s]

>>>>average [92mtraining[0m of epoch 393: loss 0.18664 perp 0.08259 kl 0.10404


At epoch: 393  valid vae loss: 0.10464 perp: 0.00863 kl: 0.09601: 100%|██████████| 34/34 [00:01<00:00, 23.55batch/s]
At epoch: 394  train vae loss: 0.21334 perp: 0.11065 kl: 0.10269:   0%|          | 0/379 [00:00<?, ?batch/s]

>>>>average [93mvalid[0m of epoch 393: loss 0.27125 perp 0.17071 kl 0.10055


At epoch: 394  train vae loss: 0.17702 perp: 0.08011 kl: 0.09691: 100%|██████████| 379/379 [00:36<00:00, 10.39batch/s]
At epoch: 394  valid vae loss: 0.16274 perp: 0.06452 kl: 0.09822:   6%|▌         | 2/34 [00:00<00:02, 11.86batch/s]

>>>>average [92mtraining[0m of epoch 394: loss 0.16450 perp 0.06373 kl 0.10077


At epoch: 394  valid vae loss: 0.09723 perp: 0.00391 kl: 0.09332: 100%|██████████| 34/34 [00:01<00:00, 23.63batch/s]
At epoch: 395  train vae loss: 0.19481 perp: 0.09363 kl: 0.10118:   0%|          | 0/379 [00:00<?, ?batch/s]

>>>>average [93mvalid[0m of epoch 394: loss 0.26561 perp 0.16775 kl 0.09786


At epoch: 395  train vae loss: 0.14618 perp: 0.04466 kl: 0.10151: 100%|██████████| 379/379 [00:36<00:00, 10.40batch/s]
At epoch: 395  valid vae loss: 0.18151 perp: 0.07905 kl: 0.10246:   6%|▌         | 2/34 [00:00<00:02, 11.72batch/s]

>>>>average [92mtraining[0m of epoch 395: loss 0.17877 perp 0.07674 kl 0.10203


At epoch: 395  valid vae loss: 0.16007 perp: 0.06306 kl: 0.09701: 100%|██████████| 34/34 [00:01<00:00, 23.59batch/s]
At epoch: 396  train vae loss: 0.12432 perp: 0.02199 kl: 0.10232:   0%|          | 0/379 [00:00<?, ?batch/s]

>>>>average [93mvalid[0m of epoch 395: loss 0.27427 perp 0.17212 kl 0.10215


At epoch: 396  train vae loss: 0.18565 perp: 0.08158 kl: 0.10407: 100%|██████████| 379/379 [00:36<00:00, 10.40batch/s]
At epoch: 396  valid vae loss: 0.21329 perp: 0.10909 kl: 0.10420:   6%|▌         | 2/34 [00:00<00:02, 11.92batch/s]

>>>>average [92mtraining[0m of epoch 396: loss 0.17532 perp 0.07360 kl 0.10173


At epoch: 396  valid vae loss: 0.11463 perp: 0.01570 kl: 0.09893: 100%|██████████| 34/34 [00:01<00:00, 23.73batch/s]
At epoch: 397  train vae loss: 0.16618 perp: 0.06207 kl: 0.10410:   0%|          | 0/379 [00:00<?, ?batch/s]

>>>>average [93mvalid[0m of epoch 396: loss 0.29867 perp 0.19469 kl 0.10398


At epoch: 397  train vae loss: 0.19500 perp: 0.08964 kl: 0.10537: 100%|██████████| 379/379 [00:36<00:00, 10.40batch/s]
At epoch: 397  valid vae loss: 0.17887 perp: 0.07783 kl: 0.10104:   6%|▌         | 2/34 [00:00<00:02, 11.70batch/s]

>>>>average [92mtraining[0m of epoch 397: loss 0.18399 perp 0.08001 kl 0.10397


At epoch: 397  valid vae loss: 0.10574 perp: 0.00992 kl: 0.09582: 100%|██████████| 34/34 [00:01<00:00, 23.60batch/s]
At epoch: 398  train vae loss: 0.14174 perp: 0.03997 kl: 0.10177:   0%|          | 0/379 [00:00<?, ?batch/s]

>>>>average [93mvalid[0m of epoch 397: loss 0.25166 perp 0.15070 kl 0.10096


At epoch: 398  train vae loss: 0.19243 perp: 0.09346 kl: 0.09896: 100%|██████████| 379/379 [00:36<00:00, 10.40batch/s]
At epoch: 398  valid vae loss: 0.22879 perp: 0.12865 kl: 0.10014:   6%|▌         | 2/34 [00:00<00:02, 11.74batch/s]

>>>>average [92mtraining[0m of epoch 398: loss 0.15763 perp 0.05839 kl 0.09924


At epoch: 398  valid vae loss: 0.15170 perp: 0.05687 kl: 0.09483: 100%|██████████| 34/34 [00:01<00:00, 23.52batch/s]
At epoch: 399  train vae loss: 0.23207 perp: 0.13064 kl: 0.10143:   0%|          | 0/379 [00:00<?, ?batch/s]

>>>>average [93mvalid[0m of epoch 398: loss 0.31697 perp 0.21717 kl 0.09980


At epoch: 399  train vae loss: 0.15994 perp: 0.05415 kl: 0.10579: 100%|██████████| 379/379 [00:36<00:00, 10.39batch/s]
At epoch: 399  valid vae loss: 0.18832 perp: 0.08538 kl: 0.10295:   6%|▌         | 2/34 [00:00<00:02, 11.81batch/s]

>>>>average [92mtraining[0m of epoch 399: loss 0.28037 perp 0.16693 kl 0.11344


At epoch: 399  valid vae loss: 0.10254 perp: 0.00454 kl: 0.09800: 100%|██████████| 34/34 [00:01<00:00, 23.56batch/s]
At epoch: 400  train vae loss: 0.16112 perp: 0.05837 kl: 0.10274:   0%|          | 0/379 [00:00<?, ?batch/s]

>>>>average [93mvalid[0m of epoch 399: loss 0.25435 perp 0.15126 kl 0.10309


At epoch: 400  train vae loss: 0.11266 perp: 0.01787 kl: 0.09479: 100%|██████████| 379/379 [00:36<00:00, 10.40batch/s]
At epoch: 400  valid vae loss: 0.17883 perp: 0.08313 kl: 0.09571:   6%|▌         | 2/34 [00:00<00:02, 11.69batch/s]

>>>>average [92mtraining[0m of epoch 400: loss 0.13538 perp 0.03618 kl 0.09919


At epoch: 400  valid vae loss: 0.10439 perp: 0.01337 kl: 0.09102: 100%|██████████| 34/34 [00:01<00:00, 23.48batch/s]
At epoch: 401  train vae loss: 0.14552 perp: 0.04848 kl: 0.09704:   0%|          | 0/379 [00:00<?, ?batch/s]

>>>>average [93mvalid[0m of epoch 400: loss 0.23523 perp 0.13950 kl 0.09573
saving to best model since this is the best valid loss so far.----


At epoch: 401  train vae loss: 0.13180 perp: 0.03931 kl: 0.09249: 100%|██████████| 379/379 [00:36<00:00, 10.40batch/s]
At epoch: 401  valid vae loss: 0.19264 perp: 0.09930 kl: 0.09334:   6%|▌         | 2/34 [00:00<00:02, 11.75batch/s]

>>>>average [92mtraining[0m of epoch 401: loss 0.13443 perp 0.03936 kl 0.09507


At epoch: 401  valid vae loss: 0.09208 perp: 0.00355 kl: 0.08853: 100%|██████████| 34/34 [00:01<00:00, 23.59batch/s]
At epoch: 402  train vae loss: 0.12548 perp: 0.03346 kl: 0.09202:   0%|          | 0/379 [00:00<?, ?batch/s]

>>>>average [93mvalid[0m of epoch 401: loss 0.25527 perp 0.16204 kl 0.09323


At epoch: 402  train vae loss: 0.13497 perp: 0.03731 kl: 0.09766: 100%|██████████| 379/379 [00:36<00:00, 10.40batch/s]
At epoch: 402  valid vae loss: 0.17544 perp: 0.07924 kl: 0.09620:   6%|▌         | 2/34 [00:00<00:02, 11.75batch/s]

>>>>average [92mtraining[0m of epoch 402: loss 0.15228 perp 0.05689 kl 0.09538


At epoch: 402  valid vae loss: 0.10036 perp: 0.00898 kl: 0.09139: 100%|██████████| 34/34 [00:01<00:00, 23.58batch/s]
At epoch: 403  train vae loss: 0.15328 perp: 0.05696 kl: 0.09631:   0%|          | 0/379 [00:00<?, ?batch/s]

>>>>average [93mvalid[0m of epoch 402: loss 0.25370 perp 0.15775 kl 0.09595


At epoch: 403  train vae loss: 0.14534 perp: 0.04954 kl: 0.09580: 100%|██████████| 379/379 [00:36<00:00, 10.41batch/s]
At epoch: 403  valid vae loss: 0.19496 perp: 0.10083 kl: 0.09413:   6%|▌         | 2/34 [00:00<00:02, 11.91batch/s]

>>>>average [92mtraining[0m of epoch 403: loss 0.14228 perp 0.04756 kl 0.09472


At epoch: 403  valid vae loss: 0.10528 perp: 0.01546 kl: 0.08981: 100%|██████████| 34/34 [00:01<00:00, 23.67batch/s]
At epoch: 404  train vae loss: 0.13330 perp: 0.03595 kl: 0.09735:   0%|          | 0/379 [00:00<?, ?batch/s]

>>>>average [93mvalid[0m of epoch 403: loss 0.25224 perp 0.15808 kl 0.09417


At epoch: 404  train vae loss: 0.14935 perp: 0.05515 kl: 0.09421: 100%|██████████| 379/379 [00:36<00:00, 10.40batch/s]
At epoch: 404  valid vae loss: 0.17012 perp: 0.07718 kl: 0.09294:   6%|▌         | 2/34 [00:00<00:02, 11.71batch/s]

>>>>average [92mtraining[0m of epoch 404: loss 0.16195 perp 0.06456 kl 0.09739


At epoch: 404  valid vae loss: 0.09434 perp: 0.00577 kl: 0.08857: 100%|██████████| 34/34 [00:01<00:00, 23.59batch/s]
At epoch: 405  train vae loss: 0.13010 perp: 0.03492 kl: 0.09518:   0%|          | 0/379 [00:00<?, ?batch/s]

>>>>average [93mvalid[0m of epoch 404: loss 0.24560 perp 0.15274 kl 0.09287


At epoch: 405  train vae loss: 0.15704 perp: 0.06152 kl: 0.09553: 100%|██████████| 379/379 [00:36<00:00, 10.40batch/s]
At epoch: 405  valid vae loss: 0.20863 perp: 0.11285 kl: 0.09578:   6%|▌         | 2/34 [00:00<00:02, 11.69batch/s]

>>>>average [92mtraining[0m of epoch 405: loss 0.17252 perp 0.07454 kl 0.09798


At epoch: 405  valid vae loss: 0.11877 perp: 0.02741 kl: 0.09137: 100%|██████████| 34/34 [00:01<00:00, 23.48batch/s]
At epoch: 406  train vae loss: 0.11600 perp: 0.01688 kl: 0.09912:   0%|          | 0/379 [00:00<?, ?batch/s]

>>>>average [93mvalid[0m of epoch 405: loss 0.26443 perp 0.16872 kl 0.09572


At epoch: 406  train vae loss: 0.18319 perp: 0.08533 kl: 0.09786: 100%|██████████| 379/379 [00:36<00:00, 10.41batch/s]
At epoch: 406  valid vae loss: 0.20181 perp: 0.10608 kl: 0.09573:   6%|▌         | 2/34 [00:00<00:02, 11.69batch/s]

>>>>average [92mtraining[0m of epoch 406: loss 0.15630 perp 0.05890 kl 0.09740


At epoch: 406  valid vae loss: 0.10901 perp: 0.01811 kl: 0.09090: 100%|██████████| 34/34 [00:01<00:00, 23.60batch/s]
At epoch: 407  train vae loss: 0.13797 perp: 0.03979 kl: 0.09818:   0%|          | 0/379 [00:00<?, ?batch/s]

>>>>average [93mvalid[0m of epoch 406: loss 0.27287 perp 0.17710 kl 0.09577


At epoch: 407  train vae loss: 0.15443 perp: 0.06017 kl: 0.09426: 100%|██████████| 379/379 [00:36<00:00, 10.39batch/s]
At epoch: 407  valid vae loss: 0.19320 perp: 0.09745 kl: 0.09575:   6%|▌         | 2/34 [00:00<00:02, 11.66batch/s]

>>>>average [92mtraining[0m of epoch 407: loss 0.16176 perp 0.06446 kl 0.09729


At epoch: 407  valid vae loss: 0.12073 perp: 0.03000 kl: 0.09073: 100%|██████████| 34/34 [00:01<00:00, 23.56batch/s]
At epoch: 408  train vae loss: 0.17068 perp: 0.07801 kl: 0.09267:   0%|          | 0/379 [00:00<?, ?batch/s]

>>>>average [93mvalid[0m of epoch 407: loss 0.27270 perp 0.17715 kl 0.09555


At epoch: 408  train vae loss: 0.12905 perp: 0.03398 kl: 0.09507: 100%|██████████| 379/379 [00:36<00:00, 10.39batch/s]
At epoch: 408  valid vae loss: 0.18447 perp: 0.08793 kl: 0.09654:   6%|▌         | 2/34 [00:00<00:02, 11.72batch/s]

>>>>average [92mtraining[0m of epoch 408: loss 0.17154 perp 0.07177 kl 0.09977


At epoch: 408  valid vae loss: 0.12809 perp: 0.03628 kl: 0.09181: 100%|██████████| 34/34 [00:01<00:00, 23.59batch/s]
At epoch: 409  train vae loss: 0.13825 perp: 0.03999 kl: 0.09826:   0%|          | 0/379 [00:00<?, ?batch/s]

>>>>average [93mvalid[0m of epoch 408: loss 0.26158 perp 0.16533 kl 0.09625


At epoch: 409  train vae loss: 0.16315 perp: 0.06845 kl: 0.09470: 100%|██████████| 379/379 [00:36<00:00, 10.39batch/s]
At epoch: 409  valid vae loss: 0.17622 perp: 0.08265 kl: 0.09357:   6%|▌         | 2/34 [00:00<00:02, 11.75batch/s]

>>>>average [92mtraining[0m of epoch 409: loss 0.15095 perp 0.05480 kl 0.09615


At epoch: 409  valid vae loss: 0.10843 perp: 0.01975 kl: 0.08868: 100%|██████████| 34/34 [00:01<00:00, 23.68batch/s]
At epoch: 410  train vae loss: 0.17274 perp: 0.07745 kl: 0.09529:   0%|          | 0/379 [00:00<?, ?batch/s]

>>>>average [93mvalid[0m of epoch 409: loss 0.25673 perp 0.16351 kl 0.09323


At epoch: 410  train vae loss: 0.28524 perp: 0.15898 kl: 0.12627: 100%|██████████| 379/379 [00:36<00:00, 10.38batch/s]
At epoch: 410  valid vae loss: 0.26354 perp: 0.14164 kl: 0.12190:   6%|▌         | 2/34 [00:00<00:02, 11.68batch/s]

>>>>average [92mtraining[0m of epoch 410: loss 0.23019 perp 0.12825 kl 0.10194


At epoch: 410  valid vae loss: 0.47243 perp: 0.35788 kl: 0.11455: 100%|██████████| 34/34 [00:01<00:00, 23.42batch/s]
At epoch: 411  train vae loss: 0.31722 perp: 0.19711 kl: 0.12011:   0%|          | 0/379 [00:00<?, ?batch/s]

>>>>average [93mvalid[0m of epoch 410: loss 0.41949 perp 0.29836 kl 0.12113


At epoch: 411  train vae loss: 0.11515 perp: 0.01986 kl: 0.09528: 100%|██████████| 379/379 [00:36<00:00, 10.39batch/s]
At epoch: 411  valid vae loss: 0.16613 perp: 0.06774 kl: 0.09839:   6%|▌         | 2/34 [00:00<00:02, 11.80batch/s]

>>>>average [92mtraining[0m of epoch 411: loss 0.16357 perp 0.05604 kl 0.10753


At epoch: 411  valid vae loss: 0.10143 perp: 0.00793 kl: 0.09350: 100%|██████████| 34/34 [00:01<00:00, 23.55batch/s]
At epoch: 412  train vae loss: 0.12433 perp: 0.02693 kl: 0.09740:   0%|          | 0/379 [00:00<?, ?batch/s]

>>>>average [93mvalid[0m of epoch 411: loss 0.24090 perp 0.14279 kl 0.09811


At epoch: 412  train vae loss: 0.14719 perp: 0.05220 kl: 0.09499: 100%|██████████| 379/379 [00:36<00:00, 10.39batch/s]
At epoch: 412  valid vae loss: 0.20139 perp: 0.10785 kl: 0.09354:   6%|▌         | 2/34 [00:00<00:02, 11.67batch/s]

>>>>average [92mtraining[0m of epoch 412: loss 0.13826 perp 0.04230 kl 0.09595


At epoch: 412  valid vae loss: 0.09663 perp: 0.00780 kl: 0.08883: 100%|██████████| 34/34 [00:01<00:00, 23.50batch/s]
At epoch: 413  train vae loss: 0.12346 perp: 0.03316 kl: 0.09031:   0%|          | 0/379 [00:00<?, ?batch/s]

>>>>average [93mvalid[0m of epoch 412: loss 0.24320 perp 0.14990 kl 0.09330


At epoch: 413  train vae loss: 0.12326 perp: 0.02936 kl: 0.09390: 100%|██████████| 379/379 [00:36<00:00, 10.39batch/s]
At epoch: 413  valid vae loss: 0.18575 perp: 0.09252 kl: 0.09323:   6%|▌         | 2/34 [00:00<00:02, 11.62batch/s]

>>>>average [92mtraining[0m of epoch 413: loss 0.15649 perp 0.06116 kl 0.09533


At epoch: 413  valid vae loss: 0.10744 perp: 0.01876 kl: 0.08868: 100%|██████████| 34/34 [00:01<00:00, 23.52batch/s]
At epoch: 414  train vae loss: 0.15522 perp: 0.06331 kl: 0.09191:   0%|          | 0/379 [00:00<?, ?batch/s]

>>>>average [93mvalid[0m of epoch 413: loss 0.25285 perp 0.16001 kl 0.09284


At epoch: 414  train vae loss: 0.15961 perp: 0.06414 kl: 0.09547: 100%|██████████| 379/379 [00:36<00:00, 10.40batch/s]
At epoch: 414  valid vae loss: 0.15658 perp: 0.06140 kl: 0.09519:   6%|▌         | 2/34 [00:00<00:02, 11.42batch/s]

>>>>average [92mtraining[0m of epoch 414: loss 0.15587 perp 0.06021 kl 0.09566


At epoch: 414  valid vae loss: 0.10001 perp: 0.00973 kl: 0.09028: 100%|██████████| 34/34 [00:01<00:00, 23.48batch/s]
At epoch: 415  train vae loss: 0.17168 perp: 0.07346 kl: 0.09822:   0%|          | 0/379 [00:00<?, ?batch/s]

>>>>average [93mvalid[0m of epoch 414: loss 0.25150 perp 0.15646 kl 0.09503


At epoch: 415  train vae loss: 0.21382 perp: 0.11245 kl: 0.10137: 100%|██████████| 379/379 [00:36<00:00, 10.38batch/s]
At epoch: 415  valid vae loss: 0.27123 perp: 0.17122 kl: 0.10001:   6%|▌         | 2/34 [00:00<00:02, 10.70batch/s]

>>>>average [92mtraining[0m of epoch 415: loss 0.16781 perp 0.07093 kl 0.09688


At epoch: 415  valid vae loss: 0.10953 perp: 0.01507 kl: 0.09446: 100%|██████████| 34/34 [00:01<00:00, 22.01batch/s]
At epoch: 416  train vae loss: 0.16938 perp: 0.07035 kl: 0.09903:   0%|          | 0/379 [00:00<?, ?batch/s]

>>>>average [93mvalid[0m of epoch 415: loss 0.27714 perp 0.17758 kl 0.09957


At epoch: 416  train vae loss: 0.23623 perp: 0.13052 kl: 0.10572: 100%|██████████| 379/379 [00:38<00:00,  9.93batch/s]
At epoch: 416  valid vae loss: 0.19870 perp: 0.09497 kl: 0.10373:   6%|▌         | 2/34 [00:00<00:02, 10.99batch/s]

>>>>average [92mtraining[0m of epoch 416: loss 0.16352 perp 0.06676 kl 0.09676


At epoch: 416  valid vae loss: 0.12488 perp: 0.02557 kl: 0.09931: 100%|██████████| 34/34 [00:01<00:00, 22.16batch/s]
At epoch: 417  train vae loss: 0.23913 perp: 0.13480 kl: 0.10433:   0%|          | 0/379 [00:00<?, ?batch/s]

>>>>average [93mvalid[0m of epoch 416: loss 0.28617 perp 0.18247 kl 0.10370


At epoch: 417  train vae loss: 0.15923 perp: 0.05861 kl: 0.10061: 100%|██████████| 379/379 [00:37<00:00, 10.10batch/s]
At epoch: 417  valid vae loss: 0.19295 perp: 0.09569 kl: 0.09726:   6%|▌         | 2/34 [00:00<00:02, 11.62batch/s]

>>>>average [92mtraining[0m of epoch 417: loss 0.18614 perp 0.08275 kl 0.10339


At epoch: 417  valid vae loss: 0.10012 perp: 0.00743 kl: 0.09270: 100%|██████████| 34/34 [00:01<00:00, 23.59batch/s]
At epoch: 418  train vae loss: 0.14703 perp: 0.04439 kl: 0.10264:   0%|          | 0/379 [00:00<?, ?batch/s]

>>>>average [93mvalid[0m of epoch 417: loss 0.24493 perp 0.14774 kl 0.09719


At epoch: 418  train vae loss: 0.13492 perp: 0.04432 kl: 0.09060: 100%|██████████| 379/379 [00:36<00:00, 10.36batch/s]
At epoch: 418  valid vae loss: 0.16435 perp: 0.07338 kl: 0.09098:   6%|▌         | 2/34 [00:00<00:02, 10.86batch/s]

>>>>average [92mtraining[0m of epoch 418: loss 0.13382 perp 0.04030 kl 0.09352


At epoch: 418  valid vae loss: 0.09025 perp: 0.00362 kl: 0.08663: 100%|██████████| 34/34 [00:01<00:00, 21.75batch/s]
At epoch: 419  train vae loss: 0.12204 perp: 0.02827 kl: 0.09377:   0%|          | 0/379 [00:00<?, ?batch/s]

>>>>average [93mvalid[0m of epoch 418: loss 0.23853 perp 0.14785 kl 0.09068


At epoch: 419  train vae loss: 0.16597 perp: 0.06815 kl: 0.09782: 100%|██████████| 379/379 [00:39<00:00,  9.69batch/s]
At epoch: 419  valid vae loss: 0.17066 perp: 0.07552 kl: 0.09515:   6%|▌         | 2/34 [00:00<00:02, 11.03batch/s]

>>>>average [92mtraining[0m of epoch 419: loss 0.16348 perp 0.06857 kl 0.09491


At epoch: 419  valid vae loss: 0.11684 perp: 0.02567 kl: 0.09116: 100%|██████████| 34/34 [00:01<00:00, 22.97batch/s]
At epoch: 420  train vae loss: 0.20539 perp: 0.10796 kl: 0.09743:   0%|          | 0/379 [00:00<?, ?batch/s]

>>>>average [93mvalid[0m of epoch 419: loss 0.28326 perp 0.18822 kl 0.09504


At epoch: 420  train vae loss: 0.14933 perp: 0.05052 kl: 0.09881: 100%|██████████| 379/379 [00:38<00:00,  9.96batch/s]
At epoch: 420  valid vae loss: 0.19228 perp: 0.09449 kl: 0.09779:   6%|▌         | 2/34 [00:00<00:02, 11.11batch/s]

>>>>average [92mtraining[0m of epoch 420: loss 0.17298 perp 0.07360 kl 0.09938


At epoch: 420  valid vae loss: 0.10047 perp: 0.00762 kl: 0.09285: 100%|██████████| 34/34 [00:01<00:00, 22.22batch/s]
At epoch: 421  train vae loss: 0.12727 perp: 0.02989 kl: 0.09737:   0%|          | 0/379 [00:00<?, ?batch/s]

>>>>average [93mvalid[0m of epoch 420: loss 0.24919 perp 0.15177 kl 0.09742


At epoch: 421  train vae loss: 0.14762 perp: 0.03655 kl: 0.11107: 100%|██████████| 379/379 [00:37<00:00, 10.02batch/s]
At epoch: 421  valid vae loss: 0.16429 perp: 0.05629 kl: 0.10800:   6%|▌         | 2/34 [00:00<00:02, 11.21batch/s]

>>>>average [92mtraining[0m of epoch 421: loss 0.25139 perp 0.15048 kl 0.10091


At epoch: 421  valid vae loss: 0.12033 perp: 0.01737 kl: 0.10297: 100%|██████████| 34/34 [00:01<00:00, 22.11batch/s]
At epoch: 422  train vae loss: 0.12978 perp: 0.02526 kl: 0.10452:   0%|          | 0/379 [00:00<?, ?batch/s]

>>>>average [93mvalid[0m of epoch 421: loss 0.26112 perp 0.15345 kl 0.10767


At epoch: 422  train vae loss: 0.12737 perp: 0.03856 kl: 0.08882: 100%|██████████| 379/379 [00:38<00:00,  9.79batch/s]
At epoch: 422  valid vae loss: 0.13305 perp: 0.04133 kl: 0.09171:   6%|▌         | 2/34 [00:00<00:02, 10.91batch/s]

>>>>average [92mtraining[0m of epoch 422: loss 0.12688 perp 0.02892 kl 0.09796


At epoch: 422  valid vae loss: 0.11307 perp: 0.02587 kl: 0.08720: 100%|██████████| 34/34 [00:01<00:00, 21.71batch/s]
At epoch: 423  train vae loss: 0.11488 perp: 0.02173 kl: 0.09316:   0%|          | 0/379 [00:00<?, ?batch/s]

>>>>average [93mvalid[0m of epoch 422: loss 0.22523 perp 0.13376 kl 0.09147
saving to best model since this is the best valid loss so far.----


At epoch: 423  train vae loss: 0.10417 perp: 0.01702 kl: 0.08714: 100%|██████████| 379/379 [00:38<00:00,  9.93batch/s]
At epoch: 423  valid vae loss: 0.13555 perp: 0.04826 kl: 0.08729:   6%|▌         | 2/34 [00:00<00:02, 11.08batch/s]

>>>>average [92mtraining[0m of epoch 423: loss 0.12198 perp 0.03206 kl 0.08992


At epoch: 423  valid vae loss: 0.09062 perp: 0.00751 kl: 0.08311: 100%|██████████| 34/34 [00:01<00:00, 22.07batch/s]
At epoch: 424  train vae loss: 0.10338 perp: 0.01474 kl: 0.08864:   0%|          | 0/379 [00:00<?, ?batch/s]

>>>>average [93mvalid[0m of epoch 423: loss 0.22331 perp 0.13626 kl 0.08705
saving to best model since this is the best valid loss so far.----


At epoch: 424  train vae loss: 0.10825 perp: 0.02050 kl: 0.08775: 100%|██████████| 379/379 [00:38<00:00,  9.94batch/s]
At epoch: 424  valid vae loss: 0.17785 perp: 0.08788 kl: 0.08998:   6%|▌         | 2/34 [00:00<00:02, 11.00batch/s]

>>>>average [92mtraining[0m of epoch 424: loss 0.13067 perp 0.04111 kl 0.08956


At epoch: 424  valid vae loss: 0.11165 perp: 0.02629 kl: 0.08536: 100%|██████████| 34/34 [00:01<00:00, 22.06batch/s]
At epoch: 425  train vae loss: 0.10849 perp: 0.01995 kl: 0.08854:   0%|          | 0/379 [00:00<?, ?batch/s]

>>>>average [93mvalid[0m of epoch 424: loss 0.25053 perp 0.16082 kl 0.08971


At epoch: 425  train vae loss: 0.14711 perp: 0.05800 kl: 0.08911: 100%|██████████| 379/379 [00:37<00:00, 10.01batch/s]
At epoch: 425  valid vae loss: 0.20909 perp: 0.11759 kl: 0.09150:   6%|▌         | 2/34 [00:00<00:02, 11.22batch/s]

>>>>average [92mtraining[0m of epoch 425: loss 0.15154 perp 0.05832 kl 0.09322


At epoch: 425  valid vae loss: 0.09649 perp: 0.00923 kl: 0.08726: 100%|██████████| 34/34 [00:01<00:00, 22.09batch/s]
At epoch: 426  train vae loss: 0.13428 perp: 0.04196 kl: 0.09231:   0%|          | 0/379 [00:00<?, ?batch/s]

>>>>average [93mvalid[0m of epoch 425: loss 0.25944 perp 0.16808 kl 0.09136


At epoch: 426  train vae loss: 0.13699 perp: 0.04617 kl: 0.09082: 100%|██████████| 379/379 [00:38<00:00,  9.95batch/s]
At epoch: 426  valid vae loss: 0.19358 perp: 0.10359 kl: 0.08998:   6%|▌         | 2/34 [00:00<00:02, 11.02batch/s]

>>>>average [92mtraining[0m of epoch 426: loss 0.14422 perp 0.05258 kl 0.09163


At epoch: 426  valid vae loss: 0.09753 perp: 0.01135 kl: 0.08618: 100%|██████████| 34/34 [00:01<00:00, 22.13batch/s]
At epoch: 427  train vae loss: 0.15179 perp: 0.05883 kl: 0.09296:   0%|          | 0/379 [00:00<?, ?batch/s]

>>>>average [93mvalid[0m of epoch 426: loss 0.24583 perp 0.15582 kl 0.09002


At epoch: 427  train vae loss: 0.24182 perp: 0.15371 kl: 0.08811: 100%|██████████| 379/379 [00:38<00:00,  9.97batch/s]
At epoch: 427  valid vae loss: 0.15405 perp: 0.06218 kl: 0.09187:   6%|▌         | 2/34 [00:00<00:02, 11.16batch/s]

>>>>average [92mtraining[0m of epoch 427: loss 0.15259 perp 0.05972 kl 0.09287


At epoch: 427  valid vae loss: 0.09999 perp: 0.01192 kl: 0.08806: 100%|██████████| 34/34 [00:01<00:00, 22.08batch/s]
At epoch: 428  train vae loss: 0.13659 perp: 0.04473 kl: 0.09187:   0%|          | 0/379 [00:00<?, ?batch/s]

>>>>average [93mvalid[0m of epoch 427: loss 0.26498 perp 0.17316 kl 0.09182


At epoch: 428  train vae loss: 0.11101 perp: 0.02123 kl: 0.08979: 100%|██████████| 379/379 [00:38<00:00,  9.92batch/s]
At epoch: 428  valid vae loss: 0.17485 perp: 0.08575 kl: 0.08910:   6%|▌         | 2/34 [00:00<00:02, 10.99batch/s]

>>>>average [92mtraining[0m of epoch 428: loss 0.14496 perp 0.05252 kl 0.09245


At epoch: 428  valid vae loss: 0.09865 perp: 0.01370 kl: 0.08496: 100%|██████████| 34/34 [00:01<00:00, 21.85batch/s]
At epoch: 429  train vae loss: 0.13994 perp: 0.04942 kl: 0.09053:   0%|          | 0/379 [00:00<?, ?batch/s]

>>>>average [93mvalid[0m of epoch 428: loss 0.23744 perp 0.14846 kl 0.08899


At epoch: 429  train vae loss: 0.16628 perp: 0.07309 kl: 0.09318: 100%|██████████| 379/379 [00:38<00:00,  9.92batch/s]
At epoch: 429  valid vae loss: 0.15691 perp: 0.06400 kl: 0.09290:   6%|▌         | 2/34 [00:00<00:02, 11.13batch/s]

>>>>average [92mtraining[0m of epoch 429: loss 0.15222 perp 0.06077 kl 0.09145


At epoch: 429  valid vae loss: 0.12140 perp: 0.03281 kl: 0.08859: 100%|██████████| 34/34 [00:01<00:00, 22.05batch/s]
At epoch: 430  train vae loss: 0.22258 perp: 0.12878 kl: 0.09380:   0%|          | 0/379 [00:00<?, ?batch/s]

>>>>average [93mvalid[0m of epoch 429: loss 0.27391 perp 0.18067 kl 0.09324


At epoch: 430  train vae loss: 0.19941 perp: 0.09365 kl: 0.10576: 100%|██████████| 379/379 [00:38<00:00,  9.90batch/s]
At epoch: 430  valid vae loss: 0.17488 perp: 0.07315 kl: 0.10173:   6%|▌         | 2/34 [00:00<00:02, 11.10batch/s]

>>>>average [92mtraining[0m of epoch 430: loss 0.18259 perp 0.08612 kl 0.09647


At epoch: 430  valid vae loss: 0.11305 perp: 0.01591 kl: 0.09714: 100%|██████████| 34/34 [00:01<00:00, 22.18batch/s]
At epoch: 431  train vae loss: 0.18721 perp: 0.08363 kl: 0.10359:   0%|          | 0/379 [00:00<?, ?batch/s]

>>>>average [93mvalid[0m of epoch 430: loss 0.27307 perp 0.17178 kl 0.10128


At epoch: 431  train vae loss: 0.15957 perp: 0.06116 kl: 0.09841: 100%|██████████| 379/379 [00:38<00:00,  9.95batch/s]
At epoch: 431  valid vae loss: 0.22222 perp: 0.12409 kl: 0.09813:   6%|▌         | 2/34 [00:00<00:02, 11.09batch/s]

>>>>average [92mtraining[0m of epoch 431: loss 0.15430 perp 0.05838 kl 0.09592


At epoch: 431  valid vae loss: 0.18831 perp: 0.09467 kl: 0.09364: 100%|██████████| 34/34 [00:01<00:00, 22.15batch/s]
At epoch: 432  train vae loss: 0.16383 perp: 0.06561 kl: 0.09822:   0%|          | 0/379 [00:00<?, ?batch/s]

>>>>average [93mvalid[0m of epoch 431: loss 0.29974 perp 0.20188 kl 0.09785


At epoch: 432  train vae loss: 0.20839 perp: 0.11317 kl: 0.09522: 100%|██████████| 379/379 [00:38<00:00,  9.85batch/s]
At epoch: 432  valid vae loss: 0.19967 perp: 0.10256 kl: 0.09712:   6%|▌         | 2/34 [00:00<00:02, 11.14batch/s]

>>>>average [92mtraining[0m of epoch 432: loss 0.16034 perp 0.06573 kl 0.09461


At epoch: 432  valid vae loss: 0.11175 perp: 0.01943 kl: 0.09232: 100%|██████████| 34/34 [00:01<00:00, 22.20batch/s]
At epoch: 433  train vae loss: 0.15382 perp: 0.05662 kl: 0.09720:   0%|          | 0/379 [00:00<?, ?batch/s]

>>>>average [93mvalid[0m of epoch 432: loss 0.29386 perp 0.19690 kl 0.09695


At epoch: 433  train vae loss: 0.15123 perp: 0.05560 kl: 0.09563: 100%|██████████| 379/379 [00:37<00:00, 10.04batch/s]
At epoch: 433  valid vae loss: 0.17776 perp: 0.08378 kl: 0.09398:   6%|▌         | 2/34 [00:00<00:02, 11.12batch/s]

>>>>average [92mtraining[0m of epoch 433: loss 0.15183 perp 0.05721 kl 0.09462


At epoch: 433  valid vae loss: 0.11811 perp: 0.02800 kl: 0.09010: 100%|██████████| 34/34 [00:01<00:00, 22.15batch/s]
At epoch: 434  train vae loss: 0.17818 perp: 0.08350 kl: 0.09468:   0%|          | 0/379 [00:00<?, ?batch/s]

>>>>average [93mvalid[0m of epoch 433: loss 0.26733 perp 0.17333 kl 0.09400


At epoch: 434  train vae loss: 0.14027 perp: 0.04556 kl: 0.09470: 100%|██████████| 379/379 [00:37<00:00, 10.03batch/s]
At epoch: 434  valid vae loss: 0.17893 perp: 0.08519 kl: 0.09374:   6%|▌         | 2/34 [00:00<00:02, 11.02batch/s]

>>>>average [92mtraining[0m of epoch 434: loss 0.17289 perp 0.07534 kl 0.09755


At epoch: 434  valid vae loss: 0.15378 perp: 0.06398 kl: 0.08979: 100%|██████████| 34/34 [00:01<00:00, 22.18batch/s]
At epoch: 435  train vae loss: 0.14998 perp: 0.05407 kl: 0.09591:   1%|          | 2/379 [00:00<00:28, 13.14batch/s]

>>>>average [93mvalid[0m of epoch 434: loss 0.25203 perp 0.15827 kl 0.09376


At epoch: 435  train vae loss: 0.15584 perp: 0.06447 kl: 0.09136: 100%|██████████| 379/379 [00:37<00:00, 10.02batch/s]
At epoch: 435  valid vae loss: 0.15073 perp: 0.05956 kl: 0.09117:   6%|▌         | 2/34 [00:00<00:02, 11.17batch/s]

>>>>average [92mtraining[0m of epoch 435: loss 0.13962 perp 0.04784 kl 0.09178


At epoch: 435  valid vae loss: 0.09813 perp: 0.01029 kl: 0.08784: 100%|██████████| 34/34 [00:01<00:00, 22.18batch/s]
At epoch: 436  train vae loss: 0.11968 perp: 0.02660 kl: 0.09308:   0%|          | 0/379 [00:00<?, ?batch/s]

>>>>average [93mvalid[0m of epoch 435: loss 0.25380 perp 0.16236 kl 0.09143


At epoch: 436  train vae loss: 0.15573 perp: 0.05921 kl: 0.09652: 100%|██████████| 379/379 [00:37<00:00, 10.02batch/s]
At epoch: 436  valid vae loss: 0.18311 perp: 0.08883 kl: 0.09428:   6%|▌         | 2/34 [00:00<00:02, 11.13batch/s]

>>>>average [92mtraining[0m of epoch 436: loss 0.18383 perp 0.08528 kl 0.09855


At epoch: 436  valid vae loss: 0.11725 perp: 0.02713 kl: 0.09012: 100%|██████████| 34/34 [00:01<00:00, 22.16batch/s]
At epoch: 437  train vae loss: 0.14414 perp: 0.04551 kl: 0.09863:   0%|          | 0/379 [00:00<?, ?batch/s]

>>>>average [93mvalid[0m of epoch 436: loss 0.24265 perp 0.14852 kl 0.09413


At epoch: 437  train vae loss: 0.14323 perp: 0.04890 kl: 0.09433: 100%|██████████| 379/379 [00:38<00:00,  9.85batch/s]
At epoch: 437  valid vae loss: 0.30715 perp: 0.21251 kl: 0.09464:   6%|▌         | 2/34 [00:00<00:02, 10.98batch/s]

>>>>average [92mtraining[0m of epoch 437: loss 0.13893 perp 0.04696 kl 0.09196


At epoch: 437  valid vae loss: 0.10450 perp: 0.01443 kl: 0.09008: 100%|██████████| 34/34 [00:01<00:00, 22.01batch/s]
At epoch: 438  train vae loss: 0.14237 perp: 0.04778 kl: 0.09460:   0%|          | 0/379 [00:00<?, ?batch/s]

>>>>average [93mvalid[0m of epoch 437: loss 0.28404 perp 0.18959 kl 0.09445


At epoch: 438  train vae loss: 0.17042 perp: 0.07341 kl: 0.09702: 100%|██████████| 379/379 [00:38<00:00,  9.88batch/s]
At epoch: 438  valid vae loss: 0.21017 perp: 0.11554 kl: 0.09463:   6%|▌         | 2/34 [00:00<00:02, 11.14batch/s]

>>>>average [92mtraining[0m of epoch 438: loss 0.14895 perp 0.05541 kl 0.09354


At epoch: 438  valid vae loss: 0.14105 perp: 0.05072 kl: 0.09033: 100%|██████████| 34/34 [00:01<00:00, 23.11batch/s]
At epoch: 439  train vae loss: 0.14166 perp: 0.04066 kl: 0.10099:   0%|          | 0/379 [00:00<?, ?batch/s]

>>>>average [93mvalid[0m of epoch 438: loss 0.26532 perp 0.17089 kl 0.09442


At epoch: 439  train vae loss: 0.21648 perp: 0.12586 kl: 0.09062: 100%|██████████| 379/379 [00:38<00:00,  9.79batch/s]
At epoch: 439  valid vae loss: 0.27995 perp: 0.18756 kl: 0.09240:   6%|▌         | 2/34 [00:00<00:02, 11.06batch/s]

>>>>average [92mtraining[0m of epoch 439: loss 0.14936 perp 0.05689 kl 0.09247


At epoch: 439  valid vae loss: 0.12040 perp: 0.03208 kl: 0.08832: 100%|██████████| 34/34 [00:01<00:00, 22.38batch/s]
At epoch: 440  train vae loss: 0.54649 perp: 0.45185 kl: 0.09464:   0%|          | 0/379 [00:00<?, ?batch/s]

>>>>average [93mvalid[0m of epoch 439: loss 0.32182 perp 0.22951 kl 0.09231


At epoch: 440  train vae loss: 0.12242 perp: 0.03239 kl: 0.09003: 100%|██████████| 379/379 [00:39<00:00,  9.63batch/s]
At epoch: 440  valid vae loss: 0.13321 perp: 0.04431 kl: 0.08890:   6%|▌         | 2/34 [00:00<00:03, 10.44batch/s]

>>>>average [92mtraining[0m of epoch 440: loss 0.14646 perp 0.05431 kl 0.09215


At epoch: 440  valid vae loss: 0.09594 perp: 0.01097 kl: 0.08497: 100%|██████████| 34/34 [00:01<00:00, 21.53batch/s]
At epoch: 441  train vae loss: 0.29412 perp: 0.20232 kl: 0.09180:   0%|          | 0/379 [00:00<?, ?batch/s]

>>>>average [93mvalid[0m of epoch 440: loss 0.24453 perp 0.15581 kl 0.08871


At epoch: 441  train vae loss: 0.12080 perp: 0.02784 kl: 0.09297: 100%|██████████| 379/379 [00:40<00:00,  9.43batch/s]
At epoch: 441  valid vae loss: 0.18136 perp: 0.08825 kl: 0.09311:   6%|▌         | 2/34 [00:00<00:02, 10.83batch/s]

>>>>average [92mtraining[0m of epoch 441: loss 0.16732 perp 0.07200 kl 0.09532


At epoch: 441  valid vae loss: 0.09925 perp: 0.00996 kl: 0.08928: 100%|██████████| 34/34 [00:01<00:00, 21.81batch/s]
At epoch: 442  train vae loss: 0.16301 perp: 0.06853 kl: 0.09447:   0%|          | 0/379 [00:00<?, ?batch/s]

>>>>average [93mvalid[0m of epoch 441: loss 0.25642 perp 0.16355 kl 0.09287


At epoch: 442  train vae loss: 0.26098 perp: 0.16392 kl: 0.09706: 100%|██████████| 379/379 [00:39<00:00,  9.55batch/s]
At epoch: 442  valid vae loss: 0.19321 perp: 0.09350 kl: 0.09971:   6%|▌         | 2/34 [00:00<00:03, 10.62batch/s]

>>>>average [92mtraining[0m of epoch 442: loss 0.16902 perp 0.07684 kl 0.09218


At epoch: 442  valid vae loss: 0.13218 perp: 0.03667 kl: 0.09551: 100%|██████████| 34/34 [00:01<00:00, 21.94batch/s]
At epoch: 443  train vae loss: 0.22761 perp: 0.12589 kl: 0.10172:   0%|          | 0/379 [00:00<?, ?batch/s]

>>>>average [93mvalid[0m of epoch 442: loss 0.30609 perp 0.20639 kl 0.09970


At epoch: 443  train vae loss: 0.12260 perp: 0.03305 kl: 0.08955: 100%|██████████| 379/379 [00:38<00:00,  9.81batch/s]
At epoch: 443  valid vae loss: 0.13890 perp: 0.04820 kl: 0.09070:   6%|▌         | 2/34 [00:00<00:02, 11.79batch/s]

>>>>average [92mtraining[0m of epoch 443: loss 0.14942 perp 0.05418 kl 0.09524


At epoch: 443  valid vae loss: 0.10425 perp: 0.01742 kl: 0.08683: 100%|██████████| 34/34 [00:01<00:00, 23.38batch/s]
At epoch: 444  train vae loss: 0.12392 perp: 0.03334 kl: 0.09058:   0%|          | 0/379 [00:00<?, ?batch/s]

>>>>average [93mvalid[0m of epoch 443: loss 0.24897 perp 0.15816 kl 0.09081


At epoch: 444  train vae loss: 0.15730 perp: 0.06587 kl: 0.09143: 100%|██████████| 379/379 [00:39<00:00,  9.58batch/s]
At epoch: 444  valid vae loss: 0.13183 perp: 0.04264 kl: 0.08919:   6%|▌         | 2/34 [00:00<00:03, 10.49batch/s]

>>>>average [92mtraining[0m of epoch 444: loss 0.13953 perp 0.04874 kl 0.09078


At epoch: 444  valid vae loss: 0.10188 perp: 0.01629 kl: 0.08559: 100%|██████████| 34/34 [00:01<00:00, 20.90batch/s]
At epoch: 445  train vae loss: 0.11972 perp: 0.02471 kl: 0.09501:   0%|          | 0/379 [00:00<?, ?batch/s]

>>>>average [93mvalid[0m of epoch 444: loss 0.23544 perp 0.14617 kl 0.08927


At epoch: 445  train vae loss: 0.14899 perp: 0.05856 kl: 0.09044: 100%|██████████| 379/379 [00:40<00:00,  9.41batch/s]
At epoch: 445  valid vae loss: 0.15719 perp: 0.06803 kl: 0.08916:   6%|▌         | 2/34 [00:00<00:02, 10.73batch/s]

>>>>average [92mtraining[0m of epoch 445: loss 0.14951 perp 0.05828 kl 0.09123


At epoch: 445  valid vae loss: 0.09066 perp: 0.00530 kl: 0.08536: 100%|██████████| 34/34 [00:01<00:00, 22.05batch/s]
At epoch: 446  train vae loss: 0.14636 perp: 0.05518 kl: 0.09118:   0%|          | 0/379 [00:00<?, ?batch/s]

>>>>average [93mvalid[0m of epoch 445: loss 0.25416 perp 0.16509 kl 0.08907


At epoch: 446  train vae loss: 0.12053 perp: 0.03076 kl: 0.08977: 100%|██████████| 379/379 [00:39<00:00,  9.66batch/s]
At epoch: 446  valid vae loss: 0.15393 perp: 0.06269 kl: 0.09123:   6%|▌         | 2/34 [00:00<00:02, 11.22batch/s]

>>>>average [92mtraining[0m of epoch 446: loss 0.15213 perp 0.06080 kl 0.09134


At epoch: 446  valid vae loss: 0.10094 perp: 0.01389 kl: 0.08704: 100%|██████████| 34/34 [00:01<00:00, 22.32batch/s]
At epoch: 447  train vae loss: 0.10788 perp: 0.01639 kl: 0.09149:   0%|          | 0/379 [00:00<?, ?batch/s]

>>>>average [93mvalid[0m of epoch 446: loss 0.24022 perp 0.14893 kl 0.09129


At epoch: 447  train vae loss: 0.15470 perp: 0.05420 kl: 0.10050: 100%|██████████| 379/379 [00:36<00:00, 10.35batch/s]
At epoch: 447  valid vae loss: 0.16781 perp: 0.06905 kl: 0.09877:   6%|▌         | 2/34 [00:00<00:02, 11.78batch/s]

>>>>average [92mtraining[0m of epoch 447: loss 0.20968 perp 0.11147 kl 0.09821


At epoch: 447  valid vae loss: 0.14253 perp: 0.04819 kl: 0.09434: 100%|██████████| 34/34 [00:01<00:00, 23.47batch/s]
At epoch: 448  train vae loss: 0.12679 perp: 0.02669 kl: 0.10010:   0%|          | 0/379 [00:00<?, ?batch/s]

>>>>average [93mvalid[0m of epoch 447: loss 0.26528 perp 0.16654 kl 0.09874


At epoch: 448  train vae loss: 0.13388 perp: 0.04196 kl: 0.09192: 100%|██████████| 379/379 [00:37<00:00, 10.13batch/s]
At epoch: 448  valid vae loss: 0.15650 perp: 0.06866 kl: 0.08784:   6%|▌         | 2/34 [00:00<00:02, 11.65batch/s]

>>>>average [92mtraining[0m of epoch 448: loss 0.13241 perp 0.03909 kl 0.09332


At epoch: 448  valid vae loss: 0.08994 perp: 0.00583 kl: 0.08411: 100%|██████████| 34/34 [00:01<00:00, 23.51batch/s]
At epoch: 449  train vae loss: 0.11590 perp: 0.02971 kl: 0.08619:   0%|          | 0/379 [00:00<?, ?batch/s]

>>>>average [93mvalid[0m of epoch 448: loss 0.24403 perp 0.15599 kl 0.08804


At epoch: 449  train vae loss: 0.14186 perp: 0.04946 kl: 0.09240: 100%|██████████| 379/379 [00:39<00:00,  9.66batch/s]
At epoch: 449  valid vae loss: 0.13527 perp: 0.04521 kl: 0.09006:   6%|▌         | 2/34 [00:00<00:02, 11.77batch/s]

>>>>average [92mtraining[0m of epoch 449: loss 0.13972 perp 0.04965 kl 0.09007


At epoch: 449  valid vae loss: 0.11451 perp: 0.02874 kl: 0.08577: 100%|██████████| 34/34 [00:01<00:00, 22.89batch/s]
At epoch: 450  train vae loss: 0.17363 perp: 0.08102 kl: 0.09261:   0%|          | 0/379 [00:00<?, ?batch/s]

>>>>average [93mvalid[0m of epoch 449: loss 0.23947 perp 0.14938 kl 0.09009


At epoch: 450  train vae loss: 0.11507 perp: 0.02293 kl: 0.09214: 100%|██████████| 379/379 [00:37<00:00, 10.08batch/s]
At epoch: 450  valid vae loss: 0.15626 perp: 0.06518 kl: 0.09108:   6%|▌         | 2/34 [00:00<00:02, 11.83batch/s]

>>>>average [92mtraining[0m of epoch 450: loss 0.17308 perp 0.07854 kl 0.09454


At epoch: 450  valid vae loss: 0.09729 perp: 0.01024 kl: 0.08706: 100%|██████████| 34/34 [00:01<00:00, 23.61batch/s]
At epoch: 451  train vae loss: 0.15026 perp: 0.05633 kl: 0.09392:   0%|          | 0/379 [00:00<?, ?batch/s]

>>>>average [93mvalid[0m of epoch 450: loss 0.25478 perp 0.16356 kl 0.09121


At epoch: 451  train vae loss: 0.15239 perp: 0.05803 kl: 0.09436: 100%|██████████| 379/379 [00:37<00:00,  9.98batch/s]
At epoch: 451  valid vae loss: 0.16887 perp: 0.07881 kl: 0.09006:   6%|▌         | 2/34 [00:00<00:02, 11.15batch/s]

>>>>average [92mtraining[0m of epoch 451: loss 0.13366 perp 0.04419 kl 0.08947


At epoch: 451  valid vae loss: 0.11637 perp: 0.03014 kl: 0.08623: 100%|██████████| 34/34 [00:01<00:00, 21.95batch/s]
At epoch: 452  train vae loss: 0.14170 perp: 0.05131 kl: 0.09039:   0%|          | 0/379 [00:00<?, ?batch/s]

>>>>average [93mvalid[0m of epoch 451: loss 0.24820 perp 0.15789 kl 0.09031


At epoch: 452  train vae loss: 0.13261 perp: 0.03797 kl: 0.09464: 100%|██████████| 379/379 [00:38<00:00,  9.93batch/s]
At epoch: 452  valid vae loss: 0.19573 perp: 0.10345 kl: 0.09228:   6%|▌         | 2/34 [00:00<00:02, 11.04batch/s]

>>>>average [92mtraining[0m of epoch 452: loss 0.14723 perp 0.05689 kl 0.09033


At epoch: 452  valid vae loss: 0.13083 perp: 0.04250 kl: 0.08833: 100%|██████████| 34/34 [00:01<00:00, 22.84batch/s]
At epoch: 453  train vae loss: 0.14197 perp: 0.04753 kl: 0.09444:   0%|          | 0/379 [00:00<?, ?batch/s]

>>>>average [93mvalid[0m of epoch 452: loss 0.28495 perp 0.19266 kl 0.09229


At epoch: 453  train vae loss: 0.14356 perp: 0.04956 kl: 0.09400: 100%|██████████| 379/379 [00:38<00:00,  9.95batch/s]
At epoch: 453  valid vae loss: 0.14568 perp: 0.05656 kl: 0.08912:   6%|▌         | 2/34 [00:00<00:02, 11.18batch/s]

>>>>average [92mtraining[0m of epoch 453: loss 0.14401 perp 0.05193 kl 0.09208


At epoch: 453  valid vae loss: 0.08765 perp: 0.00256 kl: 0.08509: 100%|██████████| 34/34 [00:01<00:00, 22.16batch/s]
At epoch: 454  train vae loss: 0.13406 perp: 0.04228 kl: 0.09178:   0%|          | 0/379 [00:00<?, ?batch/s]

>>>>average [93mvalid[0m of epoch 453: loss 0.23517 perp 0.14581 kl 0.08935


At epoch: 454  train vae loss: 0.10722 perp: 0.01642 kl: 0.09080: 100%|██████████| 379/379 [00:38<00:00,  9.89batch/s]
At epoch: 454  valid vae loss: 0.15163 perp: 0.06533 kl: 0.08630:   6%|▌         | 2/34 [00:00<00:02, 10.86batch/s]

>>>>average [92mtraining[0m of epoch 454: loss 0.13396 perp 0.04523 kl 0.08873


At epoch: 454  valid vae loss: 0.08760 perp: 0.00478 kl: 0.08281: 100%|██████████| 34/34 [00:01<00:00, 22.10batch/s]
At epoch: 455  train vae loss: 0.11877 perp: 0.03051 kl: 0.08825:   0%|          | 0/379 [00:00<?, ?batch/s]

>>>>average [93mvalid[0m of epoch 454: loss 0.24330 perp 0.15656 kl 0.08674


At epoch: 455  train vae loss: 0.11571 perp: 0.02260 kl: 0.09311: 100%|██████████| 379/379 [00:38<00:00,  9.89batch/s]
At epoch: 455  valid vae loss: 0.18097 perp: 0.09044 kl: 0.09053:   6%|▌         | 2/34 [00:00<00:03, 10.64batch/s]

>>>>average [92mtraining[0m of epoch 455: loss 0.15121 perp 0.06087 kl 0.09035


At epoch: 455  valid vae loss: 0.10434 perp: 0.01748 kl: 0.08686: 100%|██████████| 34/34 [00:01<00:00, 21.95batch/s]
At epoch: 456  train vae loss: 0.12139 perp: 0.02614 kl: 0.09525:   0%|          | 0/379 [00:00<?, ?batch/s]

>>>>average [93mvalid[0m of epoch 455: loss 0.28782 perp 0.19700 kl 0.09081


At epoch: 456  train vae loss: 0.13604 perp: 0.04744 kl: 0.08859: 100%|██████████| 379/379 [00:38<00:00,  9.90batch/s]
At epoch: 456  valid vae loss: 0.15893 perp: 0.07153 kl: 0.08740:   6%|▌         | 2/34 [00:00<00:03, 10.56batch/s]

>>>>average [92mtraining[0m of epoch 456: loss 0.13914 perp 0.04938 kl 0.08975


At epoch: 456  valid vae loss: 0.09445 perp: 0.01112 kl: 0.08333: 100%|██████████| 34/34 [00:01<00:00, 21.84batch/s]
At epoch: 457  train vae loss: 0.13313 perp: 0.04595 kl: 0.08718:   0%|          | 0/379 [00:00<?, ?batch/s]

>>>>average [93mvalid[0m of epoch 456: loss 0.23872 perp 0.15116 kl 0.08756


At epoch: 457  train vae loss: 0.19106 perp: 0.09106 kl: 0.10000: 100%|██████████| 379/379 [00:38<00:00,  9.83batch/s]
At epoch: 457  valid vae loss: 0.16802 perp: 0.07078 kl: 0.09725:   6%|▌         | 2/34 [00:00<00:02, 11.03batch/s]

>>>>average [92mtraining[0m of epoch 457: loss 0.16777 perp 0.07626 kl 0.09150


At epoch: 457  valid vae loss: 0.11054 perp: 0.01740 kl: 0.09314: 100%|██████████| 34/34 [00:01<00:00, 22.18batch/s]
At epoch: 458  train vae loss: 0.12306 perp: 0.02365 kl: 0.09942:   0%|          | 0/379 [00:00<?, ?batch/s]

>>>>average [93mvalid[0m of epoch 457: loss 0.25660 perp 0.15924 kl 0.09736


At epoch: 458  train vae loss: 0.13079 perp: 0.03176 kl: 0.09903: 100%|██████████| 379/379 [00:38<00:00,  9.97batch/s]
At epoch: 458  valid vae loss: 0.14182 perp: 0.04712 kl: 0.09470:   6%|▌         | 2/34 [00:00<00:02, 11.29batch/s]

>>>>average [92mtraining[0m of epoch 458: loss 0.15130 perp 0.05811 kl 0.09319


At epoch: 458  valid vae loss: 0.10063 perp: 0.01011 kl: 0.09052: 100%|██████████| 34/34 [00:01<00:00, 21.83batch/s]
At epoch: 459  train vae loss: 0.13791 perp: 0.04373 kl: 0.09418:   0%|          | 0/379 [00:00<?, ?batch/s]

>>>>average [93mvalid[0m of epoch 458: loss 0.25620 perp 0.16180 kl 0.09440


At epoch: 459  train vae loss: 0.10491 perp: 0.01510 kl: 0.08981: 100%|██████████| 379/379 [00:37<00:00,  9.97batch/s]
At epoch: 459  valid vae loss: 0.14924 perp: 0.06196 kl: 0.08728:   6%|▌         | 2/34 [00:00<00:02, 11.19batch/s]

>>>>average [92mtraining[0m of epoch 459: loss 0.13365 perp 0.04363 kl 0.09003


At epoch: 459  valid vae loss: 0.09310 perp: 0.00989 kl: 0.08321: 100%|██████████| 34/34 [00:01<00:00, 22.05batch/s]
At epoch: 460  train vae loss: 0.11350 perp: 0.02293 kl: 0.09057:   0%|          | 0/379 [00:00<?, ?batch/s]

>>>>average [93mvalid[0m of epoch 459: loss 0.24015 perp 0.15293 kl 0.08722


At epoch: 460  train vae loss: 0.12418 perp: 0.03492 kl: 0.08926: 100%|██████████| 379/379 [00:38<00:00,  9.80batch/s]
At epoch: 460  valid vae loss: 0.15565 perp: 0.06793 kl: 0.08772:   6%|▌         | 2/34 [00:00<00:02, 10.99batch/s]

>>>>average [92mtraining[0m of epoch 460: loss 0.13209 perp 0.04498 kl 0.08710


At epoch: 460  valid vae loss: 0.09207 perp: 0.00837 kl: 0.08370: 100%|██████████| 34/34 [00:01<00:00, 21.93batch/s]
At epoch: 461  train vae loss: 0.17954 perp: 0.09010 kl: 0.08944:   0%|          | 0/379 [00:00<?, ?batch/s]

>>>>average [93mvalid[0m of epoch 460: loss 0.25435 perp 0.16671 kl 0.08765


At epoch: 461  train vae loss: 0.13272 perp: 0.04079 kl: 0.09193: 100%|██████████| 379/379 [00:37<00:00,  9.98batch/s]
At epoch: 461  valid vae loss: 0.15241 perp: 0.06198 kl: 0.09043:   6%|▌         | 2/34 [00:00<00:02, 10.95batch/s]

>>>>average [92mtraining[0m of epoch 461: loss 0.15433 perp 0.06479 kl 0.08955


At epoch: 461  valid vae loss: 0.13654 perp: 0.04995 kl: 0.08659: 100%|██████████| 34/34 [00:01<00:00, 22.08batch/s]
At epoch: 462  train vae loss: 0.16162 perp: 0.07060 kl: 0.09102:   0%|          | 0/379 [00:00<?, ?batch/s]

>>>>average [93mvalid[0m of epoch 461: loss 0.27016 perp 0.17980 kl 0.09036


At epoch: 462  train vae loss: 0.13606 perp: 0.04526 kl: 0.09080: 100%|██████████| 379/379 [00:37<00:00,  9.98batch/s]
At epoch: 462  valid vae loss: 0.14027 perp: 0.05032 kl: 0.08995:   6%|▌         | 2/34 [00:00<00:02, 11.01batch/s]

>>>>average [92mtraining[0m of epoch 462: loss 0.15339 perp 0.06207 kl 0.09132


At epoch: 462  valid vae loss: 0.09153 perp: 0.00581 kl: 0.08572: 100%|██████████| 34/34 [00:01<00:00, 22.07batch/s]
At epoch: 463  train vae loss: 0.12093 perp: 0.03263 kl: 0.08831:   0%|          | 0/379 [00:00<?, ?batch/s]

>>>>average [93mvalid[0m of epoch 462: loss 0.24363 perp 0.15362 kl 0.09002


At epoch: 463  train vae loss: 0.12898 perp: 0.04204 kl: 0.08694: 100%|██████████| 379/379 [00:37<00:00, 10.00batch/s]
At epoch: 463  valid vae loss: 0.16480 perp: 0.07574 kl: 0.08906:   6%|▌         | 2/34 [00:00<00:02, 11.09batch/s]

>>>>average [92mtraining[0m of epoch 463: loss 0.13741 perp 0.04877 kl 0.08864


At epoch: 463  valid vae loss: 0.10132 perp: 0.01619 kl: 0.08513: 100%|██████████| 34/34 [00:01<00:00, 22.14batch/s]
At epoch: 464  train vae loss: 0.15651 perp: 0.06468 kl: 0.09183:   0%|          | 0/379 [00:00<?, ?batch/s]

>>>>average [93mvalid[0m of epoch 463: loss 0.25361 perp 0.16473 kl 0.08888


At epoch: 464  train vae loss: 0.17806 perp: 0.08701 kl: 0.09105: 100%|██████████| 379/379 [00:38<00:00,  9.92batch/s]
At epoch: 464  valid vae loss: 0.17625 perp: 0.08532 kl: 0.09093:   6%|▌         | 2/34 [00:00<00:02, 11.14batch/s]

>>>>average [92mtraining[0m of epoch 464: loss 0.14773 perp 0.05859 kl 0.08914


At epoch: 464  valid vae loss: 0.09375 perp: 0.00726 kl: 0.08649: 100%|██████████| 34/34 [00:01<00:00, 21.79batch/s]
At epoch: 465  train vae loss: 0.12689 perp: 0.03651 kl: 0.09038:   0%|          | 0/379 [00:00<?, ?batch/s]

>>>>average [93mvalid[0m of epoch 464: loss 0.26661 perp 0.17582 kl 0.09079


At epoch: 465  train vae loss: 0.20449 perp: 0.11394 kl: 0.09055: 100%|██████████| 379/379 [00:38<00:00,  9.89batch/s]
At epoch: 465  valid vae loss: 0.15620 perp: 0.06736 kl: 0.08885:   6%|▌         | 2/34 [00:00<00:02, 11.12batch/s]

>>>>average [92mtraining[0m of epoch 465: loss 0.14299 perp 0.05335 kl 0.08963


At epoch: 465  valid vae loss: 0.09675 perp: 0.01165 kl: 0.08510: 100%|██████████| 34/34 [00:01<00:00, 21.68batch/s]
At epoch: 466  train vae loss: 0.13310 perp: 0.04658 kl: 0.08652:   0%|          | 0/379 [00:00<?, ?batch/s]

>>>>average [93mvalid[0m of epoch 465: loss 0.25930 perp 0.17051 kl 0.08879


At epoch: 466  train vae loss: 0.17093 perp: 0.07626 kl: 0.09467: 100%|██████████| 379/379 [00:37<00:00, 10.06batch/s]
At epoch: 466  valid vae loss: 0.18860 perp: 0.09522 kl: 0.09339:   6%|▌         | 2/34 [00:00<00:02, 11.72batch/s]

>>>>average [92mtraining[0m of epoch 466: loss 0.14860 perp 0.05973 kl 0.08886


At epoch: 466  valid vae loss: 0.13306 perp: 0.04296 kl: 0.09010: 100%|██████████| 34/34 [00:01<00:00, 23.54batch/s]
At epoch: 467  train vae loss: 0.17690 perp: 0.08364 kl: 0.09326:   0%|          | 0/379 [00:00<?, ?batch/s]

>>>>average [93mvalid[0m of epoch 466: loss 0.26959 perp 0.17616 kl 0.09343


At epoch: 467  train vae loss: 0.11049 perp: 0.02487 kl: 0.08562: 100%|██████████| 379/379 [00:36<00:00, 10.33batch/s]
At epoch: 467  valid vae loss: 0.14798 perp: 0.06409 kl: 0.08389:   6%|▌         | 2/34 [00:00<00:02, 11.73batch/s]

>>>>average [92mtraining[0m of epoch 467: loss 0.13827 perp 0.04815 kl 0.09012


At epoch: 467  valid vae loss: 0.09097 perp: 0.01066 kl: 0.08031: 100%|██████████| 34/34 [00:01<00:00, 23.46batch/s]
At epoch: 468  train vae loss: 0.10293 perp: 0.01707 kl: 0.08586:   0%|          | 0/379 [00:00<?, ?batch/s]

>>>>average [93mvalid[0m of epoch 467: loss 0.22188 perp 0.13793 kl 0.08395
saving to best model since this is the best valid loss so far.----


At epoch: 468  train vae loss: 0.13489 perp: 0.04625 kl: 0.08864: 100%|██████████| 379/379 [00:36<00:00, 10.33batch/s]
At epoch: 468  valid vae loss: 0.17381 perp: 0.08492 kl: 0.08889:   6%|▌         | 2/34 [00:00<00:02, 11.71batch/s]

>>>>average [92mtraining[0m of epoch 468: loss 0.17005 perp 0.07739 kl 0.09266


At epoch: 468  valid vae loss: 0.09605 perp: 0.01090 kl: 0.08515: 100%|██████████| 34/34 [00:01<00:00, 23.43batch/s]
At epoch: 469  train vae loss: 0.11697 perp: 0.02714 kl: 0.08983:   0%|          | 0/379 [00:00<?, ?batch/s]

>>>>average [93mvalid[0m of epoch 468: loss 0.23516 perp 0.14649 kl 0.08867


At epoch: 469  train vae loss: 0.12254 perp: 0.03265 kl: 0.08989: 100%|██████████| 379/379 [00:36<00:00, 10.29batch/s]
At epoch: 469  valid vae loss: 0.16035 perp: 0.07296 kl: 0.08739:   6%|▌         | 2/34 [00:00<00:02, 11.59batch/s]

>>>>average [92mtraining[0m of epoch 469: loss 0.12804 perp 0.04111 kl 0.08693


At epoch: 469  valid vae loss: 0.08915 perp: 0.00556 kl: 0.08359: 100%|██████████| 34/34 [00:01<00:00, 23.29batch/s]
At epoch: 470  train vae loss: 0.15423 perp: 0.06575 kl: 0.08848:   0%|          | 0/379 [00:00<?, ?batch/s]

>>>>average [93mvalid[0m of epoch 469: loss 0.24622 perp 0.15882 kl 0.08740


At epoch: 470  train vae loss: 0.18194 perp: 0.09487 kl: 0.08706: 100%|██████████| 379/379 [00:36<00:00, 10.28batch/s]
At epoch: 470  valid vae loss: 0.15511 perp: 0.07129 kl: 0.08382:   6%|▌         | 2/34 [00:00<00:02, 11.79batch/s]

>>>>average [92mtraining[0m of epoch 470: loss 0.12997 perp 0.04442 kl 0.08555


At epoch: 470  valid vae loss: 0.14966 perp: 0.06911 kl: 0.08055: 100%|██████████| 34/34 [00:01<00:00, 23.34batch/s]
At epoch: 471  train vae loss: 0.14664 perp: 0.06152 kl: 0.08512:   0%|          | 0/379 [00:00<?, ?batch/s]

>>>>average [93mvalid[0m of epoch 470: loss 0.24534 perp 0.16147 kl 0.08387


At epoch: 471  train vae loss: 0.10628 perp: 0.02297 kl: 0.08331: 100%|██████████| 379/379 [00:36<00:00, 10.38batch/s]
At epoch: 471  valid vae loss: 0.14642 perp: 0.06360 kl: 0.08281:   6%|▌         | 2/34 [00:00<00:02, 11.75batch/s]

>>>>average [92mtraining[0m of epoch 471: loss 0.13529 perp 0.04918 kl 0.08611


At epoch: 471  valid vae loss: 0.08623 perp: 0.00692 kl: 0.07932: 100%|██████████| 34/34 [00:01<00:00, 23.45batch/s]
At epoch: 472  train vae loss: 0.10970 perp: 0.02549 kl: 0.08421:   0%|          | 0/379 [00:00<?, ?batch/s]

>>>>average [93mvalid[0m of epoch 471: loss 0.21930 perp 0.13668 kl 0.08261
saving to best model since this is the best valid loss so far.----


At epoch: 472  train vae loss: 0.17158 perp: 0.07153 kl: 0.10005: 100%|██████████| 379/379 [00:36<00:00, 10.41batch/s]
At epoch: 472  valid vae loss: 0.17959 perp: 0.08482 kl: 0.09478:   6%|▌         | 2/34 [00:00<00:02, 11.73batch/s]

>>>>average [92mtraining[0m of epoch 472: loss 0.17832 perp 0.08830 kl 0.09001


At epoch: 472  valid vae loss: 0.10382 perp: 0.01332 kl: 0.09049: 100%|██████████| 34/34 [00:01<00:00, 23.61batch/s]
At epoch: 473  train vae loss: 0.19608 perp: 0.10035 kl: 0.09574:   0%|          | 0/379 [00:00<?, ?batch/s]

>>>>average [93mvalid[0m of epoch 472: loss 0.27144 perp 0.17653 kl 0.09492


At epoch: 473  train vae loss: 0.14765 perp: 0.04503 kl: 0.10262: 100%|██████████| 379/379 [00:36<00:00, 10.40batch/s]
At epoch: 473  valid vae loss: 0.18811 perp: 0.08801 kl: 0.10009:   6%|▌         | 2/34 [00:00<00:02, 11.73batch/s]

>>>>average [92mtraining[0m of epoch 473: loss 0.18293 perp 0.08985 kl 0.09308


At epoch: 473  valid vae loss: 0.10914 perp: 0.01319 kl: 0.09596: 100%|██████████| 34/34 [00:01<00:00, 23.56batch/s]
At epoch: 474  train vae loss: 0.12135 perp: 0.02177 kl: 0.09958:   0%|          | 0/379 [00:00<?, ?batch/s]

>>>>average [93mvalid[0m of epoch 473: loss 0.25551 perp 0.15577 kl 0.09974


At epoch: 474  train vae loss: 0.10498 perp: 0.01914 kl: 0.08584: 100%|██████████| 379/379 [00:36<00:00, 10.36batch/s]
At epoch: 474  valid vae loss: 0.15869 perp: 0.07454 kl: 0.08415:   6%|▌         | 2/34 [00:00<00:02, 11.71batch/s]

>>>>average [92mtraining[0m of epoch 474: loss 0.12216 perp 0.03226 kl 0.08990


At epoch: 474  valid vae loss: 0.09374 perp: 0.01265 kl: 0.08109: 100%|██████████| 34/34 [00:01<00:00, 23.55batch/s]
At epoch: 475  train vae loss: 0.10687 perp: 0.02208 kl: 0.08479:   0%|          | 0/379 [00:00<?, ?batch/s]

>>>>average [93mvalid[0m of epoch 474: loss 0.22868 perp 0.14469 kl 0.08399


At epoch: 475  train vae loss: 0.12910 perp: 0.04359 kl: 0.08552: 100%|██████████| 379/379 [00:36<00:00, 10.39batch/s]
At epoch: 475  valid vae loss: 0.14946 perp: 0.06325 kl: 0.08621:   6%|▌         | 2/34 [00:00<00:02, 11.66batch/s]

>>>>average [92mtraining[0m of epoch 475: loss 0.13445 perp 0.04852 kl 0.08593


At epoch: 475  valid vae loss: 0.09213 perp: 0.00909 kl: 0.08304: 100%|██████████| 34/34 [00:01<00:00, 23.51batch/s]
At epoch: 476  train vae loss: 0.12498 perp: 0.03619 kl: 0.08879:   0%|          | 0/379 [00:00<?, ?batch/s]

>>>>average [93mvalid[0m of epoch 475: loss 0.22643 perp 0.14032 kl 0.08611


At epoch: 476  train vae loss: 0.15424 perp: 0.06588 kl: 0.08836: 100%|██████████| 379/379 [00:37<00:00, 10.17batch/s]
At epoch: 476  valid vae loss: 0.13247 perp: 0.04592 kl: 0.08655:   6%|▌         | 2/34 [00:00<00:02, 11.70batch/s]

>>>>average [92mtraining[0m of epoch 476: loss 0.12943 perp 0.04448 kl 0.08495


At epoch: 476  valid vae loss: 0.08878 perp: 0.00627 kl: 0.08252: 100%|██████████| 34/34 [00:01<00:00, 23.33batch/s]
At epoch: 477  train vae loss: 0.10256 perp: 0.01835 kl: 0.08421:   0%|          | 0/379 [00:00<?, ?batch/s]

>>>>average [93mvalid[0m of epoch 476: loss 0.22996 perp 0.14377 kl 0.08619


At epoch: 477  train vae loss: 0.10879 perp: 0.02907 kl: 0.07971: 100%|██████████| 379/379 [00:37<00:00, 10.19batch/s]
At epoch: 477  valid vae loss: 0.16266 perp: 0.08026 kl: 0.08240:   6%|▌         | 2/34 [00:00<00:02, 11.64batch/s]

>>>>average [92mtraining[0m of epoch 477: loss 0.12389 perp 0.03919 kl 0.08471


At epoch: 477  valid vae loss: 0.14558 perp: 0.06676 kl: 0.07881: 100%|██████████| 34/34 [00:01<00:00, 23.50batch/s]
At epoch: 478  train vae loss: 0.09673 perp: 0.01561 kl: 0.08113:   0%|          | 0/379 [00:00<?, ?batch/s]

>>>>average [93mvalid[0m of epoch 477: loss 0.22510 perp 0.14292 kl 0.08219


At epoch: 478  train vae loss: 0.12819 perp: 0.03657 kl: 0.09162: 100%|██████████| 379/379 [00:36<00:00, 10.28batch/s]
At epoch: 478  valid vae loss: 0.16078 perp: 0.07006 kl: 0.09072:   6%|▌         | 2/34 [00:00<00:02, 11.78batch/s]

>>>>average [92mtraining[0m of epoch 478: loss 0.24070 perp 0.14461 kl 0.09609


At epoch: 478  valid vae loss: 0.09030 perp: 0.00332 kl: 0.08698: 100%|██████████| 34/34 [00:01<00:00, 23.54batch/s]
At epoch: 479  train vae loss: 0.11841 perp: 0.02508 kl: 0.09332:   0%|          | 0/379 [00:00<?, ?batch/s]

>>>>average [93mvalid[0m of epoch 478: loss 0.23348 perp 0.14265 kl 0.09083


At epoch: 479  train vae loss: 0.16598 perp: 0.08258 kl: 0.08340: 100%|██████████| 379/379 [00:36<00:00, 10.25batch/s]
At epoch: 479  valid vae loss: 0.14842 perp: 0.06514 kl: 0.08329:   6%|▌         | 2/34 [00:00<00:02, 11.71batch/s]

>>>>average [92mtraining[0m of epoch 479: loss 0.12295 perp 0.03447 kl 0.08848


At epoch: 479  valid vae loss: 0.08389 perp: 0.00405 kl: 0.07984: 100%|██████████| 34/34 [00:01<00:00, 23.55batch/s]
At epoch: 480  train vae loss: 0.10878 perp: 0.02527 kl: 0.08352:   0%|          | 0/379 [00:00<?, ?batch/s]

>>>>average [93mvalid[0m of epoch 479: loss 0.22026 perp 0.13688 kl 0.08338


At epoch: 480  train vae loss: 0.08703 perp: 0.00746 kl: 0.07957: 100%|██████████| 379/379 [00:37<00:00, 10.16batch/s]
At epoch: 480  valid vae loss: 0.12280 perp: 0.04231 kl: 0.08050:   6%|▌         | 2/34 [00:00<00:02, 11.67batch/s]

>>>>average [92mtraining[0m of epoch 480: loss 0.11750 perp 0.03379 kl 0.08371


At epoch: 480  valid vae loss: 0.08295 perp: 0.00581 kl: 0.07714: 100%|██████████| 34/34 [00:01<00:00, 23.56batch/s]
At epoch: 481  train vae loss: 0.10651 perp: 0.01939 kl: 0.08711:   0%|          | 0/379 [00:00<?, ?batch/s]

>>>>average [93mvalid[0m of epoch 480: loss 0.22024 perp 0.13968 kl 0.08056


At epoch: 481  train vae loss: 0.10051 perp: 0.01902 kl: 0.08149: 100%|██████████| 379/379 [00:36<00:00, 10.24batch/s]
At epoch: 481  valid vae loss: 0.15387 perp: 0.07085 kl: 0.08302:   6%|▌         | 2/34 [00:00<00:02, 11.42batch/s]

>>>>average [92mtraining[0m of epoch 481: loss 0.13364 perp 0.04951 kl 0.08413


At epoch: 481  valid vae loss: 0.11024 perp: 0.03023 kl: 0.08001: 100%|██████████| 34/34 [00:01<00:00, 23.08batch/s]
  0%|          | 0/379 [00:00<?, ?batch/s]

>>>>average [93mvalid[0m of epoch 481: loss 0.21354 perp 0.13030 kl 0.08324
saving to best model since this is the best valid loss so far.----


At epoch: 482  train vae loss: 0.13843 perp: 0.05269 kl: 0.08574: 100%|██████████| 379/379 [00:36<00:00, 10.27batch/s]
At epoch: 482  valid vae loss: 0.12022 perp: 0.03799 kl: 0.08223:   6%|▌         | 2/34 [00:00<00:02, 10.77batch/s]

>>>>average [92mtraining[0m of epoch 482: loss 0.11888 perp 0.03614 kl 0.08274


At epoch: 482  valid vae loss: 0.08281 perp: 0.00344 kl: 0.07938: 100%|██████████| 34/34 [00:01<00:00, 22.19batch/s]
At epoch: 483  train vae loss: 0.12230 perp: 0.04003 kl: 0.08227:   0%|          | 0/379 [00:00<?, ?batch/s]

>>>>average [93mvalid[0m of epoch 482: loss 0.21761 perp 0.13530 kl 0.08231


At epoch: 483  train vae loss: 0.11781 perp: 0.03317 kl: 0.08463: 100%|██████████| 379/379 [00:36<00:00, 10.28batch/s]
At epoch: 483  valid vae loss: 0.14859 perp: 0.06535 kl: 0.08324:   6%|▌         | 2/34 [00:00<00:02, 11.57batch/s]

>>>>average [92mtraining[0m of epoch 483: loss 0.13715 perp 0.05163 kl 0.08552


At epoch: 483  valid vae loss: 0.08819 perp: 0.00821 kl: 0.07998: 100%|██████████| 34/34 [00:01<00:00, 23.31batch/s]
At epoch: 484  train vae loss: 0.11303 perp: 0.03129 kl: 0.08175:   0%|          | 0/379 [00:00<?, ?batch/s]

>>>>average [93mvalid[0m of epoch 483: loss 0.22327 perp 0.13996 kl 0.08331


At epoch: 484  train vae loss: 0.15782 perp: 0.07449 kl: 0.08333: 100%|██████████| 379/379 [00:36<00:00, 10.30batch/s]
At epoch: 484  valid vae loss: 0.14603 perp: 0.06371 kl: 0.08233:   6%|▌         | 2/34 [00:00<00:02, 11.60batch/s]

>>>>average [92mtraining[0m of epoch 484: loss 0.12980 perp 0.04593 kl 0.08388


At epoch: 484  valid vae loss: 0.10318 perp: 0.02388 kl: 0.07930: 100%|██████████| 34/34 [00:01<00:00, 23.47batch/s]
At epoch: 485  train vae loss: 0.11484 perp: 0.03318 kl: 0.08166:   0%|          | 0/379 [00:00<?, ?batch/s]

>>>>average [93mvalid[0m of epoch 484: loss 0.23423 perp 0.15181 kl 0.08242


At epoch: 485  train vae loss: 0.11611 perp: 0.02563 kl: 0.09048: 100%|██████████| 379/379 [00:36<00:00, 10.31batch/s]
At epoch: 485  valid vae loss: 0.12942 perp: 0.04222 kl: 0.08720:   6%|▌         | 2/34 [00:00<00:02, 11.55batch/s]

>>>>average [92mtraining[0m of epoch 485: loss 0.18409 perp 0.09323 kl 0.09086


At epoch: 485  valid vae loss: 0.09899 perp: 0.01508 kl: 0.08391: 100%|██████████| 34/34 [00:01<00:00, 23.16batch/s]
At epoch: 486  train vae loss: 0.11596 perp: 0.02942 kl: 0.08655:   0%|          | 0/379 [00:00<?, ?batch/s]

>>>>average [93mvalid[0m of epoch 485: loss 0.23963 perp 0.15209 kl 0.08754


At epoch: 486  train vae loss: 0.11307 perp: 0.02952 kl: 0.08354: 100%|██████████| 379/379 [00:36<00:00, 10.25batch/s]
At epoch: 486  valid vae loss: 0.13371 perp: 0.05241 kl: 0.08130:   6%|▌         | 2/34 [00:00<00:02, 11.15batch/s]

>>>>average [92mtraining[0m of epoch 486: loss 0.11753 perp 0.03273 kl 0.08480


At epoch: 486  valid vae loss: 0.08307 perp: 0.00466 kl: 0.07841: 100%|██████████| 34/34 [00:01<00:00, 22.11batch/s]
At epoch: 487  train vae loss: 0.11791 perp: 0.03486 kl: 0.08305:   0%|          | 0/379 [00:00<?, ?batch/s]

>>>>average [93mvalid[0m of epoch 486: loss 0.23709 perp 0.15558 kl 0.08151


At epoch: 487  train vae loss: 0.12257 perp: 0.03961 kl: 0.08296: 100%|██████████| 379/379 [00:37<00:00, 10.21batch/s]
At epoch: 487  valid vae loss: 0.12509 perp: 0.04395 kl: 0.08114:   6%|▌         | 2/34 [00:00<00:02, 11.52batch/s]

>>>>average [92mtraining[0m of epoch 487: loss 0.12122 perp 0.03812 kl 0.08309


At epoch: 487  valid vae loss: 0.09408 perp: 0.01612 kl: 0.07796: 100%|██████████| 34/34 [00:01<00:00, 23.08batch/s]
At epoch: 488  train vae loss: 0.11439 perp: 0.03388 kl: 0.08050:   0%|          | 0/379 [00:00<?, ?batch/s]

>>>>average [93mvalid[0m of epoch 487: loss 0.21828 perp 0.13674 kl 0.08154


At epoch: 488  train vae loss: 0.10409 perp: 0.02285 kl: 0.08125: 100%|██████████| 379/379 [00:37<00:00, 10.12batch/s]
At epoch: 488  valid vae loss: 0.14147 perp: 0.06101 kl: 0.08046:   6%|▌         | 2/34 [00:00<00:02, 11.70batch/s]

>>>>average [92mtraining[0m of epoch 488: loss 0.12664 perp 0.04412 kl 0.08251


At epoch: 488  valid vae loss: 0.08653 perp: 0.00888 kl: 0.07765: 100%|██████████| 34/34 [00:01<00:00, 23.24batch/s]
At epoch: 489  train vae loss: 0.15674 perp: 0.07485 kl: 0.08189:   0%|          | 0/379 [00:00<?, ?batch/s]

>>>>average [93mvalid[0m of epoch 488: loss 0.23418 perp 0.15340 kl 0.08078


At epoch: 489  train vae loss: 0.11726 perp: 0.02858 kl: 0.08868: 100%|██████████| 379/379 [00:37<00:00, 10.02batch/s]
At epoch: 489  valid vae loss: 0.15418 perp: 0.06814 kl: 0.08604:   6%|▌         | 2/34 [00:00<00:02, 11.13batch/s]

>>>>average [92mtraining[0m of epoch 489: loss 0.15354 perp 0.06678 kl 0.08675


At epoch: 489  valid vae loss: 0.08765 perp: 0.00489 kl: 0.08275: 100%|██████████| 34/34 [00:01<00:00, 21.97batch/s]
At epoch: 490  train vae loss: 0.13827 perp: 0.05189 kl: 0.08638:   0%|          | 0/379 [00:00<?, ?batch/s]

>>>>average [93mvalid[0m of epoch 489: loss 0.23023 perp 0.14404 kl 0.08619


At epoch: 490  train vae loss: 0.11657 perp: 0.02930 kl: 0.08727: 100%|██████████| 379/379 [00:37<00:00, 10.09batch/s]
At epoch: 490  valid vae loss: 0.12972 perp: 0.04551 kl: 0.08421:   6%|▌         | 2/34 [00:00<00:02, 10.82batch/s]

>>>>average [92mtraining[0m of epoch 490: loss 0.13732 perp 0.05155 kl 0.08577


At epoch: 490  valid vae loss: 0.09813 perp: 0.01718 kl: 0.08095: 100%|██████████| 34/34 [00:01<00:00, 23.03batch/s]
At epoch: 491  train vae loss: 0.11601 perp: 0.03002 kl: 0.08599:   0%|          | 0/379 [00:00<?, ?batch/s]

>>>>average [93mvalid[0m of epoch 490: loss 0.24302 perp 0.15878 kl 0.08424


At epoch: 491  train vae loss: 0.18669 perp: 0.10638 kl: 0.08031: 100%|██████████| 379/379 [00:36<00:00, 10.27batch/s]
At epoch: 491  valid vae loss: 0.23751 perp: 0.15485 kl: 0.08265:   6%|▌         | 2/34 [00:00<00:02, 11.73batch/s]

>>>>average [92mtraining[0m of epoch 491: loss 0.13172 perp 0.04746 kl 0.08426


At epoch: 491  valid vae loss: 0.14083 perp: 0.06177 kl: 0.07907: 100%|██████████| 34/34 [00:01<00:00, 23.42batch/s]
At epoch: 492  train vae loss: 0.16380 perp: 0.08038 kl: 0.08342:   0%|          | 0/379 [00:00<?, ?batch/s]

>>>>average [93mvalid[0m of epoch 491: loss 0.29370 perp 0.21110 kl 0.08260


At epoch: 492  train vae loss: 0.10572 perp: 0.01497 kl: 0.09075: 100%|██████████| 379/379 [00:36<00:00, 10.31batch/s]
At epoch: 492  valid vae loss: 0.14345 perp: 0.05531 kl: 0.08814:   6%|▌         | 2/34 [00:00<00:02, 11.71batch/s]

>>>>average [92mtraining[0m of epoch 492: loss 0.16785 perp 0.07898 kl 0.08887


At epoch: 492  valid vae loss: 0.09158 perp: 0.00663 kl: 0.08494: 100%|██████████| 34/34 [00:01<00:00, 23.47batch/s]
At epoch: 493  train vae loss: 0.13142 perp: 0.04096 kl: 0.09046:   0%|          | 0/379 [00:00<?, ?batch/s]

>>>>average [93mvalid[0m of epoch 492: loss 0.22938 perp 0.14082 kl 0.08856


At epoch: 493  train vae loss: 0.10485 perp: 0.02335 kl: 0.08150: 100%|██████████| 379/379 [00:36<00:00, 10.32batch/s]
At epoch: 493  valid vae loss: 0.14241 perp: 0.06150 kl: 0.08091:   6%|▌         | 2/34 [00:00<00:02, 11.71batch/s]

>>>>average [92mtraining[0m of epoch 493: loss 0.11564 perp 0.03136 kl 0.08428


At epoch: 493  valid vae loss: 0.07965 perp: 0.00190 kl: 0.07775: 100%|██████████| 34/34 [00:01<00:00, 23.41batch/s]
At epoch: 494  train vae loss: 0.10442 perp: 0.02287 kl: 0.08155:   0%|          | 0/379 [00:00<?, ?batch/s]

>>>>average [93mvalid[0m of epoch 493: loss 0.22327 perp 0.14207 kl 0.08120


At epoch: 494  train vae loss: 0.11745 perp: 0.03554 kl: 0.08191: 100%|██████████| 379/379 [00:36<00:00, 10.30batch/s]
At epoch: 494  valid vae loss: 0.13585 perp: 0.05519 kl: 0.08065:   6%|▌         | 2/34 [00:00<00:02, 11.44batch/s]

>>>>average [92mtraining[0m of epoch 494: loss 0.12069 perp 0.03890 kl 0.08179


At epoch: 494  valid vae loss: 0.13090 perp: 0.05300 kl: 0.07790: 100%|██████████| 34/34 [00:01<00:00, 23.47batch/s]
At epoch: 495  train vae loss: 0.11449 perp: 0.03395 kl: 0.08054:   0%|          | 0/379 [00:00<?, ?batch/s]

>>>>average [93mvalid[0m of epoch 494: loss 0.23311 perp 0.15200 kl 0.08111


At epoch: 495  train vae loss: 0.18612 perp: 0.09763 kl: 0.08849: 100%|██████████| 379/379 [00:36<00:00, 10.33batch/s]
At epoch: 495  valid vae loss: 0.20068 perp: 0.11417 kl: 0.08651:   6%|▌         | 2/34 [00:00<00:02, 11.57batch/s]

>>>>average [92mtraining[0m of epoch 495: loss 0.14418 perp 0.05996 kl 0.08422


At epoch: 495  valid vae loss: 0.08918 perp: 0.00580 kl: 0.08338: 100%|██████████| 34/34 [00:01<00:00, 23.50batch/s]
At epoch: 496  train vae loss: 0.11989 perp: 0.03389 kl: 0.08600:   0%|          | 0/379 [00:00<?, ?batch/s]

>>>>average [93mvalid[0m of epoch 495: loss 0.26078 perp 0.17417 kl 0.08661


At epoch: 496  train vae loss: 0.11748 perp: 0.03330 kl: 0.08418: 100%|██████████| 379/379 [00:36<00:00, 10.33batch/s]
At epoch: 496  valid vae loss: 0.13138 perp: 0.04740 kl: 0.08397:   6%|▌         | 2/34 [00:00<00:02, 11.54batch/s]

>>>>average [92mtraining[0m of epoch 496: loss 0.13258 perp 0.04744 kl 0.08513


At epoch: 496  valid vae loss: 0.08595 perp: 0.00523 kl: 0.08072: 100%|██████████| 34/34 [00:01<00:00, 23.35batch/s]
At epoch: 497  train vae loss: 0.11136 perp: 0.02617 kl: 0.08518:   0%|          | 0/379 [00:00<?, ?batch/s]

>>>>average [93mvalid[0m of epoch 496: loss 0.23638 perp 0.15228 kl 0.08410


At epoch: 497  train vae loss: 0.12456 perp: 0.02553 kl: 0.09903: 100%|██████████| 379/379 [00:36<00:00, 10.33batch/s]
At epoch: 497  valid vae loss: 0.14158 perp: 0.04705 kl: 0.09454:   6%|▌         | 2/34 [00:00<00:02, 11.70batch/s]

>>>>average [92mtraining[0m of epoch 497: loss 0.17735 perp 0.08917 kl 0.08819


At epoch: 497  valid vae loss: 0.11439 perp: 0.02351 kl: 0.09089: 100%|██████████| 34/34 [00:01<00:00, 23.44batch/s]
At epoch: 498  train vae loss: 0.12143 perp: 0.02371 kl: 0.09773:   0%|          | 0/379 [00:00<?, ?batch/s]

>>>>average [93mvalid[0m of epoch 497: loss 0.25130 perp 0.15620 kl 0.09510


At epoch: 498  train vae loss: 0.10178 perp: 0.02172 kl: 0.08006: 100%|██████████| 379/379 [00:36<00:00, 10.34batch/s]
At epoch: 498  valid vae loss: 0.15337 perp: 0.07401 kl: 0.07936:   6%|▌         | 2/34 [00:00<00:02, 11.74batch/s]

>>>>average [92mtraining[0m of epoch 498: loss 0.11255 perp 0.02684 kl 0.08571


At epoch: 498  valid vae loss: 0.08470 perp: 0.00839 kl: 0.07632: 100%|██████████| 34/34 [00:01<00:00, 23.48batch/s]
At epoch: 499  train vae loss: 0.09770 perp: 0.01978 kl: 0.07793:   0%|          | 0/379 [00:00<?, ?batch/s]

>>>>average [93mvalid[0m of epoch 498: loss 0.22854 perp 0.14885 kl 0.07970


At epoch: 499  train vae loss: 0.13328 perp: 0.05022 kl: 0.08306: 100%|██████████| 379/379 [00:36<00:00, 10.36batch/s]
At epoch: 499  valid vae loss: 0.16384 perp: 0.08282 kl: 0.08102:   6%|▌         | 2/34 [00:00<00:02, 11.69batch/s]

>>>>average [92mtraining[0m of epoch 499: loss 0.12806 perp 0.04556 kl 0.08250


At epoch: 499  valid vae loss: 0.08659 perp: 0.00873 kl: 0.07786: 100%|██████████| 34/34 [00:01<00:00, 23.27batch/s]


>>>>average [93mvalid[0m of epoch 499: loss 0.23835 perp 0.15691 kl 0.08144
