In [1]:

import torch
import numpy as np
import matplotlib.pyplot as plt
import scipy
from torchdiffeq import odeint_adjoint as torchodeint
from tqdm import tqdm
import os
import gc
import torch.nn as nn
import sys
sys.path.append("efficient_kan/")
import efficientkan #from efficient kan


In [2]:
def pred_prey_deriv(X, t, alpha, beta, delta, gamma):
    x=X[0]
    y=X[1]
    dxdt = alpha*x-beta*x*y
    dydt = delta*x*y-gamma*y
    dXdt=[dxdt, dydt]
    return dXdt

def calDeriv(t, X):
    dXdt=model(X)
    return dXdt

def plotter(pred, soln_arr, epoch, loss_train, loss_test):
    #callback plotter during training, plots current solution
    plt.figure()
    plt.plot(t, soln_arr[:, 0].detach(), color='g')
    plt.plot(t, soln_arr[:, 1].detach(), color='b')
    plt.plot(t, pred[:, 0].detach(), linestyle='dashed', color='g')
    plt.plot(t, pred[:, 1].detach(), linestyle='dashed', color='b')

    plt.legend(['x_data', 'y_data', 'x_KAN-ODE', 'y_KAN-ODE'])
    plt.ylabel('concentration')
    plt.xlabel('time')
    plt.ylim([0, 8])
    plt.vlines(tf_learn, 0, 8)
    plt.savefig("plots/pred_prey/training_updates/train_epoch_"+str(epoch) +".png", dpi=200, facecolor="w", edgecolor="w", orientation="portrait")
    plt.close('all')
    
    plt.figure()
    plt.semilogy(torch.Tensor(loss_train), label='train')
    plt.semilogy(torch.Tensor(loss_test), label='test')
    plt.legend()
    plt.xlabel('epoch')
    plt.ylabel('loss')
    plt.savefig("plots/pred_prey/loss.png", dpi=200, facecolor="w", edgecolor="w", orientation="portrait")
    plt.close('all')

    
def plotter_opt(pred, soln_arr, epoch, loss_train, loss_test):
    #plots the optimal solution 
    plt.figure()
    plt.plot(t, soln_arr[:, 0].detach(), color='g')
    plt.plot(t, soln_arr[:, 1].detach(), color='b')
    plt.plot(t, pred[:, 0].detach(), linestyle='dashed', color='g')
    plt.plot(t, pred[:, 1].detach(), linestyle='dashed', color='b')

    plt.legend(['x_data', 'y_data', 'x_KAN-ODE', 'y_KAN-ODE'])
    plt.ylabel('concentration')
    plt.xlabel('time')
    plt.ylim([0, 8])
    plt.vlines(tf_learn, 0, 8)
    plt.savefig("plots/pred_prey/optimal/train_trial_.png", dpi=200, facecolor="w", edgecolor="w", orientation="portrait")
    plt.close('all')


In [3]:
#Generate LV predator-prey data
#dx/dt=alpha*x-beta*x*y
#dy/dt=delta*x*y-gamma*y

tf=14
tf_learn=3.5
N_t_train=35
N_t=int((35*tf/tf_learn))
lr=2e-3
num_epochs=10000
plot_freq=100
is_restart=False


##coefficients from https://arxiv.org/pdf/2012.07244
alpha=1.5
beta=1
gamma=3
delta=1


x0=1 
y0=1 


In [4]:

X0=np.array([x0, y0])
t=np.linspace(0, tf, N_t)

soln_arr=scipy.integrate.odeint(pred_prey_deriv, X0, t, args=(alpha, beta, delta, gamma))


In [5]:

# initialize KAN with grid=5
model = efficientkan.KAN(layers_hidden=[2,10,2], grid_size=5) #k is order of piecewise polynomial
#convery numpy training data to torch tensors: 
X0=torch.unsqueeze((torch.Tensor(np.transpose(X0))), 0)
X0.requires_grad=True
soln_arr=torch.Tensor(soln_arr)
soln_arr.requires_grad=True
soln_arr_train=soln_arr[:N_t_train, :]
t=torch.Tensor(t)
t_learn=torch.tensor(np.linspace(0, tf_learn, N_t_train))


In [6]:
model

KAN(
  (layers): ModuleList(
    (0-1): 2 x KANLinear(
      (base_activation): SiLU()
    )
  )
)

In [7]:

if is_restart==True:
    model.load_ckpt('ckpt_predprey')

loss_min=1e10 #arbitrarily large to overwrite later
opt_plot_counter=0

epoch_cutoff=10 #start at smaller lr to initialize, then bump it up

p1=model.layers[0].spline_weight
p2=model.layers[0].base_weight
p3=model.layers[1].spline_weight
p4=model.layers[1].base_weight


loss_list_train=[]
loss_list_test=[]
#initialize ADAM optimizer
optimizer = torch.optim.Adam(model.parameters(), lr=lr)


In [None]:

for epoch in tqdm(range(num_epochs)):
    opt_plot_counter+=1
    #if epoch==epoch_cutoffs[2]:
    #    model = kan.KAN(width=[2,3,2], grid=grids[1], k=3).initialize_from_another_model(model, X0_train)
    optimizer.zero_grad()

    pred=torchodeint(calDeriv, X0, t_learn, adjoint_params=[p1, p2, p3, p4])
    loss_train=torch.mean(torch.square(pred[:, 0, :]-soln_arr_train))
    loss_train.retain_grad()
    loss_train.backward()
    optimizer.step()
    loss_list_train.append(loss_train.detach().cpu())
    pred_test=torchodeint(calDeriv, X0, t, adjoint_params=[])
    loss_list_test.append(torch.mean(torch.square(pred_test[N_t_train:,0, :]-soln_arr[N_t_train:, :])).detach().cpu())
    #if epoch ==5:
    #    model.update_grid_from_samples(X0)
    if loss_train<loss_min:
        loss_min=loss_train
        #model.save_ckpt('ckpt_predprey_opt')
        if opt_plot_counter>=200:
            print('plotting optimal model')
            plotter_opt( pred_test[:, 0, :], soln_arr, epoch, loss_list_train, loss_list_test)
            opt_plot_counter=0

    print('Iter {:04d} | Train Loss {:.5f}'.format(epoch, loss_train.item()))
    ##########
    #########################make a checker that deepcopys the best loss into, like, model_optimal
    #########
    ######################and then save that one into the file, not just whatever the current one is
    if epoch % plot_freq ==0:
        #model.save_ckpt('ckpt_predprey')
        plotter(pred_test[:, 0, :], soln_arr, epoch, loss_list_train, loss_list_test)

breakpoint()


  0%|                                                                                         | 0/10000 [00:00<?, ?it/s]

Iter 0000 | Train Loss 4.56047


  0%|                                                                               | 2/10000 [00:05<7:18:49,  2.63s/it]

Iter 0001 | Train Loss 4.51633


  0%|                                                                               | 3/10000 [00:06<5:31:55,  1.99s/it]

Iter 0002 | Train Loss 4.47252


  0%|                                                                               | 4/10000 [00:08<4:41:14,  1.69s/it]

Iter 0003 | Train Loss 4.42909


  0%|                                                                               | 5/10000 [00:09<4:13:51,  1.52s/it]

Iter 0004 | Train Loss 4.38607


  0%|                                                                               | 6/10000 [00:10<3:57:24,  1.43s/it]

Iter 0005 | Train Loss 4.34353


  0%|                                                                               | 7/10000 [00:11<3:47:15,  1.36s/it]

Iter 0006 | Train Loss 4.30150


  0%|                                                                               | 8/10000 [00:13<3:40:30,  1.32s/it]

Iter 0007 | Train Loss 4.26005


  0%|                                                                               | 9/10000 [00:14<3:37:05,  1.30s/it]

Iter 0008 | Train Loss 4.21922


  0%|                                                                              | 10/10000 [00:15<3:34:16,  1.29s/it]

Iter 0009 | Train Loss 4.17906


  0%|                                                                              | 11/10000 [00:16<3:34:41,  1.29s/it]

Iter 0010 | Train Loss 4.13963


  0%|                                                                              | 12/10000 [00:18<3:35:53,  1.30s/it]

Iter 0011 | Train Loss 4.10097


  0%|                                                                              | 13/10000 [00:19<3:34:12,  1.29s/it]

Iter 0012 | Train Loss 4.06315


  0%|                                                                              | 14/10000 [00:20<3:32:53,  1.28s/it]

Iter 0013 | Train Loss 4.02620


  0%|                                                                              | 15/10000 [00:22<3:34:28,  1.29s/it]

Iter 0014 | Train Loss 3.99019


  0%|                                                                              | 16/10000 [00:23<3:55:59,  1.42s/it]

Iter 0015 | Train Loss 3.95515


  0%|▏                                                                             | 17/10000 [00:25<3:51:04,  1.39s/it]

Iter 0016 | Train Loss 3.92113


  0%|▏                                                                             | 18/10000 [00:26<3:45:43,  1.36s/it]

Iter 0017 | Train Loss 3.88818


  0%|▏                                                                             | 19/10000 [00:27<3:41:54,  1.33s/it]

Iter 0018 | Train Loss 3.85633


  0%|▏                                                                             | 20/10000 [00:29<3:39:14,  1.32s/it]

Iter 0019 | Train Loss 3.82563


  0%|▏                                                                             | 21/10000 [00:30<3:36:40,  1.30s/it]

Iter 0020 | Train Loss 3.79611


  0%|▏                                                                             | 22/10000 [00:31<3:36:33,  1.30s/it]

Iter 0021 | Train Loss 3.76782


  0%|▏                                                                             | 23/10000 [00:32<3:35:17,  1.29s/it]

Iter 0022 | Train Loss 3.74079


  0%|▏                                                                             | 24/10000 [00:34<3:34:37,  1.29s/it]

Iter 0023 | Train Loss 3.71504


  0%|▏                                                                             | 25/10000 [00:35<3:34:47,  1.29s/it]

Iter 0024 | Train Loss 3.69063


  0%|▏                                                                             | 26/10000 [00:36<3:34:02,  1.29s/it]

Iter 0025 | Train Loss 3.66757


  0%|▏                                                                             | 27/10000 [00:38<3:35:20,  1.30s/it]

Iter 0026 | Train Loss 3.64589


  0%|▏                                                                             | 28/10000 [00:39<3:41:07,  1.33s/it]

Iter 0027 | Train Loss 3.62561


  0%|▏                                                                             | 29/10000 [00:41<4:00:35,  1.45s/it]

Iter 0028 | Train Loss 3.60672


  0%|▏                                                                             | 30/10000 [00:42<3:52:15,  1.40s/it]

Iter 0029 | Train Loss 3.58925


  0%|▏                                                                             | 31/10000 [00:44<4:02:25,  1.46s/it]

Iter 0030 | Train Loss 3.57318


  0%|▏                                                                             | 32/10000 [00:45<4:08:14,  1.49s/it]

Iter 0031 | Train Loss 3.55848


  0%|▎                                                                             | 33/10000 [00:47<4:03:30,  1.47s/it]

Iter 0032 | Train Loss 3.54513


  0%|▎                                                                             | 34/10000 [00:48<3:59:02,  1.44s/it]

Iter 0033 | Train Loss 3.53308


  0%|▎                                                                             | 35/10000 [00:49<3:55:10,  1.42s/it]

Iter 0034 | Train Loss 3.52226


  0%|▎                                                                             | 36/10000 [00:51<3:53:30,  1.41s/it]

Iter 0035 | Train Loss 3.51263


  0%|▎                                                                             | 37/10000 [00:52<3:53:50,  1.41s/it]

Iter 0036 | Train Loss 3.50406


  0%|▎                                                                             | 38/10000 [00:53<3:52:11,  1.40s/it]

Iter 0037 | Train Loss 3.49650


  0%|▎                                                                             | 39/10000 [00:55<3:58:43,  1.44s/it]

Iter 0038 | Train Loss 3.48983


  0%|▎                                                                             | 40/10000 [00:57<4:23:05,  1.58s/it]

Iter 0039 | Train Loss 3.48394


  0%|▎                                                                             | 41/10000 [00:58<4:16:07,  1.54s/it]

Iter 0040 | Train Loss 3.47872


  0%|▎                                                                             | 42/10000 [01:00<4:06:23,  1.48s/it]

Iter 0041 | Train Loss 3.47405


  0%|▎                                                                             | 43/10000 [01:01<4:14:13,  1.53s/it]

Iter 0042 | Train Loss 3.46982


  0%|▎                                                                             | 44/10000 [01:03<4:07:58,  1.49s/it]

Iter 0043 | Train Loss 3.46592


  0%|▎                                                                             | 45/10000 [01:04<4:02:57,  1.46s/it]

Iter 0044 | Train Loss 3.46225


  0%|▎                                                                             | 46/10000 [01:05<3:58:46,  1.44s/it]

Iter 0045 | Train Loss 3.45872


  0%|▎                                                                             | 47/10000 [01:07<4:06:48,  1.49s/it]

Iter 0046 | Train Loss 3.45525


  0%|▎                                                                             | 48/10000 [01:09<4:13:23,  1.53s/it]

Iter 0047 | Train Loss 3.45177


  0%|▍                                                                             | 49/10000 [01:10<4:05:25,  1.48s/it]

Iter 0048 | Train Loss 3.44824


  0%|▍                                                                             | 50/10000 [01:11<3:59:59,  1.45s/it]

Iter 0049 | Train Loss 3.44463


  1%|▍                                                                             | 51/10000 [01:13<3:57:44,  1.43s/it]

Iter 0050 | Train Loss 3.44091


  1%|▍                                                                             | 52/10000 [01:14<3:54:51,  1.42s/it]

Iter 0051 | Train Loss 3.43707


  1%|▍                                                                             | 53/10000 [01:16<3:55:14,  1.42s/it]

Iter 0052 | Train Loss 3.43313


  1%|▍                                                                             | 54/10000 [01:17<3:53:04,  1.41s/it]

Iter 0053 | Train Loss 3.42908


  1%|▍                                                                             | 55/10000 [01:18<3:51:35,  1.40s/it]

Iter 0054 | Train Loss 3.42494


  1%|▍                                                                             | 56/10000 [01:20<3:52:44,  1.40s/it]

Iter 0055 | Train Loss 3.42074


  1%|▍                                                                             | 57/10000 [01:21<3:49:16,  1.38s/it]

Iter 0056 | Train Loss 3.41649


  1%|▍                                                                             | 58/10000 [01:23<3:49:45,  1.39s/it]

Iter 0057 | Train Loss 3.41221


  1%|▍                                                                             | 59/10000 [01:24<3:49:21,  1.38s/it]

Iter 0058 | Train Loss 3.40793


  1%|▍                                                                             | 60/10000 [01:25<3:52:37,  1.40s/it]

Iter 0059 | Train Loss 3.40366


  1%|▍                                                                             | 61/10000 [01:27<3:52:21,  1.40s/it]

Iter 0060 | Train Loss 3.39943


  1%|▍                                                                             | 62/10000 [01:28<3:52:22,  1.40s/it]

Iter 0061 | Train Loss 3.39524


  1%|▍                                                                             | 63/10000 [01:30<3:51:36,  1.40s/it]

Iter 0062 | Train Loss 3.39110


  1%|▍                                                                             | 64/10000 [01:31<3:52:17,  1.40s/it]

Iter 0063 | Train Loss 3.38703


  1%|▌                                                                             | 65/10000 [01:32<3:53:32,  1.41s/it]

Iter 0064 | Train Loss 3.38303


  1%|▌                                                                             | 66/10000 [01:34<3:56:59,  1.43s/it]

Iter 0065 | Train Loss 3.37909


  1%|▌                                                                             | 67/10000 [01:35<3:54:21,  1.42s/it]

Iter 0066 | Train Loss 3.37523


  1%|▌                                                                             | 68/10000 [01:37<3:54:10,  1.41s/it]

Iter 0067 | Train Loss 3.37142


  1%|▌                                                                             | 69/10000 [01:38<3:53:56,  1.41s/it]

Iter 0068 | Train Loss 3.36768


  1%|▌                                                                             | 70/10000 [01:39<3:53:02,  1.41s/it]

Iter 0069 | Train Loss 3.36400


  1%|▌                                                                             | 71/10000 [01:41<3:54:20,  1.42s/it]

Iter 0070 | Train Loss 3.36037


  1%|▌                                                                             | 72/10000 [01:42<3:51:20,  1.40s/it]

Iter 0071 | Train Loss 3.35679


  1%|▌                                                                             | 73/10000 [01:44<3:51:07,  1.40s/it]

Iter 0072 | Train Loss 3.35325


  1%|▌                                                                             | 74/10000 [01:45<3:53:24,  1.41s/it]

Iter 0073 | Train Loss 3.34974


  1%|▌                                                                             | 75/10000 [01:47<3:54:11,  1.42s/it]

Iter 0074 | Train Loss 3.34627


  1%|▌                                                                             | 76/10000 [01:48<3:52:10,  1.40s/it]

Iter 0075 | Train Loss 3.34281


  1%|▌                                                                             | 77/10000 [01:49<3:52:07,  1.40s/it]

Iter 0076 | Train Loss 3.33937


  1%|▌                                                                             | 78/10000 [01:51<3:51:41,  1.40s/it]

Iter 0077 | Train Loss 3.33596


  1%|▌                                                                             | 79/10000 [01:53<4:15:44,  1.55s/it]

Iter 0078 | Train Loss 3.33254


  1%|▌                                                                             | 80/10000 [01:54<4:15:32,  1.55s/it]

Iter 0079 | Train Loss 3.32914


  1%|▋                                                                             | 81/10000 [01:56<4:08:38,  1.50s/it]

Iter 0080 | Train Loss 3.32575


  1%|▋                                                                             | 82/10000 [01:57<4:00:53,  1.46s/it]

Iter 0081 | Train Loss 3.32237


  1%|▋                                                                             | 83/10000 [01:58<3:55:09,  1.42s/it]

Iter 0082 | Train Loss 3.31899


  1%|▋                                                                             | 84/10000 [02:00<3:53:17,  1.41s/it]

Iter 0083 | Train Loss 3.31561


  1%|▋                                                                             | 85/10000 [02:01<3:50:37,  1.40s/it]

Iter 0084 | Train Loss 3.31223


  1%|▋                                                                             | 86/10000 [02:02<3:48:08,  1.38s/it]

Iter 0085 | Train Loss 3.30886


  1%|▋                                                                             | 87/10000 [02:04<3:49:40,  1.39s/it]

Iter 0086 | Train Loss 3.30549


  1%|▋                                                                             | 88/10000 [02:05<3:50:12,  1.39s/it]

Iter 0087 | Train Loss 3.30212


  1%|▋                                                                             | 89/10000 [02:07<3:53:04,  1.41s/it]

Iter 0088 | Train Loss 3.29876


  1%|▋                                                                             | 90/10000 [02:08<3:53:11,  1.41s/it]

Iter 0089 | Train Loss 3.29539


  1%|▋                                                                             | 91/10000 [02:09<3:51:31,  1.40s/it]

Iter 0090 | Train Loss 3.29203


  1%|▋                                                                             | 92/10000 [02:11<3:53:15,  1.41s/it]

Iter 0091 | Train Loss 3.28867


  1%|▋                                                                             | 93/10000 [02:12<3:50:35,  1.40s/it]

Iter 0092 | Train Loss 3.28532


  1%|▋                                                                             | 94/10000 [02:14<3:48:38,  1.38s/it]

Iter 0093 | Train Loss 3.28196


  1%|▋                                                                             | 95/10000 [02:15<3:48:21,  1.38s/it]

Iter 0094 | Train Loss 3.27861


  1%|▋                                                                             | 96/10000 [02:16<3:48:20,  1.38s/it]

Iter 0095 | Train Loss 3.27526


  1%|▊                                                                             | 97/10000 [02:18<3:48:14,  1.38s/it]

Iter 0096 | Train Loss 3.27191


  1%|▊                                                                             | 98/10000 [02:19<3:47:50,  1.38s/it]

Iter 0097 | Train Loss 3.26856


  1%|▊                                                                             | 99/10000 [02:20<3:48:28,  1.38s/it]

Iter 0098 | Train Loss 3.26521


  1%|▊                                                                            | 100/10000 [02:22<3:48:42,  1.39s/it]

Iter 0099 | Train Loss 3.26186
Iter 0100 | Train Loss 3.25851


  1%|▊                                                                            | 102/10000 [02:26<4:36:32,  1.68s/it]

Iter 0101 | Train Loss 3.25516


  1%|▊                                                                            | 103/10000 [02:27<4:25:18,  1.61s/it]

Iter 0102 | Train Loss 3.25180


  1%|▊                                                                            | 104/10000 [02:29<4:13:49,  1.54s/it]

Iter 0103 | Train Loss 3.24844


  1%|▊                                                                            | 105/10000 [02:30<4:07:20,  1.50s/it]

Iter 0104 | Train Loss 3.24508


  1%|▊                                                                            | 106/10000 [02:32<4:03:55,  1.48s/it]

Iter 0105 | Train Loss 3.24171


  1%|▊                                                                            | 107/10000 [02:33<4:00:19,  1.46s/it]

Iter 0106 | Train Loss 3.23834


  1%|▊                                                                            | 108/10000 [02:34<3:58:08,  1.44s/it]

Iter 0107 | Train Loss 3.23496


  1%|▊                                                                            | 109/10000 [02:36<3:55:11,  1.43s/it]

Iter 0108 | Train Loss 3.23157


  1%|▊                                                                            | 110/10000 [02:37<3:53:45,  1.42s/it]

Iter 0109 | Train Loss 3.22818


  1%|▊                                                                            | 111/10000 [02:39<3:51:31,  1.40s/it]

Iter 0110 | Train Loss 3.22478


  1%|▊                                                                            | 112/10000 [02:40<3:54:19,  1.42s/it]

Iter 0111 | Train Loss 3.22138


  1%|▊                                                                            | 113/10000 [02:41<3:51:50,  1.41s/it]

Iter 0112 | Train Loss 3.21796


  1%|▉                                                                            | 114/10000 [02:43<3:50:18,  1.40s/it]

Iter 0113 | Train Loss 3.21454


  1%|▉                                                                            | 115/10000 [02:44<3:52:05,  1.41s/it]

Iter 0114 | Train Loss 3.21111


  1%|▉                                                                            | 116/10000 [02:46<3:49:45,  1.39s/it]

Iter 0115 | Train Loss 3.20767


  1%|▉                                                                            | 117/10000 [02:47<3:50:55,  1.40s/it]

Iter 0116 | Train Loss 3.20422


  1%|▉                                                                            | 118/10000 [02:49<3:52:05,  1.41s/it]

Iter 0117 | Train Loss 3.20077


  1%|▉                                                                            | 119/10000 [02:50<3:50:27,  1.40s/it]

Iter 0118 | Train Loss 3.19730


  1%|▉                                                                            | 120/10000 [02:51<3:48:49,  1.39s/it]

Iter 0119 | Train Loss 3.19383


  1%|▉                                                                            | 121/10000 [02:53<3:48:59,  1.39s/it]

Iter 0120 | Train Loss 3.19035


  1%|▉                                                                            | 122/10000 [02:54<3:50:55,  1.40s/it]

Iter 0121 | Train Loss 3.18685


  1%|▉                                                                            | 123/10000 [02:55<3:51:36,  1.41s/it]

Iter 0122 | Train Loss 3.18335


  1%|▉                                                                            | 124/10000 [02:57<3:49:33,  1.39s/it]

Iter 0123 | Train Loss 3.17984


  1%|▉                                                                            | 125/10000 [02:58<3:51:11,  1.40s/it]

Iter 0124 | Train Loss 3.17632


  1%|▉                                                                            | 126/10000 [03:00<3:56:38,  1.44s/it]

Iter 0125 | Train Loss 3.17279


  1%|▉                                                                            | 127/10000 [03:01<3:56:55,  1.44s/it]

Iter 0126 | Train Loss 3.16925


  1%|▉                                                                            | 128/10000 [03:03<3:56:31,  1.44s/it]

Iter 0127 | Train Loss 3.16570


  1%|▉                                                                            | 129/10000 [03:04<3:54:32,  1.43s/it]

Iter 0128 | Train Loss 3.16214


  1%|█                                                                            | 130/10000 [03:06<3:55:16,  1.43s/it]

Iter 0129 | Train Loss 3.15857


  1%|█                                                                            | 131/10000 [03:07<3:52:48,  1.42s/it]

Iter 0130 | Train Loss 3.15499


  1%|█                                                                            | 132/10000 [03:08<3:52:19,  1.41s/it]

Iter 0131 | Train Loss 3.15140


  1%|█                                                                            | 133/10000 [03:10<3:51:36,  1.41s/it]

Iter 0132 | Train Loss 3.14779


  1%|█                                                                            | 134/10000 [03:11<3:53:26,  1.42s/it]

Iter 0133 | Train Loss 3.14418


  1%|█                                                                            | 135/10000 [03:13<3:51:24,  1.41s/it]

Iter 0134 | Train Loss 3.14056


  1%|█                                                                            | 136/10000 [03:14<3:50:32,  1.40s/it]

Iter 0135 | Train Loss 3.13693


  1%|█                                                                            | 137/10000 [03:15<3:50:12,  1.40s/it]

Iter 0136 | Train Loss 3.13329


  1%|█                                                                            | 138/10000 [03:17<3:49:35,  1.40s/it]

Iter 0137 | Train Loss 3.12963


  1%|█                                                                            | 139/10000 [03:18<3:49:14,  1.39s/it]

Iter 0138 | Train Loss 3.12597


  1%|█                                                                            | 140/10000 [03:20<3:51:22,  1.41s/it]

Iter 0139 | Train Loss 3.12229


  1%|█                                                                            | 141/10000 [03:21<3:49:10,  1.39s/it]

Iter 0140 | Train Loss 3.11861


  1%|█                                                                            | 142/10000 [03:22<3:48:53,  1.39s/it]

Iter 0141 | Train Loss 3.11492


  1%|█                                                                            | 143/10000 [03:24<3:47:03,  1.38s/it]

Iter 0142 | Train Loss 3.11121


  1%|█                                                                            | 144/10000 [03:25<3:47:40,  1.39s/it]

Iter 0143 | Train Loss 3.10750


  1%|█                                                                            | 145/10000 [03:26<3:49:26,  1.40s/it]

Iter 0144 | Train Loss 3.10377


  1%|█                                                                            | 146/10000 [03:28<3:51:13,  1.41s/it]

Iter 0145 | Train Loss 3.10004


  1%|█▏                                                                           | 147/10000 [03:29<3:50:34,  1.40s/it]

Iter 0146 | Train Loss 3.09629


  1%|█▏                                                                           | 148/10000 [03:31<3:53:51,  1.42s/it]

Iter 0147 | Train Loss 3.09254


  1%|█▏                                                                           | 149/10000 [03:32<3:51:10,  1.41s/it]

Iter 0148 | Train Loss 3.08878


  2%|█▏                                                                           | 150/10000 [03:34<3:52:34,  1.42s/it]

Iter 0149 | Train Loss 3.08501


  2%|█▏                                                                           | 151/10000 [03:35<3:56:04,  1.44s/it]

Iter 0150 | Train Loss 3.08123


  2%|█▏                                                                           | 152/10000 [03:37<3:57:45,  1.45s/it]

Iter 0151 | Train Loss 3.07744


  2%|█▏                                                                           | 153/10000 [03:38<3:56:25,  1.44s/it]

Iter 0152 | Train Loss 3.07364


  2%|█▏                                                                           | 154/10000 [03:39<3:56:14,  1.44s/it]

Iter 0153 | Train Loss 3.06984


  2%|█▏                                                                           | 155/10000 [03:41<3:56:19,  1.44s/it]

Iter 0154 | Train Loss 3.06603


  2%|█▏                                                                           | 156/10000 [03:42<3:55:31,  1.44s/it]

Iter 0155 | Train Loss 3.06221


  2%|█▏                                                                           | 157/10000 [03:44<3:54:56,  1.43s/it]

Iter 0156 | Train Loss 3.05838


  2%|█▏                                                                           | 158/10000 [03:45<3:55:16,  1.43s/it]

Iter 0157 | Train Loss 3.05454


  2%|█▏                                                                           | 159/10000 [03:47<3:54:24,  1.43s/it]

Iter 0158 | Train Loss 3.05070


  2%|█▏                                                                           | 160/10000 [03:48<3:53:13,  1.42s/it]

Iter 0159 | Train Loss 3.04685


  2%|█▏                                                                           | 161/10000 [03:49<3:52:21,  1.42s/it]

Iter 0160 | Train Loss 3.04300


  2%|█▏                                                                           | 162/10000 [03:51<3:52:34,  1.42s/it]

Iter 0161 | Train Loss 3.03913


  2%|█▎                                                                           | 163/10000 [03:52<3:53:40,  1.43s/it]

Iter 0162 | Train Loss 3.03527


  2%|█▎                                                                           | 164/10000 [03:54<3:55:26,  1.44s/it]

Iter 0163 | Train Loss 3.03139


  2%|█▎                                                                           | 165/10000 [03:55<3:54:18,  1.43s/it]

Iter 0164 | Train Loss 3.02751


  2%|█▎                                                                           | 166/10000 [03:57<3:55:44,  1.44s/it]

Iter 0165 | Train Loss 3.02363


  2%|█▎                                                                           | 167/10000 [03:58<3:54:37,  1.43s/it]

Iter 0166 | Train Loss 3.01974


  2%|█▎                                                                           | 168/10000 [03:59<3:54:17,  1.43s/it]

Iter 0167 | Train Loss 3.01584


  2%|█▎                                                                           | 169/10000 [04:01<3:55:56,  1.44s/it]

Iter 0168 | Train Loss 3.01193


  2%|█▎                                                                           | 170/10000 [04:02<3:55:52,  1.44s/it]

Iter 0169 | Train Loss 3.00803


  2%|█▎                                                                           | 171/10000 [04:04<3:53:49,  1.43s/it]

Iter 0170 | Train Loss 3.00411


  2%|█▎                                                                           | 172/10000 [04:05<3:53:19,  1.42s/it]

Iter 0171 | Train Loss 3.00019


  2%|█▎                                                                           | 173/10000 [04:07<3:52:46,  1.42s/it]

Iter 0172 | Train Loss 2.99627


  2%|█▎                                                                           | 174/10000 [04:08<3:53:16,  1.42s/it]

Iter 0173 | Train Loss 2.99234


  2%|█▎                                                                           | 175/10000 [04:09<3:52:52,  1.42s/it]

Iter 0174 | Train Loss 2.98840


  2%|█▎                                                                           | 176/10000 [04:11<3:53:12,  1.42s/it]

Iter 0175 | Train Loss 2.98446


  2%|█▎                                                                           | 177/10000 [04:12<3:53:55,  1.43s/it]

Iter 0176 | Train Loss 2.98051


  2%|█▎                                                                           | 178/10000 [04:14<3:52:10,  1.42s/it]

Iter 0177 | Train Loss 2.97655


  2%|█▍                                                                           | 179/10000 [04:15<3:56:31,  1.45s/it]

Iter 0178 | Train Loss 2.97259


  2%|█▍                                                                           | 180/10000 [04:17<3:55:06,  1.44s/it]

Iter 0179 | Train Loss 2.96862


  2%|█▍                                                                           | 181/10000 [04:18<3:57:30,  1.45s/it]

Iter 0180 | Train Loss 2.96465


  2%|█▍                                                                           | 182/10000 [04:19<3:57:58,  1.45s/it]

Iter 0181 | Train Loss 2.96067


  2%|█▍                                                                           | 183/10000 [04:21<3:55:59,  1.44s/it]

Iter 0182 | Train Loss 2.95668


  2%|█▍                                                                           | 184/10000 [04:22<3:58:38,  1.46s/it]

Iter 0183 | Train Loss 2.95269


  2%|█▍                                                                           | 185/10000 [04:24<3:55:07,  1.44s/it]

Iter 0184 | Train Loss 2.94869


  2%|█▍                                                                           | 186/10000 [04:25<3:53:56,  1.43s/it]

Iter 0185 | Train Loss 2.94468


  2%|█▍                                                                           | 187/10000 [04:27<3:55:18,  1.44s/it]

Iter 0186 | Train Loss 2.94066


  2%|█▍                                                                           | 188/10000 [04:28<3:57:11,  1.45s/it]

Iter 0187 | Train Loss 2.93663


  2%|█▍                                                                           | 189/10000 [04:30<3:55:14,  1.44s/it]

Iter 0188 | Train Loss 2.93260


  2%|█▍                                                                           | 190/10000 [04:31<3:54:14,  1.43s/it]

Iter 0189 | Train Loss 2.92856


  2%|█▍                                                                           | 191/10000 [04:32<3:52:51,  1.42s/it]

Iter 0190 | Train Loss 2.92451


  2%|█▍                                                                           | 192/10000 [04:34<3:52:35,  1.42s/it]

Iter 0191 | Train Loss 2.92045


  2%|█▍                                                                           | 193/10000 [04:35<3:56:46,  1.45s/it]

Iter 0192 | Train Loss 2.91638


  2%|█▍                                                                           | 194/10000 [04:37<3:54:31,  1.44s/it]

Iter 0193 | Train Loss 2.91230


  2%|█▌                                                                           | 195/10000 [04:38<3:53:27,  1.43s/it]

Iter 0194 | Train Loss 2.90821


  2%|█▌                                                                           | 196/10000 [04:40<3:54:55,  1.44s/it]

Iter 0195 | Train Loss 2.90412


  2%|█▌                                                                           | 197/10000 [04:41<3:53:59,  1.43s/it]

Iter 0196 | Train Loss 2.90001


  2%|█▌                                                                           | 198/10000 [04:42<3:53:59,  1.43s/it]

Iter 0197 | Train Loss 2.89589


  2%|█▌                                                                           | 199/10000 [04:44<3:53:24,  1.43s/it]

Iter 0198 | Train Loss 2.89176
plotting optimal model


  2%|█▌                                                                           | 200/10000 [04:46<4:31:57,  1.67s/it]

Iter 0199 | Train Loss 2.88762
Iter 0200 | Train Loss 2.88347


  2%|█▌                                                                           | 202/10000 [04:51<5:14:46,  1.93s/it]

Iter 0201 | Train Loss 2.87930


  2%|█▌                                                                           | 203/10000 [04:52<4:53:14,  1.80s/it]

Iter 0202 | Train Loss 2.87512


  2%|█▌                                                                           | 204/10000 [04:54<4:39:59,  1.71s/it]

Iter 0203 | Train Loss 2.87093


  2%|█▌                                                                           | 205/10000 [04:55<4:28:22,  1.64s/it]

Iter 0204 | Train Loss 2.86673


  2%|█▌                                                                           | 206/10000 [04:57<4:18:46,  1.59s/it]

Iter 0205 | Train Loss 2.86252


  2%|█▌                                                                           | 207/10000 [04:58<4:13:59,  1.56s/it]

Iter 0206 | Train Loss 2.85829


  2%|█▌                                                                           | 208/10000 [05:00<4:10:08,  1.53s/it]

Iter 0207 | Train Loss 2.85404


  2%|█▌                                                                           | 209/10000 [05:01<4:06:05,  1.51s/it]

Iter 0208 | Train Loss 2.84978


  2%|█▌                                                                           | 210/10000 [05:02<4:05:36,  1.51s/it]

Iter 0209 | Train Loss 2.84551


  2%|█▌                                                                           | 211/10000 [05:04<4:05:48,  1.51s/it]

Iter 0210 | Train Loss 2.84122


  2%|█▋                                                                           | 212/10000 [05:06<4:07:17,  1.52s/it]

Iter 0211 | Train Loss 2.83692


  2%|█▋                                                                           | 213/10000 [05:07<4:05:40,  1.51s/it]

Iter 0212 | Train Loss 2.83260


  2%|█▋                                                                           | 214/10000 [05:09<4:05:50,  1.51s/it]

Iter 0213 | Train Loss 2.82827


  2%|█▋                                                                           | 215/10000 [05:10<4:06:20,  1.51s/it]

Iter 0214 | Train Loss 2.82392


  2%|█▋                                                                           | 216/10000 [05:11<4:01:58,  1.48s/it]

Iter 0215 | Train Loss 2.81955


  2%|█▋                                                                           | 217/10000 [05:13<4:02:32,  1.49s/it]

Iter 0216 | Train Loss 2.81517


  2%|█▋                                                                           | 218/10000 [05:14<4:04:35,  1.50s/it]

Iter 0217 | Train Loss 2.81077


  2%|█▋                                                                           | 219/10000 [05:16<4:10:22,  1.54s/it]

Iter 0218 | Train Loss 2.80635


  2%|█▋                                                                           | 220/10000 [05:18<4:11:23,  1.54s/it]

Iter 0219 | Train Loss 2.80192


  2%|█▋                                                                           | 221/10000 [05:19<4:08:35,  1.53s/it]

Iter 0220 | Train Loss 2.79747


  2%|█▋                                                                           | 222/10000 [05:21<4:08:06,  1.52s/it]

Iter 0221 | Train Loss 2.79300


  2%|█▋                                                                           | 223/10000 [05:22<4:07:36,  1.52s/it]

Iter 0222 | Train Loss 2.78851


  2%|█▋                                                                           | 224/10000 [05:24<4:07:14,  1.52s/it]

Iter 0223 | Train Loss 2.78401


  2%|█▋                                                                           | 225/10000 [05:25<4:07:41,  1.52s/it]

Iter 0224 | Train Loss 2.77949


  2%|█▋                                                                           | 226/10000 [05:27<4:08:31,  1.53s/it]

Iter 0225 | Train Loss 2.77495


  2%|█▋                                                                           | 227/10000 [05:28<4:07:42,  1.52s/it]

Iter 0226 | Train Loss 2.77039


  2%|█▊                                                                           | 228/10000 [05:30<4:07:32,  1.52s/it]

Iter 0227 | Train Loss 2.76581


  2%|█▊                                                                           | 229/10000 [05:31<4:05:35,  1.51s/it]

Iter 0228 | Train Loss 2.76122


  2%|█▊                                                                           | 230/10000 [05:33<4:06:28,  1.51s/it]

Iter 0229 | Train Loss 2.75660


  2%|█▊                                                                           | 231/10000 [05:34<4:07:28,  1.52s/it]

Iter 0230 | Train Loss 2.75197


  2%|█▊                                                                           | 232/10000 [05:36<4:10:02,  1.54s/it]

Iter 0231 | Train Loss 2.74732


  2%|█▊                                                                           | 233/10000 [05:37<4:08:35,  1.53s/it]

Iter 0232 | Train Loss 2.74266


  2%|█▊                                                                           | 234/10000 [05:39<4:09:32,  1.53s/it]

Iter 0233 | Train Loss 2.73797


  2%|█▊                                                                           | 235/10000 [05:40<4:08:18,  1.53s/it]

Iter 0234 | Train Loss 2.73328


  2%|█▊                                                                           | 236/10000 [05:42<4:07:27,  1.52s/it]

Iter 0235 | Train Loss 2.72856


  2%|█▊                                                                           | 237/10000 [05:44<4:09:42,  1.53s/it]

Iter 0236 | Train Loss 2.72383


  2%|█▊                                                                           | 238/10000 [05:45<4:08:57,  1.53s/it]

Iter 0237 | Train Loss 2.71909


  2%|█▊                                                                           | 239/10000 [05:47<4:05:59,  1.51s/it]

Iter 0238 | Train Loss 2.71433


  2%|█▊                                                                           | 240/10000 [05:48<4:04:37,  1.50s/it]

Iter 0239 | Train Loss 2.70957


  2%|█▊                                                                           | 241/10000 [05:50<4:07:06,  1.52s/it]

Iter 0240 | Train Loss 2.70479


  2%|█▊                                                                           | 242/10000 [05:51<4:06:15,  1.51s/it]

Iter 0241 | Train Loss 2.70000


  2%|█▊                                                                           | 243/10000 [05:53<4:07:34,  1.52s/it]

Iter 0242 | Train Loss 2.69520


  2%|█▉                                                                           | 244/10000 [05:54<4:08:55,  1.53s/it]

Iter 0243 | Train Loss 2.69040


  2%|█▉                                                                           | 245/10000 [05:56<4:09:26,  1.53s/it]

Iter 0244 | Train Loss 2.68559


  2%|█▉                                                                           | 246/10000 [05:57<4:08:00,  1.53s/it]

Iter 0245 | Train Loss 2.68077


  2%|█▉                                                                           | 247/10000 [05:59<4:04:13,  1.50s/it]

Iter 0246 | Train Loss 2.67595


  2%|█▉                                                                           | 248/10000 [06:00<4:05:13,  1.51s/it]

Iter 0247 | Train Loss 2.67112


  2%|█▉                                                                           | 249/10000 [06:02<4:05:33,  1.51s/it]

Iter 0248 | Train Loss 2.66628


  2%|█▉                                                                           | 250/10000 [06:03<4:06:03,  1.51s/it]

Iter 0249 | Train Loss 2.66144


  3%|█▉                                                                           | 251/10000 [06:05<4:03:52,  1.50s/it]

Iter 0250 | Train Loss 2.65659


  3%|█▉                                                                           | 252/10000 [06:06<4:09:07,  1.53s/it]

Iter 0251 | Train Loss 2.65174


  3%|█▉                                                                           | 253/10000 [06:08<4:12:00,  1.55s/it]

Iter 0252 | Train Loss 2.64688


  3%|█▉                                                                           | 254/10000 [06:09<4:10:45,  1.54s/it]

Iter 0253 | Train Loss 2.64201


  3%|█▉                                                                           | 255/10000 [06:11<4:13:14,  1.56s/it]

Iter 0254 | Train Loss 2.63713


  3%|█▉                                                                           | 256/10000 [06:13<4:37:43,  1.71s/it]

Iter 0255 | Train Loss 2.63224


  3%|█▉                                                                           | 257/10000 [06:15<5:06:17,  1.89s/it]

Iter 0256 | Train Loss 2.62734


  3%|█▉                                                                           | 258/10000 [06:18<5:22:52,  1.99s/it]

Iter 0257 | Train Loss 2.62243


  3%|█▉                                                                           | 259/10000 [06:20<5:31:31,  2.04s/it]

Iter 0258 | Train Loss 2.61750


  3%|██                                                                           | 260/10000 [06:22<5:27:23,  2.02s/it]

Iter 0259 | Train Loss 2.61256


  3%|██                                                                           | 261/10000 [06:23<5:05:34,  1.88s/it]

Iter 0260 | Train Loss 2.60761


  3%|██                                                                           | 262/10000 [06:25<4:50:33,  1.79s/it]

Iter 0261 | Train Loss 2.60264


  3%|██                                                                           | 263/10000 [06:26<4:38:29,  1.72s/it]

Iter 0262 | Train Loss 2.59765


  3%|██                                                                           | 264/10000 [06:28<4:28:27,  1.65s/it]

Iter 0263 | Train Loss 2.59264


  3%|██                                                                           | 265/10000 [06:29<4:22:15,  1.62s/it]

Iter 0264 | Train Loss 2.58762


  3%|██                                                                           | 266/10000 [06:31<4:17:07,  1.58s/it]

Iter 0265 | Train Loss 2.58257


  3%|██                                                                           | 267/10000 [06:32<4:14:09,  1.57s/it]

Iter 0266 | Train Loss 2.57749


  3%|██                                                                           | 268/10000 [06:34<4:13:38,  1.56s/it]

Iter 0267 | Train Loss 2.57240


  3%|██                                                                           | 269/10000 [06:36<4:12:01,  1.55s/it]

Iter 0268 | Train Loss 2.56728


  3%|██                                                                           | 270/10000 [06:37<4:10:18,  1.54s/it]

Iter 0269 | Train Loss 2.56213


  3%|██                                                                           | 271/10000 [06:39<4:09:32,  1.54s/it]

Iter 0270 | Train Loss 2.55695


  3%|██                                                                           | 272/10000 [06:40<4:07:19,  1.53s/it]

Iter 0271 | Train Loss 2.55175


  3%|██                                                                           | 273/10000 [06:42<4:06:29,  1.52s/it]

Iter 0272 | Train Loss 2.54651


  3%|██                                                                           | 274/10000 [06:43<4:06:07,  1.52s/it]

Iter 0273 | Train Loss 2.54124


  3%|██                                                                           | 275/10000 [06:45<4:21:06,  1.61s/it]

Iter 0274 | Train Loss 2.53593


  3%|██▏                                                                          | 276/10000 [06:47<4:48:50,  1.78s/it]

Iter 0275 | Train Loss 2.53059


  3%|██▏                                                                          | 277/10000 [06:49<5:06:39,  1.89s/it]

Iter 0276 | Train Loss 2.52522


  3%|██▏                                                                          | 278/10000 [06:51<4:50:44,  1.79s/it]

Iter 0277 | Train Loss 2.51980


  3%|██▏                                                                          | 279/10000 [06:53<4:44:54,  1.76s/it]

Iter 0278 | Train Loss 2.51434


  3%|██▏                                                                          | 280/10000 [06:54<4:40:18,  1.73s/it]

Iter 0279 | Train Loss 2.50884


  3%|██▏                                                                          | 281/10000 [06:56<4:36:18,  1.71s/it]

Iter 0280 | Train Loss 2.50330


  3%|██▏                                                                          | 282/10000 [06:58<4:34:23,  1.69s/it]

Iter 0281 | Train Loss 2.49771


  3%|██▏                                                                          | 283/10000 [06:59<4:33:19,  1.69s/it]

Iter 0282 | Train Loss 2.49208


  3%|██▏                                                                          | 284/10000 [07:01<4:33:44,  1.69s/it]

Iter 0283 | Train Loss 2.48639


  3%|██▏                                                                          | 285/10000 [07:03<4:37:32,  1.71s/it]

Iter 0284 | Train Loss 2.48065


  3%|██▏                                                                          | 286/10000 [07:04<4:34:48,  1.70s/it]

Iter 0285 | Train Loss 2.47487


  3%|██▏                                                                          | 287/10000 [07:06<4:30:30,  1.67s/it]

Iter 0286 | Train Loss 2.46902


  3%|██▏                                                                          | 288/10000 [07:07<4:23:54,  1.63s/it]

Iter 0287 | Train Loss 2.46312


  3%|██▏                                                                          | 289/10000 [07:09<4:26:23,  1.65s/it]

Iter 0288 | Train Loss 2.45716


  3%|██▏                                                                          | 290/10000 [07:11<4:29:31,  1.67s/it]

Iter 0289 | Train Loss 2.45114


  3%|██▏                                                                          | 291/10000 [07:12<4:25:50,  1.64s/it]

Iter 0290 | Train Loss 2.44506


  3%|██▏                                                                          | 292/10000 [07:14<4:24:04,  1.63s/it]

Iter 0291 | Train Loss 2.43891


  3%|██▎                                                                          | 293/10000 [07:16<4:23:46,  1.63s/it]

Iter 0292 | Train Loss 2.43270


  3%|██▎                                                                          | 294/10000 [07:17<4:23:28,  1.63s/it]

Iter 0293 | Train Loss 2.42642


  3%|██▎                                                                          | 295/10000 [07:19<4:24:51,  1.64s/it]

Iter 0294 | Train Loss 2.42007


  3%|██▎                                                                          | 296/10000 [07:21<4:25:28,  1.64s/it]

Iter 0295 | Train Loss 2.41364


  3%|██▎                                                                          | 297/10000 [07:22<4:29:12,  1.66s/it]

Iter 0296 | Train Loss 2.40714


  3%|██▎                                                                          | 298/10000 [07:24<4:34:11,  1.70s/it]

Iter 0297 | Train Loss 2.40056


  3%|██▎                                                                          | 299/10000 [07:26<4:33:46,  1.69s/it]

Iter 0298 | Train Loss 2.39390


  3%|██▎                                                                          | 300/10000 [07:27<4:33:09,  1.69s/it]

Iter 0299 | Train Loss 2.38716
Iter 0300 | Train Loss 2.38034


  3%|██▎                                                                          | 302/10000 [07:33<5:38:16,  2.09s/it]

Iter 0301 | Train Loss 2.37343


  3%|██▎                                                                          | 303/10000 [07:34<5:20:17,  1.98s/it]

Iter 0302 | Train Loss 2.36643


  3%|██▎                                                                          | 304/10000 [07:36<5:08:26,  1.91s/it]

Iter 0303 | Train Loss 2.35934


  3%|██▎                                                                          | 305/10000 [07:38<5:00:32,  1.86s/it]

Iter 0304 | Train Loss 2.35216


  3%|██▎                                                                          | 306/10000 [07:40<4:52:42,  1.81s/it]

Iter 0305 | Train Loss 2.34488


  3%|██▎                                                                          | 307/10000 [07:41<4:50:46,  1.80s/it]

Iter 0306 | Train Loss 2.33751


  3%|██▎                                                                          | 308/10000 [07:43<4:48:22,  1.79s/it]

Iter 0307 | Train Loss 2.33004


  3%|██▍                                                                          | 309/10000 [07:45<4:42:32,  1.75s/it]

Iter 0308 | Train Loss 2.32247


  3%|██▍                                                                          | 310/10000 [07:47<4:45:32,  1.77s/it]

Iter 0309 | Train Loss 2.31480


  3%|██▍                                                                          | 311/10000 [07:48<4:44:34,  1.76s/it]

Iter 0310 | Train Loss 2.30702


  3%|██▍                                                                          | 312/10000 [07:50<4:47:57,  1.78s/it]

Iter 0311 | Train Loss 2.29914


  3%|██▍                                                                          | 313/10000 [07:52<4:45:54,  1.77s/it]

Iter 0312 | Train Loss 2.29115


  3%|██▍                                                                          | 314/10000 [07:54<4:48:05,  1.78s/it]

Iter 0313 | Train Loss 2.28305


  3%|██▍                                                                          | 315/10000 [07:56<4:50:34,  1.80s/it]

Iter 0314 | Train Loss 2.27484


  3%|██▍                                                                          | 316/10000 [07:58<5:33:27,  2.07s/it]

Iter 0315 | Train Loss 2.26652


  3%|██▍                                                                          | 317/10000 [08:01<6:12:53,  2.31s/it]

Iter 0316 | Train Loss 2.25808


  3%|██▍                                                                          | 318/10000 [08:04<6:25:52,  2.39s/it]

Iter 0317 | Train Loss 2.24953


  3%|██▍                                                                          | 319/10000 [08:06<6:05:41,  2.27s/it]

Iter 0318 | Train Loss 2.24087


  3%|██▍                                                                          | 320/10000 [08:08<5:50:05,  2.17s/it]

Iter 0319 | Train Loss 2.23208


  3%|██▍                                                                          | 321/10000 [08:10<5:39:22,  2.10s/it]

Iter 0320 | Train Loss 2.22318


  3%|██▍                                                                          | 322/10000 [08:12<5:33:06,  2.07s/it]

Iter 0321 | Train Loss 2.21417


  3%|██▍                                                                          | 323/10000 [08:14<5:31:06,  2.05s/it]

Iter 0322 | Train Loss 2.20504


  3%|██▍                                                                          | 324/10000 [08:16<5:27:34,  2.03s/it]

Iter 0323 | Train Loss 2.19579


  3%|██▌                                                                          | 325/10000 [08:18<5:27:41,  2.03s/it]

Iter 0324 | Train Loss 2.18642


  3%|██▌                                                                          | 326/10000 [08:20<5:27:17,  2.03s/it]

Iter 0325 | Train Loss 2.17693


  3%|██▌                                                                          | 327/10000 [08:21<5:20:21,  1.99s/it]

Iter 0326 | Train Loss 2.16733


  3%|██▌                                                                          | 328/10000 [08:23<5:19:25,  1.98s/it]

Iter 0327 | Train Loss 2.15761


  3%|██▌                                                                          | 329/10000 [08:25<5:18:20,  1.97s/it]

Iter 0328 | Train Loss 2.14778


  3%|██▌                                                                          | 330/10000 [08:27<5:13:20,  1.94s/it]

Iter 0329 | Train Loss 2.13784


  3%|██▌                                                                          | 331/10000 [08:29<5:13:03,  1.94s/it]

Iter 0330 | Train Loss 2.12779


  3%|██▌                                                                          | 332/10000 [08:31<5:11:46,  1.93s/it]

Iter 0331 | Train Loss 2.11762


  3%|██▌                                                                          | 333/10000 [08:33<5:13:45,  1.95s/it]

Iter 0332 | Train Loss 2.10735


  3%|██▌                                                                          | 334/10000 [08:35<5:17:21,  1.97s/it]

Iter 0333 | Train Loss 2.09697


  3%|██▌                                                                          | 335/10000 [08:37<5:16:59,  1.97s/it]

Iter 0334 | Train Loss 2.08649


  3%|██▌                                                                          | 336/10000 [08:39<5:14:55,  1.96s/it]

Iter 0335 | Train Loss 2.07590


  3%|██▌                                                                          | 337/10000 [08:41<5:18:34,  1.98s/it]

Iter 0336 | Train Loss 2.06522


  3%|██▌                                                                          | 338/10000 [08:43<5:19:02,  1.98s/it]

Iter 0337 | Train Loss 2.05444


  3%|██▌                                                                          | 339/10000 [08:45<5:18:28,  1.98s/it]

Iter 0338 | Train Loss 2.04356


  3%|██▌                                                                          | 340/10000 [08:47<5:14:10,  1.95s/it]

Iter 0339 | Train Loss 2.03259


  3%|██▋                                                                          | 341/10000 [08:49<5:18:14,  1.98s/it]

Iter 0340 | Train Loss 2.02152


  3%|██▋                                                                          | 342/10000 [08:51<5:23:04,  2.01s/it]

Iter 0341 | Train Loss 2.01037


  3%|██▋                                                                          | 343/10000 [08:53<5:24:20,  2.02s/it]

Iter 0342 | Train Loss 1.99912


  3%|██▋                                                                          | 344/10000 [08:55<5:22:32,  2.00s/it]

Iter 0343 | Train Loss 1.98779


  3%|██▋                                                                          | 345/10000 [08:57<5:22:26,  2.00s/it]

Iter 0344 | Train Loss 1.97637


  3%|██▋                                                                          | 346/10000 [08:59<5:20:57,  1.99s/it]

Iter 0345 | Train Loss 1.96486


  3%|██▋                                                                          | 347/10000 [09:01<5:21:23,  2.00s/it]

Iter 0346 | Train Loss 1.95327


  3%|██▋                                                                          | 348/10000 [09:03<5:24:34,  2.02s/it]

Iter 0347 | Train Loss 1.94158


  3%|██▋                                                                          | 349/10000 [09:05<5:23:47,  2.01s/it]

Iter 0348 | Train Loss 1.92981


  4%|██▋                                                                          | 350/10000 [09:07<5:20:57,  2.00s/it]

Iter 0349 | Train Loss 1.91795


  4%|██▋                                                                          | 351/10000 [09:09<5:19:57,  1.99s/it]

Iter 0350 | Train Loss 1.90600


  4%|██▋                                                                          | 352/10000 [09:11<5:16:25,  1.97s/it]

Iter 0351 | Train Loss 1.89395


  4%|██▋                                                                          | 353/10000 [09:13<5:15:23,  1.96s/it]

Iter 0352 | Train Loss 1.88181


  4%|██▋                                                                          | 354/10000 [09:15<5:18:21,  1.98s/it]

Iter 0353 | Train Loss 1.86958


  4%|██▋                                                                          | 355/10000 [09:17<5:16:21,  1.97s/it]

Iter 0354 | Train Loss 1.85724


  4%|██▋                                                                          | 356/10000 [09:19<5:15:29,  1.96s/it]

Iter 0355 | Train Loss 1.84479


  4%|██▋                                                                          | 357/10000 [09:21<5:18:01,  1.98s/it]

Iter 0356 | Train Loss 1.83223


  4%|██▊                                                                          | 358/10000 [09:23<5:18:32,  1.98s/it]

Iter 0357 | Train Loss 1.81956


  4%|██▊                                                                          | 359/10000 [09:25<5:16:31,  1.97s/it]

Iter 0358 | Train Loss 1.80677


  4%|██▊                                                                          | 360/10000 [09:27<5:15:33,  1.96s/it]

Iter 0359 | Train Loss 1.79386


  4%|██▊                                                                          | 361/10000 [09:29<5:11:17,  1.94s/it]

Iter 0360 | Train Loss 1.78082


  4%|██▊                                                                          | 362/10000 [09:30<5:09:10,  1.92s/it]

Iter 0361 | Train Loss 1.76764


  4%|██▊                                                                          | 363/10000 [09:32<5:10:54,  1.94s/it]

Iter 0362 | Train Loss 1.75432


  4%|██▊                                                                          | 364/10000 [09:34<5:11:44,  1.94s/it]

Iter 0363 | Train Loss 1.74086


  4%|██▊                                                                          | 365/10000 [09:36<5:14:59,  1.96s/it]

Iter 0364 | Train Loss 1.72724


  4%|██▊                                                                          | 366/10000 [09:38<5:16:34,  1.97s/it]

Iter 0365 | Train Loss 1.71347


  4%|██▊                                                                          | 367/10000 [09:40<5:19:06,  1.99s/it]

Iter 0366 | Train Loss 1.69954


  4%|██▊                                                                          | 368/10000 [09:42<5:21:09,  2.00s/it]

Iter 0367 | Train Loss 1.68544


  4%|██▊                                                                          | 369/10000 [09:44<5:19:51,  1.99s/it]

Iter 0368 | Train Loss 1.67118


  4%|██▊                                                                          | 370/10000 [09:46<5:20:47,  2.00s/it]

Iter 0369 | Train Loss 1.65676


  4%|██▊                                                                          | 371/10000 [09:48<5:19:52,  1.99s/it]

Iter 0370 | Train Loss 1.64217


  4%|██▊                                                                          | 372/10000 [09:50<5:24:15,  2.02s/it]

Iter 0371 | Train Loss 1.62742


  4%|██▊                                                                          | 373/10000 [09:53<5:26:45,  2.04s/it]

Iter 0372 | Train Loss 1.61252


  4%|██▉                                                                          | 374/10000 [09:55<5:31:43,  2.07s/it]

Iter 0373 | Train Loss 1.59746


  4%|██▉                                                                          | 375/10000 [09:57<5:33:08,  2.08s/it]

Iter 0374 | Train Loss 1.58225


  4%|██▉                                                                          | 376/10000 [09:59<5:35:37,  2.09s/it]

Iter 0375 | Train Loss 1.56691


  4%|██▉                                                                          | 377/10000 [10:01<5:42:34,  2.14s/it]

Iter 0376 | Train Loss 1.55145


  4%|██▉                                                                          | 378/10000 [10:03<5:44:04,  2.15s/it]

Iter 0377 | Train Loss 1.53587


  4%|██▉                                                                          | 379/10000 [10:06<5:48:42,  2.17s/it]

Iter 0378 | Train Loss 1.52018


  4%|██▉                                                                          | 380/10000 [10:08<5:49:49,  2.18s/it]

Iter 0379 | Train Loss 1.50440


  4%|██▉                                                                          | 381/10000 [10:10<5:50:50,  2.19s/it]

Iter 0380 | Train Loss 1.48855


  4%|██▉                                                                          | 382/10000 [10:12<5:50:19,  2.19s/it]

Iter 0381 | Train Loss 1.47263


  4%|██▉                                                                          | 383/10000 [10:14<5:53:04,  2.20s/it]

Iter 0382 | Train Loss 1.45665


  4%|██▉                                                                          | 384/10000 [10:17<5:57:38,  2.23s/it]

Iter 0383 | Train Loss 1.44063


  4%|██▉                                                                          | 385/10000 [10:19<6:02:17,  2.26s/it]

Iter 0384 | Train Loss 1.42459


  4%|██▉                                                                          | 386/10000 [10:21<6:05:06,  2.28s/it]

Iter 0385 | Train Loss 1.40853


  4%|██▉                                                                          | 387/10000 [10:24<6:07:38,  2.29s/it]

Iter 0386 | Train Loss 1.39247


  4%|██▉                                                                          | 388/10000 [10:26<6:06:54,  2.29s/it]

Iter 0387 | Train Loss 1.37643


  4%|██▉                                                                          | 389/10000 [10:28<5:59:15,  2.24s/it]

Iter 0388 | Train Loss 1.36042


  4%|███                                                                          | 390/10000 [10:30<5:58:49,  2.24s/it]

Iter 0389 | Train Loss 1.34445


  4%|███                                                                          | 391/10000 [10:33<6:01:33,  2.26s/it]

Iter 0390 | Train Loss 1.32853


  4%|███                                                                          | 392/10000 [10:35<6:02:13,  2.26s/it]

Iter 0391 | Train Loss 1.31269


  4%|███                                                                          | 393/10000 [10:37<6:05:59,  2.29s/it]

Iter 0392 | Train Loss 1.29692


  4%|███                                                                          | 394/10000 [10:40<6:15:41,  2.35s/it]

Iter 0393 | Train Loss 1.28124


  4%|███                                                                          | 395/10000 [10:42<6:19:42,  2.37s/it]

Iter 0394 | Train Loss 1.26567


  4%|███                                                                          | 396/10000 [10:45<6:22:51,  2.39s/it]

Iter 0395 | Train Loss 1.25022


  4%|███                                                                          | 397/10000 [10:47<6:29:46,  2.44s/it]

Iter 0396 | Train Loss 1.23488


  4%|███                                                                          | 398/10000 [10:50<6:37:38,  2.48s/it]

Iter 0397 | Train Loss 1.21967


  4%|███                                                                          | 399/10000 [10:52<6:35:20,  2.47s/it]

Iter 0398 | Train Loss 1.20461
plotting optimal model


  4%|███                                                                          | 400/10000 [10:56<7:17:43,  2.74s/it]

Iter 0399 | Train Loss 1.18968
Iter 0400 | Train Loss 1.17491


  4%|███                                                                          | 402/10000 [11:02<7:59:26,  3.00s/it]

Iter 0401 | Train Loss 1.16030


  4%|███                                                                          | 403/10000 [11:05<7:42:42,  2.89s/it]

Iter 0402 | Train Loss 1.14585


  4%|███                                                                          | 404/10000 [11:08<7:32:26,  2.83s/it]

Iter 0403 | Train Loss 1.13157


  4%|███                                                                          | 405/10000 [11:10<7:14:32,  2.72s/it]

Iter 0404 | Train Loss 1.11746


  4%|███▏                                                                         | 406/10000 [11:13<7:06:26,  2.67s/it]

Iter 0405 | Train Loss 1.10353


  4%|███▏                                                                         | 407/10000 [11:15<7:01:18,  2.64s/it]

Iter 0406 | Train Loss 1.08977


  4%|███▏                                                                         | 408/10000 [11:18<7:00:56,  2.63s/it]

Iter 0407 | Train Loss 1.07620


  4%|███▏                                                                         | 409/10000 [11:20<6:58:01,  2.62s/it]

Iter 0408 | Train Loss 1.06281


  4%|███▏                                                                         | 410/10000 [11:23<6:59:00,  2.62s/it]

Iter 0409 | Train Loss 1.04961


  4%|███▏                                                                         | 411/10000 [11:26<7:05:53,  2.66s/it]

Iter 0410 | Train Loss 1.03660


  4%|███▏                                                                         | 412/10000 [11:28<7:06:26,  2.67s/it]

Iter 0411 | Train Loss 1.02378


  4%|███▏                                                                         | 413/10000 [11:31<7:12:10,  2.70s/it]

Iter 0412 | Train Loss 1.01116


  4%|███▏                                                                         | 414/10000 [11:34<7:12:02,  2.70s/it]

Iter 0413 | Train Loss 0.99872


  4%|███▏                                                                         | 415/10000 [11:37<7:15:02,  2.72s/it]

Iter 0414 | Train Loss 0.98648


  4%|███▏                                                                         | 416/10000 [11:40<7:21:07,  2.76s/it]

Iter 0415 | Train Loss 0.97444


  4%|███▏                                                                         | 417/10000 [11:42<7:21:14,  2.76s/it]

Iter 0416 | Train Loss 0.96259


  4%|███▏                                                                         | 418/10000 [11:45<7:23:50,  2.78s/it]

Iter 0417 | Train Loss 0.95093


  4%|███▏                                                                         | 419/10000 [11:48<7:28:24,  2.81s/it]

Iter 0418 | Train Loss 0.93946


  4%|███▏                                                                         | 420/10000 [11:51<7:31:38,  2.83s/it]

Iter 0419 | Train Loss 0.92819


  4%|███▏                                                                         | 421/10000 [11:54<7:33:42,  2.84s/it]

Iter 0420 | Train Loss 0.91710


  4%|███▏                                                                         | 422/10000 [11:57<7:41:53,  2.89s/it]

Iter 0421 | Train Loss 0.90621


  4%|███▎                                                                         | 423/10000 [12:00<7:40:31,  2.89s/it]

Iter 0422 | Train Loss 0.89549


  4%|███▎                                                                         | 424/10000 [12:03<7:41:30,  2.89s/it]

Iter 0423 | Train Loss 0.88497


  4%|███▎                                                                         | 425/10000 [12:05<7:40:35,  2.89s/it]

Iter 0424 | Train Loss 0.87462


  4%|███▎                                                                         | 426/10000 [12:08<7:40:06,  2.88s/it]

Iter 0425 | Train Loss 0.86445


  4%|███▎                                                                         | 427/10000 [12:11<7:41:15,  2.89s/it]

Iter 0426 | Train Loss 0.85447


  4%|███▎                                                                         | 428/10000 [12:14<7:42:11,  2.90s/it]

Iter 0427 | Train Loss 0.84465


  4%|███▎                                                                         | 429/10000 [12:17<7:43:38,  2.91s/it]

Iter 0428 | Train Loss 0.83501


  4%|███▎                                                                         | 430/10000 [12:20<7:59:06,  3.00s/it]

Iter 0429 | Train Loss 0.82553


  4%|███▎                                                                         | 431/10000 [12:24<8:53:29,  3.35s/it]

Iter 0430 | Train Loss 0.81622


  4%|███▎                                                                         | 432/10000 [12:29<9:31:24,  3.58s/it]

Iter 0431 | Train Loss 0.80707


  4%|███▎                                                                        | 433/10000 [12:33<10:00:03,  3.76s/it]

Iter 0432 | Train Loss 0.79808


  4%|███▎                                                                        | 434/10000 [12:37<10:21:56,  3.90s/it]

Iter 0433 | Train Loss 0.78924


  4%|███▎                                                                        | 435/10000 [12:41<10:41:22,  4.02s/it]

Iter 0434 | Train Loss 0.78055


  4%|███▎                                                                        | 436/10000 [12:46<10:53:56,  4.10s/it]

Iter 0435 | Train Loss 0.77201


  4%|███▎                                                                        | 437/10000 [12:49<10:31:52,  3.96s/it]

Iter 0436 | Train Loss 0.76361


  4%|███▎                                                                         | 438/10000 [12:52<9:38:57,  3.63s/it]

Iter 0437 | Train Loss 0.75534


  4%|███▍                                                                         | 439/10000 [12:55<9:07:53,  3.44s/it]

Iter 0438 | Train Loss 0.74721


  4%|███▍                                                                         | 440/10000 [12:58<8:46:36,  3.31s/it]

Iter 0439 | Train Loss 0.73921


  4%|███▍                                                                         | 441/10000 [13:01<8:29:23,  3.20s/it]

Iter 0440 | Train Loss 0.73133


  4%|███▍                                                                         | 442/10000 [13:04<8:21:27,  3.15s/it]

Iter 0441 | Train Loss 0.72358


  4%|███▍                                                                         | 443/10000 [13:07<8:16:30,  3.12s/it]

Iter 0442 | Train Loss 0.71593


  4%|███▍                                                                         | 444/10000 [13:10<8:14:58,  3.11s/it]

Iter 0443 | Train Loss 0.70840


  4%|███▍                                                                         | 445/10000 [13:13<8:08:10,  3.07s/it]

Iter 0444 | Train Loss 0.70097


  4%|███▍                                                                         | 446/10000 [13:16<8:06:22,  3.05s/it]

Iter 0445 | Train Loss 0.69365


  4%|███▍                                                                         | 447/10000 [13:19<8:06:10,  3.05s/it]

Iter 0446 | Train Loss 0.68642


  4%|███▍                                                                         | 448/10000 [13:22<7:58:36,  3.01s/it]

Iter 0447 | Train Loss 0.67928


  4%|███▍                                                                         | 449/10000 [13:25<8:00:01,  3.02s/it]

Iter 0448 | Train Loss 0.67222


  4%|███▍                                                                         | 450/10000 [13:28<7:56:50,  3.00s/it]

Iter 0449 | Train Loss 0.66525


  5%|███▍                                                                         | 451/10000 [13:31<7:53:00,  2.97s/it]

Iter 0450 | Train Loss 0.65835


  5%|███▍                                                                         | 452/10000 [13:34<7:53:21,  2.97s/it]

Iter 0451 | Train Loss 0.65153


  5%|███▍                                                                         | 453/10000 [13:37<7:58:46,  3.01s/it]

Iter 0452 | Train Loss 0.64476


  5%|███▍                                                                         | 454/10000 [13:40<8:04:09,  3.04s/it]

Iter 0453 | Train Loss 0.63806


  5%|███▌                                                                         | 455/10000 [13:43<8:09:01,  3.07s/it]

Iter 0454 | Train Loss 0.63140


  5%|███▌                                                                         | 456/10000 [13:47<8:15:44,  3.12s/it]

Iter 0455 | Train Loss 0.62479


  5%|███▌                                                                         | 457/10000 [13:50<8:14:31,  3.11s/it]

Iter 0456 | Train Loss 0.61823


  5%|███▌                                                                         | 458/10000 [13:53<8:10:18,  3.08s/it]

Iter 0457 | Train Loss 0.61169


  5%|███▌                                                                         | 459/10000 [13:56<8:09:52,  3.08s/it]

Iter 0458 | Train Loss 0.60519


  5%|███▌                                                                         | 460/10000 [13:59<8:16:00,  3.12s/it]

Iter 0459 | Train Loss 0.59871


  5%|███▌                                                                         | 461/10000 [14:02<8:22:35,  3.16s/it]

Iter 0460 | Train Loss 0.59225


  5%|███▌                                                                         | 462/10000 [14:05<8:25:41,  3.18s/it]

Iter 0461 | Train Loss 0.58581


  5%|███▌                                                                         | 463/10000 [14:09<8:21:50,  3.16s/it]

Iter 0462 | Train Loss 0.57938


  5%|███▌                                                                         | 464/10000 [14:12<8:33:14,  3.23s/it]

Iter 0463 | Train Loss 0.57297


  5%|███▌                                                                         | 465/10000 [14:15<8:31:57,  3.22s/it]

Iter 0464 | Train Loss 0.56657


  5%|███▌                                                                         | 466/10000 [14:18<8:35:33,  3.24s/it]

Iter 0465 | Train Loss 0.56019


  5%|███▌                                                                         | 467/10000 [14:22<8:32:25,  3.23s/it]

Iter 0466 | Train Loss 0.55382


  5%|███▌                                                                         | 468/10000 [14:25<8:37:44,  3.26s/it]

Iter 0467 | Train Loss 0.54747


  5%|███▌                                                                         | 469/10000 [14:28<8:37:33,  3.26s/it]

Iter 0468 | Train Loss 0.54113


  5%|███▌                                                                         | 470/10000 [14:32<8:47:23,  3.32s/it]

Iter 0469 | Train Loss 0.53482


  5%|███▋                                                                         | 471/10000 [14:35<8:48:30,  3.33s/it]

Iter 0470 | Train Loss 0.52852


  5%|███▋                                                                         | 472/10000 [14:38<8:45:16,  3.31s/it]

Iter 0471 | Train Loss 0.52225


  5%|███▋                                                                         | 473/10000 [14:42<8:45:44,  3.31s/it]

Iter 0472 | Train Loss 0.51602


  5%|███▋                                                                         | 474/10000 [14:45<8:47:14,  3.32s/it]

Iter 0473 | Train Loss 0.50982


  5%|███▋                                                                         | 475/10000 [14:48<8:42:26,  3.29s/it]

Iter 0474 | Train Loss 0.50366


  5%|███▋                                                                         | 476/10000 [14:52<8:51:18,  3.35s/it]

Iter 0475 | Train Loss 0.49755


  5%|███▋                                                                         | 477/10000 [14:55<8:48:15,  3.33s/it]

Iter 0476 | Train Loss 0.49149


  5%|███▋                                                                         | 478/10000 [14:58<8:49:29,  3.34s/it]

Iter 0477 | Train Loss 0.48549


  5%|███▋                                                                         | 479/10000 [15:02<8:47:55,  3.33s/it]

Iter 0478 | Train Loss 0.47955


  5%|███▋                                                                         | 480/10000 [15:05<8:55:03,  3.37s/it]

Iter 0479 | Train Loss 0.47369


  5%|███▋                                                                         | 481/10000 [15:09<9:00:46,  3.41s/it]

Iter 0480 | Train Loss 0.46789


  5%|███▋                                                                         | 482/10000 [15:12<9:00:53,  3.41s/it]

Iter 0481 | Train Loss 0.46218


  5%|███▋                                                                         | 483/10000 [15:15<9:02:39,  3.42s/it]

Iter 0482 | Train Loss 0.45655


  5%|███▋                                                                         | 484/10000 [15:19<9:12:08,  3.48s/it]

Iter 0483 | Train Loss 0.45101


  5%|███▋                                                                         | 485/10000 [15:23<9:18:05,  3.52s/it]

Iter 0484 | Train Loss 0.44556


  5%|███▋                                                                         | 486/10000 [15:26<9:21:16,  3.54s/it]

Iter 0485 | Train Loss 0.44019


  5%|███▋                                                                         | 487/10000 [15:30<9:29:35,  3.59s/it]

Iter 0486 | Train Loss 0.43492


  5%|███▊                                                                         | 488/10000 [15:34<9:33:03,  3.61s/it]

Iter 0487 | Train Loss 0.42974


  5%|███▊                                                                         | 489/10000 [15:37<9:31:34,  3.61s/it]

Iter 0488 | Train Loss 0.42465


  5%|███▊                                                                         | 490/10000 [15:41<9:28:44,  3.59s/it]

Iter 0489 | Train Loss 0.41966


  5%|███▊                                                                         | 491/10000 [15:44<9:33:55,  3.62s/it]

Iter 0490 | Train Loss 0.41477


  5%|███▊                                                                         | 492/10000 [15:48<9:35:01,  3.63s/it]

Iter 0491 | Train Loss 0.40997


  5%|███▊                                                                         | 493/10000 [15:52<9:31:49,  3.61s/it]

Iter 0492 | Train Loss 0.40526


  5%|███▊                                                                         | 494/10000 [15:56<9:44:41,  3.69s/it]

Iter 0493 | Train Loss 0.40065


  5%|███▊                                                                         | 495/10000 [15:59<9:42:21,  3.68s/it]

Iter 0494 | Train Loss 0.39612


  5%|███▊                                                                         | 496/10000 [16:03<9:41:47,  3.67s/it]

Iter 0495 | Train Loss 0.39169


  5%|███▊                                                                         | 497/10000 [16:07<9:46:13,  3.70s/it]

Iter 0496 | Train Loss 0.38735


  5%|███▊                                                                         | 498/10000 [16:10<9:45:08,  3.69s/it]

Iter 0497 | Train Loss 0.38310


  5%|███▊                                                                         | 499/10000 [16:14<9:56:06,  3.76s/it]

Iter 0498 | Train Loss 0.37893


  5%|███▊                                                                        | 500/10000 [16:18<10:00:58,  3.80s/it]

Iter 0499 | Train Loss 0.37485
Iter 0500 | Train Loss 0.37085


  5%|███▊                                                                        | 502/10000 [16:27<10:58:19,  4.16s/it]

Iter 0501 | Train Loss 0.36694


  5%|███▊                                                                        | 503/10000 [16:31<10:47:47,  4.09s/it]

Iter 0502 | Train Loss 0.36310


  5%|███▊                                                                        | 504/10000 [16:35<10:43:19,  4.06s/it]

Iter 0503 | Train Loss 0.35934


  5%|███▊                                                                        | 505/10000 [16:39<10:34:47,  4.01s/it]

Iter 0504 | Train Loss 0.35566


  5%|███▊                                                                        | 506/10000 [16:43<10:34:02,  4.01s/it]

Iter 0505 | Train Loss 0.35205


  5%|███▊                                                                        | 507/10000 [16:49<11:41:24,  4.43s/it]

Iter 0506 | Train Loss 0.34851


  5%|███▊                                                                        | 508/10000 [16:54<12:31:59,  4.75s/it]

Iter 0507 | Train Loss 0.34505


  5%|███▊                                                                        | 509/10000 [16:58<12:03:19,  4.57s/it]

Iter 0508 | Train Loss 0.34165


  5%|███▉                                                                        | 510/10000 [17:02<11:25:39,  4.34s/it]

Iter 0509 | Train Loss 0.33831


  5%|███▉                                                                        | 511/10000 [17:06<11:03:53,  4.20s/it]

Iter 0510 | Train Loss 0.33504


  5%|███▉                                                                        | 512/10000 [17:10<10:52:05,  4.12s/it]

Iter 0511 | Train Loss 0.33184


  5%|███▉                                                                        | 513/10000 [17:14<10:42:39,  4.06s/it]

Iter 0512 | Train Loss 0.32869


  5%|███▉                                                                        | 514/10000 [17:18<10:34:24,  4.01s/it]

Iter 0513 | Train Loss 0.32561


  5%|███▉                                                                        | 515/10000 [17:22<10:25:28,  3.96s/it]

Iter 0514 | Train Loss 0.32258


  5%|███▉                                                                        | 516/10000 [17:25<10:21:07,  3.93s/it]

Iter 0515 | Train Loss 0.31960


  5%|███▉                                                                        | 517/10000 [17:29<10:23:16,  3.94s/it]

Iter 0516 | Train Loss 0.31668


  5%|███▉                                                                        | 518/10000 [17:33<10:28:10,  3.97s/it]

Iter 0517 | Train Loss 0.31381


  5%|███▉                                                                        | 519/10000 [17:37<10:27:01,  3.97s/it]

Iter 0518 | Train Loss 0.31100


  5%|███▉                                                                        | 520/10000 [17:41<10:28:36,  3.98s/it]

Iter 0519 | Train Loss 0.30823


  5%|███▉                                                                        | 521/10000 [17:45<10:22:48,  3.94s/it]

Iter 0520 | Train Loss 0.30550


  5%|███▉                                                                        | 522/10000 [17:49<10:23:46,  3.95s/it]

Iter 0521 | Train Loss 0.30283


  5%|███▉                                                                        | 523/10000 [17:53<10:23:10,  3.95s/it]

Iter 0522 | Train Loss 0.30020


  5%|███▉                                                                        | 524/10000 [17:57<10:23:24,  3.95s/it]

Iter 0523 | Train Loss 0.29761


  5%|███▉                                                                        | 525/10000 [18:01<10:33:15,  4.01s/it]

Iter 0524 | Train Loss 0.29506


  5%|███▉                                                                        | 526/10000 [18:05<10:31:25,  4.00s/it]

Iter 0525 | Train Loss 0.29255


  5%|████                                                                        | 527/10000 [18:09<10:28:27,  3.98s/it]

Iter 0526 | Train Loss 0.29008


  5%|████                                                                        | 528/10000 [18:13<10:29:29,  3.99s/it]

Iter 0527 | Train Loss 0.28764


  5%|████                                                                        | 529/10000 [18:17<10:31:45,  4.00s/it]

Iter 0528 | Train Loss 0.28525


  5%|████                                                                        | 530/10000 [18:21<10:26:57,  3.97s/it]

Iter 0529 | Train Loss 0.28288


  5%|████                                                                        | 531/10000 [18:25<10:29:21,  3.99s/it]

Iter 0530 | Train Loss 0.28055


  5%|████                                                                        | 532/10000 [18:29<10:30:26,  4.00s/it]

Iter 0531 | Train Loss 0.27826


  5%|████                                                                        | 533/10000 [18:33<10:32:02,  4.01s/it]

Iter 0532 | Train Loss 0.27599


  5%|████                                                                        | 534/10000 [18:37<10:32:47,  4.01s/it]

Iter 0533 | Train Loss 0.27375


  5%|████                                                                        | 535/10000 [18:41<10:38:59,  4.05s/it]

Iter 0534 | Train Loss 0.27154


  5%|████                                                                        | 536/10000 [18:45<10:40:05,  4.06s/it]

Iter 0535 | Train Loss 0.26936


  5%|████                                                                        | 537/10000 [18:50<10:44:48,  4.09s/it]

Iter 0536 | Train Loss 0.26721


  5%|████                                                                        | 538/10000 [18:54<10:49:31,  4.12s/it]

Iter 0537 | Train Loss 0.26508


  5%|████                                                                        | 539/10000 [18:58<10:48:35,  4.11s/it]

Iter 0538 | Train Loss 0.26298


  5%|████                                                                        | 540/10000 [19:02<10:52:42,  4.14s/it]

Iter 0539 | Train Loss 0.26090


  5%|████                                                                        | 541/10000 [19:06<10:52:39,  4.14s/it]

Iter 0540 | Train Loss 0.25884


  5%|████                                                                        | 542/10000 [19:10<10:53:32,  4.15s/it]

Iter 0541 | Train Loss 0.25681


  5%|████▏                                                                       | 543/10000 [19:14<10:49:06,  4.12s/it]

Iter 0542 | Train Loss 0.25480


  5%|████▏                                                                       | 544/10000 [19:18<10:44:52,  4.09s/it]

Iter 0543 | Train Loss 0.25280


  5%|████▏                                                                       | 545/10000 [19:22<10:36:54,  4.04s/it]

Iter 0544 | Train Loss 0.25083


  5%|████▏                                                                       | 546/10000 [19:27<10:48:26,  4.12s/it]

Iter 0545 | Train Loss 0.24888


  5%|████▏                                                                       | 547/10000 [19:31<10:54:59,  4.16s/it]

Iter 0546 | Train Loss 0.24695


  5%|████▏                                                                       | 548/10000 [19:35<11:00:38,  4.19s/it]

Iter 0547 | Train Loss 0.24504


  5%|████▏                                                                       | 549/10000 [19:39<10:50:17,  4.13s/it]

Iter 0548 | Train Loss 0.24315


  6%|████▏                                                                       | 550/10000 [19:43<10:49:12,  4.12s/it]

Iter 0549 | Train Loss 0.24127


  6%|████▏                                                                       | 551/10000 [19:47<10:49:28,  4.12s/it]

Iter 0550 | Train Loss 0.23942


  6%|████▏                                                                       | 552/10000 [19:52<10:48:54,  4.12s/it]

Iter 0551 | Train Loss 0.23758


  6%|████▏                                                                       | 553/10000 [19:56<10:48:06,  4.12s/it]

Iter 0552 | Train Loss 0.23576


  6%|████▏                                                                       | 554/10000 [20:00<10:51:17,  4.14s/it]

Iter 0553 | Train Loss 0.23396


  6%|████▏                                                                       | 555/10000 [20:04<10:55:17,  4.16s/it]

Iter 0554 | Train Loss 0.23217


  6%|████▏                                                                       | 556/10000 [20:08<10:55:36,  4.17s/it]

Iter 0555 | Train Loss 0.23041


  6%|████▏                                                                       | 557/10000 [20:12<10:54:22,  4.16s/it]

Iter 0556 | Train Loss 0.22866


  6%|████▏                                                                       | 558/10000 [20:17<10:57:04,  4.18s/it]

Iter 0557 | Train Loss 0.22694


  6%|████▏                                                                       | 559/10000 [20:21<10:58:09,  4.18s/it]

Iter 0558 | Train Loss 0.22523


  6%|████▎                                                                       | 560/10000 [20:25<11:00:42,  4.20s/it]

Iter 0559 | Train Loss 0.22354


  6%|████▎                                                                       | 561/10000 [20:29<11:05:25,  4.23s/it]

Iter 0560 | Train Loss 0.22188


  6%|████▎                                                                       | 562/10000 [20:34<11:09:08,  4.25s/it]

Iter 0561 | Train Loss 0.22024


  6%|████▎                                                                       | 563/10000 [20:38<11:13:27,  4.28s/it]

Iter 0562 | Train Loss 0.21862


  6%|████▎                                                                       | 564/10000 [20:43<11:27:54,  4.37s/it]

Iter 0563 | Train Loss 0.21702


  6%|████▎                                                                       | 565/10000 [20:47<11:22:30,  4.34s/it]

Iter 0564 | Train Loss 0.21544


  6%|████▎                                                                       | 566/10000 [20:51<11:11:41,  4.27s/it]

Iter 0565 | Train Loss 0.21388


  6%|████▎                                                                       | 567/10000 [20:55<11:13:10,  4.28s/it]

Iter 0566 | Train Loss 0.21235


  6%|████▎                                                                       | 568/10000 [21:00<11:15:45,  4.30s/it]

Iter 0567 | Train Loss 0.21084


  6%|████▎                                                                       | 569/10000 [21:04<11:16:48,  4.31s/it]

Iter 0568 | Train Loss 0.20935


  6%|████▎                                                                       | 570/10000 [21:08<11:13:11,  4.28s/it]

Iter 0569 | Train Loss 0.20788


  6%|████▎                                                                       | 571/10000 [21:12<11:16:22,  4.30s/it]

Iter 0570 | Train Loss 0.20644


  6%|████▎                                                                       | 572/10000 [21:17<11:25:16,  4.36s/it]

Iter 0571 | Train Loss 0.20502


  6%|████▎                                                                       | 573/10000 [21:21<11:10:46,  4.27s/it]

Iter 0572 | Train Loss 0.20361


  6%|████▎                                                                       | 574/10000 [21:25<11:09:23,  4.26s/it]

Iter 0573 | Train Loss 0.20223


  6%|████▎                                                                       | 575/10000 [21:29<11:03:19,  4.22s/it]

Iter 0574 | Train Loss 0.20088


  6%|████▍                                                                       | 576/10000 [21:34<11:03:35,  4.22s/it]

Iter 0575 | Train Loss 0.19954


  6%|████▍                                                                       | 577/10000 [21:38<11:12:28,  4.28s/it]

Iter 0576 | Train Loss 0.19822


  6%|████▍                                                                       | 578/10000 [21:42<11:15:19,  4.30s/it]

Iter 0577 | Train Loss 0.19692


  6%|████▍                                                                       | 579/10000 [21:47<11:10:27,  4.27s/it]

Iter 0578 | Train Loss 0.19564


  6%|████▍                                                                       | 580/10000 [21:51<11:15:52,  4.30s/it]

Iter 0579 | Train Loss 0.19439


  6%|████▍                                                                       | 581/10000 [21:55<11:13:45,  4.29s/it]

Iter 0580 | Train Loss 0.19315


  6%|████▍                                                                       | 582/10000 [22:00<11:21:54,  4.34s/it]

Iter 0581 | Train Loss 0.19192


  6%|████▍                                                                       | 583/10000 [22:04<11:25:11,  4.37s/it]

Iter 0582 | Train Loss 0.19072


  6%|████▍                                                                       | 584/10000 [22:08<11:18:31,  4.32s/it]

Iter 0583 | Train Loss 0.18954


  6%|████▍                                                                       | 585/10000 [22:13<11:22:13,  4.35s/it]

Iter 0584 | Train Loss 0.18837


  6%|████▍                                                                       | 586/10000 [22:17<11:25:44,  4.37s/it]

Iter 0585 | Train Loss 0.18722


  6%|████▍                                                                       | 587/10000 [22:22<11:25:11,  4.37s/it]

Iter 0586 | Train Loss 0.18609


  6%|████▍                                                                       | 588/10000 [22:26<11:26:57,  4.38s/it]

Iter 0587 | Train Loss 0.18498


  6%|████▍                                                                       | 589/10000 [22:31<11:52:30,  4.54s/it]

Iter 0588 | Train Loss 0.18388


  6%|████▍                                                                       | 590/10000 [22:36<12:30:00,  4.78s/it]

Iter 0589 | Train Loss 0.18280


  6%|████▍                                                                       | 591/10000 [22:41<12:13:47,  4.68s/it]

Iter 0590 | Train Loss 0.18173


  6%|████▍                                                                       | 592/10000 [22:45<12:01:06,  4.60s/it]

Iter 0591 | Train Loss 0.18068


  6%|████▌                                                                       | 593/10000 [22:49<11:50:07,  4.53s/it]

Iter 0592 | Train Loss 0.17964


  6%|████▌                                                                       | 594/10000 [22:54<11:45:47,  4.50s/it]

Iter 0593 | Train Loss 0.17861


  6%|████▌                                                                       | 595/10000 [22:58<11:39:09,  4.46s/it]

Iter 0594 | Train Loss 0.17760


  6%|████▌                                                                       | 596/10000 [23:03<11:39:11,  4.46s/it]

Iter 0595 | Train Loss 0.17661


  6%|████▌                                                                       | 597/10000 [23:07<11:44:33,  4.50s/it]

Iter 0596 | Train Loss 0.17562


  6%|████▌                                                                       | 598/10000 [23:12<11:36:36,  4.45s/it]

Iter 0597 | Train Loss 0.17465


  6%|████▌                                                                       | 599/10000 [23:16<11:35:37,  4.44s/it]

Iter 0598 | Train Loss 0.17369
plotting optimal model


  6%|████▌                                                                       | 600/10000 [23:21<12:05:51,  4.63s/it]

Iter 0599 | Train Loss 0.17274
Iter 0600 | Train Loss 0.17181


  6%|████▌                                                                       | 602/10000 [23:31<12:34:23,  4.82s/it]

Iter 0601 | Train Loss 0.17088


  6%|████▌                                                                       | 603/10000 [23:36<12:13:14,  4.68s/it]

Iter 0602 | Train Loss 0.16997


  6%|████▌                                                                       | 604/10000 [23:40<12:10:20,  4.66s/it]

Iter 0603 | Train Loss 0.16907


  6%|████▌                                                                       | 605/10000 [23:45<11:57:00,  4.58s/it]

Iter 0604 | Train Loss 0.16817


  6%|████▌                                                                       | 606/10000 [23:49<11:49:06,  4.53s/it]

Iter 0605 | Train Loss 0.16729


  6%|████▌                                                                       | 607/10000 [23:54<11:42:33,  4.49s/it]

Iter 0606 | Train Loss 0.16641


  6%|████▌                                                                       | 608/10000 [23:58<11:39:41,  4.47s/it]

Iter 0607 | Train Loss 0.16555


  6%|████▋                                                                       | 609/10000 [24:02<11:39:37,  4.47s/it]

Iter 0608 | Train Loss 0.16470


  6%|████▋                                                                       | 610/10000 [24:07<11:35:50,  4.45s/it]

Iter 0609 | Train Loss 0.16385


  6%|████▋                                                                       | 611/10000 [24:11<11:29:12,  4.40s/it]

Iter 0610 | Train Loss 0.16301


  6%|████▋                                                                       | 612/10000 [24:16<11:28:19,  4.40s/it]

Iter 0611 | Train Loss 0.16218


  6%|████▋                                                                       | 613/10000 [24:20<11:31:30,  4.42s/it]

Iter 0612 | Train Loss 0.16136


  6%|████▋                                                                       | 614/10000 [24:24<11:32:11,  4.42s/it]

Iter 0613 | Train Loss 0.16054


  6%|████▋                                                                       | 615/10000 [24:29<11:30:59,  4.42s/it]

Iter 0614 | Train Loss 0.15974


  6%|████▋                                                                       | 616/10000 [24:33<11:40:44,  4.48s/it]

Iter 0615 | Train Loss 0.15894


  6%|████▋                                                                       | 617/10000 [24:38<11:38:07,  4.46s/it]

Iter 0616 | Train Loss 0.15815


  6%|████▋                                                                       | 618/10000 [24:42<11:31:37,  4.42s/it]

Iter 0617 | Train Loss 0.15736


  6%|████▋                                                                       | 619/10000 [24:48<12:28:40,  4.79s/it]

Iter 0618 | Train Loss 0.15659


  6%|████▋                                                                       | 620/10000 [24:54<13:32:38,  5.20s/it]

Iter 0619 | Train Loss 0.15582


  6%|████▋                                                                       | 621/10000 [25:00<14:24:28,  5.53s/it]

Iter 0620 | Train Loss 0.15505


  6%|████▋                                                                       | 622/10000 [25:07<14:58:15,  5.75s/it]

Iter 0621 | Train Loss 0.15429


  6%|████▋                                                                       | 623/10000 [25:13<15:29:56,  5.95s/it]

Iter 0622 | Train Loss 0.15354


  6%|████▋                                                                       | 624/10000 [25:19<15:48:18,  6.07s/it]

Iter 0623 | Train Loss 0.15279


  6%|████▊                                                                       | 625/10000 [25:26<16:03:48,  6.17s/it]

Iter 0624 | Train Loss 0.15205


  6%|████▊                                                                       | 626/10000 [25:32<16:06:19,  6.19s/it]

Iter 0625 | Train Loss 0.15132


  6%|████▊                                                                       | 627/10000 [25:38<16:19:44,  6.27s/it]

Iter 0626 | Train Loss 0.15059


  6%|████▊                                                                       | 628/10000 [25:43<15:09:18,  5.82s/it]

Iter 0627 | Train Loss 0.14986


  6%|████▊                                                                       | 629/10000 [25:48<14:05:13,  5.41s/it]

Iter 0628 | Train Loss 0.14915


  6%|████▊                                                                       | 630/10000 [25:52<13:25:55,  5.16s/it]

Iter 0629 | Train Loss 0.14843


  6%|████▊                                                                       | 631/10000 [25:57<12:59:28,  4.99s/it]

Iter 0630 | Train Loss 0.14772


  6%|████▊                                                                       | 632/10000 [26:01<12:36:45,  4.85s/it]

Iter 0631 | Train Loss 0.14702


  6%|████▊                                                                       | 633/10000 [26:06<12:20:45,  4.74s/it]

Iter 0632 | Train Loss 0.14632


  6%|████▊                                                                       | 634/10000 [26:10<12:09:54,  4.68s/it]

Iter 0633 | Train Loss 0.14562


  6%|████▊                                                                       | 635/10000 [26:15<12:00:12,  4.61s/it]

Iter 0634 | Train Loss 0.14493


  6%|████▊                                                                       | 636/10000 [26:19<11:52:27,  4.57s/it]

Iter 0635 | Train Loss 0.14424


  6%|████▊                                                                       | 637/10000 [26:24<11:46:47,  4.53s/it]

Iter 0636 | Train Loss 0.14356


  6%|████▊                                                                       | 638/10000 [26:28<11:46:32,  4.53s/it]

Iter 0637 | Train Loss 0.14288


  6%|████▊                                                                       | 639/10000 [26:33<11:40:50,  4.49s/it]

Iter 0638 | Train Loss 0.14221


  6%|████▊                                                                       | 640/10000 [26:37<11:48:54,  4.54s/it]

Iter 0639 | Train Loss 0.14154


  6%|████▊                                                                       | 641/10000 [26:42<11:59:11,  4.61s/it]

Iter 0640 | Train Loss 0.14087


  6%|████▉                                                                       | 642/10000 [26:49<13:32:24,  5.21s/it]

Iter 0641 | Train Loss 0.14020


  6%|████▉                                                                       | 643/10000 [26:55<14:38:54,  5.64s/it]

Iter 0642 | Train Loss 0.13954


  6%|████▉                                                                       | 644/10000 [27:01<14:36:21,  5.62s/it]

Iter 0643 | Train Loss 0.13888


  6%|████▉                                                                       | 645/10000 [27:05<13:45:34,  5.30s/it]

Iter 0644 | Train Loss 0.13823


  6%|████▉                                                                       | 646/10000 [27:10<13:09:02,  5.06s/it]

Iter 0645 | Train Loss 0.13758


  6%|████▉                                                                       | 647/10000 [27:15<12:45:45,  4.91s/it]

Iter 0646 | Train Loss 0.13693


  6%|████▉                                                                       | 648/10000 [27:19<12:33:07,  4.83s/it]

Iter 0647 | Train Loss 0.13628


  6%|████▉                                                                       | 649/10000 [27:24<12:21:14,  4.76s/it]

Iter 0648 | Train Loss 0.13564


  6%|████▉                                                                       | 650/10000 [27:28<12:09:05,  4.68s/it]

Iter 0649 | Train Loss 0.13499


  7%|████▉                                                                       | 651/10000 [27:33<12:00:34,  4.62s/it]

Iter 0650 | Train Loss 0.13436


  7%|████▉                                                                       | 652/10000 [27:37<11:57:53,  4.61s/it]

Iter 0651 | Train Loss 0.13372


  7%|████▉                                                                       | 653/10000 [27:42<11:54:44,  4.59s/it]

Iter 0652 | Train Loss 0.13309


  7%|████▉                                                                       | 654/10000 [27:46<11:53:00,  4.58s/it]

Iter 0653 | Train Loss 0.13245


  7%|████▉                                                                       | 655/10000 [27:51<11:46:29,  4.54s/it]

Iter 0654 | Train Loss 0.13183


  7%|████▉                                                                       | 656/10000 [27:55<11:50:52,  4.56s/it]

Iter 0655 | Train Loss 0.13120


  7%|████▉                                                                       | 657/10000 [28:00<11:45:20,  4.53s/it]

Iter 0656 | Train Loss 0.13057


  7%|█████                                                                       | 658/10000 [28:05<11:49:10,  4.55s/it]

Iter 0657 | Train Loss 0.12995


  7%|█████                                                                       | 659/10000 [28:09<11:51:18,  4.57s/it]

Iter 0658 | Train Loss 0.12933


  7%|█████                                                                       | 660/10000 [28:14<11:53:40,  4.58s/it]

Iter 0659 | Train Loss 0.12871


  7%|█████                                                                       | 661/10000 [28:19<12:01:09,  4.63s/it]

Iter 0660 | Train Loss 0.12809


  7%|█████                                                                       | 662/10000 [28:23<11:59:24,  4.62s/it]

Iter 0661 | Train Loss 0.12748


  7%|█████                                                                       | 663/10000 [28:28<11:52:03,  4.58s/it]

Iter 0662 | Train Loss 0.12687


  7%|█████                                                                       | 664/10000 [28:32<12:00:02,  4.63s/it]

Iter 0663 | Train Loss 0.12625


  7%|█████                                                                       | 665/10000 [28:37<11:57:16,  4.61s/it]

Iter 0664 | Train Loss 0.12564


  7%|█████                                                                       | 666/10000 [28:42<11:56:22,  4.60s/it]

Iter 0665 | Train Loss 0.12504


  7%|█████                                                                       | 667/10000 [28:46<11:54:38,  4.59s/it]

Iter 0666 | Train Loss 0.12443


  7%|█████                                                                       | 668/10000 [28:51<11:54:32,  4.59s/it]

Iter 0667 | Train Loss 0.12382


  7%|█████                                                                       | 669/10000 [28:55<11:56:40,  4.61s/it]

Iter 0668 | Train Loss 0.12322


  7%|█████                                                                       | 670/10000 [29:00<11:52:26,  4.58s/it]

Iter 0669 | Train Loss 0.12262


  7%|█████                                                                       | 671/10000 [29:04<11:55:14,  4.60s/it]

Iter 0670 | Train Loss 0.12202


  7%|█████                                                                       | 672/10000 [29:09<11:58:20,  4.62s/it]

Iter 0671 | Train Loss 0.12142


  7%|█████                                                                       | 673/10000 [29:14<12:03:59,  4.66s/it]

Iter 0672 | Train Loss 0.12083


  7%|█████                                                                       | 674/10000 [29:19<12:02:53,  4.65s/it]

Iter 0673 | Train Loss 0.12023


  7%|█████▏                                                                      | 675/10000 [29:23<11:58:52,  4.63s/it]

Iter 0674 | Train Loss 0.11964


  7%|█████▏                                                                      | 676/10000 [29:28<11:58:08,  4.62s/it]

Iter 0675 | Train Loss 0.11905


  7%|█████▏                                                                      | 677/10000 [29:32<11:48:38,  4.56s/it]

Iter 0676 | Train Loss 0.11846


  7%|█████▏                                                                      | 678/10000 [29:37<11:53:35,  4.59s/it]

Iter 0677 | Train Loss 0.11787


  7%|█████▏                                                                      | 679/10000 [29:41<11:56:19,  4.61s/it]

Iter 0678 | Train Loss 0.11728


  7%|█████▏                                                                      | 680/10000 [29:46<11:56:49,  4.61s/it]

Iter 0679 | Train Loss 0.11670


  7%|█████▏                                                                      | 681/10000 [29:51<11:53:25,  4.59s/it]

Iter 0680 | Train Loss 0.11611


  7%|█████▏                                                                      | 682/10000 [29:55<11:51:07,  4.58s/it]

Iter 0681 | Train Loss 0.11553


  7%|█████▏                                                                      | 683/10000 [30:00<11:56:04,  4.61s/it]

Iter 0682 | Train Loss 0.11495


  7%|█████▏                                                                      | 684/10000 [30:04<11:54:26,  4.60s/it]

Iter 0683 | Train Loss 0.11437


  7%|█████▏                                                                      | 685/10000 [30:09<11:58:42,  4.63s/it]

Iter 0684 | Train Loss 0.11380


  7%|█████▏                                                                      | 686/10000 [30:14<12:04:43,  4.67s/it]

Iter 0685 | Train Loss 0.11322


  7%|█████▏                                                                      | 687/10000 [30:19<12:03:35,  4.66s/it]

Iter 0686 | Train Loss 0.11265


  7%|█████▏                                                                      | 688/10000 [30:23<12:09:13,  4.70s/it]

Iter 0687 | Train Loss 0.11208


  7%|█████▏                                                                      | 689/10000 [30:28<12:07:44,  4.69s/it]

Iter 0688 | Train Loss 0.11150


  7%|█████▏                                                                      | 690/10000 [30:33<12:07:12,  4.69s/it]

Iter 0689 | Train Loss 0.11094


  7%|█████▎                                                                      | 691/10000 [30:37<12:10:35,  4.71s/it]

Iter 0690 | Train Loss 0.11037


  7%|█████▎                                                                      | 692/10000 [30:42<12:06:55,  4.69s/it]

Iter 0691 | Train Loss 0.10980


  7%|█████▎                                                                      | 693/10000 [30:47<12:07:12,  4.69s/it]

Iter 0692 | Train Loss 0.10924


  7%|█████▎                                                                      | 694/10000 [30:51<12:00:17,  4.64s/it]

Iter 0693 | Train Loss 0.10868


  7%|█████▎                                                                      | 695/10000 [30:56<12:01:39,  4.65s/it]

Iter 0694 | Train Loss 0.10812


  7%|█████▎                                                                      | 696/10000 [31:01<12:06:34,  4.69s/it]

Iter 0695 | Train Loss 0.10756


  7%|█████▎                                                                      | 697/10000 [31:05<11:59:58,  4.64s/it]

Iter 0696 | Train Loss 0.10700


  7%|█████▎                                                                      | 698/10000 [31:10<11:57:40,  4.63s/it]

Iter 0697 | Train Loss 0.10644


  7%|█████▎                                                                      | 699/10000 [31:14<11:50:51,  4.59s/it]

Iter 0698 | Train Loss 0.10589


  7%|█████▎                                                                      | 700/10000 [31:19<11:51:11,  4.59s/it]

Iter 0699 | Train Loss 0.10534
Iter 0700 | Train Loss 0.10479


  7%|█████▎                                                                      | 702/10000 [31:30<13:02:29,  5.05s/it]

Iter 0701 | Train Loss 0.10424


  7%|█████▎                                                                      | 703/10000 [31:35<12:39:08,  4.90s/it]

Iter 0702 | Train Loss 0.10370


  7%|█████▎                                                                      | 704/10000 [31:39<12:30:03,  4.84s/it]

Iter 0703 | Train Loss 0.10315


  7%|█████▎                                                                      | 705/10000 [31:44<12:20:22,  4.78s/it]

Iter 0704 | Train Loss 0.10261


  7%|█████▎                                                                      | 706/10000 [31:49<12:13:43,  4.74s/it]

Iter 0705 | Train Loss 0.10207


  7%|█████▎                                                                      | 707/10000 [31:53<12:05:24,  4.68s/it]

Iter 0706 | Train Loss 0.10153


  7%|█████▍                                                                      | 708/10000 [31:58<11:57:51,  4.64s/it]

Iter 0707 | Train Loss 0.10099


  7%|█████▍                                                                      | 709/10000 [32:02<11:52:29,  4.60s/it]

Iter 0708 | Train Loss 0.10046


  7%|█████▍                                                                      | 710/10000 [32:07<11:52:58,  4.60s/it]

Iter 0709 | Train Loss 0.09992


  7%|█████▍                                                                      | 711/10000 [32:12<11:56:28,  4.63s/it]

Iter 0710 | Train Loss 0.09939


  7%|█████▍                                                                      | 712/10000 [32:16<11:52:54,  4.61s/it]

Iter 0711 | Train Loss 0.09886


  7%|█████▍                                                                      | 713/10000 [32:21<11:53:16,  4.61s/it]

Iter 0712 | Train Loss 0.09833


  7%|█████▍                                                                      | 714/10000 [32:25<11:51:41,  4.60s/it]

Iter 0713 | Train Loss 0.09781


  7%|█████▍                                                                      | 715/10000 [32:30<11:53:54,  4.61s/it]

Iter 0714 | Train Loss 0.09728


  7%|█████▍                                                                      | 716/10000 [32:35<11:52:23,  4.60s/it]

Iter 0715 | Train Loss 0.09676


  7%|█████▍                                                                      | 717/10000 [32:39<11:49:23,  4.59s/it]

Iter 0716 | Train Loss 0.09624


  7%|█████▍                                                                      | 718/10000 [32:44<11:49:54,  4.59s/it]

Iter 0717 | Train Loss 0.09572


  7%|█████▍                                                                      | 719/10000 [32:48<11:48:45,  4.58s/it]

Iter 0718 | Train Loss 0.09520


  7%|█████▍                                                                      | 720/10000 [32:53<12:00:01,  4.66s/it]

Iter 0719 | Train Loss 0.09469


  7%|█████▍                                                                      | 721/10000 [32:58<11:57:12,  4.64s/it]

Iter 0720 | Train Loss 0.09418


  7%|█████▍                                                                      | 722/10000 [33:02<11:56:28,  4.63s/it]

Iter 0721 | Train Loss 0.09367


  7%|█████▍                                                                      | 723/10000 [33:07<12:00:33,  4.66s/it]

Iter 0722 | Train Loss 0.09316


  7%|█████▌                                                                      | 724/10000 [33:12<11:56:06,  4.63s/it]

Iter 0723 | Train Loss 0.09265


  7%|█████▌                                                                      | 725/10000 [33:16<11:54:52,  4.62s/it]

Iter 0724 | Train Loss 0.09215


  7%|█████▌                                                                      | 726/10000 [33:21<11:58:50,  4.65s/it]

Iter 0725 | Train Loss 0.09164


  7%|█████▌                                                                      | 727/10000 [33:26<12:01:50,  4.67s/it]

Iter 0726 | Train Loss 0.09114


  7%|█████▌                                                                      | 728/10000 [33:30<11:54:41,  4.62s/it]

Iter 0727 | Train Loss 0.09064


  7%|█████▌                                                                      | 729/10000 [33:35<11:58:21,  4.65s/it]

Iter 0728 | Train Loss 0.09014


  7%|█████▌                                                                      | 730/10000 [33:40<12:01:49,  4.67s/it]

Iter 0729 | Train Loss 0.08965


  7%|█████▌                                                                      | 731/10000 [33:44<11:58:35,  4.65s/it]

Iter 0730 | Train Loss 0.08916


  7%|█████▌                                                                      | 732/10000 [33:49<11:56:44,  4.64s/it]

Iter 0731 | Train Loss 0.08866


  7%|█████▌                                                                      | 733/10000 [33:54<12:03:01,  4.68s/it]

Iter 0732 | Train Loss 0.08818


  7%|█████▌                                                                      | 734/10000 [33:58<11:59:03,  4.66s/it]

Iter 0733 | Train Loss 0.08769


  7%|█████▌                                                                      | 735/10000 [34:03<12:00:59,  4.67s/it]

Iter 0734 | Train Loss 0.08720


  7%|█████▌                                                                      | 736/10000 [34:08<12:10:21,  4.73s/it]

Iter 0735 | Train Loss 0.08672


  7%|█████▌                                                                      | 737/10000 [34:13<12:21:19,  4.80s/it]

Iter 0736 | Train Loss 0.08624


  7%|█████▌                                                                      | 738/10000 [34:17<12:18:03,  4.78s/it]

Iter 0737 | Train Loss 0.08576


  7%|█████▌                                                                      | 739/10000 [34:22<12:21:41,  4.81s/it]

Iter 0738 | Train Loss 0.08528


  7%|█████▌                                                                      | 740/10000 [34:27<12:26:58,  4.84s/it]

Iter 0739 | Train Loss 0.08481


  7%|█████▋                                                                      | 741/10000 [34:32<12:21:30,  4.81s/it]

Iter 0740 | Train Loss 0.08433


  7%|█████▋                                                                      | 742/10000 [34:37<12:21:32,  4.81s/it]

Iter 0741 | Train Loss 0.08386


  7%|█████▋                                                                      | 743/10000 [34:43<13:08:50,  5.11s/it]

Iter 0742 | Train Loss 0.08339


  7%|█████▋                                                                      | 744/10000 [34:49<14:30:50,  5.65s/it]

Iter 0743 | Train Loss 0.08293


  7%|█████▋                                                                      | 745/10000 [34:56<15:21:00,  5.97s/it]

Iter 0744 | Train Loss 0.08246


  7%|█████▋                                                                      | 746/10000 [35:03<16:06:49,  6.27s/it]

Iter 0745 | Train Loss 0.08200


  7%|█████▋                                                                      | 747/10000 [35:10<16:32:28,  6.44s/it]

Iter 0746 | Train Loss 0.08154


  7%|█████▋                                                                      | 748/10000 [35:16<16:33:24,  6.44s/it]

Iter 0747 | Train Loss 0.08108


  7%|█████▋                                                                      | 749/10000 [35:23<16:48:15,  6.54s/it]

Iter 0748 | Train Loss 0.08063


  8%|█████▋                                                                      | 750/10000 [35:30<16:50:16,  6.55s/it]

Iter 0749 | Train Loss 0.08017


  8%|█████▋                                                                      | 751/10000 [35:37<16:56:49,  6.60s/it]

Iter 0750 | Train Loss 0.07972


  8%|█████▋                                                                      | 752/10000 [35:43<17:01:23,  6.63s/it]

Iter 0751 | Train Loss 0.07927


  8%|█████▋                                                                      | 753/10000 [35:50<17:13:17,  6.70s/it]

Iter 0752 | Train Loss 0.07883


  8%|█████▋                                                                      | 754/10000 [35:57<17:10:00,  6.68s/it]

Iter 0753 | Train Loss 0.07838


  8%|█████▋                                                                      | 755/10000 [36:02<15:42:58,  6.12s/it]

Iter 0754 | Train Loss 0.07794


  8%|█████▋                                                                      | 756/10000 [36:06<14:44:00,  5.74s/it]

Iter 0755 | Train Loss 0.07750


  8%|█████▊                                                                      | 757/10000 [36:11<14:02:38,  5.47s/it]

Iter 0756 | Train Loss 0.07706


  8%|█████▊                                                                      | 758/10000 [36:16<13:22:29,  5.21s/it]

Iter 0757 | Train Loss 0.07662


  8%|█████▊                                                                      | 759/10000 [36:20<12:56:32,  5.04s/it]

Iter 0758 | Train Loss 0.07619


  8%|█████▊                                                                      | 760/10000 [36:25<12:45:37,  4.97s/it]

Iter 0759 | Train Loss 0.07576


  8%|█████▊                                                                      | 761/10000 [36:30<12:36:40,  4.91s/it]

Iter 0760 | Train Loss 0.07533


  8%|█████▊                                                                      | 762/10000 [36:35<12:26:54,  4.85s/it]

Iter 0761 | Train Loss 0.07490


  8%|█████▊                                                                      | 763/10000 [36:40<12:23:25,  4.83s/it]

Iter 0762 | Train Loss 0.07448
