# Repeating previous experiments using networks with sparse structure

Strategy: use the code you have already. (arch = mlp_noreluout_lastlayernottrainable)
- Initialize weights 1 to have the correct sparsity structure
- Initialize weights 2 to 1
- Use a special type of optimizer to only update non-zero entries

Some properties of the experiments:
- Only looking at 3-layer networks
- Layer2-to-layer3 weights are fixed to equal 1 (not trainable)
- Generated inputs are iid

These experiments are akin to previous experiments: looking to see how often we get good performance (0 loss) for different network structures. In particular, we are interested in the number of global and local optima, which we evaluate using the final loss and the final gap between estimated and true weights.

The first set of experiments involve generating true labels using the same network used to learn those weights. The second set of experiments involve generating true labels using a fully-connected network while learning with a sparser structure.

In [1]:
%load_ext autoreload
%autoreload 2
from __future__ import print_function
import tensorflow as tf
import numpy as np
import os,time,seaborn
import matplotlib.pyplot as plt
import arch,dl_utils,utils

np.set_printoptions(precision=2,suppress=True)
gpu_id = 1

%matplotlib inline

maindir = './temp_sparse/'
q = 0

In [2]:
# Same as graph_builder_wrapper except for sparser structures
# CURRENTLY ONLY WORKS WITH mlp_noreluout_lastlayernottrainable
# (M is a mask for only the weights from input to hidden 1 layers)
def graph_builder_wrapper_sparse(input_dict,M,build_func=arch.mlp,lr_initial=0.01,max_save=100):
    graph = build_func(input_dict)
    
    # Loss
    y = tf.placeholder(tf.float32, shape=[None,1])
    total_loss = dl_utils.loss(y, graph['yhat'])
    
    # W gap
    w = {k:tf.placeholder(tf.float32, shape=[None,None]) for k in graph if 'weights' in k}
    total_w_gap = dl_utils.w_gap(w, {k:graph[k] for k in graph if 'weights' in k})
    
    # Optimizer
    learning_rate = tf.Variable(lr_initial, name='learning_rate')
    optimizer = tf.train.GradientDescentOptimizer(learning_rate)
    grads = optimizer.compute_gradients(total_loss)
    modified_grads = []
    for gv in grads:
        if gv[0] is None: modified_grads.append(gv)
        else: modified_grads.append((tf.multiply(gv[0], M), gv[1]))
    opt_step = optimizer.apply_gradients(modified_grads)
    
    graph['y'] = y
    graph['w'] = w
    graph['opt_step'] = opt_step
    graph['total_loss'] = total_loss
    graph['total_w_gap'] = total_w_gap
    graph['learning_rate'] = learning_rate
    graph['saver'] = tf.train.Saver(max_to_keep=100)
    return graph

def generate_two_plots(all_final_loss,all_final_w_gaps,all_initial_w_gaps):
    f, (ax1,ax2) = plt.subplots(1,2,figsize=(16,6))
    seaborn.regplot(all_initial_w_gaps,all_final_loss,ax=ax1)
    ax1.set(xlabel='initial w gap', ylabel='final loss')
    seaborn.regplot(all_final_w_gaps,all_final_loss,ax=ax2)
    ax2.set(xlabel='final w gap', ylabel='final loss')

# Experiment set 1: true labels generated with sparse architectures

### Sparse structure 1

Each hidden neuron goes to one input

In [3]:
# Sparse structure 1: each hidden neuron goes to one input
def generate_sparse_w_1(p,r,seed=0):
    np.random.seed(seed)
    def sparse_mask_1(p,r): return np.repeat(np.eye(p),r,axis=0)
    M = sparse_mask_1(p,r)
    Z = np.random.normal(0,1,[p*r,p]) # std is 1 since each hidden neuron is connected to 1 input
    W1 = (Z*M).T
    W2 = np.ones([h,1])
    return {'weights1':W1,'weights2':W2},M.T

In [None]:
num_sims = 100 # number of simulations
N = 10000      # number of samples

# Network parameters
num_epochs = 100
batch_size = 100
build_func = arch.mlp_noreluout_lastlayernottrainable
p = 5    # number of inputs

list_r = range(1,5)
outputs = {r:[] for r in list_r}
start = time.time()

# r = number of neurons each input is connected to
for i,r in enumerate(list_r):
    
    for t in range(num_sims):
        
        h = p*r  # number of neurons
        input_dict = dict(p=p,h=h)
        
        seed = (r-1)*num_sims+t
        savedir = '%ssparse_structure_1/initialization_%s_r%s/'%(maindir,t,r)
    
        # Generate data
        w_true,M = generate_sparse_w_1(p,r,seed=seed)
        X = dl_utils.generate_X(N,0,input_dict,cov_is_eye=True,seed=seed)
        Y = dl_utils.generate_output(X,w_true,input_dict,build_func=build_func)

        # initialize weights to have desired sparsity structure
        w_init,M = generate_sparse_w_1(p,r,seed=seed+1)

        # Build graph and train..
        tf.reset_default_graph()
        with tf.device("/gpu:%s"%(gpu_id)):
            graph = graph_builder_wrapper_sparse(input_dict,M,build_func=build_func)
            out = dl_utils.train(X,Y,graph,num_epochs,batch_size,w_true,w_init,savedir=savedir)
        
#         # If weights exist already, load useful information
#         _,final_train_loss,final_w_gap = dl_utils.get_train_out(X,Y,w_true,input_dict,
#                                                                     savedir,build_func)
#         initial_w_gap = dl_utils.compute_w_gap(w_true,w_init)
#         out = ['','','','',initial_w_gap,final_train_loss,final_w_gap]
        
        # Save useful information
        outputs[r].append(out)
        
        print('\rr = %s, %s/%s simulations done (%.2f s elapsed)'\
              %(r,t+1,num_sims,time.time()-start),end='')
        
pickle.dump(outputs,file('%ssparse_structure_1/outputs.pickle'%(maindir),'wb'))

Epoch 1/100 (0.307 s), batch 1.0/100 (0.009 s): loss 1.739, w gap: 3.772Epoch 1/100 (0.308 s), batch 2.0/100 (0.010 s): loss 1.709, w gap: 3.722Epoch 1/100 (0.309 s), batch 3.0/100 (0.011 s): loss 1.273, w gap: 3.668Epoch 1/100 (0.309 s), batch 4.0/100 (0.011 s): loss 1.461, w gap: 3.631Epoch 1/100 (0.310 s), batch 5.0/100 (0.012 s): loss 1.208, w gap: 3.588Epoch 1/100 (0.312 s), batch 6.0/100 (0.015 s): loss 1.245, w gap: 3.554Epoch 1/100 (0.313 s), batch 7.0/100 (0.015 s): loss 1.874, w gap: 3.518Epoch 1/100 (0.314 s), batch 8.0/100 (0.016 s): loss 1.097, w gap: 3.462Epoch 1/100 (0.316 s), batch 9.0/100 (0.018 s): loss 2.010, w gap: 3.431Epoch 1/100 (0.316 s), batch 10.0/100 (0.019 s): loss 1.065, w gap: 3.369Epoch 1/100 (0.317 s), batch 11.0/100 (0.019 s): loss 0.895, w gap: 3.343Epoch 1/100 (0.318 s), batch 12.0/100 (0.020 s): loss 1.210, w gap: 3.313Epoch 1/100 (0.319 s), batch 13.0/100 (0.021 s): loss 1.580, w gap: 3.273Epoch 1/100 (0.320 s), batch 14.0/100 (0.022 s

Epoch 3/100 (0.507 s), batch 12.0/100 (0.011 s): loss 0.337, w gap: 0.625Epoch 3/100 (0.508 s), batch 13.0/100 (0.012 s): loss 0.232, w gap: 0.611Epoch 3/100 (0.509 s), batch 14.0/100 (0.013 s): loss 0.166, w gap: 0.602Epoch 3/100 (0.510 s), batch 15.0/100 (0.014 s): loss 0.228, w gap: 0.596Epoch 3/100 (0.511 s), batch 16.0/100 (0.015 s): loss 0.217, w gap: 0.587Epoch 3/100 (0.512 s), batch 17.0/100 (0.015 s): loss 0.209, w gap: 0.578Epoch 3/100 (0.513 s), batch 18.0/100 (0.016 s): loss 0.155, w gap: 0.570Epoch 3/100 (0.513 s), batch 19.0/100 (0.017 s): loss 0.206, w gap: 0.563Epoch 3/100 (0.514 s), batch 20.0/100 (0.018 s): loss 0.186, w gap: 0.555Epoch 3/100 (0.515 s), batch 21.0/100 (0.019 s): loss 0.165, w gap: 0.548Epoch 3/100 (0.517 s), batch 22.0/100 (0.021 s): loss 0.184, w gap: 0.541Epoch 3/100 (0.518 s), batch 23.0/100 (0.021 s): loss 0.143, w gap: 0.534Epoch 3/100 (0.519 s), batch 24.0/100 (0.022 s): loss 0.231, w gap: 0.528Epoch 3/100 (0.519 s), batch 25.0/100

Epoch 5/100 (0.708 s), batch 39.0/100 (0.039 s): loss 0.010, w gap: 0.031Epoch 5/100 (0.711 s), batch 40.0/100 (0.042 s): loss 0.012, w gap: 0.030Epoch 5/100 (0.712 s), batch 41.0/100 (0.042 s): loss 0.012, w gap: 0.030Epoch 5/100 (0.712 s), batch 42.0/100 (0.043 s): loss 0.009, w gap: 0.029Epoch 5/100 (0.713 s), batch 43.0/100 (0.044 s): loss 0.010, w gap: 0.029Epoch 5/100 (0.714 s), batch 44.0/100 (0.045 s): loss 0.012, w gap: 0.029Epoch 5/100 (0.715 s), batch 45.0/100 (0.045 s): loss 0.009, w gap: 0.028Epoch 5/100 (0.716 s), batch 46.0/100 (0.046 s): loss 0.013, w gap: 0.028Epoch 5/100 (0.717 s), batch 47.0/100 (0.047 s): loss 0.007, w gap: 0.027Epoch 5/100 (0.717 s), batch 48.0/100 (0.048 s): loss 0.008, w gap: 0.027Epoch 5/100 (0.718 s), batch 49.0/100 (0.049 s): loss 0.009, w gap: 0.027Epoch 5/100 (0.719 s), batch 50.0/100 (0.050 s): loss 0.007, w gap: 0.026Epoch 5/100 (0.720 s), batch 51.0/100 (0.050 s): loss 0.009, w gap: 0.026Epoch 5/100 (0.721 s), batch 52.0/100

Epoch 7/100 (0.910 s), batch 59.0/100 (0.053 s): loss 0.001, w gap: 0.002Epoch 7/100 (0.910 s), batch 60.0/100 (0.054 s): loss 0.001, w gap: 0.002Epoch 7/100 (0.911 s), batch 61.0/100 (0.055 s): loss 0.001, w gap: 0.002Epoch 7/100 (0.912 s), batch 62.0/100 (0.055 s): loss 0.000, w gap: 0.002Epoch 7/100 (0.913 s), batch 63.0/100 (0.056 s): loss 0.000, w gap: 0.002Epoch 7/100 (0.914 s), batch 64.0/100 (0.057 s): loss 0.000, w gap: 0.002Epoch 7/100 (0.914 s), batch 65.0/100 (0.058 s): loss 0.000, w gap: 0.002Epoch 7/100 (0.915 s), batch 66.0/100 (0.059 s): loss 0.000, w gap: 0.002Epoch 7/100 (0.916 s), batch 67.0/100 (0.059 s): loss 0.000, w gap: 0.002Epoch 7/100 (0.917 s), batch 68.0/100 (0.060 s): loss 0.000, w gap: 0.002Epoch 7/100 (0.917 s), batch 69.0/100 (0.061 s): loss 0.001, w gap: 0.001Epoch 7/100 (0.918 s), batch 70.0/100 (0.062 s): loss 0.000, w gap: 0.001Epoch 7/100 (0.919 s), batch 71.0/100 (0.062 s): loss 0.000, w gap: 0.001Epoch 7/100 (0.920 s), batch 72.0/100

Epoch 9/100 (1.110 s), batch 84.0/100 (0.075 s): loss 0.000, w gap: 0.000Epoch 9/100 (1.112 s), batch 85.0/100 (0.077 s): loss 0.000, w gap: 0.000Epoch 9/100 (1.113 s), batch 86.0/100 (0.078 s): loss 0.000, w gap: 0.000Epoch 9/100 (1.115 s), batch 87.0/100 (0.080 s): loss 0.000, w gap: 0.000Epoch 9/100 (1.116 s), batch 88.0/100 (0.081 s): loss 0.000, w gap: 0.000Epoch 9/100 (1.116 s), batch 89.0/100 (0.081 s): loss 0.000, w gap: 0.000Epoch 9/100 (1.117 s), batch 90.0/100 (0.082 s): loss 0.000, w gap: 0.000Epoch 9/100 (1.119 s), batch 91.0/100 (0.084 s): loss 0.000, w gap: 0.000Epoch 9/100 (1.120 s), batch 92.0/100 (0.085 s): loss 0.000, w gap: 0.000Epoch 9/100 (1.122 s), batch 93.0/100 (0.087 s): loss 0.000, w gap: 0.000Epoch 9/100 (1.124 s), batch 94.0/100 (0.089 s): loss 0.000, w gap: 0.000Epoch 9/100 (1.127 s), batch 95.0/100 (0.092 s): loss 0.000, w gap: 0.000Epoch 9/100 (1.127 s), batch 96.0/100 (0.092 s): loss 0.000, w gap: 0.000Epoch 9/100 (1.128 s), batch 97.0/100

Epoch 11/100 (1.311 s), batch 94.0/100 (0.075 s): loss 0.000, w gap: 0.000Epoch 11/100 (1.312 s), batch 95.0/100 (0.076 s): loss 0.000, w gap: 0.000Epoch 11/100 (1.313 s), batch 96.0/100 (0.077 s): loss 0.000, w gap: 0.000Epoch 11/100 (1.314 s), batch 97.0/100 (0.078 s): loss 0.000, w gap: 0.000Epoch 11/100 (1.315 s), batch 98.0/100 (0.079 s): loss 0.000, w gap: 0.000Epoch 11/100 (1.315 s), batch 99.0/100 (0.080 s): loss 0.000, w gap: 0.000Epoch 11/100 (1.317 s), batch 100.0/100 (0.081 s): loss 0.000, w gap: 0.000Epoch 12/100 (1.321 s), batch 1.0/100 (0.001 s): loss 0.000, w gap: 0.000Epoch 12/100 (1.323 s), batch 2.0/100 (0.003 s): loss 0.000, w gap: 0.000Epoch 12/100 (1.324 s), batch 3.0/100 (0.004 s): loss 0.000, w gap: 0.000Epoch 12/100 (1.325 s), batch 4.0/100 (0.004 s): loss 0.000, w gap: 0.000Epoch 12/100 (1.326 s), batch 5.0/100 (0.005 s): loss 0.000, w gap: 0.000Epoch 12/100 (1.326 s), batch 6.0/100 (0.006 s): loss 0.000, w gap: 0.000Epoch 12/100 (1.327 s), batch

Epoch 14/100 (1.513 s), batch 1.0/100 (0.002 s): loss 0.000, w gap: 0.000Epoch 14/100 (1.514 s), batch 2.0/100 (0.003 s): loss 0.000, w gap: 0.000Epoch 14/100 (1.515 s), batch 3.0/100 (0.004 s): loss 0.000, w gap: 0.000Epoch 14/100 (1.515 s), batch 4.0/100 (0.005 s): loss 0.000, w gap: 0.000Epoch 14/100 (1.516 s), batch 5.0/100 (0.006 s): loss 0.000, w gap: 0.000Epoch 14/100 (1.517 s), batch 6.0/100 (0.007 s): loss 0.000, w gap: 0.000Epoch 14/100 (1.519 s), batch 7.0/100 (0.008 s): loss 0.000, w gap: 0.000Epoch 14/100 (1.519 s), batch 8.0/100 (0.009 s): loss 0.000, w gap: 0.000Epoch 14/100 (1.520 s), batch 9.0/100 (0.010 s): loss 0.000, w gap: 0.000Epoch 14/100 (1.521 s), batch 10.0/100 (0.011 s): loss 0.000, w gap: 0.000Epoch 14/100 (1.522 s), batch 11.0/100 (0.012 s): loss 0.000, w gap: 0.000Epoch 14/100 (1.523 s), batch 12.0/100 (0.012 s): loss 0.000, w gap: 0.000Epoch 14/100 (1.524 s), batch 13.0/100 (0.013 s): loss 0.000, w gap: 0.000Epoch 14/100 (1.525 s), batch 14.

Epoch 16/100 (1.713 s), batch 23.0/100 (0.020 s): loss 0.000, w gap: 0.000Epoch 16/100 (1.715 s), batch 24.0/100 (0.022 s): loss 0.000, w gap: 0.000Epoch 16/100 (1.716 s), batch 25.0/100 (0.023 s): loss 0.000, w gap: 0.000Epoch 16/100 (1.717 s), batch 26.0/100 (0.024 s): loss 0.000, w gap: 0.000Epoch 16/100 (1.718 s), batch 27.0/100 (0.024 s): loss 0.000, w gap: 0.000Epoch 16/100 (1.718 s), batch 28.0/100 (0.025 s): loss 0.000, w gap: 0.000Epoch 16/100 (1.719 s), batch 29.0/100 (0.026 s): loss 0.000, w gap: 0.000Epoch 16/100 (1.720 s), batch 30.0/100 (0.027 s): loss 0.000, w gap: 0.000Epoch 16/100 (1.721 s), batch 31.0/100 (0.027 s): loss 0.000, w gap: 0.000Epoch 16/100 (1.721 s), batch 32.0/100 (0.028 s): loss 0.000, w gap: 0.000Epoch 16/100 (1.722 s), batch 33.0/100 (0.029 s): loss 0.000, w gap: 0.000Epoch 16/100 (1.724 s), batch 34.0/100 (0.031 s): loss 0.000, w gap: 0.000Epoch 16/100 (1.726 s), batch 35.0/100 (0.033 s): loss 0.000, w gap: 0.000Epoch 16/100 (1.727 s), 

Epoch 18/100 (1.915 s), batch 40.0/100 (0.040 s): loss 0.000, w gap: 0.000Epoch 18/100 (1.916 s), batch 41.0/100 (0.040 s): loss 0.000, w gap: 0.000Epoch 18/100 (1.918 s), batch 42.0/100 (0.042 s): loss 0.000, w gap: 0.000Epoch 18/100 (1.919 s), batch 43.0/100 (0.043 s): loss 0.000, w gap: 0.000Epoch 18/100 (1.920 s), batch 44.0/100 (0.044 s): loss 0.000, w gap: 0.000Epoch 18/100 (1.920 s), batch 45.0/100 (0.045 s): loss 0.000, w gap: 0.000Epoch 18/100 (1.921 s), batch 46.0/100 (0.046 s): loss 0.000, w gap: 0.000Epoch 18/100 (1.922 s), batch 47.0/100 (0.047 s): loss 0.000, w gap: 0.000Epoch 18/100 (1.923 s), batch 48.0/100 (0.047 s): loss 0.000, w gap: 0.000Epoch 18/100 (1.924 s), batch 49.0/100 (0.048 s): loss 0.000, w gap: 0.000Epoch 18/100 (1.924 s), batch 50.0/100 (0.049 s): loss 0.000, w gap: 0.000Epoch 18/100 (1.925 s), batch 51.0/100 (0.050 s): loss 0.000, w gap: 0.000Epoch 18/100 (1.926 s), batch 52.0/100 (0.051 s): loss 0.000, w gap: 0.000Epoch 18/100 (1.927 s), 

Epoch 20/100 (2.116 s), batch 68.0/100 (0.055 s): loss 0.000, w gap: 0.000Epoch 20/100 (2.117 s), batch 69.0/100 (0.056 s): loss 0.000, w gap: 0.000Epoch 20/100 (2.118 s), batch 70.0/100 (0.057 s): loss 0.000, w gap: 0.000Epoch 20/100 (2.119 s), batch 71.0/100 (0.057 s): loss 0.000, w gap: 0.000Epoch 20/100 (2.119 s), batch 72.0/100 (0.058 s): loss 0.000, w gap: 0.000Epoch 20/100 (2.120 s), batch 73.0/100 (0.059 s): loss 0.000, w gap: 0.000Epoch 20/100 (2.121 s), batch 74.0/100 (0.060 s): loss 0.000, w gap: 0.000Epoch 20/100 (2.123 s), batch 75.0/100 (0.061 s): loss 0.000, w gap: 0.000Epoch 20/100 (2.123 s), batch 76.0/100 (0.062 s): loss 0.000, w gap: 0.000Epoch 20/100 (2.124 s), batch 77.0/100 (0.063 s): loss 0.000, w gap: 0.000Epoch 20/100 (2.125 s), batch 78.0/100 (0.064 s): loss 0.000, w gap: 0.000Epoch 20/100 (2.127 s), batch 79.0/100 (0.066 s): loss 0.000, w gap: 0.000Epoch 20/100 (2.128 s), batch 80.0/100 (0.066 s): loss 0.000, w gap: 0.000Epoch 20/100 (2.128 s), 

Epoch 22/100 (2.317 s), batch 83.0/100 (0.076 s): loss 0.000, w gap: 0.000Epoch 22/100 (2.318 s), batch 84.0/100 (0.077 s): loss 0.000, w gap: 0.000Epoch 22/100 (2.319 s), batch 85.0/100 (0.077 s): loss 0.000, w gap: 0.000Epoch 22/100 (2.320 s), batch 86.0/100 (0.078 s): loss 0.000, w gap: 0.000Epoch 22/100 (2.321 s), batch 87.0/100 (0.079 s): loss 0.000, w gap: 0.000Epoch 22/100 (2.322 s), batch 88.0/100 (0.081 s): loss 0.000, w gap: 0.000Epoch 22/100 (2.323 s), batch 89.0/100 (0.082 s): loss 0.000, w gap: 0.000Epoch 22/100 (2.324 s), batch 90.0/100 (0.083 s): loss 0.000, w gap: 0.000Epoch 22/100 (2.325 s), batch 91.0/100 (0.083 s): loss 0.000, w gap: 0.000Epoch 22/100 (2.326 s), batch 92.0/100 (0.084 s): loss 0.000, w gap: 0.000Epoch 22/100 (2.326 s), batch 93.0/100 (0.085 s): loss 0.000, w gap: 0.000Epoch 22/100 (2.327 s), batch 94.0/100 (0.086 s): loss 0.000, w gap: 0.000Epoch 22/100 (2.328 s), batch 95.0/100 (0.087 s): loss 0.000, w gap: 0.000Epoch 22/100 (2.329 s), 

Epoch 24/100 (2.518 s), batch 73.0/100 (0.077 s): loss 0.000, w gap: 0.000Epoch 24/100 (2.520 s), batch 74.0/100 (0.079 s): loss 0.000, w gap: 0.000Epoch 24/100 (2.521 s), batch 75.0/100 (0.080 s): loss 0.000, w gap: 0.000Epoch 24/100 (2.522 s), batch 76.0/100 (0.081 s): loss 0.000, w gap: 0.000Epoch 24/100 (2.523 s), batch 77.0/100 (0.082 s): loss 0.000, w gap: 0.000Epoch 24/100 (2.524 s), batch 78.0/100 (0.083 s): loss 0.000, w gap: 0.000Epoch 24/100 (2.525 s), batch 79.0/100 (0.084 s): loss 0.000, w gap: 0.000Epoch 24/100 (2.526 s), batch 80.0/100 (0.085 s): loss 0.000, w gap: 0.000Epoch 24/100 (2.527 s), batch 81.0/100 (0.085 s): loss 0.000, w gap: 0.000Epoch 24/100 (2.528 s), batch 82.0/100 (0.086 s): loss 0.000, w gap: 0.000Epoch 24/100 (2.529 s), batch 83.0/100 (0.088 s): loss 0.000, w gap: 0.000Epoch 24/100 (2.530 s), batch 84.0/100 (0.089 s): loss 0.000, w gap: 0.000Epoch 24/100 (2.532 s), batch 85.0/100 (0.091 s): loss 0.000, w gap: 0.000Epoch 24/100 (2.534 s), 

Epoch 26/100 (2.720 s), batch 78.0/100 (0.071 s): loss 0.000, w gap: 0.000Epoch 26/100 (2.721 s), batch 79.0/100 (0.072 s): loss 0.000, w gap: 0.000Epoch 26/100 (2.721 s), batch 80.0/100 (0.072 s): loss 0.000, w gap: 0.000Epoch 26/100 (2.722 s), batch 81.0/100 (0.073 s): loss 0.000, w gap: 0.000Epoch 26/100 (2.724 s), batch 82.0/100 (0.075 s): loss 0.000, w gap: 0.000Epoch 26/100 (2.725 s), batch 83.0/100 (0.076 s): loss 0.000, w gap: 0.000Epoch 26/100 (2.726 s), batch 84.0/100 (0.077 s): loss 0.000, w gap: 0.000Epoch 26/100 (2.726 s), batch 85.0/100 (0.077 s): loss 0.000, w gap: 0.000Epoch 26/100 (2.727 s), batch 86.0/100 (0.078 s): loss 0.000, w gap: 0.000Epoch 26/100 (2.728 s), batch 87.0/100 (0.079 s): loss 0.000, w gap: 0.000Epoch 26/100 (2.729 s), batch 88.0/100 (0.080 s): loss 0.000, w gap: 0.000Epoch 26/100 (2.729 s), batch 89.0/100 (0.081 s): loss 0.000, w gap: 0.000Epoch 26/100 (2.730 s), batch 90.0/100 (0.081 s): loss 0.000, w gap: 0.000Epoch 26/100 (2.731 s), 

Epoch 29/100 (2.920 s), batch 7.0/100 (0.007 s): loss 0.000, w gap: 0.000Epoch 29/100 (2.921 s), batch 8.0/100 (0.008 s): loss 0.000, w gap: 0.000Epoch 29/100 (2.923 s), batch 9.0/100 (0.009 s): loss 0.000, w gap: 0.000Epoch 29/100 (2.923 s), batch 10.0/100 (0.010 s): loss 0.000, w gap: 0.000Epoch 29/100 (2.924 s), batch 11.0/100 (0.011 s): loss 0.000, w gap: 0.000Epoch 29/100 (2.925 s), batch 12.0/100 (0.011 s): loss 0.000, w gap: 0.000Epoch 29/100 (2.926 s), batch 13.0/100 (0.012 s): loss 0.000, w gap: 0.000Epoch 29/100 (2.926 s), batch 14.0/100 (0.013 s): loss 0.000, w gap: 0.000Epoch 29/100 (2.927 s), batch 15.0/100 (0.014 s): loss 0.000, w gap: 0.000Epoch 29/100 (2.928 s), batch 16.0/100 (0.014 s): loss 0.000, w gap: 0.000Epoch 29/100 (2.929 s), batch 17.0/100 (0.015 s): loss 0.000, w gap: 0.000Epoch 29/100 (2.929 s), batch 18.0/100 (0.016 s): loss 0.000, w gap: 0.000Epoch 29/100 (2.930 s), batch 19.0/100 (0.017 s): loss 0.000, w gap: 0.000Epoch 29/100 (2.931 s), bat

Epoch 31/100 (3.123 s), batch 21.0/100 (0.021 s): loss 0.000, w gap: 0.000Epoch 31/100 (3.123 s), batch 22.0/100 (0.022 s): loss 0.000, w gap: 0.000Epoch 31/100 (3.124 s), batch 23.0/100 (0.023 s): loss 0.000, w gap: 0.000Epoch 31/100 (3.125 s), batch 24.0/100 (0.023 s): loss 0.000, w gap: 0.000Epoch 31/100 (3.126 s), batch 25.0/100 (0.024 s): loss 0.000, w gap: 0.000Epoch 31/100 (3.127 s), batch 26.0/100 (0.025 s): loss 0.000, w gap: 0.000Epoch 31/100 (3.127 s), batch 27.0/100 (0.026 s): loss 0.000, w gap: 0.000Epoch 31/100 (3.130 s), batch 28.0/100 (0.028 s): loss 0.000, w gap: 0.000Epoch 31/100 (3.131 s), batch 29.0/100 (0.029 s): loss 0.000, w gap: 0.000Epoch 31/100 (3.131 s), batch 30.0/100 (0.030 s): loss 0.000, w gap: 0.000Epoch 31/100 (3.133 s), batch 31.0/100 (0.032 s): loss 0.000, w gap: 0.000Epoch 31/100 (3.134 s), batch 32.0/100 (0.032 s): loss 0.000, w gap: 0.000Epoch 31/100 (3.135 s), batch 33.0/100 (0.033 s): loss 0.000, w gap: 0.000Epoch 31/100 (3.136 s), 

Epoch 33/100 (3.324 s), batch 15.0/100 (0.017 s): loss 0.000, w gap: 0.000Epoch 33/100 (3.326 s), batch 16.0/100 (0.019 s): loss 0.000, w gap: 0.000Epoch 33/100 (3.327 s), batch 17.0/100 (0.020 s): loss 0.000, w gap: 0.000Epoch 33/100 (3.328 s), batch 18.0/100 (0.021 s): loss 0.000, w gap: 0.000Epoch 33/100 (3.329 s), batch 19.0/100 (0.022 s): loss 0.000, w gap: 0.000Epoch 33/100 (3.330 s), batch 20.0/100 (0.023 s): loss 0.000, w gap: 0.000Epoch 33/100 (3.331 s), batch 21.0/100 (0.024 s): loss 0.000, w gap: 0.000Epoch 33/100 (3.332 s), batch 22.0/100 (0.025 s): loss 0.000, w gap: 0.000Epoch 33/100 (3.333 s), batch 23.0/100 (0.026 s): loss 0.000, w gap: 0.000Epoch 33/100 (3.334 s), batch 24.0/100 (0.027 s): loss 0.000, w gap: 0.000Epoch 33/100 (3.335 s), batch 25.0/100 (0.028 s): loss 0.000, w gap: 0.000Epoch 33/100 (3.336 s), batch 26.0/100 (0.029 s): loss 0.000, w gap: 0.000Epoch 33/100 (3.337 s), batch 27.0/100 (0.030 s): loss 0.000, w gap: 0.000Epoch 33/100 (3.338 s), 

Epoch 35/100 (3.525 s), batch 23.0/100 (0.021 s): loss 0.000, w gap: 0.000Epoch 35/100 (3.526 s), batch 24.0/100 (0.022 s): loss 0.000, w gap: 0.000Epoch 35/100 (3.527 s), batch 25.0/100 (0.023 s): loss 0.000, w gap: 0.000Epoch 35/100 (3.528 s), batch 26.0/100 (0.024 s): loss 0.000, w gap: 0.000Epoch 35/100 (3.529 s), batch 27.0/100 (0.025 s): loss 0.000, w gap: 0.000Epoch 35/100 (3.529 s), batch 28.0/100 (0.025 s): loss 0.000, w gap: 0.000Epoch 35/100 (3.530 s), batch 29.0/100 (0.026 s): loss 0.000, w gap: 0.000Epoch 35/100 (3.531 s), batch 30.0/100 (0.027 s): loss 0.000, w gap: 0.000Epoch 35/100 (3.532 s), batch 31.0/100 (0.028 s): loss 0.000, w gap: 0.000Epoch 35/100 (3.532 s), batch 32.0/100 (0.028 s): loss 0.000, w gap: 0.000Epoch 35/100 (3.533 s), batch 33.0/100 (0.029 s): loss 0.000, w gap: 0.000Epoch 35/100 (3.535 s), batch 34.0/100 (0.031 s): loss 0.000, w gap: 0.000Epoch 35/100 (3.536 s), batch 35.0/100 (0.032 s): loss 0.000, w gap: 0.000Epoch 35/100 (3.537 s), 

Epoch 37/100 (3.726 s), batch 47.0/100 (0.043 s): loss 0.000, w gap: 0.000Epoch 37/100 (3.728 s), batch 48.0/100 (0.045 s): loss 0.000, w gap: 0.000Epoch 37/100 (3.730 s), batch 49.0/100 (0.047 s): loss 0.000, w gap: 0.000Epoch 37/100 (3.731 s), batch 50.0/100 (0.048 s): loss 0.000, w gap: 0.000Epoch 37/100 (3.732 s), batch 51.0/100 (0.049 s): loss 0.000, w gap: 0.000Epoch 37/100 (3.733 s), batch 52.0/100 (0.050 s): loss 0.000, w gap: 0.000Epoch 37/100 (3.733 s), batch 53.0/100 (0.051 s): loss 0.000, w gap: 0.000Epoch 37/100 (3.734 s), batch 54.0/100 (0.051 s): loss 0.000, w gap: 0.000Epoch 37/100 (3.735 s), batch 55.0/100 (0.052 s): loss 0.000, w gap: 0.000Epoch 37/100 (3.737 s), batch 56.0/100 (0.054 s): loss 0.000, w gap: 0.000Epoch 37/100 (3.737 s), batch 57.0/100 (0.055 s): loss 0.000, w gap: 0.000Epoch 37/100 (3.738 s), batch 58.0/100 (0.055 s): loss 0.000, w gap: 0.000Epoch 37/100 (3.739 s), batch 59.0/100 (0.056 s): loss 0.000, w gap: 0.000Epoch 37/100 (3.740 s), 

Epoch 39/100 (3.927 s), batch 53.0/100 (0.050 s): loss 0.000, w gap: 0.000Epoch 39/100 (3.928 s), batch 54.0/100 (0.052 s): loss 0.000, w gap: 0.000Epoch 39/100 (3.929 s), batch 55.0/100 (0.052 s): loss 0.000, w gap: 0.000Epoch 39/100 (3.931 s), batch 56.0/100 (0.054 s): loss 0.000, w gap: 0.000Epoch 39/100 (3.932 s), batch 57.0/100 (0.055 s): loss 0.000, w gap: 0.000Epoch 39/100 (3.933 s), batch 58.0/100 (0.056 s): loss 0.000, w gap: 0.000Epoch 39/100 (3.933 s), batch 59.0/100 (0.056 s): loss 0.000, w gap: 0.000Epoch 39/100 (3.934 s), batch 60.0/100 (0.057 s): loss 0.000, w gap: 0.000Epoch 39/100 (3.935 s), batch 61.0/100 (0.058 s): loss 0.000, w gap: 0.000Epoch 39/100 (3.936 s), batch 62.0/100 (0.059 s): loss 0.000, w gap: 0.000Epoch 39/100 (3.937 s), batch 63.0/100 (0.060 s): loss 0.000, w gap: 0.000Epoch 39/100 (3.937 s), batch 64.0/100 (0.060 s): loss 0.000, w gap: 0.000Epoch 39/100 (3.938 s), batch 65.0/100 (0.061 s): loss 0.000, w gap: 0.000Epoch 39/100 (3.939 s), 

Epoch 41/100 (4.128 s), batch 72.0/100 (0.068 s): loss 0.000, w gap: 0.000Epoch 41/100 (4.129 s), batch 73.0/100 (0.068 s): loss 0.000, w gap: 0.000Epoch 41/100 (4.130 s), batch 74.0/100 (0.069 s): loss 0.000, w gap: 0.000Epoch 41/100 (4.130 s), batch 75.0/100 (0.070 s): loss 0.000, w gap: 0.000Epoch 41/100 (4.131 s), batch 76.0/100 (0.071 s): loss 0.000, w gap: 0.000Epoch 41/100 (4.132 s), batch 77.0/100 (0.072 s): loss 0.000, w gap: 0.000Epoch 41/100 (4.133 s), batch 78.0/100 (0.072 s): loss 0.000, w gap: 0.000Epoch 41/100 (4.133 s), batch 79.0/100 (0.073 s): loss 0.000, w gap: 0.000Epoch 41/100 (4.134 s), batch 80.0/100 (0.074 s): loss 0.000, w gap: 0.000Epoch 41/100 (4.135 s), batch 81.0/100 (0.075 s): loss 0.000, w gap: 0.000Epoch 41/100 (4.136 s), batch 82.0/100 (0.076 s): loss 0.000, w gap: 0.000Epoch 41/100 (4.137 s), batch 83.0/100 (0.076 s): loss 0.000, w gap: 0.000Epoch 41/100 (4.138 s), batch 84.0/100 (0.077 s): loss 0.000, w gap: 0.000Epoch 41/100 (4.138 s), 

Epoch 43/100 (4.329 s), batch 91.0/100 (0.074 s): loss 0.000, w gap: 0.000Epoch 43/100 (4.331 s), batch 92.0/100 (0.076 s): loss 0.000, w gap: 0.000Epoch 43/100 (4.332 s), batch 93.0/100 (0.077 s): loss 0.000, w gap: 0.000Epoch 43/100 (4.333 s), batch 94.0/100 (0.079 s): loss 0.000, w gap: 0.000Epoch 43/100 (4.334 s), batch 95.0/100 (0.079 s): loss 0.000, w gap: 0.000Epoch 43/100 (4.335 s), batch 96.0/100 (0.080 s): loss 0.000, w gap: 0.000Epoch 43/100 (4.336 s), batch 97.0/100 (0.081 s): loss 0.000, w gap: 0.000Epoch 43/100 (4.337 s), batch 98.0/100 (0.082 s): loss 0.000, w gap: 0.000Epoch 43/100 (4.337 s), batch 99.0/100 (0.082 s): loss 0.000, w gap: 0.000Epoch 43/100 (4.338 s), batch 100.0/100 (0.083 s): loss 0.000, w gap: 0.000Epoch 44/100 (4.343 s), batch 1.0/100 (0.001 s): loss 0.000, w gap: 0.000Epoch 44/100 (4.344 s), batch 2.0/100 (0.002 s): loss 0.000, w gap: 0.000Epoch 44/100 (4.345 s), batch 3.0/100 (0.003 s): loss 0.000, w gap: 0.000Epoch 44/100 (4.346 s), ba

Epoch 46/100 (4.532 s), batch 9.0/100 (0.011 s): loss 0.000, w gap: 0.000Epoch 46/100 (4.534 s), batch 10.0/100 (0.013 s): loss 0.000, w gap: 0.000Epoch 46/100 (4.535 s), batch 11.0/100 (0.015 s): loss 0.000, w gap: 0.000Epoch 46/100 (4.536 s), batch 12.0/100 (0.015 s): loss 0.000, w gap: 0.000Epoch 46/100 (4.537 s), batch 13.0/100 (0.016 s): loss 0.000, w gap: 0.000Epoch 46/100 (4.538 s), batch 14.0/100 (0.017 s): loss 0.000, w gap: 0.000Epoch 46/100 (4.539 s), batch 15.0/100 (0.018 s): loss 0.000, w gap: 0.000Epoch 46/100 (4.539 s), batch 16.0/100 (0.019 s): loss 0.000, w gap: 0.000Epoch 46/100 (4.540 s), batch 17.0/100 (0.020 s): loss 0.000, w gap: 0.000Epoch 46/100 (4.541 s), batch 18.0/100 (0.021 s): loss 0.000, w gap: 0.000Epoch 46/100 (4.542 s), batch 19.0/100 (0.021 s): loss 0.000, w gap: 0.000Epoch 46/100 (4.542 s), batch 20.0/100 (0.022 s): loss 0.000, w gap: 0.000Epoch 46/100 (4.543 s), batch 21.0/100 (0.023 s): loss 0.000, w gap: 0.000Epoch 46/100 (4.544 s), b

Epoch 48/100 (4.733 s), batch 5.0/100 (0.005 s): loss 0.000, w gap: 0.000Epoch 48/100 (4.734 s), batch 6.0/100 (0.006 s): loss 0.000, w gap: 0.000Epoch 48/100 (4.735 s), batch 7.0/100 (0.007 s): loss 0.000, w gap: 0.000Epoch 48/100 (4.735 s), batch 8.0/100 (0.008 s): loss 0.000, w gap: 0.000Epoch 48/100 (4.736 s), batch 9.0/100 (0.009 s): loss 0.000, w gap: 0.000Epoch 48/100 (4.737 s), batch 10.0/100 (0.010 s): loss 0.000, w gap: 0.000Epoch 48/100 (4.738 s), batch 11.0/100 (0.010 s): loss 0.000, w gap: 0.000Epoch 48/100 (4.738 s), batch 12.0/100 (0.011 s): loss 0.000, w gap: 0.000Epoch 48/100 (4.739 s), batch 13.0/100 (0.012 s): loss 0.000, w gap: 0.000Epoch 48/100 (4.740 s), batch 14.0/100 (0.013 s): loss 0.000, w gap: 0.000Epoch 48/100 (4.741 s), batch 15.0/100 (0.013 s): loss 0.000, w gap: 0.000Epoch 48/100 (4.743 s), batch 16.0/100 (0.015 s): loss 0.000, w gap: 0.000Epoch 48/100 (4.743 s), batch 17.0/100 (0.016 s): loss 0.000, w gap: 0.000Epoch 48/100 (4.744 s), batch

Epoch 50/100 (4.933 s), batch 22.0/100 (0.020 s): loss 0.000, w gap: 0.000Epoch 50/100 (4.935 s), batch 23.0/100 (0.023 s): loss 0.000, w gap: 0.000Epoch 50/100 (4.937 s), batch 24.0/100 (0.024 s): loss 0.000, w gap: 0.000Epoch 50/100 (4.938 s), batch 25.0/100 (0.025 s): loss 0.000, w gap: 0.000Epoch 50/100 (4.939 s), batch 26.0/100 (0.026 s): loss 0.000, w gap: 0.000Epoch 50/100 (4.940 s), batch 27.0/100 (0.027 s): loss 0.000, w gap: 0.000Epoch 50/100 (4.940 s), batch 28.0/100 (0.028 s): loss 0.000, w gap: 0.000Epoch 50/100 (4.941 s), batch 29.0/100 (0.028 s): loss 0.000, w gap: 0.000Epoch 50/100 (4.942 s), batch 30.0/100 (0.029 s): loss 0.000, w gap: 0.000Epoch 50/100 (4.942 s), batch 31.0/100 (0.030 s): loss 0.000, w gap: 0.000Epoch 50/100 (4.943 s), batch 32.0/100 (0.031 s): loss 0.000, w gap: 0.000Epoch 50/100 (4.944 s), batch 33.0/100 (0.031 s): loss 0.000, w gap: 0.000Epoch 50/100 (4.945 s), batch 34.0/100 (0.032 s): loss 0.000, w gap: 0.000Epoch 50/100 (4.947 s), 

Epoch 52/100 (5.134 s), batch 32.0/100 (0.026 s): loss 0.000, w gap: 0.000Epoch 52/100 (5.135 s), batch 33.0/100 (0.027 s): loss 0.000, w gap: 0.000Epoch 52/100 (5.136 s), batch 34.0/100 (0.028 s): loss 0.000, w gap: 0.000Epoch 52/100 (5.137 s), batch 35.0/100 (0.029 s): loss 0.000, w gap: 0.000Epoch 52/100 (5.138 s), batch 36.0/100 (0.029 s): loss 0.000, w gap: 0.000Epoch 52/100 (5.138 s), batch 37.0/100 (0.030 s): loss 0.000, w gap: 0.000Epoch 52/100 (5.140 s), batch 38.0/100 (0.032 s): loss 0.000, w gap: 0.000Epoch 52/100 (5.141 s), batch 39.0/100 (0.033 s): loss 0.000, w gap: 0.000Epoch 52/100 (5.142 s), batch 40.0/100 (0.033 s): loss 0.000, w gap: 0.000Epoch 52/100 (5.142 s), batch 41.0/100 (0.034 s): loss 0.000, w gap: 0.000Epoch 52/100 (5.143 s), batch 42.0/100 (0.035 s): loss 0.000, w gap: 0.000Epoch 52/100 (5.144 s), batch 43.0/100 (0.036 s): loss 0.000, w gap: 0.000Epoch 52/100 (5.145 s), batch 44.0/100 (0.036 s): loss 0.000, w gap: 0.000Epoch 52/100 (5.146 s), 

Epoch 54/100 (5.335 s), batch 53.0/100 (0.049 s): loss 0.000, w gap: 0.000Epoch 54/100 (5.337 s), batch 54.0/100 (0.051 s): loss 0.000, w gap: 0.000Epoch 54/100 (5.338 s), batch 55.0/100 (0.051 s): loss 0.000, w gap: 0.000Epoch 54/100 (5.340 s), batch 56.0/100 (0.054 s): loss 0.000, w gap: 0.000Epoch 54/100 (5.341 s), batch 57.0/100 (0.055 s): loss 0.000, w gap: 0.000Epoch 54/100 (5.342 s), batch 58.0/100 (0.055 s): loss 0.000, w gap: 0.000Epoch 54/100 (5.342 s), batch 59.0/100 (0.056 s): loss 0.000, w gap: 0.000Epoch 54/100 (5.343 s), batch 60.0/100 (0.057 s): loss 0.000, w gap: 0.000Epoch 54/100 (5.344 s), batch 61.0/100 (0.058 s): loss 0.000, w gap: 0.000Epoch 54/100 (5.346 s), batch 62.0/100 (0.059 s): loss 0.000, w gap: 0.000Epoch 54/100 (5.347 s), batch 63.0/100 (0.060 s): loss 0.000, w gap: 0.000Epoch 54/100 (5.347 s), batch 64.0/100 (0.061 s): loss 0.000, w gap: 0.000Epoch 54/100 (5.348 s), batch 65.0/100 (0.062 s): loss 0.000, w gap: 0.000Epoch 54/100 (5.349 s), 

Epoch 56/100 (5.536 s), batch 84.0/100 (0.067 s): loss 0.000, w gap: 0.000Epoch 56/100 (5.537 s), batch 85.0/100 (0.068 s): loss 0.000, w gap: 0.000Epoch 56/100 (5.538 s), batch 86.0/100 (0.069 s): loss 0.000, w gap: 0.000Epoch 56/100 (5.539 s), batch 87.0/100 (0.070 s): loss 0.000, w gap: 0.000Epoch 56/100 (5.539 s), batch 88.0/100 (0.071 s): loss 0.000, w gap: 0.000Epoch 56/100 (5.540 s), batch 89.0/100 (0.072 s): loss 0.000, w gap: 0.000Epoch 56/100 (5.541 s), batch 90.0/100 (0.072 s): loss 0.000, w gap: 0.000Epoch 56/100 (5.542 s), batch 91.0/100 (0.073 s): loss 0.000, w gap: 0.000Epoch 56/100 (5.544 s), batch 92.0/100 (0.075 s): loss 0.000, w gap: 0.000Epoch 56/100 (5.544 s), batch 93.0/100 (0.076 s): loss 0.000, w gap: 0.000Epoch 56/100 (5.545 s), batch 94.0/100 (0.077 s): loss 0.000, w gap: 0.000Epoch 56/100 (5.546 s), batch 95.0/100 (0.077 s): loss 0.000, w gap: 0.000Epoch 56/100 (5.547 s), batch 96.0/100 (0.078 s): loss 0.000, w gap: 0.000Epoch 56/100 (5.547 s), 

Epoch 58/100 (5.737 s), batch 95.0/100 (0.086 s): loss 0.000, w gap: 0.000Epoch 58/100 (5.739 s), batch 96.0/100 (0.088 s): loss 0.000, w gap: 0.000Epoch 58/100 (5.740 s), batch 97.0/100 (0.089 s): loss 0.000, w gap: 0.000Epoch 58/100 (5.741 s), batch 98.0/100 (0.090 s): loss 0.000, w gap: 0.000Epoch 58/100 (5.741 s), batch 99.0/100 (0.090 s): loss 0.000, w gap: 0.000Epoch 58/100 (5.742 s), batch 100.0/100 (0.091 s): loss 0.000, w gap: 0.000Epoch 59/100 (5.748 s), batch 1.0/100 (0.001 s): loss 0.000, w gap: 0.000Epoch 59/100 (5.749 s), batch 2.0/100 (0.002 s): loss 0.000, w gap: 0.000Epoch 59/100 (5.750 s), batch 3.0/100 (0.003 s): loss 0.000, w gap: 0.000Epoch 59/100 (5.751 s), batch 4.0/100 (0.004 s): loss 0.000, w gap: 0.000Epoch 59/100 (5.752 s), batch 5.0/100 (0.005 s): loss 0.000, w gap: 0.000Epoch 59/100 (5.753 s), batch 6.0/100 (0.006 s): loss 0.000, w gap: 0.000Epoch 59/100 (5.754 s), batch 7.0/100 (0.007 s): loss 0.000, w gap: 0.000Epoch 59/100 (5.755 s), batch 

Epoch 61/100 (5.939 s), batch 1.0/100 (0.001 s): loss 0.000, w gap: 0.000Epoch 61/100 (5.940 s), batch 2.0/100 (0.002 s): loss 0.000, w gap: 0.000Epoch 61/100 (5.941 s), batch 3.0/100 (0.003 s): loss 0.000, w gap: 0.000Epoch 61/100 (5.942 s), batch 4.0/100 (0.004 s): loss 0.000, w gap: 0.000Epoch 61/100 (5.944 s), batch 5.0/100 (0.005 s): loss 0.000, w gap: 0.000Epoch 61/100 (5.945 s), batch 6.0/100 (0.006 s): loss 0.000, w gap: 0.000Epoch 61/100 (5.947 s), batch 7.0/100 (0.008 s): loss 0.000, w gap: 0.000Epoch 61/100 (5.948 s), batch 8.0/100 (0.010 s): loss 0.000, w gap: 0.000Epoch 61/100 (5.949 s), batch 9.0/100 (0.010 s): loss 0.000, w gap: 0.000Epoch 61/100 (5.950 s), batch 10.0/100 (0.011 s): loss 0.000, w gap: 0.000Epoch 61/100 (5.951 s), batch 11.0/100 (0.012 s): loss 0.000, w gap: 0.000Epoch 61/100 (5.951 s), batch 12.0/100 (0.013 s): loss 0.000, w gap: 0.000Epoch 61/100 (5.952 s), batch 13.0/100 (0.014 s): loss 0.000, w gap: 0.000Epoch 61/100 (5.953 s), batch 14.

Epoch 63/100 (6.141 s), batch 13.0/100 (0.013 s): loss 0.000, w gap: 0.000Epoch 63/100 (6.142 s), batch 14.0/100 (0.014 s): loss 0.000, w gap: 0.000Epoch 63/100 (6.143 s), batch 15.0/100 (0.015 s): loss 0.000, w gap: 0.000Epoch 63/100 (6.144 s), batch 16.0/100 (0.016 s): loss 0.000, w gap: 0.000Epoch 63/100 (6.144 s), batch 17.0/100 (0.017 s): loss 0.000, w gap: 0.000Epoch 63/100 (6.145 s), batch 18.0/100 (0.017 s): loss 0.000, w gap: 0.000Epoch 63/100 (6.146 s), batch 19.0/100 (0.018 s): loss 0.000, w gap: 0.000Epoch 63/100 (6.147 s), batch 20.0/100 (0.019 s): loss 0.000, w gap: 0.000Epoch 63/100 (6.148 s), batch 21.0/100 (0.020 s): loss 0.000, w gap: 0.000Epoch 63/100 (6.148 s), batch 22.0/100 (0.020 s): loss 0.000, w gap: 0.000Epoch 63/100 (6.149 s), batch 23.0/100 (0.021 s): loss 0.000, w gap: 0.000Epoch 63/100 (6.151 s), batch 24.0/100 (0.023 s): loss 0.000, w gap: 0.000Epoch 63/100 (6.152 s), batch 25.0/100 (0.024 s): loss 0.000, w gap: 0.000Epoch 63/100 (6.153 s), 

Epoch 65/100 (6.342 s), batch 28.0/100 (0.028 s): loss 0.000, w gap: 0.000Epoch 65/100 (6.344 s), batch 29.0/100 (0.030 s): loss 0.000, w gap: 0.000Epoch 65/100 (6.345 s), batch 30.0/100 (0.031 s): loss 0.000, w gap: 0.000Epoch 65/100 (6.345 s), batch 31.0/100 (0.032 s): loss 0.000, w gap: 0.000Epoch 65/100 (6.346 s), batch 32.0/100 (0.033 s): loss 0.000, w gap: 0.000Epoch 65/100 (6.348 s), batch 33.0/100 (0.034 s): loss 0.000, w gap: 0.000Epoch 65/100 (6.350 s), batch 34.0/100 (0.037 s): loss 0.000, w gap: 0.000Epoch 65/100 (6.351 s), batch 35.0/100 (0.037 s): loss 0.000, w gap: 0.000Epoch 65/100 (6.352 s), batch 36.0/100 (0.038 s): loss 0.000, w gap: 0.000Epoch 65/100 (6.352 s), batch 37.0/100 (0.039 s): loss 0.000, w gap: 0.000Epoch 65/100 (6.353 s), batch 38.0/100 (0.040 s): loss 0.000, w gap: 0.000Epoch 65/100 (6.354 s), batch 39.0/100 (0.040 s): loss 0.000, w gap: 0.000Epoch 65/100 (6.355 s), batch 40.0/100 (0.041 s): loss 0.000, w gap: 0.000Epoch 65/100 (6.356 s), 

Epoch 67/100 (6.543 s), batch 47.0/100 (0.042 s): loss 0.000, w gap: 0.000Epoch 67/100 (6.544 s), batch 48.0/100 (0.043 s): loss 0.000, w gap: 0.000Epoch 67/100 (6.545 s), batch 49.0/100 (0.044 s): loss 0.000, w gap: 0.000Epoch 67/100 (6.546 s), batch 50.0/100 (0.045 s): loss 0.000, w gap: 0.000Epoch 67/100 (6.546 s), batch 51.0/100 (0.046 s): loss 0.000, w gap: 0.000Epoch 67/100 (6.548 s), batch 52.0/100 (0.047 s): loss 0.000, w gap: 0.000Epoch 67/100 (6.549 s), batch 53.0/100 (0.048 s): loss 0.000, w gap: 0.000Epoch 67/100 (6.550 s), batch 54.0/100 (0.049 s): loss 0.000, w gap: 0.000Epoch 67/100 (6.550 s), batch 55.0/100 (0.050 s): loss 0.000, w gap: 0.000Epoch 67/100 (6.551 s), batch 56.0/100 (0.050 s): loss 0.000, w gap: 0.000Epoch 67/100 (6.552 s), batch 57.0/100 (0.051 s): loss 0.000, w gap: 0.000Epoch 67/100 (6.552 s), batch 58.0/100 (0.052 s): loss 0.000, w gap: 0.000Epoch 67/100 (6.553 s), batch 59.0/100 (0.052 s): loss 0.000, w gap: 0.000Epoch 67/100 (6.554 s), 

Epoch 69/100 (6.744 s), batch 71.0/100 (0.062 s): loss 0.000, w gap: 0.000Epoch 69/100 (6.745 s), batch 72.0/100 (0.063 s): loss 0.000, w gap: 0.000Epoch 69/100 (6.746 s), batch 73.0/100 (0.064 s): loss 0.000, w gap: 0.000Epoch 69/100 (6.747 s), batch 74.0/100 (0.065 s): loss 0.000, w gap: 0.000Epoch 69/100 (6.748 s), batch 75.0/100 (0.066 s): loss 0.000, w gap: 0.000Epoch 69/100 (6.749 s), batch 76.0/100 (0.066 s): loss 0.000, w gap: 0.000Epoch 69/100 (6.749 s), batch 77.0/100 (0.067 s): loss 0.000, w gap: 0.000Epoch 69/100 (6.750 s), batch 78.0/100 (0.068 s): loss 0.000, w gap: 0.000Epoch 69/100 (6.751 s), batch 79.0/100 (0.069 s): loss 0.000, w gap: 0.000Epoch 69/100 (6.752 s), batch 80.0/100 (0.070 s): loss 0.000, w gap: 0.000Epoch 69/100 (6.752 s), batch 81.0/100 (0.070 s): loss 0.000, w gap: 0.000Epoch 69/100 (6.753 s), batch 82.0/100 (0.071 s): loss 0.000, w gap: 0.000Epoch 69/100 (6.754 s), batch 83.0/100 (0.072 s): loss 0.000, w gap: 0.000Epoch 69/100 (6.755 s), 

Epoch 71/100 (6.945 s), batch 68.0/100 (0.071 s): loss 0.000, w gap: 0.000Epoch 71/100 (6.945 s), batch 69.0/100 (0.071 s): loss 0.000, w gap: 0.000Epoch 71/100 (6.946 s), batch 70.0/100 (0.072 s): loss 0.000, w gap: 0.000Epoch 71/100 (6.947 s), batch 71.0/100 (0.073 s): loss 0.000, w gap: 0.000Epoch 71/100 (6.949 s), batch 72.0/100 (0.075 s): loss 0.000, w gap: 0.000Epoch 71/100 (6.950 s), batch 73.0/100 (0.076 s): loss 0.000, w gap: 0.000Epoch 71/100 (6.950 s), batch 74.0/100 (0.076 s): loss 0.000, w gap: 0.000Epoch 71/100 (6.951 s), batch 75.0/100 (0.077 s): loss 0.000, w gap: 0.000Epoch 71/100 (6.952 s), batch 76.0/100 (0.078 s): loss 0.000, w gap: 0.000Epoch 71/100 (6.953 s), batch 77.0/100 (0.079 s): loss 0.000, w gap: 0.000Epoch 71/100 (6.953 s), batch 78.0/100 (0.079 s): loss 0.000, w gap: 0.000Epoch 71/100 (6.955 s), batch 79.0/100 (0.081 s): loss 0.000, w gap: 0.000Epoch 71/100 (6.956 s), batch 80.0/100 (0.082 s): loss 0.000, w gap: 0.000Epoch 71/100 (6.957 s), 

Epoch 73/100 (7.147 s), batch 95.0/100 (0.084 s): loss 0.000, w gap: 0.000Epoch 73/100 (7.148 s), batch 96.0/100 (0.086 s): loss 0.000, w gap: 0.000Epoch 73/100 (7.149 s), batch 97.0/100 (0.086 s): loss 0.000, w gap: 0.000Epoch 73/100 (7.150 s), batch 98.0/100 (0.087 s): loss 0.000, w gap: 0.000Epoch 73/100 (7.151 s), batch 99.0/100 (0.088 s): loss 0.000, w gap: 0.000Epoch 73/100 (7.151 s), batch 100.0/100 (0.089 s): loss 0.000, w gap: 0.000Epoch 74/100 (7.157 s), batch 1.0/100 (0.001 s): loss 0.000, w gap: 0.000Epoch 74/100 (7.158 s), batch 2.0/100 (0.002 s): loss 0.000, w gap: 0.000Epoch 74/100 (7.159 s), batch 3.0/100 (0.003 s): loss 0.000, w gap: 0.000Epoch 74/100 (7.160 s), batch 4.0/100 (0.003 s): loss 0.000, w gap: 0.000Epoch 74/100 (7.161 s), batch 5.0/100 (0.004 s): loss 0.000, w gap: 0.000Epoch 74/100 (7.162 s), batch 6.0/100 (0.006 s): loss 0.000, w gap: 0.000Epoch 74/100 (7.163 s), batch 7.0/100 (0.007 s): loss 0.000, w gap: 0.000Epoch 74/100 (7.165 s), batch 

Epoch 76/100 (7.347 s), batch 10.0/100 (0.011 s): loss 0.000, w gap: 0.000Epoch 76/100 (7.350 s), batch 11.0/100 (0.013 s): loss 0.000, w gap: 0.000Epoch 76/100 (7.350 s), batch 12.0/100 (0.014 s): loss 0.000, w gap: 0.000Epoch 76/100 (7.351 s), batch 13.0/100 (0.015 s): loss 0.000, w gap: 0.000Epoch 76/100 (7.352 s), batch 14.0/100 (0.016 s): loss 0.000, w gap: 0.000Epoch 76/100 (7.353 s), batch 15.0/100 (0.016 s): loss 0.000, w gap: 0.000Epoch 76/100 (7.353 s), batch 16.0/100 (0.017 s): loss 0.000, w gap: 0.000Epoch 76/100 (7.354 s), batch 17.0/100 (0.018 s): loss 0.000, w gap: 0.000Epoch 76/100 (7.355 s), batch 18.0/100 (0.019 s): loss 0.000, w gap: 0.000Epoch 76/100 (7.356 s), batch 19.0/100 (0.020 s): loss 0.000, w gap: 0.000Epoch 76/100 (7.357 s), batch 20.0/100 (0.021 s): loss 0.000, w gap: 0.000Epoch 76/100 (7.358 s), batch 21.0/100 (0.021 s): loss 0.000, w gap: 0.000Epoch 76/100 (7.358 s), batch 22.0/100 (0.022 s): loss 0.000, w gap: 0.000Epoch 76/100 (7.360 s), 

Epoch 78/100 (7.548 s), batch 27.0/100 (0.026 s): loss 0.000, w gap: 0.000Epoch 78/100 (7.549 s), batch 28.0/100 (0.027 s): loss 0.000, w gap: 0.000Epoch 78/100 (7.550 s), batch 29.0/100 (0.028 s): loss 0.000, w gap: 0.000Epoch 78/100 (7.551 s), batch 30.0/100 (0.029 s): loss 0.000, w gap: 0.000Epoch 78/100 (7.552 s), batch 31.0/100 (0.030 s): loss 0.000, w gap: 0.000Epoch 78/100 (7.553 s), batch 32.0/100 (0.030 s): loss 0.000, w gap: 0.000Epoch 78/100 (7.553 s), batch 33.0/100 (0.031 s): loss 0.000, w gap: 0.000Epoch 78/100 (7.555 s), batch 34.0/100 (0.033 s): loss 0.000, w gap: 0.000Epoch 78/100 (7.557 s), batch 35.0/100 (0.035 s): loss 0.000, w gap: 0.000Epoch 78/100 (7.558 s), batch 36.0/100 (0.036 s): loss 0.000, w gap: 0.000Epoch 78/100 (7.558 s), batch 37.0/100 (0.036 s): loss 0.000, w gap: 0.000Epoch 78/100 (7.559 s), batch 38.0/100 (0.037 s): loss 0.000, w gap: 0.000Epoch 78/100 (7.560 s), batch 39.0/100 (0.038 s): loss 0.000, w gap: 0.000Epoch 78/100 (7.561 s), 

Epoch 80/100 (7.750 s), batch 56.0/100 (0.046 s): loss 0.000, w gap: 0.000Epoch 80/100 (7.751 s), batch 57.0/100 (0.047 s): loss 0.000, w gap: 0.000Epoch 80/100 (7.751 s), batch 58.0/100 (0.047 s): loss 0.000, w gap: 0.000Epoch 80/100 (7.752 s), batch 59.0/100 (0.048 s): loss 0.000, w gap: 0.000Epoch 80/100 (7.753 s), batch 60.0/100 (0.049 s): loss 0.000, w gap: 0.000Epoch 80/100 (7.754 s), batch 61.0/100 (0.050 s): loss 0.000, w gap: 0.000Epoch 80/100 (7.755 s), batch 62.0/100 (0.051 s): loss 0.000, w gap: 0.000Epoch 80/100 (7.757 s), batch 63.0/100 (0.052 s): loss 0.000, w gap: 0.000Epoch 80/100 (7.757 s), batch 64.0/100 (0.053 s): loss 0.000, w gap: 0.000Epoch 80/100 (7.758 s), batch 65.0/100 (0.054 s): loss 0.000, w gap: 0.000Epoch 80/100 (7.759 s), batch 66.0/100 (0.055 s): loss 0.000, w gap: 0.000Epoch 80/100 (7.760 s), batch 67.0/100 (0.056 s): loss 0.000, w gap: 0.000Epoch 80/100 (7.760 s), batch 68.0/100 (0.056 s): loss 0.000, w gap: 0.000Epoch 80/100 (7.761 s), 

Epoch 82/100 (7.951 s), batch 66.0/100 (0.056 s): loss 0.000, w gap: 0.000Epoch 82/100 (7.952 s), batch 67.0/100 (0.057 s): loss 0.000, w gap: 0.000Epoch 82/100 (7.953 s), batch 68.0/100 (0.058 s): loss 0.000, w gap: 0.000Epoch 82/100 (7.954 s), batch 69.0/100 (0.059 s): loss 0.000, w gap: 0.000Epoch 82/100 (7.955 s), batch 70.0/100 (0.060 s): loss 0.000, w gap: 0.000Epoch 82/100 (7.956 s), batch 71.0/100 (0.060 s): loss 0.000, w gap: 0.000Epoch 82/100 (7.956 s), batch 72.0/100 (0.061 s): loss 0.000, w gap: 0.000Epoch 82/100 (7.957 s), batch 73.0/100 (0.062 s): loss 0.000, w gap: 0.000Epoch 82/100 (7.958 s), batch 74.0/100 (0.063 s): loss 0.000, w gap: 0.000Epoch 82/100 (7.959 s), batch 75.0/100 (0.064 s): loss 0.000, w gap: 0.000Epoch 82/100 (7.959 s), batch 76.0/100 (0.064 s): loss 0.000, w gap: 0.000Epoch 82/100 (7.960 s), batch 77.0/100 (0.065 s): loss 0.000, w gap: 0.000Epoch 82/100 (7.961 s), batch 78.0/100 (0.066 s): loss 0.000, w gap: 0.000Epoch 82/100 (7.962 s), 

Epoch 84/100 (8.152 s), batch 79.0/100 (0.080 s): loss 0.000, w gap: 0.000Epoch 84/100 (8.153 s), batch 80.0/100 (0.081 s): loss 0.000, w gap: 0.000Epoch 84/100 (8.154 s), batch 81.0/100 (0.082 s): loss 0.000, w gap: 0.000Epoch 84/100 (8.155 s), batch 82.0/100 (0.083 s): loss 0.000, w gap: 0.000Epoch 84/100 (8.156 s), batch 83.0/100 (0.084 s): loss 0.000, w gap: 0.000Epoch 84/100 (8.156 s), batch 84.0/100 (0.085 s): loss 0.000, w gap: 0.000Epoch 84/100 (8.158 s), batch 85.0/100 (0.086 s): loss 0.000, w gap: 0.000Epoch 84/100 (8.159 s), batch 86.0/100 (0.087 s): loss 0.000, w gap: 0.000Epoch 84/100 (8.160 s), batch 87.0/100 (0.088 s): loss 0.000, w gap: 0.000Epoch 84/100 (8.160 s), batch 88.0/100 (0.089 s): loss 0.000, w gap: 0.000Epoch 84/100 (8.161 s), batch 89.0/100 (0.090 s): loss 0.000, w gap: 0.000Epoch 84/100 (8.162 s), batch 90.0/100 (0.090 s): loss 0.000, w gap: 0.000Epoch 84/100 (8.163 s), batch 91.0/100 (0.091 s): loss 0.000, w gap: 0.000Epoch 84/100 (8.163 s), 

Epoch 86/100 (8.353 s), batch 94.0/100 (0.082 s): loss 0.000, w gap: 0.000Epoch 86/100 (8.353 s), batch 95.0/100 (0.083 s): loss 0.000, w gap: 0.000Epoch 86/100 (8.354 s), batch 96.0/100 (0.084 s): loss 0.000, w gap: 0.000Epoch 86/100 (8.355 s), batch 97.0/100 (0.085 s): loss 0.000, w gap: 0.000Epoch 86/100 (8.356 s), batch 98.0/100 (0.086 s): loss 0.000, w gap: 0.000Epoch 86/100 (8.357 s), batch 99.0/100 (0.086 s): loss 0.000, w gap: 0.000Epoch 86/100 (8.357 s), batch 100.0/100 (0.087 s): loss 0.000, w gap: 0.000Epoch 87/100 (8.364 s), batch 1.0/100 (0.001 s): loss 0.000, w gap: 0.000Epoch 87/100 (8.365 s), batch 2.0/100 (0.002 s): loss 0.000, w gap: 0.000Epoch 87/100 (8.365 s), batch 3.0/100 (0.003 s): loss 0.000, w gap: 0.000Epoch 87/100 (8.366 s), batch 4.0/100 (0.003 s): loss 0.000, w gap: 0.000Epoch 87/100 (8.367 s), batch 5.0/100 (0.004 s): loss 0.000, w gap: 0.000Epoch 87/100 (8.368 s), batch 6.0/100 (0.005 s): loss 0.000, w gap: 0.000Epoch 87/100 (8.368 s), batch

Epoch 89/100 (8.554 s), batch 7.0/100 (0.010 s): loss 0.000, w gap: 0.000Epoch 89/100 (8.555 s), batch 8.0/100 (0.011 s): loss 0.000, w gap: 0.000Epoch 89/100 (8.556 s), batch 9.0/100 (0.011 s): loss 0.000, w gap: 0.000Epoch 89/100 (8.557 s), batch 10.0/100 (0.012 s): loss 0.000, w gap: 0.000Epoch 89/100 (8.558 s), batch 11.0/100 (0.013 s): loss 0.000, w gap: 0.000Epoch 89/100 (8.558 s), batch 12.0/100 (0.014 s): loss 0.000, w gap: 0.000Epoch 89/100 (8.559 s), batch 13.0/100 (0.015 s): loss 0.000, w gap: 0.000Epoch 89/100 (8.562 s), batch 14.0/100 (0.017 s): loss 0.000, w gap: 0.000Epoch 89/100 (8.562 s), batch 15.0/100 (0.018 s): loss 0.000, w gap: 0.000Epoch 89/100 (8.563 s), batch 16.0/100 (0.019 s): loss 0.000, w gap: 0.000Epoch 89/100 (8.564 s), batch 17.0/100 (0.019 s): loss 0.000, w gap: 0.000Epoch 89/100 (8.565 s), batch 18.0/100 (0.020 s): loss 0.000, w gap: 0.000Epoch 89/100 (8.565 s), batch 19.0/100 (0.021 s): loss 0.000, w gap: 0.000Epoch 89/100 (8.566 s), bat

Epoch 91/100 (8.755 s), batch 28.0/100 (0.023 s): loss 0.000, w gap: 0.000Epoch 91/100 (8.756 s), batch 29.0/100 (0.024 s): loss 0.000, w gap: 0.000Epoch 91/100 (8.756 s), batch 30.0/100 (0.024 s): loss 0.000, w gap: 0.000Epoch 91/100 (8.757 s), batch 31.0/100 (0.025 s): loss 0.000, w gap: 0.000Epoch 91/100 (8.759 s), batch 32.0/100 (0.027 s): loss 0.000, w gap: 0.000Epoch 91/100 (8.759 s), batch 33.0/100 (0.028 s): loss 0.000, w gap: 0.000Epoch 91/100 (8.760 s), batch 34.0/100 (0.028 s): loss 0.000, w gap: 0.000Epoch 91/100 (8.761 s), batch 35.0/100 (0.029 s): loss 0.000, w gap: 0.000Epoch 91/100 (8.762 s), batch 36.0/100 (0.030 s): loss 0.000, w gap: 0.000Epoch 91/100 (8.763 s), batch 37.0/100 (0.032 s): loss 0.000, w gap: 0.000Epoch 91/100 (8.764 s), batch 38.0/100 (0.032 s): loss 0.000, w gap: 0.000Epoch 91/100 (8.765 s), batch 39.0/100 (0.033 s): loss 0.000, w gap: 0.000Epoch 91/100 (8.766 s), batch 40.0/100 (0.034 s): loss 0.000, w gap: 0.000Epoch 91/100 (8.767 s), 

Epoch 93/100 (8.956 s), batch 35.0/100 (0.044 s): loss 0.000, w gap: 0.000Epoch 93/100 (8.956 s), batch 36.0/100 (0.045 s): loss 0.000, w gap: 0.000Epoch 93/100 (8.957 s), batch 37.0/100 (0.046 s): loss 0.000, w gap: 0.000Epoch 93/100 (8.958 s), batch 38.0/100 (0.047 s): loss 0.000, w gap: 0.000Epoch 93/100 (8.960 s), batch 39.0/100 (0.048 s): loss 0.000, w gap: 0.000Epoch 93/100 (8.961 s), batch 40.0/100 (0.049 s): loss 0.000, w gap: 0.000Epoch 93/100 (8.962 s), batch 41.0/100 (0.050 s): loss 0.000, w gap: 0.000Epoch 93/100 (8.964 s), batch 42.0/100 (0.052 s): loss 0.000, w gap: 0.000Epoch 93/100 (8.964 s), batch 43.0/100 (0.053 s): loss 0.000, w gap: 0.000Epoch 93/100 (8.965 s), batch 44.0/100 (0.054 s): loss 0.000, w gap: 0.000Epoch 93/100 (8.966 s), batch 45.0/100 (0.055 s): loss 0.000, w gap: 0.000Epoch 93/100 (8.967 s), batch 46.0/100 (0.056 s): loss 0.000, w gap: 0.000Epoch 93/100 (8.968 s), batch 47.0/100 (0.056 s): loss 0.000, w gap: 0.000Epoch 93/100 (8.969 s), 

Epoch 95/100 (9.157 s), batch 30.0/100 (0.027 s): loss 0.000, w gap: 0.000Epoch 95/100 (9.158 s), batch 31.0/100 (0.028 s): loss 0.000, w gap: 0.000Epoch 95/100 (9.159 s), batch 32.0/100 (0.029 s): loss 0.000, w gap: 0.000Epoch 95/100 (9.160 s), batch 33.0/100 (0.030 s): loss 0.000, w gap: 0.000Epoch 95/100 (9.161 s), batch 34.0/100 (0.031 s): loss 0.000, w gap: 0.000Epoch 95/100 (9.162 s), batch 35.0/100 (0.032 s): loss 0.000, w gap: 0.000Epoch 95/100 (9.163 s), batch 36.0/100 (0.033 s): loss 0.000, w gap: 0.000Epoch 95/100 (9.164 s), batch 37.0/100 (0.034 s): loss 0.000, w gap: 0.000Epoch 95/100 (9.164 s), batch 38.0/100 (0.035 s): loss 0.000, w gap: 0.000Epoch 95/100 (9.165 s), batch 39.0/100 (0.036 s): loss 0.000, w gap: 0.000Epoch 95/100 (9.166 s), batch 40.0/100 (0.037 s): loss 0.000, w gap: 0.000Epoch 95/100 (9.167 s), batch 41.0/100 (0.038 s): loss 0.000, w gap: 0.000Epoch 95/100 (9.168 s), batch 42.0/100 (0.039 s): loss 0.000, w gap: 0.000Epoch 95/100 (9.169 s), 

Epoch 97/100 (9.358 s), batch 21.0/100 (0.020 s): loss 0.000, w gap: 0.000Epoch 97/100 (9.360 s), batch 22.0/100 (0.022 s): loss 0.000, w gap: 0.000Epoch 97/100 (9.361 s), batch 23.0/100 (0.023 s): loss 0.000, w gap: 0.000Epoch 97/100 (9.361 s), batch 24.0/100 (0.024 s): loss 0.000, w gap: 0.000Epoch 97/100 (9.362 s), batch 25.0/100 (0.025 s): loss 0.000, w gap: 0.000Epoch 97/100 (9.364 s), batch 26.0/100 (0.027 s): loss 0.000, w gap: 0.000Epoch 97/100 (9.366 s), batch 27.0/100 (0.029 s): loss 0.000, w gap: 0.000Epoch 97/100 (9.367 s), batch 28.0/100 (0.030 s): loss 0.000, w gap: 0.000Epoch 97/100 (9.368 s), batch 29.0/100 (0.031 s): loss 0.000, w gap: 0.000Epoch 97/100 (9.370 s), batch 30.0/100 (0.032 s): loss 0.000, w gap: 0.000Epoch 97/100 (9.370 s), batch 31.0/100 (0.033 s): loss 0.000, w gap: 0.000Epoch 97/100 (9.371 s), batch 32.0/100 (0.034 s): loss 0.000, w gap: 0.000Epoch 97/100 (9.372 s), batch 33.0/100 (0.035 s): loss 0.000, w gap: 0.000Epoch 97/100 (9.373 s), 

Epoch 99/100 (9.559 s), batch 27.0/100 (0.025 s): loss 0.000, w gap: 0.000Epoch 99/100 (9.560 s), batch 28.0/100 (0.026 s): loss 0.000, w gap: 0.000Epoch 99/100 (9.561 s), batch 29.0/100 (0.027 s): loss 0.000, w gap: 0.000Epoch 99/100 (9.562 s), batch 30.0/100 (0.028 s): loss 0.000, w gap: 0.000Epoch 99/100 (9.563 s), batch 31.0/100 (0.029 s): loss 0.000, w gap: 0.000Epoch 99/100 (9.563 s), batch 32.0/100 (0.029 s): loss 0.000, w gap: 0.000Epoch 99/100 (9.564 s), batch 33.0/100 (0.030 s): loss 0.000, w gap: 0.000Epoch 99/100 (9.565 s), batch 34.0/100 (0.031 s): loss 0.000, w gap: 0.000Epoch 99/100 (9.566 s), batch 35.0/100 (0.032 s): loss 0.000, w gap: 0.000Epoch 99/100 (9.566 s), batch 36.0/100 (0.032 s): loss 0.000, w gap: 0.000Epoch 99/100 (9.567 s), batch 37.0/100 (0.033 s): loss 0.000, w gap: 0.000Epoch 99/100 (9.568 s), batch 38.0/100 (0.034 s): loss 0.000, w gap: 0.000Epoch 99/100 (9.568 s), batch 39.0/100 (0.035 s): loss 0.000, w gap: 0.000Epoch 99/100 (9.569 s), 


Epoch 100/100 (9.424 s), batch 100.0/100 (0.083 s): loss 0.000, w gap: 0.000
Epoch 100/100 (9.534 s), batch 100.0/100 (0.098 s): loss 0.000, w gap: 0.000
Epoch 100/100 (9.651 s), batch 100.0/100 (0.098 s): loss 0.000, w gap: 0.000
Epoch 100/100 (9.594 s), batch 100.0/100 (0.080 s): loss 0.000, w gap: 0.000
Epoch 100/100 (9.763 s), batch 100.0/100 (0.091 s): loss 0.000, w gap: 0.000
Epoch 100/100 (9.679 s), batch 100.0/100 (0.093 s): loss 0.000, w gap: 0.000
Epoch 100/100 (9.643 s), batch 100.0/100 (0.098 s): loss 0.000, w gap: 0.000
Epoch 100/100 (9.510 s), batch 100.0/100 (0.092 s): loss 3.468, w gap: 10.769
Epoch 100/100 (9.545 s), batch 100.0/100 (0.081 s): loss 0.000, w gap: 0.000
Epoch 100/100 (9.623 s), batch 100.0/100 (0.098 s): loss 0.000, w gap: 0.000
Epoch 100/100 (9.681 s), batch 100.0/100 (0.101 s): loss 0.000, w gap: 0.000
Epoch 100/100 (9.491 s), batch 100.0/100 (0.094 s): loss 0.000, w gap: 0.000
Epoch 100/100 (9.658 s), batch 100.0/100 (0.097 s): loss 2.371, w gap: 5.3

Epoch 1/100 (0.105 s), batch 42.0/100 (0.045 s): loss 5.742, w gap: 18.053Epoch 1/100 (0.111 s), batch 43.0/100 (0.052 s): loss 4.813, w gap: 18.069Epoch 1/100 (0.113 s), batch 44.0/100 (0.054 s): loss 3.905, w gap: 18.071Epoch 1/100 (0.114 s), batch 45.0/100 (0.055 s): loss 4.255, w gap: 18.047Epoch 1/100 (0.115 s), batch 46.0/100 (0.055 s): loss 3.241, w gap: 18.055Epoch 1/100 (0.116 s), batch 47.0/100 (0.056 s): loss 3.222, w gap: 18.059Epoch 1/100 (0.116 s), batch 48.0/100 (0.057 s): loss 2.704, w gap: 17.995Epoch 1/100 (0.117 s), batch 49.0/100 (0.058 s): loss 4.162, w gap: 17.927Epoch 1/100 (0.118 s), batch 50.0/100 (0.058 s): loss 3.316, w gap: 17.827Epoch 1/100 (0.119 s), batch 51.0/100 (0.060 s): loss 3.175, w gap: 17.762Epoch 1/100 (0.120 s), batch 52.0/100 (0.061 s): loss 3.313, w gap: 17.650Epoch 1/100 (0.121 s), batch 53.0/100 (0.061 s): loss 3.301, w gap: 17.603Epoch 1/100 (0.122 s), batch 54.0/100 (0.062 s): loss 3.326, w gap: 17.475Epoch 1/100 (0.122 s), ba

Epoch 3/100 (0.312 s), batch 51.0/100 (0.050 s): loss 0.955, w gap: 13.853Epoch 3/100 (0.314 s), batch 52.0/100 (0.052 s): loss 0.785, w gap: 13.835Epoch 3/100 (0.314 s), batch 53.0/100 (0.053 s): loss 1.099, w gap: 13.833Epoch 3/100 (0.315 s), batch 54.0/100 (0.054 s): loss 0.671, w gap: 13.838Epoch 3/100 (0.316 s), batch 55.0/100 (0.055 s): loss 0.580, w gap: 13.831Epoch 3/100 (0.317 s), batch 56.0/100 (0.055 s): loss 0.690, w gap: 13.834Epoch 3/100 (0.318 s), batch 57.0/100 (0.056 s): loss 1.038, w gap: 13.827Epoch 3/100 (0.323 s), batch 58.0/100 (0.062 s): loss 0.819, w gap: 13.809Epoch 3/100 (0.325 s), batch 59.0/100 (0.064 s): loss 1.123, w gap: 13.801Epoch 3/100 (0.326 s), batch 60.0/100 (0.065 s): loss 0.505, w gap: 13.816Epoch 3/100 (0.327 s), batch 61.0/100 (0.065 s): loss 0.917, w gap: 13.804Epoch 3/100 (0.328 s), batch 62.0/100 (0.066 s): loss 0.689, w gap: 13.804Epoch 3/100 (0.330 s), batch 63.0/100 (0.068 s): loss 1.138, w gap: 13.823Epoch 3/100 (0.330 s), b

Epoch 5/100 (0.512 s), batch 55.0/100 (0.058 s): loss 0.706, w gap: 13.783Epoch 5/100 (0.514 s), batch 56.0/100 (0.060 s): loss 0.541, w gap: 13.779Epoch 5/100 (0.515 s), batch 57.0/100 (0.060 s): loss 0.752, w gap: 13.760Epoch 5/100 (0.516 s), batch 58.0/100 (0.061 s): loss 0.728, w gap: 13.758Epoch 5/100 (0.517 s), batch 59.0/100 (0.062 s): loss 0.685, w gap: 13.748Epoch 5/100 (0.518 s), batch 60.0/100 (0.063 s): loss 1.174, w gap: 13.751Epoch 5/100 (0.518 s), batch 61.0/100 (0.063 s): loss 0.877, w gap: 13.760Epoch 5/100 (0.519 s), batch 62.0/100 (0.064 s): loss 0.638, w gap: 13.768Epoch 5/100 (0.520 s), batch 63.0/100 (0.065 s): loss 0.775, w gap: 13.781Epoch 5/100 (0.521 s), batch 64.0/100 (0.066 s): loss 0.916, w gap: 13.770Epoch 5/100 (0.521 s), batch 65.0/100 (0.067 s): loss 0.925, w gap: 13.781Epoch 5/100 (0.522 s), batch 66.0/100 (0.067 s): loss 0.756, w gap: 13.783Epoch 5/100 (0.523 s), batch 67.0/100 (0.068 s): loss 0.643, w gap: 13.784Epoch 5/100 (0.524 s), b

Epoch 7/100 (0.713 s), batch 70.0/100 (0.062 s): loss 0.643, w gap: 13.854Epoch 7/100 (0.714 s), batch 71.0/100 (0.063 s): loss 1.027, w gap: 13.855Epoch 7/100 (0.715 s), batch 72.0/100 (0.063 s): loss 0.531, w gap: 13.857Epoch 7/100 (0.715 s), batch 73.0/100 (0.064 s): loss 0.750, w gap: 13.846Epoch 7/100 (0.716 s), batch 74.0/100 (0.065 s): loss 1.013, w gap: 13.844Epoch 7/100 (0.717 s), batch 75.0/100 (0.065 s): loss 0.690, w gap: 13.871Epoch 7/100 (0.718 s), batch 76.0/100 (0.066 s): loss 0.598, w gap: 13.868Epoch 7/100 (0.718 s), batch 77.0/100 (0.067 s): loss 1.000, w gap: 13.864Epoch 7/100 (0.719 s), batch 78.0/100 (0.068 s): loss 1.089, w gap: 13.874Epoch 7/100 (0.720 s), batch 79.0/100 (0.069 s): loss 0.661, w gap: 13.878Epoch 7/100 (0.721 s), batch 80.0/100 (0.069 s): loss 0.622, w gap: 13.888Epoch 7/100 (0.721 s), batch 81.0/100 (0.070 s): loss 0.717, w gap: 13.884Epoch 7/100 (0.722 s), batch 82.0/100 (0.071 s): loss 0.834, w gap: 13.872Epoch 7/100 (0.723 s), b

Epoch 10/100 (0.918 s), batch 1.0/100 (0.002 s): loss 0.779, w gap: 13.912Epoch 10/100 (0.919 s), batch 2.0/100 (0.003 s): loss 0.691, w gap: 13.908Epoch 10/100 (0.919 s), batch 3.0/100 (0.004 s): loss 0.708, w gap: 13.910Epoch 10/100 (0.921 s), batch 4.0/100 (0.005 s): loss 0.723, w gap: 13.908Epoch 10/100 (0.922 s), batch 5.0/100 (0.006 s): loss 0.516, w gap: 13.896Epoch 10/100 (0.922 s), batch 6.0/100 (0.007 s): loss 1.028, w gap: 13.886Epoch 10/100 (0.924 s), batch 7.0/100 (0.008 s): loss 0.687, w gap: 13.889Epoch 10/100 (0.925 s), batch 8.0/100 (0.009 s): loss 0.635, w gap: 13.884Epoch 10/100 (0.926 s), batch 9.0/100 (0.011 s): loss 0.849, w gap: 13.895Epoch 10/100 (0.927 s), batch 10.0/100 (0.012 s): loss 0.802, w gap: 13.886Epoch 10/100 (0.928 s), batch 11.0/100 (0.012 s): loss 1.288, w gap: 13.878Epoch 10/100 (0.929 s), batch 12.0/100 (0.013 s): loss 0.963, w gap: 13.889Epoch 10/100 (0.929 s), batch 13.0/100 (0.014 s): loss 0.824, w gap: 13.898Epoch 10/100 (0.930 

Epoch 12/100 (1.119 s), batch 31.0/100 (0.027 s): loss 0.911, w gap: 13.880Epoch 12/100 (1.120 s), batch 32.0/100 (0.028 s): loss 0.703, w gap: 13.881Epoch 12/100 (1.120 s), batch 33.0/100 (0.029 s): loss 0.616, w gap: 13.900Epoch 12/100 (1.121 s), batch 34.0/100 (0.030 s): loss 0.733, w gap: 13.903Epoch 12/100 (1.122 s), batch 35.0/100 (0.031 s): loss 0.687, w gap: 13.882Epoch 12/100 (1.123 s), batch 36.0/100 (0.032 s): loss 0.547, w gap: 13.884Epoch 12/100 (1.124 s), batch 37.0/100 (0.033 s): loss 0.828, w gap: 13.880Epoch 12/100 (1.125 s), batch 38.0/100 (0.033 s): loss 1.001, w gap: 13.894Epoch 12/100 (1.126 s), batch 39.0/100 (0.034 s): loss 0.872, w gap: 13.906Epoch 12/100 (1.127 s), batch 40.0/100 (0.035 s): loss 0.575, w gap: 13.912Epoch 12/100 (1.135 s), batch 41.0/100 (0.044 s): loss 0.777, w gap: 13.911Epoch 12/100 (1.136 s), batch 42.0/100 (0.045 s): loss 0.636, w gap: 13.917Epoch 12/100 (1.137 s), batch 43.0/100 (0.045 s): loss 0.914, w gap: 13.922Epoch 12/10

Epoch 14/100 (1.320 s), batch 25.0/100 (0.030 s): loss 0.809, w gap: 13.959Epoch 14/100 (1.321 s), batch 26.0/100 (0.031 s): loss 0.838, w gap: 13.969Epoch 14/100 (1.322 s), batch 27.0/100 (0.032 s): loss 0.549, w gap: 13.972Epoch 14/100 (1.322 s), batch 28.0/100 (0.033 s): loss 0.582, w gap: 13.973Epoch 14/100 (1.323 s), batch 29.0/100 (0.034 s): loss 0.780, w gap: 13.970Epoch 14/100 (1.325 s), batch 30.0/100 (0.035 s): loss 0.695, w gap: 13.960Epoch 14/100 (1.326 s), batch 31.0/100 (0.036 s): loss 0.669, w gap: 13.942Epoch 14/100 (1.326 s), batch 32.0/100 (0.037 s): loss 0.841, w gap: 13.941Epoch 14/100 (1.328 s), batch 33.0/100 (0.039 s): loss 0.881, w gap: 13.949Epoch 14/100 (1.329 s), batch 34.0/100 (0.039 s): loss 0.617, w gap: 13.939Epoch 14/100 (1.331 s), batch 35.0/100 (0.041 s): loss 1.072, w gap: 13.942Epoch 14/100 (1.331 s), batch 36.0/100 (0.042 s): loss 0.841, w gap: 13.947Epoch 14/100 (1.332 s), batch 37.0/100 (0.043 s): loss 1.047, w gap: 13.959Epoch 14/10

Epoch 16/100 (1.522 s), batch 32.0/100 (0.032 s): loss 0.987, w gap: 13.978Epoch 16/100 (1.523 s), batch 33.0/100 (0.033 s): loss 0.537, w gap: 13.982Epoch 16/100 (1.524 s), batch 34.0/100 (0.034 s): loss 1.000, w gap: 13.966Epoch 16/100 (1.524 s), batch 35.0/100 (0.034 s): loss 0.546, w gap: 13.972Epoch 16/100 (1.525 s), batch 36.0/100 (0.035 s): loss 0.599, w gap: 13.964Epoch 16/100 (1.526 s), batch 37.0/100 (0.036 s): loss 0.827, w gap: 13.933Epoch 16/100 (1.527 s), batch 38.0/100 (0.037 s): loss 0.810, w gap: 13.925Epoch 16/100 (1.527 s), batch 39.0/100 (0.037 s): loss 0.670, w gap: 13.921Epoch 16/100 (1.528 s), batch 40.0/100 (0.038 s): loss 0.801, w gap: 13.929Epoch 16/100 (1.529 s), batch 41.0/100 (0.039 s): loss 0.633, w gap: 13.936Epoch 16/100 (1.530 s), batch 42.0/100 (0.040 s): loss 0.782, w gap: 13.950Epoch 16/100 (1.530 s), batch 43.0/100 (0.040 s): loss 0.872, w gap: 13.949Epoch 16/100 (1.531 s), batch 44.0/100 (0.041 s): loss 0.867, w gap: 13.970Epoch 16/10

Epoch 18/100 (1.724 s), batch 30.0/100 (0.034 s): loss 0.814, w gap: 13.912Epoch 18/100 (1.725 s), batch 31.0/100 (0.034 s): loss 0.854, w gap: 13.914Epoch 18/100 (1.726 s), batch 32.0/100 (0.035 s): loss 0.804, w gap: 13.922Epoch 18/100 (1.726 s), batch 33.0/100 (0.036 s): loss 0.900, w gap: 13.929Epoch 18/100 (1.727 s), batch 34.0/100 (0.037 s): loss 0.786, w gap: 13.941Epoch 18/100 (1.728 s), batch 35.0/100 (0.038 s): loss 1.288, w gap: 13.945Epoch 18/100 (1.730 s), batch 36.0/100 (0.039 s): loss 0.909, w gap: 13.975Epoch 18/100 (1.737 s), batch 37.0/100 (0.046 s): loss 0.643, w gap: 13.990Epoch 18/100 (1.738 s), batch 38.0/100 (0.047 s): loss 0.562, w gap: 13.989Epoch 18/100 (1.738 s), batch 39.0/100 (0.048 s): loss 0.789, w gap: 13.980Epoch 18/100 (1.739 s), batch 40.0/100 (0.049 s): loss 0.688, w gap: 13.986Epoch 18/100 (1.741 s), batch 41.0/100 (0.050 s): loss 0.608, w gap: 13.986Epoch 18/100 (1.742 s), batch 42.0/100 (0.051 s): loss 0.681, w gap: 13.977Epoch 18/10

Epoch 20/100 (1.923 s), batch 31.0/100 (0.030 s): loss 0.675, w gap: 13.994Epoch 20/100 (1.925 s), batch 32.0/100 (0.032 s): loss 0.882, w gap: 13.994Epoch 20/100 (1.926 s), batch 33.0/100 (0.033 s): loss 0.850, w gap: 13.994Epoch 20/100 (1.927 s), batch 34.0/100 (0.034 s): loss 1.111, w gap: 14.007Epoch 20/100 (1.927 s), batch 35.0/100 (0.034 s): loss 0.490, w gap: 14.025Epoch 20/100 (1.928 s), batch 36.0/100 (0.035 s): loss 0.611, w gap: 14.015Epoch 20/100 (1.929 s), batch 37.0/100 (0.036 s): loss 0.770, w gap: 14.014Epoch 20/100 (1.929 s), batch 38.0/100 (0.037 s): loss 0.723, w gap: 13.997Epoch 20/100 (1.930 s), batch 39.0/100 (0.037 s): loss 0.722, w gap: 14.003Epoch 20/100 (1.931 s), batch 40.0/100 (0.038 s): loss 0.838, w gap: 13.994Epoch 20/100 (1.932 s), batch 41.0/100 (0.039 s): loss 0.536, w gap: 14.007Epoch 20/100 (1.932 s), batch 42.0/100 (0.039 s): loss 0.841, w gap: 14.003Epoch 20/100 (1.933 s), batch 43.0/100 (0.040 s): loss 0.973, w gap: 13.997Epoch 20/10

Epoch 22/100 (2.124 s), batch 44.0/100 (0.041 s): loss 1.065, w gap: 13.933Epoch 22/100 (2.125 s), batch 45.0/100 (0.042 s): loss 0.567, w gap: 13.937Epoch 22/100 (2.126 s), batch 46.0/100 (0.043 s): loss 0.655, w gap: 13.928Epoch 22/100 (2.127 s), batch 47.0/100 (0.043 s): loss 1.032, w gap: 13.935Epoch 22/100 (2.128 s), batch 48.0/100 (0.044 s): loss 0.641, w gap: 13.950Epoch 22/100 (2.128 s), batch 49.0/100 (0.045 s): loss 0.860, w gap: 13.936Epoch 22/100 (2.129 s), batch 50.0/100 (0.046 s): loss 0.807, w gap: 13.946Epoch 22/100 (2.130 s), batch 51.0/100 (0.047 s): loss 1.009, w gap: 13.936Epoch 22/100 (2.131 s), batch 52.0/100 (0.048 s): loss 0.768, w gap: 13.953Epoch 22/100 (2.131 s), batch 53.0/100 (0.048 s): loss 0.830, w gap: 13.957Epoch 22/100 (2.132 s), batch 54.0/100 (0.049 s): loss 0.634, w gap: 13.968Epoch 22/100 (2.133 s), batch 55.0/100 (0.050 s): loss 0.564, w gap: 13.956Epoch 22/100 (2.134 s), batch 56.0/100 (0.050 s): loss 0.730, w gap: 13.954Epoch 22/10

Epoch 24/100 (2.325 s), batch 60.0/100 (0.055 s): loss 0.581, w gap: 13.944Epoch 24/100 (2.326 s), batch 61.0/100 (0.056 s): loss 0.758, w gap: 13.952Epoch 24/100 (2.327 s), batch 62.0/100 (0.057 s): loss 0.767, w gap: 13.958Epoch 24/100 (2.327 s), batch 63.0/100 (0.058 s): loss 0.941, w gap: 13.972Epoch 24/100 (2.328 s), batch 64.0/100 (0.058 s): loss 0.787, w gap: 13.977Epoch 24/100 (2.329 s), batch 65.0/100 (0.059 s): loss 0.838, w gap: 13.982Epoch 24/100 (2.330 s), batch 66.0/100 (0.060 s): loss 0.596, w gap: 13.993Epoch 24/100 (2.335 s), batch 67.0/100 (0.066 s): loss 0.660, w gap: 13.993Epoch 24/100 (2.336 s), batch 68.0/100 (0.067 s): loss 0.901, w gap: 14.004Epoch 24/100 (2.338 s), batch 69.0/100 (0.068 s): loss 0.861, w gap: 14.002Epoch 24/100 (2.339 s), batch 70.0/100 (0.069 s): loss 0.615, w gap: 14.010Epoch 24/100 (2.340 s), batch 71.0/100 (0.070 s): loss 0.644, w gap: 13.992Epoch 24/100 (2.340 s), batch 72.0/100 (0.071 s): loss 0.896, w gap: 14.003Epoch 24/10

Epoch 26/100 (2.526 s), batch 91.0/100 (0.074 s): loss 0.621, w gap: 13.980Epoch 26/100 (2.527 s), batch 92.0/100 (0.076 s): loss 0.762, w gap: 13.960Epoch 26/100 (2.528 s), batch 93.0/100 (0.077 s): loss 0.715, w gap: 13.965Epoch 26/100 (2.529 s), batch 94.0/100 (0.077 s): loss 0.876, w gap: 13.969Epoch 26/100 (2.530 s), batch 95.0/100 (0.078 s): loss 0.618, w gap: 13.993Epoch 26/100 (2.531 s), batch 96.0/100 (0.079 s): loss 0.578, w gap: 13.981Epoch 26/100 (2.531 s), batch 97.0/100 (0.080 s): loss 1.005, w gap: 13.983Epoch 26/100 (2.532 s), batch 98.0/100 (0.081 s): loss 0.603, w gap: 13.990Epoch 26/100 (2.533 s), batch 99.0/100 (0.081 s): loss 0.758, w gap: 13.997Epoch 26/100 (2.534 s), batch 100.0/100 (0.082 s): loss 1.208, w gap: 13.977Epoch 27/100 (2.539 s), batch 1.0/100 (0.001 s): loss 0.686, w gap: 13.973Epoch 27/100 (2.540 s), batch 2.0/100 (0.002 s): loss 0.801, w gap: 13.984Epoch 27/100 (2.540 s), batch 3.0/100 (0.003 s): loss 0.683, w gap: 13.974Epoch 27/100 

Epoch 29/100 (2.727 s), batch 19.0/100 (0.016 s): loss 0.683, w gap: 13.949Epoch 29/100 (2.728 s), batch 20.0/100 (0.017 s): loss 0.760, w gap: 13.945Epoch 29/100 (2.729 s), batch 21.0/100 (0.018 s): loss 0.867, w gap: 13.959Epoch 29/100 (2.730 s), batch 22.0/100 (0.019 s): loss 0.702, w gap: 13.951Epoch 29/100 (2.731 s), batch 23.0/100 (0.020 s): loss 0.673, w gap: 13.959Epoch 29/100 (2.731 s), batch 24.0/100 (0.021 s): loss 0.990, w gap: 13.957Epoch 29/100 (2.732 s), batch 25.0/100 (0.021 s): loss 0.714, w gap: 13.952Epoch 29/100 (2.733 s), batch 26.0/100 (0.022 s): loss 0.448, w gap: 13.954Epoch 29/100 (2.734 s), batch 27.0/100 (0.023 s): loss 0.806, w gap: 13.948Epoch 29/100 (2.735 s), batch 28.0/100 (0.024 s): loss 0.886, w gap: 13.960Epoch 29/100 (2.735 s), batch 29.0/100 (0.024 s): loss 0.671, w gap: 13.961Epoch 29/100 (2.736 s), batch 30.0/100 (0.025 s): loss 0.620, w gap: 13.970Epoch 29/100 (2.737 s), batch 31.0/100 (0.026 s): loss 0.941, w gap: 13.965Epoch 29/10

Epoch 31/100 (2.928 s), batch 29.0/100 (0.026 s): loss 0.742, w gap: 14.026Epoch 31/100 (2.930 s), batch 30.0/100 (0.028 s): loss 0.610, w gap: 14.035Epoch 31/100 (2.931 s), batch 31.0/100 (0.029 s): loss 0.670, w gap: 14.025Epoch 31/100 (2.932 s), batch 32.0/100 (0.030 s): loss 0.717, w gap: 14.017Epoch 31/100 (2.932 s), batch 33.0/100 (0.031 s): loss 0.582, w gap: 14.005Epoch 31/100 (2.933 s), batch 34.0/100 (0.031 s): loss 0.685, w gap: 14.010Epoch 31/100 (2.934 s), batch 35.0/100 (0.032 s): loss 1.002, w gap: 14.015Epoch 31/100 (2.935 s), batch 36.0/100 (0.033 s): loss 0.629, w gap: 14.010Epoch 31/100 (2.936 s), batch 37.0/100 (0.034 s): loss 0.627, w gap: 14.003Epoch 31/100 (2.936 s), batch 38.0/100 (0.035 s): loss 0.810, w gap: 14.004Epoch 31/100 (2.937 s), batch 39.0/100 (0.035 s): loss 0.708, w gap: 14.004Epoch 31/100 (2.938 s), batch 40.0/100 (0.036 s): loss 0.714, w gap: 14.016Epoch 31/100 (2.939 s), batch 41.0/100 (0.037 s): loss 0.603, w gap: 14.010Epoch 31/10

Epoch 33/100 (3.129 s), batch 51.0/100 (0.043 s): loss 1.054, w gap: 13.927Epoch 33/100 (3.131 s), batch 52.0/100 (0.045 s): loss 0.725, w gap: 13.926Epoch 33/100 (3.132 s), batch 53.0/100 (0.045 s): loss 0.416, w gap: 13.929Epoch 33/100 (3.133 s), batch 54.0/100 (0.046 s): loss 0.890, w gap: 13.914Epoch 33/100 (3.134 s), batch 55.0/100 (0.047 s): loss 0.800, w gap: 13.917Epoch 33/100 (3.134 s), batch 56.0/100 (0.048 s): loss 0.700, w gap: 13.935Epoch 33/100 (3.135 s), batch 57.0/100 (0.049 s): loss 0.842, w gap: 13.934Epoch 33/100 (3.136 s), batch 58.0/100 (0.050 s): loss 0.532, w gap: 13.947Epoch 33/100 (3.137 s), batch 59.0/100 (0.050 s): loss 0.702, w gap: 13.937Epoch 33/100 (3.137 s), batch 60.0/100 (0.051 s): loss 0.885, w gap: 13.930Epoch 33/100 (3.138 s), batch 61.0/100 (0.052 s): loss 0.708, w gap: 13.962Epoch 33/100 (3.139 s), batch 62.0/100 (0.053 s): loss 0.870, w gap: 13.953Epoch 33/100 (3.140 s), batch 63.0/100 (0.054 s): loss 0.520, w gap: 13.964Epoch 33/10

Epoch 35/100 (3.330 s), batch 65.0/100 (0.064 s): loss 0.666, w gap: 13.971Epoch 35/100 (3.332 s), batch 66.0/100 (0.066 s): loss 0.544, w gap: 13.977Epoch 35/100 (3.333 s), batch 67.0/100 (0.067 s): loss 0.759, w gap: 13.981Epoch 35/100 (3.334 s), batch 68.0/100 (0.067 s): loss 0.794, w gap: 13.976Epoch 35/100 (3.334 s), batch 69.0/100 (0.068 s): loss 0.643, w gap: 13.974Epoch 35/100 (3.335 s), batch 70.0/100 (0.069 s): loss 0.653, w gap: 13.990Epoch 35/100 (3.336 s), batch 71.0/100 (0.069 s): loss 0.949, w gap: 13.982Epoch 35/100 (3.337 s), batch 72.0/100 (0.070 s): loss 0.658, w gap: 13.998Epoch 35/100 (3.337 s), batch 73.0/100 (0.071 s): loss 0.881, w gap: 14.010Epoch 35/100 (3.338 s), batch 74.0/100 (0.072 s): loss 0.790, w gap: 14.007Epoch 35/100 (3.339 s), batch 75.0/100 (0.072 s): loss 0.782, w gap: 13.994Epoch 35/100 (3.340 s), batch 76.0/100 (0.073 s): loss 0.993, w gap: 14.001Epoch 35/100 (3.340 s), batch 77.0/100 (0.074 s): loss 1.257, w gap: 14.014Epoch 35/10

Epoch 37/100 (3.530 s), batch 67.0/100 (0.060 s): loss 0.659, w gap: 14.008Epoch 37/100 (3.533 s), batch 68.0/100 (0.062 s): loss 0.705, w gap: 14.009Epoch 37/100 (3.533 s), batch 69.0/100 (0.063 s): loss 0.722, w gap: 14.010Epoch 37/100 (3.535 s), batch 70.0/100 (0.065 s): loss 0.619, w gap: 14.006Epoch 37/100 (3.536 s), batch 71.0/100 (0.065 s): loss 0.520, w gap: 14.017Epoch 37/100 (3.536 s), batch 72.0/100 (0.066 s): loss 0.404, w gap: 13.993Epoch 37/100 (3.537 s), batch 73.0/100 (0.067 s): loss 0.603, w gap: 13.970Epoch 37/100 (3.538 s), batch 74.0/100 (0.068 s): loss 0.727, w gap: 13.960Epoch 37/100 (3.539 s), batch 75.0/100 (0.068 s): loss 0.639, w gap: 13.956Epoch 37/100 (3.539 s), batch 76.0/100 (0.069 s): loss 0.968, w gap: 13.955Epoch 37/100 (3.541 s), batch 77.0/100 (0.071 s): loss 0.819, w gap: 13.943Epoch 37/100 (3.543 s), batch 78.0/100 (0.073 s): loss 0.787, w gap: 13.972Epoch 37/100 (3.544 s), batch 79.0/100 (0.073 s): loss 0.757, w gap: 13.986Epoch 37/10

Epoch 39/100 (3.732 s), batch 84.0/100 (0.070 s): loss 0.961, w gap: 13.964Epoch 39/100 (3.733 s), batch 85.0/100 (0.071 s): loss 0.777, w gap: 13.986Epoch 39/100 (3.735 s), batch 86.0/100 (0.073 s): loss 0.574, w gap: 13.988Epoch 39/100 (3.739 s), batch 87.0/100 (0.078 s): loss 0.637, w gap: 13.984Epoch 39/100 (3.740 s), batch 88.0/100 (0.079 s): loss 0.841, w gap: 13.981Epoch 39/100 (3.741 s), batch 89.0/100 (0.079 s): loss 0.571, w gap: 13.983Epoch 39/100 (3.743 s), batch 90.0/100 (0.081 s): loss 0.900, w gap: 13.978Epoch 39/100 (3.744 s), batch 91.0/100 (0.082 s): loss 0.755, w gap: 14.004Epoch 39/100 (3.744 s), batch 92.0/100 (0.083 s): loss 0.627, w gap: 14.022Epoch 39/100 (3.745 s), batch 93.0/100 (0.083 s): loss 0.629, w gap: 14.018Epoch 39/100 (3.746 s), batch 94.0/100 (0.084 s): loss 1.020, w gap: 14.019Epoch 39/100 (3.747 s), batch 95.0/100 (0.085 s): loss 0.896, w gap: 14.023Epoch 39/100 (3.747 s), batch 96.0/100 (0.086 s): loss 0.740, w gap: 14.026Epoch 39/10

Epoch 41/100 (3.933 s), batch 90.0/100 (0.076 s): loss 0.745, w gap: 13.996Epoch 41/100 (3.934 s), batch 91.0/100 (0.077 s): loss 0.836, w gap: 13.983Epoch 41/100 (3.934 s), batch 92.0/100 (0.078 s): loss 0.529, w gap: 13.986Epoch 41/100 (3.935 s), batch 93.0/100 (0.079 s): loss 0.559, w gap: 13.984Epoch 41/100 (3.936 s), batch 94.0/100 (0.080 s): loss 0.597, w gap: 13.986Epoch 41/100 (3.937 s), batch 95.0/100 (0.080 s): loss 0.522, w gap: 13.989Epoch 41/100 (3.938 s), batch 96.0/100 (0.082 s): loss 0.750, w gap: 13.978Epoch 41/100 (3.939 s), batch 97.0/100 (0.083 s): loss 0.585, w gap: 13.976Epoch 41/100 (3.940 s), batch 98.0/100 (0.084 s): loss 0.817, w gap: 13.973Epoch 41/100 (3.941 s), batch 99.0/100 (0.084 s): loss 0.981, w gap: 13.971Epoch 41/100 (3.941 s), batch 100.0/100 (0.085 s): loss 0.687, w gap: 13.975Epoch 42/100 (3.948 s), batch 1.0/100 (0.001 s): loss 0.549, w gap: 13.985Epoch 42/100 (3.949 s), batch 2.0/100 (0.002 s): loss 0.572, w gap: 13.986Epoch 42/100

Epoch 44/100 (4.134 s), batch 3.0/100 (0.003 s): loss 0.580, w gap: 13.979Epoch 44/100 (4.135 s), batch 4.0/100 (0.004 s): loss 0.608, w gap: 13.976Epoch 44/100 (4.135 s), batch 5.0/100 (0.004 s): loss 0.540, w gap: 13.982Epoch 44/100 (4.136 s), batch 6.0/100 (0.005 s): loss 0.827, w gap: 13.967Epoch 44/100 (4.137 s), batch 7.0/100 (0.006 s): loss 0.696, w gap: 13.961Epoch 44/100 (4.138 s), batch 8.0/100 (0.007 s): loss 1.120, w gap: 13.958Epoch 44/100 (4.138 s), batch 9.0/100 (0.007 s): loss 0.772, w gap: 13.960Epoch 44/100 (4.139 s), batch 10.0/100 (0.008 s): loss 0.631, w gap: 13.956Epoch 44/100 (4.140 s), batch 11.0/100 (0.009 s): loss 0.792, w gap: 13.958Epoch 44/100 (4.141 s), batch 12.0/100 (0.010 s): loss 0.738, w gap: 13.961Epoch 44/100 (4.141 s), batch 13.0/100 (0.010 s): loss 0.589, w gap: 13.944Epoch 44/100 (4.142 s), batch 14.0/100 (0.011 s): loss 0.742, w gap: 13.938Epoch 44/100 (4.143 s), batch 15.0/100 (0.012 s): loss 0.567, w gap: 13.945Epoch 44/100 (4.14

Epoch 46/100 (4.335 s), batch 31.0/100 (0.026 s): loss 0.771, w gap: 13.962Epoch 46/100 (4.335 s), batch 32.0/100 (0.026 s): loss 0.845, w gap: 13.962Epoch 46/100 (4.336 s), batch 33.0/100 (0.027 s): loss 0.816, w gap: 13.962Epoch 46/100 (4.337 s), batch 34.0/100 (0.028 s): loss 0.676, w gap: 13.973Epoch 46/100 (4.338 s), batch 35.0/100 (0.029 s): loss 0.721, w gap: 13.969Epoch 46/100 (4.338 s), batch 36.0/100 (0.029 s): loss 0.665, w gap: 13.970Epoch 46/100 (4.339 s), batch 37.0/100 (0.030 s): loss 0.883, w gap: 13.985Epoch 46/100 (4.340 s), batch 38.0/100 (0.031 s): loss 0.823, w gap: 13.988Epoch 46/100 (4.340 s), batch 39.0/100 (0.032 s): loss 0.518, w gap: 13.978Epoch 46/100 (4.341 s), batch 40.0/100 (0.032 s): loss 0.816, w gap: 13.965Epoch 46/100 (4.342 s), batch 41.0/100 (0.033 s): loss 0.656, w gap: 13.983Epoch 46/100 (4.342 s), batch 42.0/100 (0.034 s): loss 0.688, w gap: 13.980Epoch 46/100 (4.343 s), batch 43.0/100 (0.034 s): loss 1.085, w gap: 13.978Epoch 46/10

Epoch 48/100 (4.535 s), batch 56.0/100 (0.049 s): loss 0.582, w gap: 13.980Epoch 48/100 (4.537 s), batch 57.0/100 (0.051 s): loss 0.440, w gap: 13.979Epoch 48/100 (4.538 s), batch 58.0/100 (0.052 s): loss 0.504, w gap: 13.972Epoch 48/100 (4.539 s), batch 59.0/100 (0.052 s): loss 0.568, w gap: 13.957Epoch 48/100 (4.540 s), batch 60.0/100 (0.053 s): loss 0.557, w gap: 13.944Epoch 48/100 (4.540 s), batch 61.0/100 (0.054 s): loss 0.528, w gap: 13.931Epoch 48/100 (4.541 s), batch 62.0/100 (0.054 s): loss 0.611, w gap: 13.930Epoch 48/100 (4.542 s), batch 63.0/100 (0.055 s): loss 0.711, w gap: 13.920Epoch 48/100 (4.544 s), batch 64.0/100 (0.057 s): loss 0.688, w gap: 13.906Epoch 48/100 (4.545 s), batch 65.0/100 (0.059 s): loss 0.712, w gap: 13.918Epoch 48/100 (4.547 s), batch 66.0/100 (0.060 s): loss 0.906, w gap: 13.928Epoch 48/100 (4.548 s), batch 67.0/100 (0.061 s): loss 1.228, w gap: 13.927Epoch 48/100 (4.549 s), batch 68.0/100 (0.062 s): loss 0.487, w gap: 13.958Epoch 48/10

Epoch 50/100 (4.736 s), batch 63.0/100 (0.054 s): loss 0.629, w gap: 13.936Epoch 50/100 (4.741 s), batch 64.0/100 (0.058 s): loss 0.706, w gap: 13.943Epoch 50/100 (4.742 s), batch 65.0/100 (0.059 s): loss 0.784, w gap: 13.951Epoch 50/100 (4.743 s), batch 66.0/100 (0.060 s): loss 0.644, w gap: 13.953Epoch 50/100 (4.744 s), batch 67.0/100 (0.061 s): loss 0.589, w gap: 13.959Epoch 50/100 (4.744 s), batch 68.0/100 (0.062 s): loss 0.864, w gap: 13.967Epoch 50/100 (4.745 s), batch 69.0/100 (0.062 s): loss 0.650, w gap: 14.000Epoch 50/100 (4.746 s), batch 70.0/100 (0.063 s): loss 0.863, w gap: 13.985Epoch 50/100 (4.747 s), batch 71.0/100 (0.064 s): loss 0.683, w gap: 13.992Epoch 50/100 (4.747 s), batch 72.0/100 (0.065 s): loss 0.730, w gap: 13.998Epoch 50/100 (4.748 s), batch 73.0/100 (0.065 s): loss 0.732, w gap: 14.004Epoch 50/100 (4.749 s), batch 74.0/100 (0.066 s): loss 0.641, w gap: 14.009Epoch 50/100 (4.750 s), batch 75.0/100 (0.067 s): loss 0.494, w gap: 14.006Epoch 50/10

Epoch 52/100 (4.937 s), batch 73.0/100 (0.068 s): loss 0.661, w gap: 13.944Epoch 52/100 (4.938 s), batch 74.0/100 (0.068 s): loss 0.916, w gap: 13.933Epoch 52/100 (4.939 s), batch 75.0/100 (0.069 s): loss 0.930, w gap: 13.943Epoch 52/100 (4.940 s), batch 76.0/100 (0.070 s): loss 0.863, w gap: 13.958Epoch 52/100 (4.941 s), batch 77.0/100 (0.071 s): loss 0.826, w gap: 13.970Epoch 52/100 (4.941 s), batch 78.0/100 (0.072 s): loss 0.614, w gap: 13.997Epoch 52/100 (4.942 s), batch 79.0/100 (0.072 s): loss 0.867, w gap: 13.988Epoch 52/100 (4.943 s), batch 80.0/100 (0.073 s): loss 0.758, w gap: 13.976Epoch 52/100 (4.944 s), batch 81.0/100 (0.074 s): loss 0.827, w gap: 13.964Epoch 52/100 (4.944 s), batch 82.0/100 (0.075 s): loss 0.683, w gap: 13.981Epoch 52/100 (4.945 s), batch 83.0/100 (0.075 s): loss 0.661, w gap: 13.978Epoch 52/100 (4.946 s), batch 84.0/100 (0.076 s): loss 0.914, w gap: 13.983Epoch 52/100 (4.947 s), batch 85.0/100 (0.077 s): loss 0.591, w gap: 13.990Epoch 52/10

Epoch 54/100 (5.139 s), batch 79.0/100 (0.072 s): loss 0.847, w gap: 14.008Epoch 54/100 (5.140 s), batch 80.0/100 (0.073 s): loss 0.976, w gap: 14.001Epoch 54/100 (5.140 s), batch 81.0/100 (0.074 s): loss 0.773, w gap: 14.025Epoch 54/100 (5.141 s), batch 82.0/100 (0.075 s): loss 0.829, w gap: 14.008Epoch 54/100 (5.143 s), batch 83.0/100 (0.077 s): loss 0.856, w gap: 14.008Epoch 54/100 (5.144 s), batch 84.0/100 (0.078 s): loss 0.460, w gap: 14.016Epoch 54/100 (5.145 s), batch 85.0/100 (0.078 s): loss 0.713, w gap: 13.978Epoch 54/100 (5.145 s), batch 86.0/100 (0.079 s): loss 0.507, w gap: 13.991Epoch 54/100 (5.146 s), batch 87.0/100 (0.080 s): loss 0.747, w gap: 13.990Epoch 54/100 (5.147 s), batch 88.0/100 (0.081 s): loss 0.628, w gap: 14.011Epoch 54/100 (5.148 s), batch 89.0/100 (0.081 s): loss 0.901, w gap: 13.995Epoch 54/100 (5.148 s), batch 90.0/100 (0.082 s): loss 0.733, w gap: 14.019Epoch 54/100 (5.149 s), batch 91.0/100 (0.083 s): loss 0.850, w gap: 14.013Epoch 54/10

Epoch 56/100 (5.339 s), batch 87.0/100 (0.080 s): loss 0.668, w gap: 13.983Epoch 56/100 (5.340 s), batch 88.0/100 (0.081 s): loss 1.081, w gap: 13.977Epoch 56/100 (5.341 s), batch 89.0/100 (0.081 s): loss 0.695, w gap: 13.972Epoch 56/100 (5.342 s), batch 90.0/100 (0.082 s): loss 0.669, w gap: 13.974Epoch 56/100 (5.342 s), batch 91.0/100 (0.083 s): loss 0.549, w gap: 13.980Epoch 56/100 (5.343 s), batch 92.0/100 (0.084 s): loss 0.688, w gap: 13.973Epoch 56/100 (5.344 s), batch 93.0/100 (0.084 s): loss 0.896, w gap: 13.987Epoch 56/100 (5.345 s), batch 94.0/100 (0.085 s): loss 0.871, w gap: 13.988Epoch 56/100 (5.346 s), batch 95.0/100 (0.086 s): loss 0.932, w gap: 14.000Epoch 56/100 (5.346 s), batch 96.0/100 (0.087 s): loss 1.103, w gap: 14.024Epoch 56/100 (5.347 s), batch 97.0/100 (0.087 s): loss 0.561, w gap: 14.039Epoch 56/100 (5.348 s), batch 98.0/100 (0.088 s): loss 0.597, w gap: 14.026Epoch 56/100 (5.349 s), batch 99.0/100 (0.089 s): loss 0.883, w gap: 14.025Epoch 56/10

Epoch 58/100 (5.540 s), batch 87.0/100 (0.086 s): loss 0.982, w gap: 13.948Epoch 58/100 (5.542 s), batch 88.0/100 (0.088 s): loss 0.750, w gap: 13.962Epoch 58/100 (5.543 s), batch 89.0/100 (0.089 s): loss 0.665, w gap: 13.956Epoch 58/100 (5.544 s), batch 90.0/100 (0.090 s): loss 0.672, w gap: 13.951Epoch 58/100 (5.545 s), batch 91.0/100 (0.091 s): loss 0.735, w gap: 13.945Epoch 58/100 (5.547 s), batch 92.0/100 (0.093 s): loss 0.730, w gap: 13.948Epoch 58/100 (5.548 s), batch 93.0/100 (0.093 s): loss 0.840, w gap: 13.956Epoch 58/100 (5.548 s), batch 94.0/100 (0.094 s): loss 0.737, w gap: 13.965Epoch 58/100 (5.550 s), batch 95.0/100 (0.096 s): loss 0.705, w gap: 13.987Epoch 58/100 (5.551 s), batch 96.0/100 (0.096 s): loss 0.841, w gap: 13.979Epoch 58/100 (5.552 s), batch 97.0/100 (0.097 s): loss 0.863, w gap: 13.995Epoch 58/100 (5.552 s), batch 98.0/100 (0.098 s): loss 0.611, w gap: 14.001Epoch 58/100 (5.553 s), batch 99.0/100 (0.099 s): loss 0.544, w gap: 13.972Epoch 58/10

Epoch 60/100 (5.742 s), batch 87.0/100 (0.090 s): loss 0.812, w gap: 14.004Epoch 60/100 (5.743 s), batch 88.0/100 (0.091 s): loss 0.739, w gap: 14.011Epoch 60/100 (5.744 s), batch 89.0/100 (0.092 s): loss 0.823, w gap: 13.996Epoch 60/100 (5.745 s), batch 90.0/100 (0.092 s): loss 0.843, w gap: 14.017Epoch 60/100 (5.746 s), batch 91.0/100 (0.094 s): loss 0.892, w gap: 14.041Epoch 60/100 (5.747 s), batch 92.0/100 (0.095 s): loss 0.642, w gap: 14.042Epoch 60/100 (5.748 s), batch 93.0/100 (0.095 s): loss 0.992, w gap: 14.027Epoch 60/100 (5.748 s), batch 94.0/100 (0.096 s): loss 0.731, w gap: 14.043Epoch 60/100 (5.749 s), batch 95.0/100 (0.097 s): loss 0.901, w gap: 14.027Epoch 60/100 (5.750 s), batch 96.0/100 (0.098 s): loss 0.487, w gap: 14.039Epoch 60/100 (5.751 s), batch 97.0/100 (0.098 s): loss 0.708, w gap: 14.019Epoch 60/100 (5.751 s), batch 98.0/100 (0.099 s): loss 0.851, w gap: 14.013Epoch 60/100 (5.752 s), batch 99.0/100 (0.100 s): loss 0.492, w gap: 14.020Epoch 60/10

Epoch 63/100 (5.943 s), batch 3.0/100 (0.004 s): loss 0.592, w gap: 14.000Epoch 63/100 (5.944 s), batch 4.0/100 (0.004 s): loss 0.679, w gap: 13.990Epoch 63/100 (5.945 s), batch 5.0/100 (0.005 s): loss 0.714, w gap: 13.983Epoch 63/100 (5.945 s), batch 6.0/100 (0.006 s): loss 0.682, w gap: 13.979Epoch 63/100 (5.946 s), batch 7.0/100 (0.007 s): loss 0.783, w gap: 13.970Epoch 63/100 (5.947 s), batch 8.0/100 (0.007 s): loss 0.674, w gap: 13.970Epoch 63/100 (5.948 s), batch 9.0/100 (0.008 s): loss 0.549, w gap: 13.974Epoch 63/100 (5.948 s), batch 10.0/100 (0.009 s): loss 0.588, w gap: 13.977Epoch 63/100 (5.949 s), batch 11.0/100 (0.010 s): loss 0.816, w gap: 13.970Epoch 63/100 (5.950 s), batch 12.0/100 (0.010 s): loss 0.923, w gap: 13.983Epoch 63/100 (5.951 s), batch 13.0/100 (0.011 s): loss 0.580, w gap: 13.992Epoch 63/100 (5.951 s), batch 14.0/100 (0.012 s): loss 0.500, w gap: 13.982Epoch 63/100 (5.952 s), batch 15.0/100 (0.013 s): loss 0.714, w gap: 13.975Epoch 63/100 (5.95

Epoch 65/100 (6.144 s), batch 37.0/100 (0.032 s): loss 0.723, w gap: 13.988Epoch 65/100 (6.146 s), batch 38.0/100 (0.034 s): loss 0.769, w gap: 13.981Epoch 65/100 (6.147 s), batch 39.0/100 (0.035 s): loss 0.642, w gap: 13.986Epoch 65/100 (6.148 s), batch 40.0/100 (0.035 s): loss 0.628, w gap: 13.981Epoch 65/100 (6.148 s), batch 41.0/100 (0.036 s): loss 0.663, w gap: 13.981Epoch 65/100 (6.150 s), batch 42.0/100 (0.038 s): loss 1.224, w gap: 13.973Epoch 65/100 (6.151 s), batch 43.0/100 (0.039 s): loss 0.630, w gap: 13.997Epoch 65/100 (6.152 s), batch 44.0/100 (0.040 s): loss 0.738, w gap: 13.992Epoch 65/100 (6.153 s), batch 45.0/100 (0.040 s): loss 0.825, w gap: 13.988Epoch 65/100 (6.153 s), batch 46.0/100 (0.041 s): loss 0.880, w gap: 13.992Epoch 65/100 (6.154 s), batch 47.0/100 (0.042 s): loss 1.030, w gap: 13.998Epoch 65/100 (6.155 s), batch 48.0/100 (0.043 s): loss 0.720, w gap: 14.017Epoch 65/100 (6.156 s), batch 49.0/100 (0.043 s): loss 0.685, w gap: 14.008Epoch 65/10

Epoch 67/100 (6.346 s), batch 40.0/100 (0.033 s): loss 0.664, w gap: 14.029Epoch 67/100 (6.347 s), batch 41.0/100 (0.034 s): loss 0.582, w gap: 14.017Epoch 67/100 (6.348 s), batch 42.0/100 (0.035 s): loss 0.481, w gap: 14.001Epoch 67/100 (6.348 s), batch 43.0/100 (0.036 s): loss 0.646, w gap: 13.996Epoch 67/100 (6.350 s), batch 44.0/100 (0.038 s): loss 0.747, w gap: 13.977Epoch 67/100 (6.351 s), batch 45.0/100 (0.039 s): loss 0.689, w gap: 13.972Epoch 67/100 (6.352 s), batch 46.0/100 (0.039 s): loss 1.120, w gap: 13.972Epoch 67/100 (6.353 s), batch 47.0/100 (0.040 s): loss 0.910, w gap: 13.990Epoch 67/100 (6.354 s), batch 48.0/100 (0.042 s): loss 0.548, w gap: 13.985Epoch 67/100 (6.355 s), batch 49.0/100 (0.043 s): loss 1.000, w gap: 13.981Epoch 67/100 (6.356 s), batch 50.0/100 (0.043 s): loss 0.864, w gap: 13.985Epoch 67/100 (6.357 s), batch 51.0/100 (0.044 s): loss 0.731, w gap: 13.983Epoch 67/100 (6.357 s), batch 52.0/100 (0.045 s): loss 0.508, w gap: 13.975Epoch 67/10

Epoch 69/100 (6.547 s), batch 50.0/100 (0.045 s): loss 0.839, w gap: 13.995Epoch 69/100 (6.547 s), batch 51.0/100 (0.045 s): loss 0.727, w gap: 13.993Epoch 69/100 (6.548 s), batch 52.0/100 (0.046 s): loss 0.534, w gap: 14.004Epoch 69/100 (6.549 s), batch 53.0/100 (0.047 s): loss 0.776, w gap: 14.004Epoch 69/100 (6.550 s), batch 54.0/100 (0.048 s): loss 0.754, w gap: 14.007Epoch 69/100 (6.550 s), batch 55.0/100 (0.048 s): loss 0.761, w gap: 14.007Epoch 69/100 (6.551 s), batch 56.0/100 (0.049 s): loss 1.292, w gap: 14.006Epoch 69/100 (6.552 s), batch 57.0/100 (0.050 s): loss 0.682, w gap: 14.040Epoch 69/100 (6.553 s), batch 58.0/100 (0.051 s): loss 0.820, w gap: 14.043Epoch 69/100 (6.553 s), batch 59.0/100 (0.051 s): loss 0.662, w gap: 14.041Epoch 69/100 (6.554 s), batch 60.0/100 (0.052 s): loss 0.539, w gap: 14.035Epoch 69/100 (6.555 s), batch 61.0/100 (0.053 s): loss 0.774, w gap: 14.034Epoch 69/100 (6.557 s), batch 62.0/100 (0.055 s): loss 0.557, w gap: 14.028Epoch 69/10

Epoch 71/100 (6.748 s), batch 50.0/100 (0.053 s): loss 0.783, w gap: 13.991Epoch 71/100 (6.750 s), batch 51.0/100 (0.055 s): loss 0.889, w gap: 13.971Epoch 71/100 (6.750 s), batch 52.0/100 (0.055 s): loss 0.833, w gap: 13.999Epoch 71/100 (6.751 s), batch 53.0/100 (0.056 s): loss 0.879, w gap: 14.010Epoch 71/100 (6.752 s), batch 54.0/100 (0.057 s): loss 0.748, w gap: 14.011Epoch 71/100 (6.753 s), batch 55.0/100 (0.058 s): loss 0.826, w gap: 14.001Epoch 71/100 (6.753 s), batch 56.0/100 (0.058 s): loss 0.823, w gap: 14.000Epoch 71/100 (6.754 s), batch 57.0/100 (0.059 s): loss 0.643, w gap: 13.997Epoch 71/100 (6.755 s), batch 58.0/100 (0.060 s): loss 0.778, w gap: 13.999Epoch 71/100 (6.756 s), batch 59.0/100 (0.061 s): loss 0.823, w gap: 13.986Epoch 71/100 (6.756 s), batch 60.0/100 (0.061 s): loss 0.770, w gap: 13.992Epoch 71/100 (6.757 s), batch 61.0/100 (0.062 s): loss 0.666, w gap: 14.000Epoch 71/100 (6.758 s), batch 62.0/100 (0.063 s): loss 0.505, w gap: 14.002Epoch 71/10

Epoch 73/100 (6.949 s), batch 55.0/100 (0.052 s): loss 0.620, w gap: 13.999Epoch 73/100 (6.950 s), batch 56.0/100 (0.053 s): loss 0.765, w gap: 13.992Epoch 73/100 (6.951 s), batch 57.0/100 (0.054 s): loss 0.827, w gap: 14.016Epoch 73/100 (6.952 s), batch 58.0/100 (0.054 s): loss 0.567, w gap: 14.021Epoch 73/100 (6.952 s), batch 59.0/100 (0.055 s): loss 0.957, w gap: 14.010Epoch 73/100 (6.953 s), batch 60.0/100 (0.056 s): loss 0.689, w gap: 14.024Epoch 73/100 (6.954 s), batch 61.0/100 (0.057 s): loss 1.030, w gap: 14.035Epoch 73/100 (6.955 s), batch 62.0/100 (0.057 s): loss 0.524, w gap: 14.042Epoch 73/100 (6.955 s), batch 63.0/100 (0.058 s): loss 0.674, w gap: 14.030Epoch 73/100 (6.956 s), batch 64.0/100 (0.059 s): loss 0.691, w gap: 14.013Epoch 73/100 (6.957 s), batch 65.0/100 (0.060 s): loss 0.688, w gap: 14.016Epoch 73/100 (6.958 s), batch 66.0/100 (0.060 s): loss 0.785, w gap: 14.017Epoch 73/100 (6.958 s), batch 67.0/100 (0.061 s): loss 0.698, w gap: 14.023Epoch 73/10

Epoch 75/100 (7.150 s), batch 53.0/100 (0.050 s): loss 1.131, w gap: 13.985Epoch 75/100 (7.151 s), batch 54.0/100 (0.051 s): loss 0.740, w gap: 13.991Epoch 75/100 (7.152 s), batch 55.0/100 (0.051 s): loss 0.803, w gap: 13.998Epoch 75/100 (7.152 s), batch 56.0/100 (0.052 s): loss 0.985, w gap: 13.988Epoch 75/100 (7.153 s), batch 57.0/100 (0.053 s): loss 0.621, w gap: 13.995Epoch 75/100 (7.154 s), batch 58.0/100 (0.053 s): loss 0.652, w gap: 13.976Epoch 75/100 (7.154 s), batch 59.0/100 (0.054 s): loss 0.689, w gap: 13.984Epoch 75/100 (7.155 s), batch 60.0/100 (0.055 s): loss 0.624, w gap: 13.991Epoch 75/100 (7.156 s), batch 61.0/100 (0.056 s): loss 0.729, w gap: 13.991Epoch 75/100 (7.157 s), batch 62.0/100 (0.056 s): loss 0.795, w gap: 14.001Epoch 75/100 (7.158 s), batch 63.0/100 (0.057 s): loss 0.640, w gap: 13.996Epoch 75/100 (7.158 s), batch 64.0/100 (0.058 s): loss 0.631, w gap: 13.987Epoch 75/100 (7.159 s), batch 65.0/100 (0.059 s): loss 0.830, w gap: 13.997Epoch 75/10

Epoch 77/100 (7.350 s), batch 62.0/100 (0.057 s): loss 0.654, w gap: 13.991Epoch 77/100 (7.353 s), batch 63.0/100 (0.059 s): loss 0.719, w gap: 13.976Epoch 77/100 (7.353 s), batch 64.0/100 (0.060 s): loss 0.691, w gap: 13.976Epoch 77/100 (7.354 s), batch 65.0/100 (0.061 s): loss 0.831, w gap: 13.971Epoch 77/100 (7.355 s), batch 66.0/100 (0.061 s): loss 0.599, w gap: 13.984Epoch 77/100 (7.356 s), batch 67.0/100 (0.062 s): loss 0.648, w gap: 13.984Epoch 77/100 (7.356 s), batch 68.0/100 (0.063 s): loss 0.645, w gap: 13.974Epoch 77/100 (7.357 s), batch 69.0/100 (0.064 s): loss 0.788, w gap: 13.972Epoch 77/100 (7.358 s), batch 70.0/100 (0.064 s): loss 0.586, w gap: 13.988Epoch 77/100 (7.359 s), batch 71.0/100 (0.065 s): loss 0.907, w gap: 13.990Epoch 77/100 (7.359 s), batch 72.0/100 (0.066 s): loss 0.673, w gap: 14.002Epoch 77/100 (7.360 s), batch 73.0/100 (0.067 s): loss 0.659, w gap: 13.979Epoch 77/100 (7.361 s), batch 74.0/100 (0.067 s): loss 0.917, w gap: 13.978Epoch 77/10

Epoch 79/100 (7.551 s), batch 72.0/100 (0.060 s): loss 0.660, w gap: 14.024Epoch 79/100 (7.552 s), batch 73.0/100 (0.061 s): loss 0.625, w gap: 14.022Epoch 79/100 (7.554 s), batch 74.0/100 (0.063 s): loss 0.866, w gap: 14.028Epoch 79/100 (7.555 s), batch 75.0/100 (0.064 s): loss 0.531, w gap: 14.027Epoch 79/100 (7.556 s), batch 76.0/100 (0.064 s): loss 0.543, w gap: 14.015Epoch 79/100 (7.556 s), batch 77.0/100 (0.065 s): loss 0.678, w gap: 14.005Epoch 79/100 (7.557 s), batch 78.0/100 (0.066 s): loss 0.786, w gap: 14.023Epoch 79/100 (7.558 s), batch 79.0/100 (0.067 s): loss 0.733, w gap: 14.023Epoch 79/100 (7.559 s), batch 80.0/100 (0.068 s): loss 0.570, w gap: 14.017Epoch 79/100 (7.559 s), batch 81.0/100 (0.068 s): loss 0.795, w gap: 14.004Epoch 79/100 (7.560 s), batch 82.0/100 (0.069 s): loss 0.574, w gap: 14.002Epoch 79/100 (7.561 s), batch 83.0/100 (0.070 s): loss 0.494, w gap: 14.007Epoch 79/100 (7.561 s), batch 84.0/100 (0.070 s): loss 0.658, w gap: 14.001Epoch 79/10

Epoch 82/100 (7.752 s), batch 5.0/100 (0.004 s): loss 0.615, w gap: 13.990Epoch 82/100 (7.753 s), batch 6.0/100 (0.005 s): loss 0.758, w gap: 13.991Epoch 82/100 (7.754 s), batch 7.0/100 (0.006 s): loss 1.115, w gap: 13.992Epoch 82/100 (7.755 s), batch 8.0/100 (0.007 s): loss 0.541, w gap: 14.005Epoch 82/100 (7.755 s), batch 9.0/100 (0.007 s): loss 0.776, w gap: 13.993Epoch 82/100 (7.756 s), batch 10.0/100 (0.008 s): loss 0.499, w gap: 13.995Epoch 82/100 (7.757 s), batch 11.0/100 (0.009 s): loss 0.546, w gap: 13.989Epoch 82/100 (7.758 s), batch 12.0/100 (0.010 s): loss 0.596, w gap: 13.972Epoch 82/100 (7.758 s), batch 13.0/100 (0.010 s): loss 0.626, w gap: 13.969Epoch 82/100 (7.759 s), batch 14.0/100 (0.011 s): loss 0.492, w gap: 13.969Epoch 82/100 (7.760 s), batch 15.0/100 (0.012 s): loss 1.029, w gap: 13.967Epoch 82/100 (7.761 s), batch 16.0/100 (0.013 s): loss 0.608, w gap: 13.979Epoch 82/100 (7.761 s), batch 17.0/100 (0.013 s): loss 0.868, w gap: 13.986Epoch 82/100 (7.

Epoch 84/100 (7.953 s), batch 12.0/100 (0.011 s): loss 0.800, w gap: 14.058Epoch 84/100 (7.954 s), batch 13.0/100 (0.012 s): loss 0.561, w gap: 14.057Epoch 84/100 (7.955 s), batch 14.0/100 (0.013 s): loss 0.743, w gap: 14.060Epoch 84/100 (7.956 s), batch 15.0/100 (0.013 s): loss 0.757, w gap: 14.042Epoch 84/100 (7.956 s), batch 16.0/100 (0.014 s): loss 0.573, w gap: 14.055Epoch 84/100 (7.957 s), batch 17.0/100 (0.015 s): loss 0.738, w gap: 14.071Epoch 84/100 (7.958 s), batch 18.0/100 (0.016 s): loss 0.474, w gap: 14.066Epoch 84/100 (7.959 s), batch 19.0/100 (0.017 s): loss 0.672, w gap: 14.049Epoch 84/100 (7.960 s), batch 20.0/100 (0.017 s): loss 0.566, w gap: 14.061Epoch 84/100 (7.963 s), batch 21.0/100 (0.021 s): loss 0.584, w gap: 14.052Epoch 84/100 (7.964 s), batch 22.0/100 (0.022 s): loss 1.036, w gap: 14.027Epoch 84/100 (7.965 s), batch 23.0/100 (0.023 s): loss 0.951, w gap: 14.033Epoch 84/100 (7.966 s), batch 24.0/100 (0.024 s): loss 0.624, w gap: 14.029Epoch 84/10

Epoch 86/100 (8.160 s), batch 1.0/100 (0.001 s): loss 0.534, w gap: 13.953Epoch 86/100 (8.161 s), batch 2.0/100 (0.002 s): loss 0.726, w gap: 13.950Epoch 86/100 (8.162 s), batch 3.0/100 (0.003 s): loss 0.807, w gap: 13.954Epoch 86/100 (8.163 s), batch 4.0/100 (0.004 s): loss 0.623, w gap: 13.945Epoch 86/100 (8.164 s), batch 5.0/100 (0.004 s): loss 0.730, w gap: 13.924Epoch 86/100 (8.164 s), batch 6.0/100 (0.005 s): loss 0.748, w gap: 13.929Epoch 86/100 (8.165 s), batch 7.0/100 (0.006 s): loss 0.573, w gap: 13.927Epoch 86/100 (8.166 s), batch 8.0/100 (0.007 s): loss 0.679, w gap: 13.942Epoch 86/100 (8.167 s), batch 9.0/100 (0.007 s): loss 0.878, w gap: 13.935Epoch 86/100 (8.167 s), batch 10.0/100 (0.008 s): loss 0.621, w gap: 13.946Epoch 86/100 (8.169 s), batch 11.0/100 (0.010 s): loss 0.745, w gap: 13.907Epoch 86/100 (8.170 s), batch 12.0/100 (0.010 s): loss 0.717, w gap: 13.907Epoch 86/100 (8.171 s), batch 13.0/100 (0.011 s): loss 0.708, w gap: 13.917Epoch 86/100 (8.171 

Epoch 87/100 (8.361 s), batch 94.0/100 (0.096 s): loss 0.533, w gap: 13.953Epoch 87/100 (8.363 s), batch 95.0/100 (0.098 s): loss 0.747, w gap: 13.936Epoch 87/100 (8.364 s), batch 96.0/100 (0.099 s): loss 0.756, w gap: 13.930Epoch 87/100 (8.365 s), batch 97.0/100 (0.099 s): loss 0.823, w gap: 13.933Epoch 87/100 (8.366 s), batch 98.0/100 (0.100 s): loss 0.908, w gap: 13.932Epoch 87/100 (8.366 s), batch 99.0/100 (0.101 s): loss 0.957, w gap: 13.950Epoch 87/100 (8.367 s), batch 100.0/100 (0.102 s): loss 0.607, w gap: 13.983Epoch 88/100 (8.374 s), batch 1.0/100 (0.001 s): loss 0.654, w gap: 13.985Epoch 88/100 (8.374 s), batch 2.0/100 (0.002 s): loss 0.539, w gap: 13.983Epoch 88/100 (8.375 s), batch 3.0/100 (0.003 s): loss 0.959, w gap: 13.975Epoch 88/100 (8.376 s), batch 4.0/100 (0.003 s): loss 0.583, w gap: 13.982Epoch 88/100 (8.377 s), batch 5.0/100 (0.004 s): loss 0.596, w gap: 13.974Epoch 88/100 (8.377 s), batch 6.0/100 (0.005 s): loss 0.780, w gap: 13.965Epoch 88/100 (8.

Epoch 89/100 (8.562 s), batch 96.0/100 (0.089 s): loss 0.564, w gap: 13.999Epoch 89/100 (8.563 s), batch 97.0/100 (0.090 s): loss 0.591, w gap: 13.983Epoch 89/100 (8.564 s), batch 98.0/100 (0.090 s): loss 0.786, w gap: 13.964Epoch 89/100 (8.565 s), batch 99.0/100 (0.091 s): loss 0.769, w gap: 13.965Epoch 89/100 (8.566 s), batch 100.0/100 (0.093 s): loss 0.585, w gap: 13.968Epoch 90/100 (8.575 s), batch 1.0/100 (0.001 s): loss 0.629, w gap: 13.954Epoch 90/100 (8.576 s), batch 2.0/100 (0.002 s): loss 0.555, w gap: 13.951Epoch 90/100 (8.576 s), batch 3.0/100 (0.003 s): loss 0.680, w gap: 13.948Epoch 90/100 (8.577 s), batch 4.0/100 (0.003 s): loss 0.785, w gap: 13.938Epoch 90/100 (8.578 s), batch 5.0/100 (0.004 s): loss 0.593, w gap: 13.925Epoch 90/100 (8.579 s), batch 6.0/100 (0.005 s): loss 0.396, w gap: 13.911Epoch 90/100 (8.579 s), batch 7.0/100 (0.006 s): loss 0.869, w gap: 13.906Epoch 90/100 (8.580 s), batch 8.0/100 (0.006 s): loss 0.691, w gap: 13.902Epoch 90/100 (8.58

Epoch 91/100 (8.763 s), batch 90.0/100 (0.083 s): loss 0.766, w gap: 14.030Epoch 91/100 (8.765 s), batch 91.0/100 (0.085 s): loss 0.685, w gap: 14.021Epoch 91/100 (8.766 s), batch 92.0/100 (0.086 s): loss 0.864, w gap: 14.024Epoch 91/100 (8.767 s), batch 93.0/100 (0.088 s): loss 0.806, w gap: 14.034Epoch 91/100 (8.768 s), batch 94.0/100 (0.088 s): loss 0.868, w gap: 14.041Epoch 91/100 (8.769 s), batch 95.0/100 (0.089 s): loss 0.850, w gap: 14.041Epoch 91/100 (8.770 s), batch 96.0/100 (0.090 s): loss 0.566, w gap: 14.039Epoch 91/100 (8.770 s), batch 97.0/100 (0.091 s): loss 0.606, w gap: 14.028Epoch 91/100 (8.771 s), batch 98.0/100 (0.091 s): loss 0.676, w gap: 14.014Epoch 91/100 (8.772 s), batch 99.0/100 (0.092 s): loss 0.714, w gap: 14.008Epoch 91/100 (8.773 s), batch 100.0/100 (0.093 s): loss 0.965, w gap: 14.011Epoch 92/100 (8.781 s), batch 1.0/100 (0.001 s): loss 1.003, w gap: 14.035Epoch 92/100 (8.782 s), batch 2.0/100 (0.002 s): loss 0.656, w gap: 14.041Epoch 92/100

Epoch 93/100 (8.965 s), batch 90.0/100 (0.089 s): loss 0.883, w gap: 13.961Epoch 93/100 (8.966 s), batch 91.0/100 (0.090 s): loss 0.720, w gap: 13.973Epoch 93/100 (8.966 s), batch 92.0/100 (0.091 s): loss 0.909, w gap: 13.974Epoch 93/100 (8.967 s), batch 93.0/100 (0.092 s): loss 0.606, w gap: 13.995Epoch 93/100 (8.968 s), batch 94.0/100 (0.092 s): loss 0.694, w gap: 13.989Epoch 93/100 (8.969 s), batch 95.0/100 (0.093 s): loss 0.758, w gap: 13.978Epoch 93/100 (8.969 s), batch 96.0/100 (0.094 s): loss 1.026, w gap: 13.978Epoch 93/100 (8.970 s), batch 97.0/100 (0.095 s): loss 0.822, w gap: 13.994Epoch 93/100 (8.971 s), batch 98.0/100 (0.096 s): loss 0.733, w gap: 13.985Epoch 93/100 (8.972 s), batch 99.0/100 (0.096 s): loss 0.774, w gap: 13.984Epoch 93/100 (8.973 s), batch 100.0/100 (0.097 s): loss 0.770, w gap: 13.988Epoch 94/100 (8.979 s), batch 1.0/100 (0.001 s): loss 0.972, w gap: 13.994Epoch 94/100 (8.980 s), batch 2.0/100 (0.002 s): loss 0.599, w gap: 13.994Epoch 94/100

Epoch 96/100 (9.169 s), batch 1.0/100 (0.001 s): loss 0.683, w gap: 13.972Epoch 96/100 (9.170 s), batch 2.0/100 (0.002 s): loss 0.918, w gap: 13.979Epoch 96/100 (9.171 s), batch 3.0/100 (0.003 s): loss 0.746, w gap: 14.003Epoch 96/100 (9.173 s), batch 4.0/100 (0.005 s): loss 0.878, w gap: 14.007Epoch 96/100 (9.173 s), batch 5.0/100 (0.005 s): loss 0.603, w gap: 14.002Epoch 96/100 (9.175 s), batch 6.0/100 (0.007 s): loss 0.742, w gap: 13.981Epoch 96/100 (9.176 s), batch 7.0/100 (0.008 s): loss 0.598, w gap: 13.983Epoch 96/100 (9.177 s), batch 8.0/100 (0.009 s): loss 0.697, w gap: 13.980Epoch 96/100 (9.178 s), batch 9.0/100 (0.010 s): loss 0.507, w gap: 13.963Epoch 96/100 (9.179 s), batch 10.0/100 (0.011 s): loss 0.632, w gap: 13.948Epoch 96/100 (9.181 s), batch 11.0/100 (0.013 s): loss 0.636, w gap: 13.953Epoch 96/100 (9.182 s), batch 12.0/100 (0.014 s): loss 1.011, w gap: 13.952Epoch 96/100 (9.182 s), batch 13.0/100 (0.014 s): loss 0.794, w gap: 13.958Epoch 96/100 (9.183 

Epoch 98/100 (9.370 s), batch 2.0/100 (0.003 s): loss 0.729, w gap: 13.967Epoch 98/100 (9.371 s), batch 3.0/100 (0.004 s): loss 0.853, w gap: 13.957Epoch 98/100 (9.372 s), batch 4.0/100 (0.005 s): loss 1.133, w gap: 13.968Epoch 98/100 (9.372 s), batch 5.0/100 (0.006 s): loss 0.767, w gap: 13.992Epoch 98/100 (9.373 s), batch 6.0/100 (0.006 s): loss 0.866, w gap: 13.994Epoch 98/100 (9.374 s), batch 7.0/100 (0.007 s): loss 0.757, w gap: 13.997Epoch 98/100 (9.375 s), batch 8.0/100 (0.008 s): loss 0.775, w gap: 13.994Epoch 98/100 (9.376 s), batch 9.0/100 (0.010 s): loss 0.843, w gap: 14.001Epoch 98/100 (9.377 s), batch 10.0/100 (0.010 s): loss 0.811, w gap: 13.996Epoch 98/100 (9.378 s), batch 11.0/100 (0.011 s): loss 0.867, w gap: 13.997Epoch 98/100 (9.379 s), batch 12.0/100 (0.012 s): loss 0.754, w gap: 13.988Epoch 98/100 (9.379 s), batch 13.0/100 (0.013 s): loss 0.634, w gap: 13.989Epoch 98/100 (9.380 s), batch 14.0/100 (0.013 s): loss 0.567, w gap: 13.988Epoch 98/100 (9.381

Epoch 100/100 (9.639 s), batch 100.0/100 (0.087 s): loss 0.853, w gap: 13.941
Epoch 100/100 (9.579 s), batch 100.0/100 (0.089 s): loss 0.141, w gap: 5.157
Epoch 100/100 (9.793 s), batch 100.0/100 (0.103 s): loss 0.004, w gap: 0.296
Epoch 100/100 (9.728 s), batch 100.0/100 (0.105 s): loss 0.000, w gap: 1.951
Epoch 100/100 (9.627 s), batch 100.0/100 (0.108 s): loss 0.000, w gap: 5.925
Epoch 100/100 (9.621 s), batch 100.0/100 (0.087 s): loss 0.000, w gap: 5.855
Epoch 100/100 (9.672 s), batch 100.0/100 (0.100 s): loss 0.000, w gap: 8.259
Epoch 100/100 (9.641 s), batch 100.0/100 (0.093 s): loss 0.000, w gap: 2.175
Epoch 100/100 (9.687 s), batch 100.0/100 (0.083 s): loss 0.000, w gap: 2.271
Epoch 100/100 (9.581 s), batch 100.0/100 (0.084 s): loss 0.069, w gap: 5.505
Epoch 100/100 (9.694 s), batch 100.0/100 (0.097 s): loss 0.000, w gap: 12.566
Epoch 100/100 (9.592 s), batch 100.0/100 (0.107 s): loss 0.000, w gap: 35.774
Epoch 100/100 (9.665 s), batch 100.0/100 (0.093 s): loss 0.000, w gap: 5.

### Sparse structure 2

Each hidden neuron goes to s random inputs

In [None]:
# Sparse structure 2: each hidden neuron goes to s random inputs
def generate_sparse_w_2(p,s,h,seed=0,maskseed=0):
    def sparse_mask_2(p,s,h,seed=0):
        np.random.seed(seed)
        M = np.zeros((h,p))
        for i in range(h):
            M[i,np.random.choice(range(p),s,replace=False)] = 1
        return M
    M = sparse_mask_2(p,s,h,seed=maskseed)
    np.random.seed(seed)
    Z = np.random.normal(0,1./np.sqrt(s),[h,p])
    W1 = (Z*M).T
    W2 = np.ones([h,1])
    return {'weights1':W1,'weights2':W2},M.T

In [None]:
# Case where h > p

num_sims = 100 # number of simulations
N = 10000      # number of samples

# Network parameters
num_epochs = 100
batch_size = 100
build_func = arch.mlp_noreluout_lastlayernottrainable
p = 5    # number of inputs
h = 10   # number of hidden nodes

list_s = range(1,5)
outputs = {s:[] for s in list_s}
start = time.time()

# r = number of neurons each input is connected to
for i,s in enumerate(list_s):
    
    for t in range(num_sims):
        
        h = p*r  # number of neurons
        input_dict = dict(p=p,h=h)
        
        seed = (s-1)*num_sims+t
        savedir = '%ssparse_structure_2/initialization_%s_s%s/'%(maindir,t,s)
    
        # Generate data
        w_true,M = generate_sparse_w_2(p,s,h,seed=seed,maskseed=seed)
        X = dl_utils.generate_X(N,0,input_dict,cov_is_eye=True,seed=seed)
        Y = dl_utils.generate_output(X,w_true,input_dict,build_func=build_func)

        # initialize weights to have desired sparsity structure
        w_init,M = generate_sparse_w_2(p,s,h,seed=seed+1,maskseed=seed)

        # Build graph and train..
        tf.reset_default_graph()
        with tf.device("/gpu:%s"%(gpu_id)):
            graph = graph_builder_wrapper_sparse(input_dict,M,build_func=build_func)
            out = dl_utils.train(X,Y,graph,num_epochs,batch_size,w_true,w_init,savedir=savedir)
        
#         # If weights exist already, load useful information
#         _,final_train_loss,final_w_gap = dl_utils.get_train_out(X,Y,w_true,input_dict,
#                                                                     savedir,build_func)
#         initial_w_gap = dl_utils.compute_w_gap(w_true,w_init)
#         out = ['','','','',initial_w_gap,final_train_loss,final_w_gap]
        
        # Save useful information
        outputs[r].append(out)
        
        print('\rr = %s, %s/%s simulations done (%.2f s elapsed)'\
              %(r,t+1,num_sims,time.time()-start),end='')
        
pickle.dump(outputs,file('%ssparse_structure_2/outputs.pickle'%(maindir),'wb'))

In [None]:
# Case where h < p

num_sims = 100 # number of simulations
N = 10000      # number of samples

# Network parameters
num_epochs = 100
batch_size = 100
build_func = arch.mlp_noreluout_lastlayernottrainable
p = 5    # number of inputs
h = 3    # number of hidden nodes

list_s = range(1,5)
outputs = {s:[] for s in list_s}
start = time.time()

# r = number of neurons each input is connected to
for i,s in enumerate(list_s):
    
    for t in range(num_sims):
        
        h = p*r  # number of neurons
        input_dict = dict(p=p,h=h)
        
        seed = (s-1)*num_sims+t
        savedir = '%ssparse_structure_2/initialization_%s_s%s/'%(maindir,t,s)
    
        # Generate data
        w_true,M = generate_sparse_w_2(p,s,h,seed=seed,maskseed=seed)
        X = dl_utils.generate_X(N,0,input_dict,cov_is_eye=True,seed=seed)
        Y = dl_utils.generate_output(X,w_true,input_dict,build_func=build_func)

        # initialize weights to have desired sparsity structure
        w_init,M = generate_sparse_w_2(p,s,h,seed=seed+1,maskseed=seed)

        # Build graph and train..
        tf.reset_default_graph()
        with tf.device("/gpu:%s"%(gpu_id)):
            graph = graph_builder_wrapper_sparse(input_dict,M,build_func=build_func)
            out = dl_utils.train(X,Y,graph,num_epochs,batch_size,w_true,w_init,savedir=savedir)
        
#         # If weights exist already, load useful information
#         _,final_train_loss,final_w_gap = dl_utils.get_train_out(X,Y,w_true,input_dict,
#                                                                     savedir,build_func)
#         initial_w_gap = dl_utils.compute_w_gap(w_true,w_init)
#         out = ['','','','',initial_w_gap,final_train_loss,final_w_gap]
        
        # Save useful information
        outputs[r].append(out)
        
        print('\rr = %s, %s/%s simulations done (%.2f s elapsed)'\
              %(r,t+1,num_sims,time.time()-start),end='')
        
pickle.dump(outputs,file('%ssparse_structure_2/outputs.pickle'%(maindir),'wb'))

# Experiment set 2: true labels generated with dense architectures

Layer2-layer3 weights all set to 1 still.

In [None]:
# Generate labels using dense networks

np.random.seed(0)
p = 10
h = 3
w = {'weights1':np.random.normal(0,1./np.sqrt(p),[p,h]),
     'weights2':np.ones([h,1])}

N = 10000
X = dl_utils.generate_X(N,0,input_dict,cov_is_eye=True,seed=seed)
Y = dl_utils.generate_output(X,w_true,input_dict,build_func=arch.mlp_noreluout_lastlayernottrainable)

### Sparse structure 1

In [None]:
num_attempts = 10 # different initializations may have different performance, so try a few

# Network parameters
num_epochs = 100
batch_size = 100
build_func = arch.mlp_noreluout_lastlayernottrainable

list_r = range(1,5)
outputs = {r:[] for r in list_r}
start = time.time()

# r = number of neurons each input is connected to
for i,r in enumerate(list_r):
    
    for t in range(num_attempts):

        h = p*r  # number of neurons
        input_dict = dict(p=p,h=h)

        seed = (r-1)*num_sims+t
        savedir = '%ssparse_structure_1_denselabels/initialization_r%s/'%(maindir,r)

        # initialize weights to have desired sparsity structure
        w_init,M = generate_sparse_w_1(p,r,seed=seed)

        # Build graph and train..
        tf.reset_default_graph()
        with tf.device("/gpu:%s"%(gpu_id)):
            graph = graph_builder_wrapper_sparse(input_dict,M,build_func=build_func)
            out = dl_utils.train(X,Y,graph,num_epochs,batch_size,w_true,w_init,savedir=savedir)

    #     # If weights exist already, load useful information
    #     _,final_train_loss,final_w_gap = dl_utils.get_train_out(X,Y,w_true,input_dict,
    #                                                                 savedir,build_func)
    #     initial_w_gap = dl_utils.compute_w_gap(w_true,w_init)
    #     out = ['','','','',initial_w_gap,final_train_loss,final_w_gap]

        # Save useful information
        outputs[r].append(out)

pickle.dump(outputs,file('%ssparse_structure_1_denselabels/outputs.pickle'%(maindir),'wb'))

### Sparse structure 2

In [None]:
# Case where h < p

num_attempts = 10 # different initializations may have different performance, so try a few

# Network parameters
num_epochs = 100
batch_size = 100
build_func = arch.mlp_noreluout_lastlayernottrainable

h = 3    # number of hidden nodes

list_s = range(1,9)
outputs = {s:[] for s in list_s}
start = time.time()

# r = number of neurons each input is connected to
for i,s in enumerate(list_s):
    
    for t in range(num_attempts):

        h = p*r  # number of neurons
        input_dict = dict(p=p,h=h)

        seed = (r-1)*num_sims+t
        savedir = '%ssparse_structure_2_denselabels_pgh/initialization_r%s/'%(maindir,r)

        # initialize weights to have desired sparsity structure
        w_init,M = generate_sparse_w_2(p,s,h,seed=seed+1,maskseed=seed)

        # Build graph and train..
        tf.reset_default_graph()
        with tf.device("/gpu:%s"%(gpu_id)):
            graph = graph_builder_wrapper_sparse(input_dict,M,build_func=build_func)
            out = dl_utils.train(X,Y,graph,num_epochs,batch_size,w_true,w_init,savedir=savedir)

    #     # If weights exist already, load useful information
    #     _,final_train_loss,final_w_gap = dl_utils.get_train_out(X,Y,w_true,input_dict,
    #                                                                 savedir,build_func)
    #     initial_w_gap = dl_utils.compute_w_gap(w_true,w_init)
    #     out = ['','','','',initial_w_gap,final_train_loss,final_w_gap]

        # Save useful information
        outputs[r].append(out)

pickle.dump(outputs,file('%ssparse_structure_2_denselabels_pgh/outputs.pickle'%(maindir),'wb'))

In [None]:
# Case where h > p

num_attempts = 10 # different initializations may have different performance, so try a few

# Network parameters
num_epochs = 100
batch_size = 100
build_func = arch.mlp_noreluout_lastlayernottrainable

h = 15    # number of hidden nodes

list_s = range(1,9)
outputs = {s:[] for s in list_s}
start = time.time()

# r = number of neurons each input is connected to
for i,s in enumerate(list_s):
    
    for t in range(num_attempts):

        h = p*r  # number of neurons
        input_dict = dict(p=p,h=h)

        seed = (r-1)*num_sims+t
        savedir = '%ssparse_structure_2_denselabels_plh/initialization_r%s/'%(maindir,r)

        # initialize weights to have desired sparsity structure
        w_init,M = generate_sparse_w_2(p,s,h,seed=seed+1,maskseed=seed)

        # Build graph and train..
        tf.reset_default_graph()
        with tf.device("/gpu:%s"%(gpu_id)):
            graph = graph_builder_wrapper_sparse(input_dict,M,build_func=build_func)
            out = dl_utils.train(X,Y,graph,num_epochs,batch_size,w_true,w_init,savedir=savedir)

    #     # If weights exist already, load useful information
    #     _,final_train_loss,final_w_gap = dl_utils.get_train_out(X,Y,w_true,input_dict,
    #                                                                 savedir,build_func)
    #     initial_w_gap = dl_utils.compute_w_gap(w_true,w_init)
    #     out = ['','','','',initial_w_gap,final_train_loss,final_w_gap]

        # Save useful information
        outputs[r].append(out)

pickle.dump(outputs,file('%ssparse_structure_2_denselabels_plh/outputs.pickle'%(maindir),'wb'))