## Todo:


In [1]:
EXPERIMENT_NAME = 'experiment_01_stanford40_train'

In [2]:
import tensorflow as tf
import numpy as np
import os, sys
from time import time
from matplotlib import pyplot as plt
from itertools import cycle
import random
import pickle

from utils import optimistic_restore, save
import layers

PWD = os.getcwd()
sys.path.insert(0, os.path.abspath(os.path.join(PWD, '..')))

import pdb

In [3]:
'''
HYPERPARAMS
'''
BATCH_SIZE = 10
PATIENCE = 2
TRIPLETS_TRAIN = '/media/red/capstone/data/stanford40_triplets_train.pkl'
TRIPLETS_VALIDATION = '/media/red/capstone/data/stanford40_triplets_val.pkl'

LEARNING_RATE = 1e-4
LEARNING_RATE_DECAY = 0.7
BETA1 = 0.9
BETA2 = 0.99
NUM_EPOCH = 100
RANDOM_SEED = 1234
SUMMARY_EVERY = 10
VALIDATION_PERCENTAGE = 0.05
SNAPSHOT_MAX = 3 # Keeps the last best 10 snapshots (best determined by validation accuracy)
SNAPSHOT_DIR = os.path.join('/media/red/capstone/snapshots/', EXPERIMENT_NAME)

# Network params
NORMALIZE = True
N_FEAT = 4096

np.random.seed(seed=RANDOM_SEED)

In [4]:
'''
Load Triplets
'''
class SVM_Triplet:
    def __init__(self, X1, X2, Y, base_classes, pos_class, new_class):
        self.X1 = X1
        self.X2 = X2
        self.Y = Y
        self.base_classes = base_classes
        self.pos_class = pos_class
        self.new_class = new_class
        
# Load features
triplets_loadin = {}
triplets_loadin['train'] = pickle.load(open(TRIPLETS_TRAIN, "rb"))
triplets_loadin['validation'] = pickle.load(open(TRIPLETS_VALIDATION, "rb"))

x_data = {
    'train':[],
    'validation':[]
}
y_data = {
    'train':[],
    'validation':[]
}
for partition in ['train', 'validation']:
    for triplet in triplets_loadin[partition]:
        if NORMALIZE:
            X1 = triplet.X1 / np.linalg.norm(triplet.X1, axis=0, keepdims=True)
            X2 = triplet.X2 / np.linalg.norm(triplet.X2, axis=0, keepdims=True)
            Y  = triplet.Y  / np.linalg.norm(triplet.Y,  axis=0, keepdims=True)
            x_data[partition].append(np.hstack((X1, X2)))
            y_data[partition].append(Y-X1)
        else:   
            x_data[partition].append(np.hstack((triplet.X1, triplet.X2)))
            y_data[partition].append(triplet.Y-triplet.X1)
        
x_data['train'] = np.stack(x_data['train'])
y_data['train'] = np.stack(y_data['train'])
x_data['validation'] = np.stack(x_data['validation'])
y_data['validation'] = np.stack(y_data['validation'])

In [None]:
'''
Declare model
'''

def lrelu(x, alpha=0.1):
    return tf.nn.relu(x) - alpha * tf.nn.relu(-x)

def net(x, is_training):
    def dense_block(n_units):
        stack.append(layers.fc(
            input=stack[-1],
            units=n_units,
            activation='relu',
            name='fc'
            )[0])
        stack.append(tf.contrib.layers.batch_norm(
                stack[-1], 
                center=True, scale=True, 
                is_training=is_training,
                scope='bn'))
        stack.append(lrelu(stack[-1]))
        
    n_units_list = [2*N_FEAT, 2*N_FEAT, N_FEAT]
    stack = [x,]
    for i, n in enumerate(n_units_list):
        with tf.variable_scope("block_"+str(i)):
            dense_block(n)
    stack.append(layers.fc(
            input=stack[-1],
            units=4096,
            activation='linear',
            name='fc_final'
            )[0])
    return stack

In [None]:
'''
Model Setup
'''
x = tf.placeholder(dtype=tf.float32, shape=(None, 8192))
y = tf.placeholder(dtype=tf.float32, shape=(None, 4096))
is_training = tf.placeholder(dtype=tf.bool)

global_step = tf.get_variable('global_step', dtype=tf.int32, trainable=False, initializer=0) #incremented everytime optimizer runs
lr = tf.get_variable('learning_rate', dtype=tf.float32, trainable=False, initializer=LEARNING_RATE)

net = net(x, is_training)

'''
Loss, Metrics, and Optimization Setup
'''
pred = net[-1]
pred_normalized = pred / tf.norm(pred, axis=1, keep_dims=True)
y_normalized = y / tf.norm(y,axis=1,keep_dims=True)
reduced_loss = tf.losses.cosine_distance(
        labels=y_normalized,
        predictions=pred_normalized,
        dim=1,
        reduction=tf.losses.Reduction.MEAN,
        )
train_loss_summary = tf.summary.scalar('training_loss', reduced_loss)

optimizer = tf.train.AdamOptimizer(
        learning_rate=lr,
        beta1=BETA1,
        beta2=BETA2,
        name='AdamOptimizer')
tvars = tf.trainable_variables()
grads, _ = tf.clip_by_global_norm(tf.gradients(reduced_loss, tvars), 5.0)
train_op = optimizer.apply_gradients(zip(grads, tvars), global_step=global_step)

'''
TensorBoard Setup
'''
all_train_summary = tf.summary.merge_all()

summary_writer = tf.summary.FileWriter(SNAPSHOT_DIR,
        graph=tf.get_default_graph())

'''
Tensorflow Saver Setup
'''
saver = tf.train.Saver(var_list=tf.global_variables(),
                       max_to_keep=SNAPSHOT_MAX)

'''
Tensorflow Session Setup
'''
tf.set_random_seed(RANDOM_SEED)
config = tf.ConfigProto()
config.gpu_options.per_process_gpu_memory_fraction = 0.8
config.gpu_options.allow_growth = True
config.allow_soft_placement = True
sess = tf.Session(config=config)
init = tf.group(tf.global_variables_initializer(),
                tf.local_variables_initializer())
sess.run(init)

'''
Primary Loop
'''
partition_types = ['train', 'validation']
best_loss = float('inf')
patience_counter = 0
step_v = global_step.eval(session=sess)
for epoch in range(NUM_EPOCH):
    overall_loss = 0 # Variables used for validation
    
    print('Training Epoch {}/{}'.format(
            epoch, NUM_EPOCH))
    
    for partition in partition_types: # Itr through data partitions
        n_exemplars = x_data[partition].shape[0]
        if partition == 'train':
            shuffle_indices = np.arange(n_exemplars)
            np.random.shuffle(shuffle_indices)
            x_data['train'] = x_data['train'][shuffle_indices, ...]
            y_data['train'] = y_data['train'][shuffle_indices, ...]
        step_v = global_step.eval(session=sess)
        for i in range(0, n_exemplars, BATCH_SIZE):
            upper_range = i+BATCH_SIZE
            if upper_range > n_exemplars:
                upper_range = n_exemplars
            x_batch = x_data[partition][i:upper_range, ...]
            y_batch = y_data[partition][i:upper_range, ...]
            
            feed_dict = {
                x:x_batch,
                y:y_batch
            }
            if partition == 'train':
                feed_dict[is_training] = True
            else:
                feed_dict[is_training] = False
                
            # Run the proper sess run command
            if partition == 'train':
                start_t = time()
                if step_v % SUMMARY_EVERY == 0:
                    _, loss_v, summary_v = sess.run(
                        [train_op, reduced_loss, all_train_summary],
                        feed_dict=feed_dict)
                    summary_writer.add_summary(summary_v, step_v)
                    duration = time() - start_t
                    print('step {:d} \t loss = {:.3f} ({:.3f} sec/step)'.format(
                            step_v, loss_v, duration))
                else: # Vanilla Training
                    _ = sess.run([train_op], feed_dict=feed_dict)
                step_v = global_step.eval(session=sess)
            elif partition == 'validation':
                feed_dict[is_training] = False
                loss_v = sess.run(
                        [reduced_loss],
                        feed_dict=feed_dict)[0]
                overall_loss += loss_v
        # Post-epoch routine for validation set (saving, stat computation, etc)
        if partition == 'validation':
            duration = time() - start_t
            overall_loss /= x_data['validation'].shape[0]
            overall_loss_summary = tf.Summary()
            overall_loss_summary.value.add(tag='validation_loss', simple_value=overall_loss)
            summary_writer.add_summary(overall_loss_summary, step_v)

            if overall_loss < best_loss:
                print('New Best Loss {:.3f} < Old Best {:.3f}.  Saving...'.format(
                        overall_loss, best_loss))
                best_loss = overall_loss
                patience_counter = 0
                save(saver, sess, SNAPSHOT_DIR, step_v)
            else:
                patience_counter += 1
        if patience_counter >= PATIENCE:
            patience_counter = 0
            lr_v = lr.eval(session=sess) * LEARNING_RATE_DECAY
            lr.assign(lr_v).eval(session=sess)

Training Epoch 0/100
step 0 	 loss = 1.004 (3.001 sec/step)
step 10 	 loss = 0.999 (2.756 sec/step)
step 20 	 loss = 0.991 (2.561 sec/step)
step 30 	 loss = 0.972 (2.605 sec/step)
step 40 	 loss = 0.972 (2.527 sec/step)
step 50 	 loss = 0.968 (2.835 sec/step)
step 60 	 loss = 0.955 (2.768 sec/step)
step 70 	 loss = 0.934 (2.706 sec/step)
step 80 	 loss = 0.934 (2.485 sec/step)
step 90 	 loss = 0.935 (2.771 sec/step)
step 100 	 loss = 0.913 (2.771 sec/step)
step 110 	 loss = 0.905 (2.661 sec/step)
step 120 	 loss = 0.847 (2.888 sec/step)
step 130 	 loss = 0.895 (3.087 sec/step)
step 140 	 loss = 0.847 (2.615 sec/step)
step 150 	 loss = 0.858 (2.801 sec/step)
step 160 	 loss = 0.834 (2.875 sec/step)
step 170 	 loss = 0.846 (2.863 sec/step)
step 180 	 loss = 0.855 (2.617 sec/step)
step 190 	 loss = 0.792 (2.575 sec/step)
step 200 	 loss = 0.842 (2.883 sec/step)
step 210 	 loss = 0.839 (2.874 sec/step)
step 220 	 loss = 0.761 (2.938 sec/step)
step 230 	 loss = 0.803 (2.432 sec/step)
step 2

step 1980 	 loss = 0.605 (2.963 sec/step)
step 1990 	 loss = 0.607 (2.845 sec/step)
step 2000 	 loss = 0.633 (3.047 sec/step)
step 2010 	 loss = 0.629 (3.020 sec/step)
step 2020 	 loss = 0.591 (2.615 sec/step)
step 2030 	 loss = 0.610 (2.840 sec/step)
step 2040 	 loss = 0.622 (2.876 sec/step)
step 2050 	 loss = 0.628 (2.714 sec/step)
step 2060 	 loss = 0.607 (2.585 sec/step)
step 2070 	 loss = 0.580 (2.779 sec/step)
step 2080 	 loss = 0.592 (2.912 sec/step)
step 2090 	 loss = 0.646 (2.647 sec/step)
step 2100 	 loss = 0.649 (3.088 sec/step)
step 2110 	 loss = 0.647 (2.554 sec/step)
step 2120 	 loss = 0.600 (2.993 sec/step)
step 2130 	 loss = 0.632 (3.153 sec/step)
step 2140 	 loss = 0.592 (2.909 sec/step)
step 2150 	 loss = 0.630 (2.733 sec/step)
step 2160 	 loss = 0.628 (2.690 sec/step)
step 2170 	 loss = 0.609 (2.653 sec/step)
step 2180 	 loss = 0.616 (3.092 sec/step)
step 2190 	 loss = 0.582 (2.796 sec/step)
step 2200 	 loss = 0.595 (2.627 sec/step)
step 2210 	 loss = 0.603 (2.651 se

step 3910 	 loss = 0.549 (2.914 sec/step)
step 3920 	 loss = 0.525 (3.156 sec/step)
step 3930 	 loss = 0.558 (2.834 sec/step)
step 3940 	 loss = 0.528 (2.991 sec/step)
step 3950 	 loss = 0.555 (2.509 sec/step)
step 3960 	 loss = 0.546 (2.401 sec/step)
step 3970 	 loss = 0.535 (2.419 sec/step)
step 3980 	 loss = 0.550 (2.442 sec/step)
step 3990 	 loss = 0.534 (2.274 sec/step)
step 4000 	 loss = 0.570 (2.442 sec/step)
step 4010 	 loss = 0.534 (2.195 sec/step)
step 4020 	 loss = 0.491 (2.284 sec/step)
step 4030 	 loss = 0.534 (2.228 sec/step)
step 4040 	 loss = 0.491 (2.369 sec/step)
step 4050 	 loss = 0.535 (3.290 sec/step)
step 4060 	 loss = 0.565 (2.593 sec/step)
step 4070 	 loss = 0.553 (2.426 sec/step)
step 4080 	 loss = 0.536 (3.006 sec/step)
step 4090 	 loss = 0.568 (2.724 sec/step)
step 4100 	 loss = 0.563 (2.651 sec/step)
step 4110 	 loss = 0.534 (2.904 sec/step)
step 4120 	 loss = 0.523 (2.988 sec/step)
step 4130 	 loss = 0.535 (3.039 sec/step)
step 4140 	 loss = 0.541 (2.783 se

step 5870 	 loss = 0.550 (3.022 sec/step)
step 5880 	 loss = 0.546 (2.640 sec/step)
step 5890 	 loss = 0.539 (2.657 sec/step)
step 5900 	 loss = 0.514 (2.629 sec/step)
step 5910 	 loss = 0.515 (2.581 sec/step)
step 5920 	 loss = 0.526 (2.769 sec/step)
step 5930 	 loss = 0.554 (2.980 sec/step)
step 5940 	 loss = 0.557 (2.604 sec/step)
step 5950 	 loss = 0.543 (2.747 sec/step)
step 5960 	 loss = 0.491 (2.650 sec/step)
step 5970 	 loss = 0.498 (2.662 sec/step)
step 5980 	 loss = 0.579 (3.110 sec/step)
step 5990 	 loss = 0.535 (2.922 sec/step)
step 6000 	 loss = 0.530 (2.812 sec/step)
step 6010 	 loss = 0.571 (2.761 sec/step)
step 6020 	 loss = 0.544 (2.597 sec/step)
step 6030 	 loss = 0.499 (2.826 sec/step)
step 6040 	 loss = 0.502 (2.850 sec/step)
step 6050 	 loss = 0.542 (2.944 sec/step)
step 6060 	 loss = 0.535 (2.961 sec/step)
step 6070 	 loss = 0.518 (3.014 sec/step)
step 6080 	 loss = 0.530 (2.780 sec/step)
step 6090 	 loss = 0.537 (2.689 sec/step)
step 6100 	 loss = 0.474 (3.041 se

step 7800 	 loss = 0.483 (3.378 sec/step)
step 7810 	 loss = 0.505 (3.132 sec/step)
step 7820 	 loss = 0.477 (2.602 sec/step)
step 7830 	 loss = 0.516 (3.031 sec/step)
step 7840 	 loss = 0.444 (2.549 sec/step)
step 7850 	 loss = 0.463 (2.533 sec/step)
step 7860 	 loss = 0.463 (3.166 sec/step)
step 7870 	 loss = 0.445 (2.739 sec/step)
step 7880 	 loss = 0.475 (3.064 sec/step)
step 7890 	 loss = 0.490 (2.673 sec/step)
step 7900 	 loss = 0.451 (2.809 sec/step)
step 7910 	 loss = 0.455 (2.558 sec/step)
step 7920 	 loss = 0.443 (2.840 sec/step)
step 7930 	 loss = 0.436 (2.913 sec/step)
step 7940 	 loss = 0.472 (2.809 sec/step)
step 7950 	 loss = 0.496 (3.052 sec/step)
step 7960 	 loss = 0.493 (2.882 sec/step)
step 7970 	 loss = 0.477 (2.861 sec/step)
step 7980 	 loss = 0.496 (2.621 sec/step)
step 7990 	 loss = 0.443 (2.790 sec/step)
step 8000 	 loss = 0.452 (2.791 sec/step)
step 8010 	 loss = 0.478 (2.902 sec/step)
step 8020 	 loss = 0.431 (2.822 sec/step)
step 8030 	 loss = 0.480 (2.889 se

step 9760 	 loss = 0.473 (2.161 sec/step)
step 9770 	 loss = 0.487 (2.266 sec/step)
step 9780 	 loss = 0.491 (2.279 sec/step)
step 9790 	 loss = 0.489 (2.291 sec/step)
step 9800 	 loss = 0.476 (3.174 sec/step)
step 9810 	 loss = 0.475 (2.655 sec/step)
step 9820 	 loss = 0.477 (2.467 sec/step)
step 9830 	 loss = 0.483 (2.697 sec/step)
step 9840 	 loss = 0.482 (2.845 sec/step)
step 9850 	 loss = 0.468 (2.524 sec/step)
step 9860 	 loss = 0.477 (3.226 sec/step)
step 9870 	 loss = 0.478 (2.980 sec/step)
step 9880 	 loss = 0.463 (2.865 sec/step)
step 9890 	 loss = 0.490 (2.874 sec/step)
step 9900 	 loss = 0.475 (2.866 sec/step)
step 9910 	 loss = 0.474 (2.603 sec/step)
step 9920 	 loss = 0.471 (2.535 sec/step)
step 9930 	 loss = 0.511 (2.784 sec/step)
step 9940 	 loss = 0.485 (2.750 sec/step)
step 9950 	 loss = 0.496 (2.687 sec/step)
step 9960 	 loss = 0.482 (2.884 sec/step)
step 9970 	 loss = 0.463 (3.032 sec/step)
step 9980 	 loss = 0.500 (2.961 sec/step)
step 9990 	 loss = 0.487 (2.829 se

step 11650 	 loss = 0.429 (2.769 sec/step)
step 11660 	 loss = 0.419 (2.855 sec/step)
step 11670 	 loss = 0.417 (3.041 sec/step)
step 11680 	 loss = 0.431 (2.659 sec/step)
step 11690 	 loss = 0.448 (2.886 sec/step)
step 11700 	 loss = 0.469 (2.876 sec/step)
step 11710 	 loss = 0.411 (2.882 sec/step)
step 11720 	 loss = 0.403 (2.705 sec/step)
step 11730 	 loss = 0.454 (2.861 sec/step)
step 11740 	 loss = 0.427 (3.070 sec/step)
step 11750 	 loss = 0.450 (2.768 sec/step)
step 11760 	 loss = 0.411 (2.692 sec/step)
step 11770 	 loss = 0.442 (2.895 sec/step)
step 11780 	 loss = 0.421 (2.650 sec/step)
step 11790 	 loss = 0.419 (2.759 sec/step)
step 11800 	 loss = 0.442 (2.532 sec/step)
step 11810 	 loss = 0.433 (3.072 sec/step)
step 11820 	 loss = 0.463 (2.990 sec/step)
step 11830 	 loss = 0.419 (2.633 sec/step)
step 11840 	 loss = 0.440 (2.759 sec/step)
step 11850 	 loss = 0.437 (2.700 sec/step)
step 11860 	 loss = 0.427 (2.920 sec/step)
step 11870 	 loss = 0.423 (2.502 sec/step)
step 11880 

step 13560 	 loss = 0.451 (2.625 sec/step)
step 13570 	 loss = 0.458 (2.714 sec/step)
step 13580 	 loss = 0.420 (3.233 sec/step)
step 13590 	 loss = 0.428 (2.760 sec/step)
step 13600 	 loss = 0.424 (2.949 sec/step)
step 13610 	 loss = 0.447 (2.712 sec/step)
step 13620 	 loss = 0.410 (2.801 sec/step)
step 13630 	 loss = 0.429 (2.759 sec/step)
step 13640 	 loss = 0.440 (2.607 sec/step)
step 13650 	 loss = 0.419 (2.830 sec/step)
step 13660 	 loss = 0.433 (2.740 sec/step)
step 13670 	 loss = 0.428 (2.994 sec/step)
step 13680 	 loss = 0.432 (2.887 sec/step)
step 13690 	 loss = 0.423 (2.835 sec/step)
step 13700 	 loss = 0.427 (2.995 sec/step)
step 13710 	 loss = 0.463 (3.132 sec/step)
step 13720 	 loss = 0.440 (2.679 sec/step)
step 13730 	 loss = 0.394 (3.014 sec/step)
step 13740 	 loss = 0.425 (2.775 sec/step)
step 13750 	 loss = 0.437 (2.852 sec/step)
step 13760 	 loss = 0.466 (2.712 sec/step)
step 13770 	 loss = 0.414 (2.626 sec/step)
step 13780 	 loss = 0.411 (2.570 sec/step)
step 13790 

step 15450 	 loss = 0.386 (2.350 sec/step)
step 15460 	 loss = 0.397 (2.331 sec/step)
step 15470 	 loss = 0.380 (2.294 sec/step)
step 15480 	 loss = 0.371 (2.317 sec/step)
step 15490 	 loss = 0.403 (2.515 sec/step)
step 15500 	 loss = 0.381 (2.280 sec/step)
step 15510 	 loss = 0.396 (2.384 sec/step)
step 15520 	 loss = 0.382 (3.465 sec/step)
step 15530 	 loss = 0.386 (2.645 sec/step)
step 15540 	 loss = 0.393 (2.435 sec/step)
step 15550 	 loss = 0.400 (3.332 sec/step)
step 15560 	 loss = 0.407 (2.936 sec/step)
step 15570 	 loss = 0.402 (2.871 sec/step)
step 15580 	 loss = 0.399 (3.149 sec/step)
step 15590 	 loss = 0.390 (3.211 sec/step)
step 15600 	 loss = 0.391 (2.923 sec/step)
step 15610 	 loss = 0.394 (2.562 sec/step)
step 15620 	 loss = 0.377 (3.032 sec/step)
step 15630 	 loss = 0.384 (2.665 sec/step)
step 15640 	 loss = 0.392 (2.960 sec/step)
step 15650 	 loss = 0.377 (2.846 sec/step)
step 15660 	 loss = 0.394 (2.751 sec/step)
step 15670 	 loss = 0.387 (3.093 sec/step)
step 15680 

step 17360 	 loss = 0.414 (3.004 sec/step)
step 17370 	 loss = 0.426 (3.229 sec/step)
step 17380 	 loss = 0.406 (2.861 sec/step)
step 17390 	 loss = 0.399 (2.677 sec/step)
step 17400 	 loss = 0.431 (2.901 sec/step)
step 17410 	 loss = 0.396 (2.779 sec/step)
step 17420 	 loss = 0.399 (3.005 sec/step)
step 17430 	 loss = 0.384 (2.868 sec/step)
step 17440 	 loss = 0.399 (2.605 sec/step)
step 17450 	 loss = 0.399 (2.648 sec/step)
step 17460 	 loss = 0.391 (2.972 sec/step)
step 17470 	 loss = 0.383 (3.241 sec/step)
step 17480 	 loss = 0.389 (2.992 sec/step)
step 17490 	 loss = 0.402 (3.021 sec/step)
step 17500 	 loss = 0.400 (2.752 sec/step)
step 17510 	 loss = 0.419 (2.846 sec/step)
step 17520 	 loss = 0.434 (2.682 sec/step)
step 17530 	 loss = 0.408 (2.835 sec/step)
step 17540 	 loss = 0.406 (2.892 sec/step)
step 17550 	 loss = 0.415 (2.760 sec/step)
step 17560 	 loss = 0.408 (2.763 sec/step)
step 17570 	 loss = 0.407 (2.620 sec/step)
step 17580 	 loss = 0.418 (2.841 sec/step)
step 17590 

step 19270 	 loss = 0.370 (2.904 sec/step)
step 19280 	 loss = 0.371 (2.475 sec/step)
step 19290 	 loss = 0.364 (3.138 sec/step)
step 19300 	 loss = 0.374 (2.933 sec/step)
step 19310 	 loss = 0.360 (2.792 sec/step)
step 19320 	 loss = 0.364 (2.466 sec/step)
step 19330 	 loss = 0.370 (2.721 sec/step)
step 19340 	 loss = 0.358 (2.937 sec/step)
step 19350 	 loss = 0.383 (2.828 sec/step)
step 19360 	 loss = 0.361 (2.803 sec/step)
step 19370 	 loss = 0.382 (2.634 sec/step)
step 19380 	 loss = 0.384 (2.814 sec/step)
step 19390 	 loss = 0.372 (3.049 sec/step)
step 19400 	 loss = 0.365 (2.907 sec/step)
step 19410 	 loss = 0.364 (2.755 sec/step)
step 19420 	 loss = 0.354 (3.126 sec/step)
step 19430 	 loss = 0.356 (2.995 sec/step)
step 19440 	 loss = 0.353 (2.641 sec/step)
step 19450 	 loss = 0.351 (3.155 sec/step)
step 19460 	 loss = 0.376 (2.774 sec/step)
step 19470 	 loss = 0.380 (2.626 sec/step)
step 19480 	 loss = 0.355 (2.746 sec/step)
step 19490 	 loss = 0.363 (2.654 sec/step)
step 19500 

step 21180 	 loss = 0.380 (2.288 sec/step)
step 21190 	 loss = 0.376 (2.352 sec/step)
step 21200 	 loss = 0.377 (2.254 sec/step)
step 21210 	 loss = 0.380 (2.469 sec/step)
step 21220 	 loss = 0.371 (2.339 sec/step)
step 21230 	 loss = 0.365 (2.301 sec/step)
step 21240 	 loss = 0.359 (2.402 sec/step)
step 21250 	 loss = 0.370 (2.406 sec/step)
step 21260 	 loss = 0.378 (2.460 sec/step)
step 21270 	 loss = 0.340 (3.990 sec/step)
step 21280 	 loss = 0.385 (2.546 sec/step)
step 21290 	 loss = 0.386 (2.454 sec/step)
step 21300 	 loss = 0.367 (2.901 sec/step)
step 21310 	 loss = 0.376 (2.724 sec/step)
step 21320 	 loss = 0.360 (2.699 sec/step)
step 21330 	 loss = 0.360 (3.021 sec/step)
step 21340 	 loss = 0.370 (2.714 sec/step)
step 21350 	 loss = 0.370 (2.783 sec/step)
step 21360 	 loss = 0.399 (3.120 sec/step)
step 21370 	 loss = 0.364 (2.804 sec/step)
step 21380 	 loss = 0.378 (3.239 sec/step)
step 21390 	 loss = 0.367 (2.625 sec/step)
step 21400 	 loss = 0.369 (2.924 sec/step)
step 21410 

step 23090 	 loss = 0.331 (3.045 sec/step)
step 23100 	 loss = 0.339 (2.541 sec/step)
step 23110 	 loss = 0.313 (2.764 sec/step)
step 23120 	 loss = 0.326 (2.770 sec/step)
step 23130 	 loss = 0.346 (2.933 sec/step)
step 23140 	 loss = 0.327 (2.871 sec/step)
step 23150 	 loss = 0.336 (2.625 sec/step)
step 23160 	 loss = 0.330 (2.804 sec/step)
step 23170 	 loss = 0.310 (2.696 sec/step)
step 23180 	 loss = 0.321 (2.979 sec/step)
step 23190 	 loss = 0.329 (3.042 sec/step)
step 23200 	 loss = 0.324 (3.010 sec/step)
step 23210 	 loss = 0.344 (2.820 sec/step)
step 23220 	 loss = 0.342 (3.045 sec/step)
step 23230 	 loss = 0.339 (2.907 sec/step)
step 23240 	 loss = 0.309 (3.032 sec/step)
step 23250 	 loss = 0.319 (2.873 sec/step)
step 23260 	 loss = 0.326 (2.875 sec/step)
step 23270 	 loss = 0.340 (2.627 sec/step)
step 23280 	 loss = 0.341 (2.634 sec/step)
step 23290 	 loss = 0.356 (2.691 sec/step)
step 23300 	 loss = 0.329 (2.855 sec/step)
step 23310 	 loss = 0.331 (2.924 sec/step)
step 23320 

step 25000 	 loss = 0.319 (3.183 sec/step)
step 25010 	 loss = 0.324 (2.881 sec/step)
step 25020 	 loss = 0.335 (2.779 sec/step)
step 25030 	 loss = 0.340 (3.257 sec/step)
step 25040 	 loss = 0.336 (2.911 sec/step)
step 25050 	 loss = 0.318 (3.095 sec/step)
step 25060 	 loss = 0.343 (2.675 sec/step)
step 25070 	 loss = 0.345 (2.722 sec/step)
step 25080 	 loss = 0.327 (2.819 sec/step)
step 25090 	 loss = 0.328 (2.979 sec/step)
step 25100 	 loss = 0.326 (2.838 sec/step)
step 25110 	 loss = 0.328 (2.884 sec/step)
step 25120 	 loss = 0.326 (2.924 sec/step)
step 25130 	 loss = 0.339 (3.014 sec/step)
step 25140 	 loss = 0.336 (3.174 sec/step)
step 25150 	 loss = 0.325 (2.568 sec/step)
step 25160 	 loss = 0.328 (2.942 sec/step)
step 25170 	 loss = 0.332 (2.987 sec/step)
step 25180 	 loss = 0.317 (2.787 sec/step)
step 25190 	 loss = 0.342 (2.629 sec/step)
step 25200 	 loss = 0.329 (3.108 sec/step)
step 25210 	 loss = 0.352 (3.002 sec/step)
step 25220 	 loss = 0.338 (2.967 sec/step)
step 25230 

step 26910 	 loss = 0.295 (2.904 sec/step)
step 26920 	 loss = 0.289 (2.796 sec/step)
step 26930 	 loss = 0.302 (2.478 sec/step)
step 26940 	 loss = 0.304 (2.530 sec/step)
step 26950 	 loss = 0.298 (2.277 sec/step)
step 26960 	 loss = 0.308 (2.456 sec/step)
step 26970 	 loss = 0.298 (2.416 sec/step)
step 26980 	 loss = 0.324 (2.457 sec/step)
step 26990 	 loss = 0.292 (2.285 sec/step)
step 27000 	 loss = 0.302 (2.286 sec/step)
step 27010 	 loss = 0.287 (2.276 sec/step)
step 27020 	 loss = 0.294 (2.335 sec/step)
step 27030 	 loss = 0.304 (2.230 sec/step)
step 27040 	 loss = 0.302 (2.267 sec/step)
step 27050 	 loss = 0.291 (4.097 sec/step)
step 27060 	 loss = 0.317 (2.388 sec/step)
step 27070 	 loss = 0.314 (2.649 sec/step)
step 27080 	 loss = 0.304 (3.269 sec/step)
step 27090 	 loss = 0.312 (2.723 sec/step)
step 27100 	 loss = 0.294 (2.599 sec/step)
step 27110 	 loss = 0.327 (3.247 sec/step)
step 27120 	 loss = 0.288 (2.744 sec/step)
step 27130 	 loss = 0.299 (2.920 sec/step)
step 27140 

step 28820 	 loss = 0.307 (2.765 sec/step)
step 28830 	 loss = 0.313 (3.075 sec/step)
step 28840 	 loss = 0.312 (3.161 sec/step)
step 28850 	 loss = 0.301 (2.830 sec/step)
step 28860 	 loss = 0.316 (2.786 sec/step)
step 28870 	 loss = 0.287 (2.852 sec/step)
step 28880 	 loss = 0.306 (3.120 sec/step)
step 28890 	 loss = 0.320 (2.797 sec/step)
step 28900 	 loss = 0.311 (2.799 sec/step)
step 28910 	 loss = 0.319 (2.740 sec/step)
step 28920 	 loss = 0.306 (3.066 sec/step)
step 28930 	 loss = 0.305 (2.856 sec/step)
step 28940 	 loss = 0.312 (2.572 sec/step)
step 28950 	 loss = 0.322 (2.926 sec/step)
step 28960 	 loss = 0.316 (2.761 sec/step)
step 28970 	 loss = 0.309 (2.631 sec/step)
step 28980 	 loss = 0.320 (2.787 sec/step)
step 28990 	 loss = 0.302 (2.998 sec/step)
step 29000 	 loss = 0.327 (2.782 sec/step)
step 29010 	 loss = 0.327 (3.205 sec/step)
step 29020 	 loss = 0.298 (2.922 sec/step)
step 29030 	 loss = 0.297 (3.109 sec/step)
step 29040 	 loss = 0.284 (3.020 sec/step)
step 29050 

step 30730 	 loss = 0.291 (2.845 sec/step)
step 30740 	 loss = 0.280 (2.842 sec/step)
step 30750 	 loss = 0.264 (2.775 sec/step)
step 30760 	 loss = 0.271 (2.810 sec/step)
step 30770 	 loss = 0.302 (2.916 sec/step)
step 30780 	 loss = 0.287 (2.740 sec/step)
step 30790 	 loss = 0.291 (2.634 sec/step)
step 30800 	 loss = 0.280 (2.918 sec/step)
step 30810 	 loss = 0.274 (2.837 sec/step)
step 30820 	 loss = 0.294 (2.944 sec/step)
step 30830 	 loss = 0.291 (3.023 sec/step)
step 30840 	 loss = 0.277 (2.525 sec/step)
step 30850 	 loss = 0.282 (2.977 sec/step)
step 30860 	 loss = 0.276 (2.778 sec/step)
step 30870 	 loss = 0.273 (2.863 sec/step)
step 30880 	 loss = 0.278 (2.931 sec/step)
step 30890 	 loss = 0.282 (3.210 sec/step)
step 30900 	 loss = 0.297 (3.097 sec/step)
step 30910 	 loss = 0.263 (3.253 sec/step)
step 30920 	 loss = 0.277 (2.827 sec/step)
step 30930 	 loss = 0.273 (2.687 sec/step)
step 30940 	 loss = 0.277 (3.056 sec/step)
step 30950 	 loss = 0.273 (2.954 sec/step)
step 30960 

step 32640 	 loss = 0.269 (2.771 sec/step)
step 32650 	 loss = 0.269 (2.632 sec/step)
step 32660 	 loss = 0.292 (3.230 sec/step)
step 32670 	 loss = 0.283 (2.752 sec/step)
step 32680 	 loss = 0.298 (3.141 sec/step)
step 32690 	 loss = 0.280 (2.412 sec/step)
step 32700 	 loss = 0.279 (2.452 sec/step)
step 32710 	 loss = 0.286 (2.374 sec/step)
step 32720 	 loss = 0.287 (2.271 sec/step)
step 32730 	 loss = 0.298 (2.430 sec/step)
step 32740 	 loss = 0.297 (2.209 sec/step)
step 32750 	 loss = 0.271 (2.383 sec/step)
step 32760 	 loss = 0.286 (2.335 sec/step)
step 32770 	 loss = 0.286 (2.495 sec/step)
step 32780 	 loss = 0.294 (2.452 sec/step)
step 32790 	 loss = 0.278 (2.359 sec/step)
step 32800 	 loss = 0.293 (2.361 sec/step)
step 32810 	 loss = 0.294 (2.216 sec/step)
step 32820 	 loss = 0.269 (2.242 sec/step)
step 32830 	 loss = 0.286 (3.902 sec/step)
step 32840 	 loss = 0.293 (2.621 sec/step)
step 32850 	 loss = 0.281 (3.027 sec/step)
step 32860 	 loss = 0.285 (2.720 sec/step)
step 32870 

step 34550 	 loss = 0.242 (2.815 sec/step)
step 34560 	 loss = 0.268 (2.694 sec/step)
step 34570 	 loss = 0.257 (3.075 sec/step)
step 34580 	 loss = 0.269 (3.090 sec/step)
step 34590 	 loss = 0.270 (2.979 sec/step)
step 34600 	 loss = 0.286 (2.695 sec/step)
step 34610 	 loss = 0.251 (2.858 sec/step)
step 34620 	 loss = 0.268 (2.626 sec/step)
step 34630 	 loss = 0.272 (2.590 sec/step)
step 34640 	 loss = 0.271 (2.734 sec/step)
step 34650 	 loss = 0.257 (2.717 sec/step)
step 34660 	 loss = 0.253 (2.611 sec/step)
step 34670 	 loss = 0.257 (2.993 sec/step)
step 34680 	 loss = 0.273 (2.816 sec/step)
step 34690 	 loss = 0.270 (2.961 sec/step)
step 34700 	 loss = 0.269 (2.842 sec/step)
step 34710 	 loss = 0.275 (2.799 sec/step)
step 34720 	 loss = 0.262 (2.616 sec/step)
step 34730 	 loss = 0.271 (2.841 sec/step)
step 34740 	 loss = 0.296 (2.758 sec/step)
step 34750 	 loss = 0.274 (2.839 sec/step)
step 34760 	 loss = 0.279 (2.951 sec/step)
step 34770 	 loss = 0.291 (2.483 sec/step)
step 34780 

step 36460 	 loss = 0.288 (2.913 sec/step)
step 36470 	 loss = 0.270 (2.493 sec/step)
step 36480 	 loss = 0.277 (2.401 sec/step)
step 36490 	 loss = 0.260 (2.806 sec/step)
step 36500 	 loss = 0.262 (3.123 sec/step)
step 36510 	 loss = 0.275 (3.100 sec/step)
step 36520 	 loss = 0.258 (2.823 sec/step)
step 36530 	 loss = 0.268 (3.017 sec/step)
step 36540 	 loss = 0.264 (2.971 sec/step)
step 36550 	 loss = 0.267 (2.778 sec/step)
step 36560 	 loss = 0.269 (2.820 sec/step)
step 36570 	 loss = 0.279 (2.773 sec/step)
step 36580 	 loss = 0.267 (2.636 sec/step)
step 36590 	 loss = 0.276 (2.999 sec/step)
step 36600 	 loss = 0.277 (2.813 sec/step)
step 36610 	 loss = 0.291 (3.243 sec/step)
step 36620 	 loss = 0.282 (3.002 sec/step)
step 36630 	 loss = 0.272 (2.747 sec/step)
step 36640 	 loss = 0.267 (2.842 sec/step)
step 36650 	 loss = 0.265 (2.548 sec/step)
step 36660 	 loss = 0.281 (2.846 sec/step)
step 36670 	 loss = 0.263 (2.728 sec/step)
step 36680 	 loss = 0.259 (2.846 sec/step)
step 36690 