In [2]:
#!/usr/local/bin/python

# This version of the code trains the attractor connections with a separate
# objective function than the objective function used to train all other weights
# in the network (on the prediction task).

from __future__ import print_function
import itertools
import tensorflow as tf
import numpy as np
import sys
import argparse
import datetime


% load_ext autoreload
% autoreload

from tensorflow_helpers import *
from data_generator import generate_examples, pick_task

from helper_functions import get_batches, load_pretrained_embeddings, \
    get_model_type_str, translate_ids_to_words, \
    save_results, print_into_log, print_some_translated_sentences, \
    get_training_progress_comment
from information_trackers import compute_entropy_fullvec
from graph_init import GRU_attractor


class EarlyStopper():
    def __init__(self, patience_max, disp_epoch, min_delta = 0.00):
        self.best = 1e10
        self.patience = 0  # our patience
        self.patience_max = patience_max
        self.display_epoch = disp_epoch
        self.min_delta = min_delta

    def update(self, current):
        if self.best > current:
            self.best = current
            self.patience = 0
        elif abs(self.best - current) > self.min_delta:
            self.patience += 1

    def patience_ran_out(self):
        if self.patience*self.display_epoch > self.patience_max:
            return True
        else:
            False
            
def softmax(x):
    """Compute softmax values for each sets of scores in x."""
    e_x = np.exp(x - np.max(x))
    return e_x / e_x.sum(axis=0) # only difference


ops = {
    'model_type': "GRU",  # OPTIONS: vanilla, LSTM_raw, LSTM_tensorflow, LSTM_attractor
    'hid': 50,
    'in': None,  # TBD
    'out': 1,
    #         'batch_size':n_examples, #since the sequences are 1-dimensional it's easier to just run them all at once
    'n_attractor_iterations': 15,
    'attractor_dynamics': "projection2",  # OPTIONS:  "" (for no attractor dynamics),
    #           "direct" (simple attractor weights applied to hidden states directly, trained with noise addition)
    #           "projection" (project the hidden state into a separate space via weights, do attraction, project back)
    #           "helper_hidden" (hidden-hidden neurons) - IMPORTANT: don't forget to add h_hid number
    'h_hid': 100,  # helper hidden for "helper hidden" "attractory_dynamics" mode
    'attractor_noise_level': 0.5,
    'attractor_noise_type': "bernoilli",  # OPTIONS: "gaussian", "dropout", "random_drop"

    'training_mode': "",  # 'attractor_on_both',

    'attractor_regularization': "l2_regularization",  # OPTIONS: "l2_regularization", "l2_norm"
    'attractor_regularization_lambda': 0.0,

    'record_mutual_information': True,
    'problem_type': "msnbc",  # OPTIONS: parity, parity_length, majority, reber, kazakov, pos_brown, ner_german, sentiment_imdb, topic_classification, video_classification
    'masking': True,#"seq", "final"
    'prediction_type': 'seq', #'seq', 'final', 'final_class'
    'seq_len': None,

    'save_best_model': True,
    'reshuffle_data_each_replication': False,  # relevant for POS datasets (since they are loaded from files)
    'test_partition': 0.3,
    'lrate': 0.001,  # was 0.008

    # NLP related (pos_brown task)
    'bidirectional': False,
    'embedding_size': 100,
    'load_word_embeddings': False,
    'train_word_embeddings': False,
    'trainable_logic_symbols': 0, #make first *N* embeddings trainable(Pad, unknown, start symbols make it a separate matrix and trainable)
    'input_type': "embed",  # embed&prior, embed, prior
    'dropout': 0.2  # in range(0,1)
}

# !!!!!!!!!!!!!!!!!!!!!!
# SEQ_LEN = 12 # number of bits in input sequence
N_HIDDEN = ops['hid']  # number of hidden units
N_H_HIDDEN = ops['h_hid']
TASK = ops['problem_type']
ARCH = ops['model_type']  # hidden layer type: 'GRU' or 'tanh'
NOISE_LEVEL = ops['attractor_noise_level']
# noise in training attractor net
# if >=0, Gaussian with std dev NOISE_LEVEL
# if < 0, Bernoulli dropout proportion -NOISE_LEVEL

# !!!!!!!!!!!!!!!!!!!!!!
INPUT_NOISE_LEVEL = 0.1
ATTRACTOR_TYPE = ops['attractor_dynamics']
N_ATTRACTOR_STEPS = ops['n_attractor_iterations']
# number of time steps in attractor dynamics
# if = 0, then no attractor net
# !!!!!!!!!!!!!!!!!!!!!!
# ATTR_WEIGHT_CONSTRAINTS = True
# True: make attractor weights symmetric and have zero diag
# False: unconstrained
TRAIN_ATTR_WEIGHTS_ON_PREDICTION = False
# True: train attractor weights on attractor net _and_ prediction
REPORT_BEST_TRAIN_PERFORMANCE = True
# True: save the train/test perf on the epoch for which train perf was best
LOSS_SWITCH_FREQ = 1
# how often (in epochs) to switch between attractor
# and prediction loss

ops, SEQ_LEN, N_INPUT, N_CLASSES, N_TRAIN, N_TEST = pick_task(ops['problem_type'],
                                                              ops)  # task (parity, majority, reber, kazakov)

# Training Parameters

TRAINING_EPOCHS = 500
N_REPLICATIONS = 1
BATCH_SIZE = 8000
DISPLAY_EPOCH = 1
EARLY_STOPPING_THRESH = 0.03 # 1e-3 for POS, 0.03 for Sentiment
EARLY_STOPPING_PATIENCE = 20  # in epochs
EARLY_STOPPING_MINIMUM_EPOCH = 0

# NOTEBOOK CODE

######### MAIN CODE #############################################################
#0.02, 0.05, 0.1, 0.2, 0.35, 0.5, 
for dataset_part in [0.5, 0.99]:
#     for attractor_steps in [15,0]:
    attractor_steps = 15
    for attractor_steps in [15,0]:
        NOISE_LEVEL = ops['attractor_noise_level']


#     for att_reg in [0.0]:
#         ops['attractor_regularization_lambda'] = att_reg
        # the tf seed needs to be within the context of the graph.
        tf.reset_default_graph()
        np.random.seed(11)
        tf.set_random_seed(11)
        ops['n_attractor_iterations'] = attractor_steps
        N_ATTRACTOR_STEPS = ops['n_attractor_iterations']

        #
        # PLACEHOLDERS
        #
        if 'pos' in ops['problem_type']:
            # X will be looked up in the embedding table, so the last dimension is just a number
            X = tf.placeholder("int64", [None, SEQ_LEN], name='X')
            # last dimension is left singular, tensorflow will expect it to be an id number, not 1-hot embed
            Y = tf.placeholder("int64", [None, SEQ_LEN], name='Y')
        elif 'msnbc' in ops['problem_type']:
            X = tf.placeholder("int64", [None, SEQ_LEN], name='X')
            Y = tf.placeholder("int64", [None, SEQ_LEN], name='Y')
        elif ops['problem_type'] == 'sentiment_imdb':
             # X will be looked up in the embedding table, so the last dimension is just a number
            X = tf.placeholder("int64", [None, SEQ_LEN], name='X')
            Y = tf.placeholder("int64", [None, N_CLASSES], name='Y')
        elif ops['problem_type'] == 'topic_classification':
             # X will be looked up in the embedding table, so the last dimension is just a number
            X = tf.placeholder("int64", [None, SEQ_LEN], name='X')
            Y = tf.placeholder("int64", [None, 1], name='Y')
        elif ops['problem_type'] == 'ner_german':
            X = tf.placeholder("float", [None, SEQ_LEN, N_INPUT])
            Y = tf.placeholder("int64", [None, SEQ_LEN])
        else:  # single output 
            X = tf.placeholder("float", [None, SEQ_LEN, N_INPUT])
            Y = tf.placeholder("int64", [None, 1])
        attractor_tgt_net = tf.placeholder("float", [None, N_HIDDEN], name='attractor_tgt')

        # Embedding matrix initialization
        if 'pos' in ops['problem_type'] or 'sentiment' in ops['problem_type'] or ops['problem_type'] == "topic_classification":
            [_, _, _, _, _, _, maps] = generate_examples(SEQ_LEN, N_TRAIN, N_TEST,
                                                         INPUT_NOISE_LEVEL, TASK, ops)

            if ops['load_word_embeddings']:
                embeddings_loaded, _ = load_pretrained_embeddings('data/glove.6B.{}d.txt'.format(ops['embedding_size']),
                                                               maps, ops)
                if ops['trainable_logic_symbols'] > 0:
                    with tf.variable_scope("TASK_WEIGHTS"):
                        symbols_embedding = tf.get_variable("symb_embedding",
                                                initializer=tf.truncated_normal_initializer(stddev=0.05),
                                                shape=[ops['trainable_logic_symbols'], ops['embedding_size']],
                                                dtype=tf.float32,
                                                trainable=True)
                    
                word_embedding = tf.get_variable("embedding",
                                            initializer=embeddings_loaded,
                                            dtype=tf.float32,
                                            trainable=ops['train_word_embeddings'])
                if ops['trainable_logic_symbols'] > 0:
                    embedding = tf.concat([symbols_embedding, word_embedding], axis=0)
                else:
                    embedding = word_embedding
            else:  # initialize randomly
                embedding = tf.get_variable("embedding",
                                            initializer=tf.truncated_normal_initializer(stddev=0.05),
                                            shape=[ops['vocab_size'], ops['embedding_size']],
                                            dtype=tf.float32,
                                            trainable=ops['train_word_embeddings'])
            embed_lookup = tf.nn.embedding_lookup(embedding, X)

            # load priors information
            if ops['input_type'] == 'prior' or ops['input_type'] == 'embed&prior':
                id2prior = maps['id2prior']
                word2id = maps['word2id']
                priors = np.zeros([len(id2prior), len(id2prior[0])]).astype("float32")
                for id, prior in id2prior.items():
                    priors[id] = prior
                priors_op = tf.get_variable("priors",
                                            initializer=priors,
                                            dtype=tf.float32,
                                            trainable=False)
                prior_lookup = tf.nn.embedding_lookup(priors_op, X)

            if ops['input_type'] == 'embed':
                embed = embed_lookup
            elif ops['input_type'] == 'prior':
                embed = prior_lookup
            elif ops['input_type'] == 'embed&prior':
                embed = tf.concat([embed_lookup, prior_lookup], axis=2)

        # Graph + all the training variables
        if 'pos' in ops['problem_type']:
            net_inputs = {'X': embed, 'mask': Y, 'attractor_tgt_net': attractor_tgt_net}
        elif ops['problem_type'] == 'sentiment_imdb' or ops['problem_type'] == 'topic_classification':
            net_inputs = {'X': embed, 'mask': X, 'attractor_tgt_net': attractor_tgt_net}
        elif ops['problem_type'] == 'msnbc':
            net_inputs = {'X': tf.one_hot(X, depth=N_CLASSES), 'mask': X, 'attractor_tgt_net': attractor_tgt_net}
        else:
            net_inputs = {'X': X, 'mask': X, 'attractor_tgt_net': attractor_tgt_net}

        if ops['bidirectional']:
            G_attractors = {'forw': [], 'back': []}
            names = G_attractors.keys()
            # Forward:
            G_forw = GRU_attractor(ops, inputs=net_inputs, direction='forward', suffix=names[0])
            attr_loss_op_forw = G_forw.attr_loss_op
            attr_train_op_forw = G_forw.attr_train_op
            h_clean_seq_flat_forw = G_forw.h_clean_seq_flat  # for computing entropy of states
            h_net_seq_flat_forw = G_forw.h_net_seq_flat  # -> attractor_tgt_net placeholder
            G_attractors['forw'] = {'attr_loss_op': attr_loss_op_forw, "attr_train_op": attr_train_op_forw,
                                    'h_clean_seq_flat': h_clean_seq_flat_forw, 'h_net_seq_flat': h_net_seq_flat_forw}
            G_forw_output = G_forw.output

            # Backward:
            G_back = GRU_attractor(ops, inputs=net_inputs, direction='backward', suffix=names[1])
            attr_loss_op_back = G_back.attr_loss_op
            attr_train_op_back = G_back.attr_train_op
            h_clean_seq_flat_back = G_back.h_clean_seq_flat  # for computing entropy of states
            h_net_seq_flat_back = G_back.h_net_seq_flat  # -> attractor_tgt_net placeholder
            G_attractors['back'] = {'attr_loss_op': attr_loss_op_back, "attr_train_op": attr_train_op_back,
                                    'h_clean_seq_flat': h_clean_seq_flat_back, 'h_net_seq_flat': h_net_seq_flat_back}
            G_back_output = G_back.output

            
            
            # Merge: [seq_len, batch_size, n_hid*2]
            # Note that we reverse the backward cell's output to align with original direction
            # note in "final" only prediction, one less dimension
            if 'final' in ops['prediction_type']:
                merge_index = 1
            else:
                merge_index = 2
            output = tf.concat([G_forw_output, tf.reverse(G_back_output, axis=[0])], axis=merge_index)
    
            if ops['dropout'] > 0.0:
                # note keep_prob = 1.0 - drop_probability (not sure why they implemented it this way)
                # tensorflow implementation scales by 1/keep_prob automatically
                output = tf.nn.dropout(output, keep_prob=1.0 - ops['dropout'])
            else:
                output = output

            input_size_final_projection = 2 * ops['hid']
            Y_ =  project_into_output(Y, output, input_size_final_projection, ops['out'], ops)
            
            # LOSS, ACC, & TRAIN OPS
            pred_loss_op = task_loss(Y, Y_, ops)
            optimizer_pred = tf.train.AdamOptimizer(learning_rate=0.008)
            prediction_parameters = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, "TASK_WEIGHTS")
            pred_train_op = optimizer_pred.minimize(pred_loss_op, var_list=prediction_parameters)
            accuracy = task_accuracy(Y, Y_, ops)
        else:
            G_attractors = {'forw': []}
            names = G_attractors.keys()
            # Forward:
            G_forw = GRU_attractor(ops, inputs=net_inputs, direction='forward', suffix=names[0])
            attr_loss_op_forw = G_forw.attr_loss_op
            attr_train_op_forw = G_forw.attr_train_op
            h_clean_seq_flat_forw = G_forw.h_clean_seq_flat  # for computing entropy of states
            h_net_seq_flat_forw = G_forw.h_net_seq_flat  # -> attractor_tgt_net placeholder
            G_attractors['forw'] = {'attr_loss_op': attr_loss_op_forw, "attr_train_op": attr_train_op_forw,
                                    'h_clean_seq_flat': h_clean_seq_flat_forw, 'h_net_seq_flat': h_net_seq_flat_forw}
            G_forw_output = G_forw.output

            input_size_final_projection = ops['hid']
            
            if ops['dropout'] > 0.0:
                output = tf.nn.dropout(G_forw_output, keep_prob=1.0 - ops['dropout'])
            else:
                output = G_forw_output
            
            Y_ = project_into_output(Y, output, input_size_final_projection, ops['out'], ops)

            # LOSS, ACC, & TRAIN OPS
            pred_loss_op = task_loss(Y, Y_, ops)
            optimizer_pred = tf.train.AdamOptimizer(learning_rate=0.008)
            prediction_parameters = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, "TASK_WEIGHTS")
            pred_train_op = optimizer_pred.minimize(pred_loss_op, var_list=prediction_parameters)
            accuracy = task_accuracy(Y, Y_, ops)

            
        mask_op = tf.cast(tf.sign(Y), dtype=tf.float32)
        # Initialize the variables (i.e. assign their default value)
        init = tf.global_variables_initializer()
        gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.9)
        with tf.Session(config=tf.ConfigProto(gpu_options=gpu_options)) as sess:
            # TODO: make a class for all "best" quantities (a lot of space)
            saved_train_acc = []
            saved_test_acc = []
            saved_epoch = []
            saved_att_loss = []
            saved_entropy_final = []
            saved_val_acc = []
            saved_val_loss = []
            saved_traini_loss = []
            saver = tf.train.Saver()

            # Start training
            for replication in range(N_REPLICATIONS):
                print("********** replication ", replication, " **********")
                early_stopper = EarlyStopper(EARLY_STOPPING_PATIENCE, DISPLAY_EPOCH)
                [X_full_train, Y_full_train, X_test, Y_test, X_val, Y_val, maps] = generate_examples(SEQ_LEN, N_TRAIN, N_TEST,
                                                                                           INPUT_NOISE_LEVEL, TASK, ops)
                # Take Only part of dataset:
                all_ids = range(len(X_full_train))
                np.random.shuffle(all_ids)
                train_part = int(dataset_part * len(X_full_train))
                ids_to_take = all_ids[0:train_part]
                ids_for_val = all_ids[train_part:int(train_part + 0.2*train_part)]
                if len(ids_to_take) > X_full_train.shape[0]:
                    ids_to_take = range(X_full_train.shape[0])
                X_train = X_full_train[ids_to_take]
                Y_train = Y_full_train[ids_to_take]
                
                if BATCH_SIZE < len(X_train):
                    ops['attractor_regularization_lambda'] = ops['attractor_regularization_lambda']/(len(X_train)*1.0/BATCH_SIZE)
                    print(ops['attractor_regularization_lambda'])
                
                X_val, Y_val = X_full_train[ids_for_val], Y_full_train[ids_for_val,:]
                
                N_TRAIN = len(X_train)
                print(X_train.shape, Y_train.shape, X_val.shape, Y_val.shape)

                # Log Path init-n:
                COMMENT = 'dataset_starvation_experiment'
                MODEL_NAME_FILE = '{}_(att_iter{}__bidir{}__drop{})_{}.txt'.format(ops['problem_type'],
                                                                                   ops['n_attractor_iterations'],
                                                                                   ops['bidirectional'],
                                                                                   ops['dropout'],
                                                                                   COMMENT)
                LOG_DIRECTORY = 'experiments/logs/{}'.format(MODEL_NAME_FILE)
                MODEL_DIRECTORY = 'experiments/logs/{}_{}'.format(datetime.date.today(), MODEL_NAME_FILE)
                print_into_log(LOG_DIRECTORY, get_model_type_str(ops, N_TRAIN, N_TEST, SEQ_LEN))
                print_into_log(MODEL_DIRECTORY, get_model_type_str(ops, N_TRAIN, N_TEST, SEQ_LEN), supress=True)

                sess.run(init)  # Run the initializer

                train_prediction_loss = True
                best_train_acc = -1000.
                best_test_acc = 0
                best_entropy = 0.0
                best_att_loss = 0
                best_train_loss = 0
                best_val_loss = 0.0
                best_val_acc = 0.0
                best_epoch = 0
                for epoch in range(1, TRAINING_EPOCHS + 2):
                    if (epoch - 1) % DISPLAY_EPOCH == 0:
                        # TRAIN set:
                        ploss, train_acc = batch_tensor_collect(sess, [pred_loss_op, accuracy],
                                                                X, Y, X_train, Y_train, BATCH_SIZE)
                        # TEST set:
                        test_acc = batch_tensor_collect(sess, [accuracy], X, Y, X_test, Y_test, BATCH_SIZE)[0]
    
                        # Validation set & Early stopping:
                        ploss_val, val_acc = batch_tensor_collect(sess, [pred_loss_op, accuracy],
                                                                  X, Y, X_val, Y_val, BATCH_SIZE)
            
                        # Precistion/Recall:
                        if ops['problem_type'] == 'ner_german':
                            y_pred, y_true, mask_val = batch_tensor_collect(sess, [Y_, Y, mask_op],
                                                                X, Y, X_test, Y_test, BATCH_SIZE)
                            y_pred = np.argmax(y_pred, axis=2)
                            
                            Y_pred_flat = np.extract(mask_val.astype(bool), y_pred)
                            Y_test_flat = np.extract(mask_val.astype(bool), y_true)
                            print("PRECISION:",compute_f1(Y_pred_flat, Y_test_flat, maps['id2tag']))
                            
                        print(early_stopper.patience, early_stopper.best, ploss_val)
                        early_stopper.update(ploss_val)
                        if early_stopper.patience_ran_out():
                            print_into_log(LOG_DIRECTORY, "STOPPED EARLY AT {}".format(epoch))
                            break

                        # ATTRACTOR(s) LOSS
                        aloss = {}
                        entropy = {}
                        hid_vals_arr = batch_tensor_collect(sess, [A['h_net_seq_flat'] for att_name, A in
                                                                   G_attractors.items()],
                                                            X, Y, X_train, Y_train, BATCH_SIZE)
                        h_clean_val_arr = batch_tensor_collect(sess, [A['h_clean_seq_flat'] for att_name, A in
                                                                      G_attractors.items()],
                                                               X, Y, X_train, Y_train, BATCH_SIZE)
                        for i, attractor_name in enumerate(G_attractors.keys()):
                            A = G_attractors[attractor_name]
                            a_loss_val = []
                            n_splits = np.max([1, int(len(X_train) / BATCH_SIZE)])
                            for batch_hid_vals in np.array_split(hid_vals_arr[i], n_splits):
                                a_loss_val.append(
                                    sess.run(A['attr_loss_op'], feed_dict={attractor_tgt_net: batch_hid_vals}))
                            aloss[attractor_name] = "{:.4f}".format(np.mean(a_loss_val))

                            entropy[attractor_name] = "{:.4f}".format(
                                compute_entropy_fullvec(h_clean_val_arr[i], ops, n_bins=8))

                        # Print training information:
                        print_into_log(LOG_DIRECTORY, datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S') + get_training_progress_comment(epoch, ploss, aloss, ploss_val, val_acc, train_acc,
                                                                     test_acc, entropy))
                        # Update the logs:
                       
                        #                 if ops['record_mutual_information']:
                        # #                     h_attractor_val, h_clean_val = sess.run([h_attractor_collection, h_clean_seq_flat],
                        # #                                                                    feed_dict={X: X_train, Y: Y_train})
                        #                     # TODO: h_attractor_collection reshapeing masking.
                        #                     h_attractor_val = None
                        #                     h_clean_val = batch_tensor_collect(sess, [h_clean_seq_flat],
                        #                                                                         X, Y, X_train, Y_train, BATCH_SIZE)[0]
                        #                     MIS.update(ploss, aloss, train_acc, test_acc, np.tanh(hid_vals), h_attractor_val, h_clean_val)

                        if (val_acc > best_val_acc):
                            best_train_acc = train_acc
                            best_test_acc = test_acc
                            best_att_loss = aloss
                            best_epoch = epoch
                            best_val_acc = val_acc

                            best_val_loss = ploss_val
                            best_train_loss = ploss
                            if ops['save_best_model']:
                                save_path = saver.save(sess, MODEL_DIRECTORY)
                            best_entropy = entropy
                        if (1.0 - 1e-15 < 0.0):
                            print('reached_peak')
                            break

                    if epoch > 1 and LOSS_SWITCH_FREQ > 0 \
                            and (epoch - 1) % LOSS_SWITCH_FREQ == 0:
                        train_prediction_loss = not train_prediction_loss

                    # MODEL TRAINING
                    batches = get_batches(BATCH_SIZE, X_train, Y_train)
                    for (batch_x, batch_y) in batches:
                        if (LOSS_SWITCH_FREQ == 0 or train_prediction_loss):
                            # Optimize all parameters except for attractor weights
                            _ = sess.run([pred_train_op],
                                         feed_dict={X: batch_x, Y: batch_y})
                        # Attractor:
                        if (N_ATTRACTOR_STEPS > 0):
                            batch_hid_vals = sess.run([A['h_net_seq_flat'] for att_name, A in G_attractors.items()],
                                                      feed_dict={X:batch_x,  Y:batch_y})

                            for i, attractor_name in enumerate(G_attractors.keys()):
                                A = G_attractors[attractor_name]
                                _ = sess.run(A['attr_train_op'], feed_dict={attractor_tgt_net: batch_hid_vals[i]})
                print("Optimization Finished!")

                if (REPORT_BEST_TRAIN_PERFORMANCE):
                    saved_train_acc.append(best_train_acc)
                    saved_test_acc.append(best_test_acc)
                    saved_att_loss.append(best_att_loss)
                    saved_entropy_final.append(best_entropy)
                    saved_epoch.append(best_epoch)

                    saved_val_acc.append(best_val_acc)
                    saved_val_loss.append(best_val_loss)
                    saved_traini_loss.append(best_train_loss)
                else:
                    saved_train_acc.append(train_acc)
                    saved_test_acc.append(test_acc)
                    #             saved_att_loss.append(aloss)

            save_results(ops, saved_epoch, saved_train_acc, saved_test_acc, saved_att_loss, saved_entropy_final, saved_val_acc,
                 saved_val_loss, saved_traini_loss, N_TRAIN, N_TEST, SEQ_LEN, comment=COMMENT)

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload
L2 reg-n
********** replication  0  **********
msnbc
(3689, 40) (3689, 40) (737, 40) (737, 40)
Logged Successfully: 

    model_type: 		GRU bidir(False), task: msnbc
    hid: 			50,
    h_hid: 			100
    n_attractor_iterations: 	15,
    attractor_dynamics: 	projection2
    attractor_noise_level: 	0.5
    attractor_noise_type: 	bernoilli
    attractor_regu-n: 		l2_regularization(lambda:0.0)
    word_embedding: size	(100), train(False)
    dropout: 			0.2
    TRAIN/TEST_SIZE: 	3689/0, SEQ_LEN: 40
Logged Successfully: 
0 10000000000.0 2.8903298
Logged Successfully: 
2018-05-17 18:59:54epoch=0; Loss Pred=2.8901; Val Loss=2.8903; Val Acc=0.0724; Loss Att={'forw': '1.0264'}; Train Acc=0.072; Test Acc=0.0707; Entropy={'forw': '7.0835'}; Entropy_Test=

0 2.8903298 2.7956798
Logged Successfully: 
2018-05-17 18:59:56epoch=1; Loss Pred=2.7979; Val Loss=2.7957; Val Acc=0.0974; Loss Att={'forw': '1.0168'}; Train

Logged Successfully: 
2018-05-17 19:00:47epoch=33; Loss Pred=1.9218; Val Loss=1.9081; Val Acc=0.4875; Loss Att={'forw': '0.7977'}; Train Acc=0.480; Test Acc=0.4743; Entropy={'forw': '7.9465'}; Entropy_Test=

0 1.9080912 1.9108592
Logged Successfully: 
2018-05-17 19:00:49epoch=34; Loss Pred=1.9272; Val Loss=1.9109; Val Acc=0.4925; Loss Att={'forw': '0.7889'}; Train Acc=0.484; Test Acc=0.4755; Entropy={'forw': '7.9289'}; Entropy_Test=

1 1.9080912 1.8530815
Logged Successfully: 
2018-05-17 19:00:51epoch=35; Loss Pred=1.8686; Val Loss=1.8531; Val Acc=0.5290; Loss Att={'forw': '0.7830'}; Train Acc=0.526; Test Acc=0.5180; Entropy={'forw': '8.1270'}; Entropy_Test=

0 1.8530815 1.8574303
Logged Successfully: 
2018-05-17 19:00:52epoch=36; Loss Pred=1.8696; Val Loss=1.8574; Val Acc=0.5345; Loss Att={'forw': '0.7737'}; Train Acc=0.532; Test Acc=0.5239; Entropy={'forw': '8.1343'}; Entropy_Test=

1 1.8530815 1.7968547
Logged Successfully: 
2018-05-17 19:00:54epoch=37; Loss Pred=1.8075; Val Loss=1.

0 1.4031008 1.3969809
Logged Successfully: 
2018-05-17 19:01:45epoch=69; Loss Pred=1.3987; Val Loss=1.3970; Val Acc=0.6407; Loss Att={'forw': '0.4698'}; Train Acc=0.637; Test Acc=0.6350; Entropy={'forw': '8.3148'}; Entropy_Test=

0 1.3969809 1.398012
Logged Successfully: 
2018-05-17 19:01:47epoch=70; Loss Pred=1.4005; Val Loss=1.3980; Val Acc=0.6399; Loss Att={'forw': '0.4614'}; Train Acc=0.638; Test Acc=0.6337; Entropy={'forw': '8.2819'}; Entropy_Test=

1 1.3969809 1.3826414
Logged Successfully: 
2018-05-17 19:01:48epoch=71; Loss Pred=1.3906; Val Loss=1.3826; Val Acc=0.6418; Loss Att={'forw': '0.4525'}; Train Acc=0.640; Test Acc=0.6355; Entropy={'forw': '8.2980'}; Entropy_Test=

0 1.3826414 1.3904243
Logged Successfully: 
2018-05-17 19:01:50epoch=72; Loss Pred=1.3929; Val Loss=1.3904; Val Acc=0.6424; Loss Att={'forw': '0.4444'}; Train Acc=0.640; Test Acc=0.6359; Entropy={'forw': '8.2929'}; Entropy_Test=

1 1.3826414 1.3803707
Logged Successfully: 
2018-05-17 19:01:51epoch=73; Loss Pre

0 1.3023099 1.2978954
Logged Successfully: 
2018-05-17 19:02:42epoch=105; Loss Pred=1.3006; Val Loss=1.2979; Val Acc=0.6567; Loss Att={'forw': '0.2552'}; Train Acc=0.657; Test Acc=0.6545; Entropy={'forw': '7.8425'}; Entropy_Test=

0 1.2978954 1.3003764
Logged Successfully: 
2018-05-17 19:02:44epoch=106; Loss Pred=1.3011; Val Loss=1.3004; Val Acc=0.6554; Loss Att={'forw': '0.2519'}; Train Acc=0.657; Test Acc=0.6515; Entropy={'forw': '7.8256'}; Entropy_Test=

1 1.2978954 1.2971768
Logged Successfully: 
2018-05-17 19:02:46epoch=107; Loss Pred=1.2971; Val Loss=1.2972; Val Acc=0.6569; Loss Att={'forw': '0.2493'}; Train Acc=0.658; Test Acc=0.6491; Entropy={'forw': '7.7605'}; Entropy_Test=

0 1.2971768 1.2975742
Logged Successfully: 
2018-05-17 19:02:47epoch=108; Loss Pred=1.2960; Val Loss=1.2976; Val Acc=0.6569; Loss Att={'forw': '0.2467'}; Train Acc=0.656; Test Acc=0.6504; Entropy={'forw': '7.7305'}; Entropy_Test=

1 1.2971768 1.2951092
Logged Successfully: 
2018-05-17 19:02:49epoch=109; Lo

1 1.2620379 1.2584991
Logged Successfully: 
2018-05-17 19:03:40epoch=141; Loss Pred=1.2573; Val Loss=1.2585; Val Acc=0.6644; Loss Att={'forw': '0.2120'}; Train Acc=0.667; Test Acc=0.6639; Entropy={'forw': '7.7114'}; Entropy_Test=

0 1.2584991 1.261543
Logged Successfully: 
2018-05-17 19:03:41epoch=142; Loss Pred=1.2599; Val Loss=1.2615; Val Acc=0.6643; Loss Att={'forw': '0.2119'}; Train Acc=0.665; Test Acc=0.6629; Entropy={'forw': '7.7078'}; Entropy_Test=

1 1.2584991 1.2576172
Logged Successfully: 
2018-05-17 19:03:43epoch=143; Loss Pred=1.2570; Val Loss=1.2576; Val Acc=0.6657; Loss Att={'forw': '0.2117'}; Train Acc=0.668; Test Acc=0.6645; Entropy={'forw': '7.6970'}; Entropy_Test=

0 1.2576172 1.2579716
Logged Successfully: 
2018-05-17 19:03:45epoch=144; Loss Pred=1.2566; Val Loss=1.2580; Val Acc=0.6659; Loss Att={'forw': '0.2116'}; Train Acc=0.667; Test Acc=0.6629; Entropy={'forw': '7.7081'}; Entropy_Test=

1 1.2576172 1.2611715
Logged Successfully: 
2018-05-17 19:03:46epoch=145; Los

1 1.2418728 1.2399603
Logged Successfully: 
2018-05-17 19:04:37epoch=177; Loss Pred=1.2369; Val Loss=1.2400; Val Acc=0.6680; Loss Att={'forw': '0.2078'}; Train Acc=0.671; Test Acc=0.6668; Entropy={'forw': '7.9490'}; Entropy_Test=

0 1.2399603 1.2416017
Logged Successfully: 
2018-05-17 19:04:38epoch=178; Loss Pred=1.2360; Val Loss=1.2416; Val Acc=0.6682; Loss Att={'forw': '0.2074'}; Train Acc=0.672; Test Acc=0.6659; Entropy={'forw': '7.9532'}; Entropy_Test=

1 1.2399603 1.2416692
Logged Successfully: 
2018-05-17 19:04:40epoch=179; Loss Pred=1.2357; Val Loss=1.2417; Val Acc=0.6682; Loss Att={'forw': '0.2079'}; Train Acc=0.671; Test Acc=0.6675; Entropy={'forw': '7.9628'}; Entropy_Test=

2 1.2399603 1.2428632
Logged Successfully: 
2018-05-17 19:04:41epoch=180; Loss Pred=1.2355; Val Loss=1.2429; Val Acc=0.6684; Loss Att={'forw': '0.2081'}; Train Acc=0.672; Test Acc=0.6671; Entropy={'forw': '7.9352'}; Entropy_Test=

3 1.2399603 1.2382977
Logged Successfully: 
2018-05-17 19:04:43epoch=181; Lo

1 1.2265942 1.2301618
Logged Successfully: 
2018-05-17 19:05:33epoch=213; Loss Pred=1.2247; Val Loss=1.2302; Val Acc=0.6686; Loss Att={'forw': '0.2067'}; Train Acc=0.673; Test Acc=0.6671; Entropy={'forw': '8.0874'}; Entropy_Test=

2 1.2265942 1.2291988
Logged Successfully: 
2018-05-17 19:05:34epoch=214; Loss Pred=1.2237; Val Loss=1.2292; Val Acc=0.6687; Loss Att={'forw': '0.2064'}; Train Acc=0.672; Test Acc=0.6681; Entropy={'forw': '8.0456'}; Entropy_Test=

3 1.2265942 1.2286267
Logged Successfully: 
2018-05-17 19:05:36epoch=215; Loss Pred=1.2243; Val Loss=1.2286; Val Acc=0.6680; Loss Att={'forw': '0.2062'}; Train Acc=0.672; Test Acc=0.6686; Entropy={'forw': '8.0627'}; Entropy_Test=

4 1.2265942 1.226015
Logged Successfully: 
2018-05-17 19:05:38epoch=216; Loss Pred=1.2247; Val Loss=1.2260; Val Acc=0.6682; Loss Att={'forw': '0.2058'}; Train Acc=0.672; Test Acc=0.6669; Entropy={'forw': '8.0500'}; Entropy_Test=

0 1.226015 1.2249581
Logged Successfully: 
2018-05-17 19:05:39epoch=217; Loss

2 1.2186439 1.2201103
Logged Successfully: 
2018-05-17 19:06:29epoch=249; Loss Pred=1.2213; Val Loss=1.2201; Val Acc=0.6682; Loss Att={'forw': '0.2001'}; Train Acc=0.674; Test Acc=0.6683; Entropy={'forw': '8.0491'}; Entropy_Test=

3 1.2186439 1.2242802
Logged Successfully: 
2018-05-17 19:06:30epoch=250; Loss Pred=1.2220; Val Loss=1.2243; Val Acc=0.6665; Loss Att={'forw': '0.2005'}; Train Acc=0.673; Test Acc=0.6682; Entropy={'forw': '8.0151'}; Entropy_Test=

4 1.2186439 1.2249353
Logged Successfully: 
2018-05-17 19:06:32epoch=251; Loss Pred=1.2211; Val Loss=1.2249; Val Acc=0.6678; Loss Att={'forw': '0.2005'}; Train Acc=0.673; Test Acc=0.6670; Entropy={'forw': '7.9976'}; Entropy_Test=

5 1.2186439 1.2261243
Logged Successfully: 
2018-05-17 19:06:33epoch=252; Loss Pred=1.2229; Val Loss=1.2261; Val Acc=0.6664; Loss Att={'forw': '0.1993'}; Train Acc=0.674; Test Acc=0.6668; Entropy={'forw': '7.9470'}; Entropy_Test=

6 1.2186439 1.2195371
Logged Successfully: 
2018-05-17 19:06:35epoch=253; Lo

0 10000000000.0 2.8820734
Logged Successfully: 
2018-05-17 19:07:25epoch=0; Loss Pred=2.8867; Val Loss=2.8821; Val Acc=0.1198; Loss Att={'forw': '1.0000'}; Train Acc=0.119; Test Acc=0.1211; Entropy={'forw': '6.6938'}; Entropy_Test=

0 2.8820734 2.815235
Logged Successfully: 
2018-05-17 19:07:26epoch=1; Loss Pred=2.8199; Val Loss=2.8152; Val Acc=0.1828; Loss Att={'forw': '1.0000'}; Train Acc=0.178; Test Acc=0.1770; Entropy={'forw': '6.7012'}; Entropy_Test=

0 2.815235 2.8151965
Logged Successfully: 
2018-05-17 19:07:27epoch=2; Loss Pred=2.8200; Val Loss=2.8152; Val Acc=0.1824; Loss Att={'forw': '1.0000'}; Train Acc=0.178; Test Acc=0.1784; Entropy={'forw': '6.7012'}; Entropy_Test=

0 2.8151965 2.748392
Logged Successfully: 
2018-05-17 19:07:28epoch=3; Loss Pred=2.7538; Val Loss=2.7484; Val Acc=0.2184; Loss Att={'forw': '1.0000'}; Train Acc=0.212; Test Acc=0.2095; Entropy={'forw': '6.7567'}; Entropy_Test=

0 2.748392 2.749419
Logged Successfully: 
2018-05-17 19:07:29epoch=4; Loss Pred=2.7

0 1.625855 1.6260668
Logged Successfully: 
2018-05-17 19:07:56epoch=36; Loss Pred=1.6462; Val Loss=1.6261; Val Acc=0.6008; Loss Att={'forw': '1.0000'}; Train Acc=0.595; Test Acc=0.5888; Entropy={'forw': '8.6164'}; Entropy_Test=

1 1.625855 1.5809948
Logged Successfully: 
2018-05-17 19:07:57epoch=37; Loss Pred=1.6011; Val Loss=1.5810; Val Acc=0.6026; Loss Att={'forw': '1.0000'}; Train Acc=0.599; Test Acc=0.5943; Entropy={'forw': '8.6751'}; Entropy_Test=

0 1.5809948 1.5800012
Logged Successfully: 
2018-05-17 19:07:58epoch=38; Loss Pred=1.6003; Val Loss=1.5800; Val Acc=0.6046; Loss Att={'forw': '1.0000'}; Train Acc=0.600; Test Acc=0.5942; Entropy={'forw': '8.6751'}; Entropy_Test=

0 1.5800012 1.5468185
Logged Successfully: 
2018-05-17 19:07:59epoch=39; Loss Pred=1.5676; Val Loss=1.5468; Val Acc=0.6078; Loss Att={'forw': '1.0000'}; Train Acc=0.604; Test Acc=0.5963; Entropy={'forw': '8.7100'}; Entropy_Test=

0 1.5468185 1.5449989
Logged Successfully: 
2018-05-17 19:08:00epoch=40; Loss Pred

0 1.289938 1.2936041
Logged Successfully: 
2018-05-17 19:08:27epoch=72; Loss Pred=1.2925; Val Loss=1.2936; Val Acc=0.6589; Loss Att={'forw': '1.0000'}; Train Acc=0.664; Test Acc=0.6585; Entropy={'forw': '8.2535'}; Entropy_Test=

1 1.289938 1.2835417
Logged Successfully: 
2018-05-17 19:08:27epoch=73; Loss Pred=1.2883; Val Loss=1.2835; Val Acc=0.6596; Loss Att={'forw': '1.0000'}; Train Acc=0.665; Test Acc=0.6595; Entropy={'forw': '8.1836'}; Entropy_Test=

0 1.2835417 1.2877133
Logged Successfully: 
2018-05-17 19:08:28epoch=74; Loss Pred=1.2884; Val Loss=1.2877; Val Acc=0.6593; Loss Att={'forw': '1.0000'}; Train Acc=0.665; Test Acc=0.6580; Entropy={'forw': '8.1836'}; Entropy_Test=

1 1.2835417 1.2783679
Logged Successfully: 
2018-05-17 19:08:29epoch=75; Loss Pred=1.2840; Val Loss=1.2784; Val Acc=0.6605; Loss Att={'forw': '1.0000'}; Train Acc=0.666; Test Acc=0.6595; Entropy={'forw': '8.1824'}; Entropy_Test=

0 1.2783679 1.2815919
Logged Successfully: 
2018-05-17 19:08:30epoch=76; Loss Pred

4 1.231703 1.2305459
Logged Successfully: 
2018-05-17 19:08:56epoch=108; Loss Pred=1.2299; Val Loss=1.2305; Val Acc=0.6665; Loss Att={'forw': '1.0000'}; Train Acc=0.671; Test Acc=0.6641; Entropy={'forw': '8.4476'}; Entropy_Test=

0 1.2305459 1.229234
Logged Successfully: 
2018-05-17 19:08:57epoch=109; Loss Pred=1.2282; Val Loss=1.2292; Val Acc=0.6678; Loss Att={'forw': '1.0000'}; Train Acc=0.671; Test Acc=0.6669; Entropy={'forw': '8.4687'}; Entropy_Test=

0 1.229234 1.2315247
Logged Successfully: 
2018-05-17 19:08:58epoch=110; Loss Pred=1.2271; Val Loss=1.2315; Val Acc=0.6691; Loss Att={'forw': '1.0000'}; Train Acc=0.671; Test Acc=0.6636; Entropy={'forw': '8.4687'}; Entropy_Test=

1 1.229234 1.2242539
Logged Successfully: 
2018-05-17 19:08:59epoch=111; Loss Pred=1.2259; Val Loss=1.2243; Val Acc=0.6667; Loss Att={'forw': '1.0000'}; Train Acc=0.670; Test Acc=0.6659; Entropy={'forw': '8.4738'}; Entropy_Test=

0 1.2242539 1.2268114
Logged Successfully: 
2018-05-17 19:09:00epoch=112; Loss P

3 1.1994011 1.2017353
Logged Successfully: 
2018-05-17 19:09:25epoch=144; Loss Pred=1.1987; Val Loss=1.2017; Val Acc=0.6679; Loss Att={'forw': '1.0000'}; Train Acc=0.671; Test Acc=0.6664; Entropy={'forw': '8.8671'}; Entropy_Test=

4 1.1994011 1.1988779
Logged Successfully: 
2018-05-17 19:09:25epoch=145; Loss Pred=1.1988; Val Loss=1.1989; Val Acc=0.6708; Loss Att={'forw': '1.0000'}; Train Acc=0.672; Test Acc=0.6677; Entropy={'forw': '8.8801'}; Entropy_Test=

0 1.1988779 1.1998618
Logged Successfully: 
2018-05-17 19:09:26epoch=146; Loss Pred=1.1967; Val Loss=1.1999; Val Acc=0.6677; Loss Att={'forw': '1.0000'}; Train Acc=0.671; Test Acc=0.6671; Entropy={'forw': '8.8801'}; Entropy_Test=

1 1.1988779 1.198002
Logged Successfully: 
2018-05-17 19:09:27epoch=147; Loss Pred=1.1953; Val Loss=1.1980; Val Acc=0.6695; Loss Att={'forw': '1.0000'}; Train Acc=0.672; Test Acc=0.6680; Entropy={'forw': '8.8846'}; Entropy_Test=

0 1.198002 1.1955723
Logged Successfully: 
2018-05-17 19:09:28epoch=148; Loss

6 1.1794456 1.1849756
Logged Successfully: 
2018-05-17 19:09:52epoch=180; Loss Pred=1.1820; Val Loss=1.1850; Val Acc=0.6717; Loss Att={'forw': '1.0000'}; Train Acc=0.673; Test Acc=0.6691; Entropy={'forw': '9.0356'}; Entropy_Test=

7 1.1794456 1.1793892
Logged Successfully: 
2018-05-17 19:09:53epoch=181; Loss Pred=1.1811; Val Loss=1.1794; Val Acc=0.6727; Loss Att={'forw': '1.0000'}; Train Acc=0.673; Test Acc=0.6694; Entropy={'forw': '9.0398'}; Entropy_Test=

0 1.1793892 1.1803254
Logged Successfully: 
2018-05-17 19:09:54epoch=182; Loss Pred=1.1800; Val Loss=1.1803; Val Acc=0.6724; Loss Att={'forw': '1.0000'}; Train Acc=0.673; Test Acc=0.6677; Entropy={'forw': '9.0398'}; Entropy_Test=

1 1.1793892 1.1837202
Logged Successfully: 
2018-05-17 19:09:55epoch=183; Loss Pred=1.1801; Val Loss=1.1837; Val Acc=0.6710; Loss Att={'forw': '1.0000'}; Train Acc=0.673; Test Acc=0.6686; Entropy={'forw': '9.0542'}; Entropy_Test=

2 1.1793892 1.1805818
Logged Successfully: 
2018-05-17 19:09:56epoch=184; Lo

1 1.1674829 1.1677581
Logged Successfully: 
2018-05-17 19:10:20epoch=216; Loss Pred=1.1678; Val Loss=1.1678; Val Acc=0.6751; Loss Att={'forw': '1.0000'}; Train Acc=0.674; Test Acc=0.6690; Entropy={'forw': '9.1531'}; Entropy_Test=

2 1.1674829 1.1662028
Logged Successfully: 
2018-05-17 19:10:21epoch=217; Loss Pred=1.1648; Val Loss=1.1662; Val Acc=0.6740; Loss Att={'forw': '1.0000'}; Train Acc=0.675; Test Acc=0.6717; Entropy={'forw': '9.1628'}; Entropy_Test=

0 1.1662028 1.1686077
Logged Successfully: 
2018-05-17 19:10:22epoch=218; Loss Pred=1.1671; Val Loss=1.1686; Val Acc=0.6743; Loss Att={'forw': '1.0000'}; Train Acc=0.674; Test Acc=0.6697; Entropy={'forw': '9.1628'}; Entropy_Test=

1 1.1662028 1.1639657
Logged Successfully: 
2018-05-17 19:10:23epoch=219; Loss Pred=1.1660; Val Loss=1.1640; Val Acc=0.6711; Loss Att={'forw': '1.0000'}; Train Acc=0.674; Test Acc=0.6691; Entropy={'forw': '9.1512'}; Entropy_Test=

0 1.1639657 1.1677965
Logged Successfully: 
2018-05-17 19:10:23epoch=220; Lo

2 1.1572131 1.1589288
Logged Successfully: 
2018-05-17 19:10:47epoch=252; Loss Pred=1.1554; Val Loss=1.1589; Val Acc=0.6749; Loss Att={'forw': '1.0000'}; Train Acc=0.675; Test Acc=0.6700; Entropy={'forw': '9.2352'}; Entropy_Test=

3 1.1572131 1.1577281
Logged Successfully: 
2018-05-17 19:10:48epoch=253; Loss Pred=1.1578; Val Loss=1.1577; Val Acc=0.6740; Loss Att={'forw': '1.0000'}; Train Acc=0.676; Test Acc=0.6697; Entropy={'forw': '9.2528'}; Entropy_Test=

4 1.1572131 1.162537
Logged Successfully: 
2018-05-17 19:10:49epoch=254; Loss Pred=1.1562; Val Loss=1.1625; Val Acc=0.6723; Loss Att={'forw': '1.0000'}; Train Acc=0.676; Test Acc=0.6712; Entropy={'forw': '9.2528'}; Entropy_Test=

5 1.1572131 1.1568966
Logged Successfully: 
2018-05-17 19:10:50epoch=255; Loss Pred=1.1551; Val Loss=1.1569; Val Acc=0.6746; Loss Att={'forw': '1.0000'}; Train Acc=0.676; Test Acc=0.6711; Entropy={'forw': '9.2605'}; Entropy_Test=

0 1.1568966 1.1582212
Logged Successfully: 
2018-05-17 19:10:50epoch=256; Los

0 1.1507319 1.1535752
Logged Successfully: 
2018-05-17 19:11:14epoch=288; Loss Pred=1.1472; Val Loss=1.1536; Val Acc=0.6762; Loss Att={'forw': '1.0000'}; Train Acc=0.678; Test Acc=0.6730; Entropy={'forw': '9.2895'}; Entropy_Test=

1 1.1507319 1.1562792
Logged Successfully: 
2018-05-17 19:11:15epoch=289; Loss Pred=1.1485; Val Loss=1.1563; Val Acc=0.6765; Loss Att={'forw': '1.0000'}; Train Acc=0.677; Test Acc=0.6720; Entropy={'forw': '9.2962'}; Entropy_Test=

2 1.1507319 1.1561946
Logged Successfully: 
2018-05-17 19:11:16epoch=290; Loss Pred=1.1477; Val Loss=1.1562; Val Acc=0.6737; Loss Att={'forw': '1.0000'}; Train Acc=0.677; Test Acc=0.6709; Entropy={'forw': '9.2962'}; Entropy_Test=

3 1.1507319 1.1541377
Logged Successfully: 
2018-05-17 19:11:17epoch=291; Loss Pred=1.1474; Val Loss=1.1541; Val Acc=0.6765; Loss Att={'forw': '1.0000'}; Train Acc=0.678; Test Acc=0.6732; Entropy={'forw': '9.2875'}; Entropy_Test=

4 1.1507319 1.1526461
Logged Successfully: 
2018-05-17 19:11:17epoch=292; Lo

1 1.1469462 1.1499126
Logged Successfully: 
2018-05-17 19:11:41epoch=324; Loss Pred=1.1409; Val Loss=1.1499; Val Acc=0.6760; Loss Att={'forw': '1.0000'}; Train Acc=0.677; Test Acc=0.6725; Entropy={'forw': '9.3220'}; Entropy_Test=

2 1.1469462 1.1486777
Logged Successfully: 
2018-05-17 19:11:42epoch=325; Loss Pred=1.1396; Val Loss=1.1487; Val Acc=0.6752; Loss Att={'forw': '1.0000'}; Train Acc=0.678; Test Acc=0.6728; Entropy={'forw': '9.3126'}; Entropy_Test=

3 1.1469462 1.1527519
Logged Successfully: 
2018-05-17 19:11:43epoch=326; Loss Pred=1.1381; Val Loss=1.1528; Val Acc=0.6721; Loss Att={'forw': '1.0000'}; Train Acc=0.679; Test Acc=0.6728; Entropy={'forw': '9.3126'}; Entropy_Test=

4 1.1469462 1.1518779
Logged Successfully: 
2018-05-17 19:11:43epoch=327; Loss Pred=1.1382; Val Loss=1.1519; Val Acc=0.6758; Loss Att={'forw': '1.0000'}; Train Acc=0.678; Test Acc=0.6749; Entropy={'forw': '9.3317'}; Entropy_Test=

5 1.1469462 1.1477429
Logged Successfully: 
2018-05-17 19:11:44epoch=328; Lo

2 1.1424524 1.1491466
Logged Successfully: 
2018-05-17 19:12:08epoch=360; Loss Pred=1.1332; Val Loss=1.1491; Val Acc=0.6762; Loss Att={'forw': '1.0000'}; Train Acc=0.680; Test Acc=0.6736; Entropy={'forw': '9.3371'}; Entropy_Test=

3 1.1424524 1.1436782
Logged Successfully: 
2018-05-17 19:12:09epoch=361; Loss Pred=1.1331; Val Loss=1.1437; Val Acc=0.6735; Loss Att={'forw': '1.0000'}; Train Acc=0.680; Test Acc=0.6744; Entropy={'forw': '9.3441'}; Entropy_Test=

4 1.1424524 1.1452274
Logged Successfully: 
2018-05-17 19:12:09epoch=362; Loss Pred=1.1332; Val Loss=1.1452; Val Acc=0.6745; Loss Att={'forw': '1.0000'}; Train Acc=0.679; Test Acc=0.6718; Entropy={'forw': '9.3441'}; Entropy_Test=

5 1.1424524 1.1449375
Logged Successfully: 
2018-05-17 19:12:10epoch=363; Loss Pred=1.1339; Val Loss=1.1449; Val Acc=0.6749; Loss Att={'forw': '1.0000'}; Train Acc=0.679; Test Acc=0.6760; Entropy={'forw': '9.3489'}; Entropy_Test=

6 1.1424524 1.144459
Logged Successfully: 
2018-05-17 19:12:11epoch=364; Los

8 1.138956 1.1427295
Logged Successfully: 
2018-05-17 19:12:35epoch=396; Loss Pred=1.1283; Val Loss=1.1427; Val Acc=0.6755; Loss Att={'forw': '1.0000'}; Train Acc=0.681; Test Acc=0.6739; Entropy={'forw': '9.3512'}; Entropy_Test=

9 1.138956 1.1426214
Logged Successfully: 
2018-05-17 19:12:35epoch=397; Loss Pred=1.1299; Val Loss=1.1426; Val Acc=0.6743; Loss Att={'forw': '1.0000'}; Train Acc=0.680; Test Acc=0.6737; Entropy={'forw': '9.3447'}; Entropy_Test=

10 1.138956 1.1419479
Logged Successfully: 
2018-05-17 19:12:36epoch=398; Loss Pred=1.1270; Val Loss=1.1419; Val Acc=0.6734; Loss Att={'forw': '1.0000'}; Train Acc=0.681; Test Acc=0.6751; Entropy={'forw': '9.3447'}; Entropy_Test=

11 1.138956 1.1368746
Logged Successfully: 
2018-05-17 19:12:37epoch=399; Loss Pred=1.1278; Val Loss=1.1369; Val Acc=0.6774; Loss Att={'forw': '1.0000'}; Train Acc=0.680; Test Acc=0.6722; Entropy={'forw': '9.3409'}; Entropy_Test=

0 1.1368746 1.1425322
Logged Successfully: 
2018-05-17 19:12:38epoch=400; Loss

0 2.5064924 2.4756331
Logged Successfully: 
2018-05-17 19:13:20epoch=9; Loss Pred=2.4780; Val Loss=2.4756; Val Acc=0.1702; Loss Att={'forw': '0.8263'}; Train Acc=0.208; Test Acc=0.2074; Entropy={'forw': '6.8215'}; Entropy_Test=

0 2.4756331 2.4823046
Logged Successfully: 
2018-05-17 19:13:23epoch=10; Loss Pred=2.4772; Val Loss=2.4823; Val Acc=0.1762; Loss Att={'forw': '0.8152'}; Train Acc=0.209; Test Acc=0.2032; Entropy={'forw': '6.8180'}; Entropy_Test=

1 2.4756331 2.4306183
Logged Successfully: 
2018-05-17 19:13:25epoch=11; Loss Pred=2.4329; Val Loss=2.4306; Val Acc=0.2007; Loss Att={'forw': '0.8240'}; Train Acc=0.233; Test Acc=0.2300; Entropy={'forw': '6.9703'}; Entropy_Test=

0 2.4306183 2.4231114
Logged Successfully: 
2018-05-17 19:13:27epoch=12; Loss Pred=2.4271; Val Loss=2.4231; Val Acc=0.2078; Loss Att={'forw': '0.8210'}; Train Acc=0.244; Test Acc=0.2397; Entropy={'forw': '7.0602'}; Entropy_Test=

0 2.4231114 2.3513768
Logged Successfully: 
2018-05-17 19:13:29epoch=13; Loss Pre

1 1.7159512 1.6471499
Logged Successfully: 
2018-05-17 19:14:41epoch=45; Loss Pred=1.6608; Val Loss=1.6471; Val Acc=0.5836; Loss Att={'forw': '0.6991'}; Train Acc=0.580; Test Acc=0.5747; Entropy={'forw': '8.7859'}; Entropy_Test=

0 1.6471499 1.6464299
Logged Successfully: 
2018-05-17 19:14:43epoch=46; Loss Pred=1.6618; Val Loss=1.6464; Val Acc=0.5804; Loss Att={'forw': '0.6915'}; Train Acc=0.581; Test Acc=0.5731; Entropy={'forw': '8.7797'}; Entropy_Test=

0 1.6464299 1.5800053
Logged Successfully: 
2018-05-17 19:14:46epoch=47; Loss Pred=1.6025; Val Loss=1.5800; Val Acc=0.5994; Loss Att={'forw': '0.6833'}; Train Acc=0.589; Test Acc=0.5810; Entropy={'forw': '8.7165'}; Entropy_Test=

0 1.5800053 1.5726824
Logged Successfully: 
2018-05-17 19:14:48epoch=48; Loss Pred=1.6018; Val Loss=1.5727; Val Acc=0.5967; Loss Att={'forw': '0.6744'}; Train Acc=0.590; Test Acc=0.5811; Entropy={'forw': '8.7270'}; Entropy_Test=

0 1.5726824 1.5337846
Logged Successfully: 
2018-05-17 19:14:50epoch=49; Loss Pr

1 1.3384604 1.3370736
Logged Successfully: 
2018-05-17 19:16:01epoch=81; Loss Pred=1.3686; Val Loss=1.3371; Val Acc=0.6475; Loss Att={'forw': '0.3720'}; Train Acc=0.648; Test Acc=0.6446; Entropy={'forw': '8.2977'}; Entropy_Test=

0 1.3370736 1.344105
Logged Successfully: 
2018-05-17 19:16:03epoch=82; Loss Pred=1.3718; Val Loss=1.3441; Val Acc=0.6490; Loss Att={'forw': '0.3646'}; Train Acc=0.647; Test Acc=0.6444; Entropy={'forw': '8.2150'}; Entropy_Test=

1 1.3370736 1.3351202
Logged Successfully: 
2018-05-17 19:16:05epoch=83; Loss Pred=1.3628; Val Loss=1.3351; Val Acc=0.6454; Loss Att={'forw': '0.3581'}; Train Acc=0.649; Test Acc=0.6458; Entropy={'forw': '8.2423'}; Entropy_Test=

0 1.3351202 1.3342882
Logged Successfully: 
2018-05-17 19:16:07epoch=84; Loss Pred=1.3652; Val Loss=1.3343; Val Acc=0.6480; Loss Att={'forw': '0.3508'}; Train Acc=0.649; Test Acc=0.6454; Entropy={'forw': '8.2481'}; Entropy_Test=

0 1.3342882 1.3350003
Logged Successfully: 
2018-05-17 19:16:09epoch=85; Loss Pre

1 1.2621844 1.2603536
Logged Successfully: 
2018-05-17 19:17:21epoch=117; Loss Pred=1.2949; Val Loss=1.2604; Val Acc=0.6658; Loss Att={'forw': '0.2289'}; Train Acc=0.656; Test Acc=0.6531; Entropy={'forw': '7.9889'}; Entropy_Test=

0 1.2603536 1.2716793
Logged Successfully: 
2018-05-17 19:17:23epoch=118; Loss Pred=1.2934; Val Loss=1.2717; Val Acc=0.6632; Loss Att={'forw': '0.2273'}; Train Acc=0.657; Test Acc=0.6558; Entropy={'forw': '8.0327'}; Entropy_Test=

1 1.2603536 1.2597599
Logged Successfully: 
2018-05-17 19:17:25epoch=119; Loss Pred=1.2924; Val Loss=1.2598; Val Acc=0.6673; Loss Att={'forw': '0.2265'}; Train Acc=0.657; Test Acc=0.6552; Entropy={'forw': '8.0820'}; Entropy_Test=

0 1.2597599 1.2669641
Logged Successfully: 
2018-05-17 19:17:28epoch=120; Loss Pred=1.2922; Val Loss=1.2670; Val Acc=0.6712; Loss Att={'forw': '0.2245'}; Train Acc=0.657; Test Acc=0.6559; Entropy={'forw': '8.0433'}; Entropy_Test=

1 1.2597599 1.2570099
Logged Successfully: 
2018-05-17 19:17:30epoch=121; Lo

3 1.2259083 1.2394836
Logged Successfully: 
2018-05-17 19:18:41epoch=153; Loss Pred=1.2622; Val Loss=1.2395; Val Acc=0.6740; Loss Att={'forw': '0.2086'}; Train Acc=0.665; Test Acc=0.6643; Entropy={'forw': '7.9069'}; Entropy_Test=

4 1.2259083 1.2400951
Logged Successfully: 
2018-05-17 19:18:43epoch=154; Loss Pred=1.2620; Val Loss=1.2401; Val Acc=0.6804; Loss Att={'forw': '0.2086'}; Train Acc=0.665; Test Acc=0.6630; Entropy={'forw': '7.8869'}; Entropy_Test=

5 1.2259083 1.2327898
Logged Successfully: 
2018-05-17 19:18:45epoch=155; Loss Pred=1.2602; Val Loss=1.2328; Val Acc=0.6785; Loss Att={'forw': '0.2084'}; Train Acc=0.665; Test Acc=0.6636; Entropy={'forw': '7.8834'}; Entropy_Test=

6 1.2259083 1.2330201
Logged Successfully: 
2018-05-17 19:18:47epoch=156; Loss Pred=1.2605; Val Loss=1.2330; Val Acc=0.6755; Loss Att={'forw': '0.2077'}; Train Acc=0.665; Test Acc=0.6633; Entropy={'forw': '7.8823'}; Entropy_Test=

7 1.2259083 1.2236193
Logged Successfully: 
2018-05-17 19:18:50epoch=157; Lo

6 1.2032382 1.2226559
Logged Successfully: 
2018-05-17 19:20:00epoch=189; Loss Pred=1.2441; Val Loss=1.2227; Val Acc=0.6909; Loss Att={'forw': '0.2061'}; Train Acc=0.670; Test Acc=0.6660; Entropy={'forw': '8.1021'}; Entropy_Test=

7 1.2032382 1.204002
Logged Successfully: 
2018-05-17 19:20:03epoch=190; Loss Pred=1.2434; Val Loss=1.2040; Val Acc=0.6857; Loss Att={'forw': '0.2060'}; Train Acc=0.670; Test Acc=0.6679; Entropy={'forw': '8.1091'}; Entropy_Test=

8 1.2032382 1.2025892
Logged Successfully: 
2018-05-17 19:20:05epoch=191; Loss Pred=1.2423; Val Loss=1.2026; Val Acc=0.6833; Loss Att={'forw': '0.2059'}; Train Acc=0.670; Test Acc=0.6651; Entropy={'forw': '8.1441'}; Entropy_Test=

0 1.2025892 1.2015061
Logged Successfully: 
2018-05-17 19:20:07epoch=192; Loss Pred=1.2424; Val Loss=1.2015; Val Acc=0.6879; Loss Att={'forw': '0.2061'}; Train Acc=0.670; Test Acc=0.6682; Entropy={'forw': '8.1644'}; Entropy_Test=

0 1.2015061 1.2032636
Logged Successfully: 
2018-05-17 19:20:09epoch=193; Los

3 1.1910586 1.2059202
Logged Successfully: 
2018-05-17 19:21:19epoch=225; Loss Pred=1.2319; Val Loss=1.2059; Val Acc=0.6850; Loss Att={'forw': '0.2038'}; Train Acc=0.670; Test Acc=0.6691; Entropy={'forw': '8.3114'}; Entropy_Test=

4 1.1910586 1.1946644
Logged Successfully: 
2018-05-17 19:21:22epoch=226; Loss Pred=1.2326; Val Loss=1.1947; Val Acc=0.6837; Loss Att={'forw': '0.2039'}; Train Acc=0.670; Test Acc=0.6701; Entropy={'forw': '8.3187'}; Entropy_Test=

5 1.1910586 1.203246
Logged Successfully: 
2018-05-17 19:21:24epoch=227; Loss Pred=1.2300; Val Loss=1.2032; Val Acc=0.6833; Loss Att={'forw': '0.2032'}; Train Acc=0.670; Test Acc=0.6670; Entropy={'forw': '8.3148'}; Entropy_Test=

6 1.1910586 1.2026118
Logged Successfully: 
2018-05-17 19:21:26epoch=228; Loss Pred=1.2317; Val Loss=1.2026; Val Acc=0.6829; Loss Att={'forw': '0.2034'}; Train Acc=0.670; Test Acc=0.6687; Entropy={'forw': '8.3185'}; Entropy_Test=

7 1.1910586 1.1984127
Logged Successfully: 
2018-05-17 19:21:28epoch=229; Los

12 1.1850512 1.1846607
Logged Successfully: 
2018-05-17 19:22:38epoch=261; Loss Pred=1.2266; Val Loss=1.1847; Val Acc=0.6844; Loss Att={'forw': '0.1960'}; Train Acc=0.670; Test Acc=0.6701; Entropy={'forw': '8.3726'}; Entropy_Test=

0 1.1846607 1.1886638
Logged Successfully: 
2018-05-17 19:22:40epoch=262; Loss Pred=1.2281; Val Loss=1.1887; Val Acc=0.6888; Loss Att={'forw': '0.1953'}; Train Acc=0.671; Test Acc=0.6703; Entropy={'forw': '8.3401'}; Entropy_Test=

1 1.1846607 1.1919572
Logged Successfully: 
2018-05-17 19:22:42epoch=263; Loss Pred=1.2272; Val Loss=1.1920; Val Acc=0.6838; Loss Att={'forw': '0.1957'}; Train Acc=0.671; Test Acc=0.6689; Entropy={'forw': '8.3292'}; Entropy_Test=

2 1.1846607 1.198666
Logged Successfully: 
2018-05-17 19:22:44epoch=264; Loss Pred=1.2259; Val Loss=1.1987; Val Acc=0.6834; Loss Att={'forw': '0.1948'}; Train Acc=0.671; Test Acc=0.6695; Entropy={'forw': '8.3667'}; Entropy_Test=

3 1.1846607 1.187403
Logged Successfully: 
2018-05-17 19:22:47epoch=265; Los

0 2.4140093 2.4121416
Logged Successfully: 
2018-05-17 19:23:45epoch=12; Loss Pred=2.4269; Val Loss=2.4121; Val Acc=0.2729; Loss Att={'forw': '1.0000'}; Train Acc=0.287; Test Acc=0.2806; Entropy={'forw': '7.8468'}; Entropy_Test=

0 2.4121416 2.317695
Logged Successfully: 
2018-05-17 19:23:47epoch=13; Loss Pred=2.3352; Val Loss=2.3177; Val Acc=0.2566; Loss Att={'forw': '1.0000'}; Train Acc=0.276; Test Acc=0.2697; Entropy={'forw': '8.3363'}; Entropy_Test=

0 2.317695 2.3175666
Logged Successfully: 
2018-05-17 19:23:48epoch=14; Loss Pred=2.3352; Val Loss=2.3176; Val Acc=0.2599; Loss Att={'forw': '1.0000'}; Train Acc=0.276; Test Acc=0.2695; Entropy={'forw': '8.3363'}; Entropy_Test=

0 2.3175666 2.2711031
Logged Successfully: 
2018-05-17 19:23:49epoch=15; Loss Pred=2.2899; Val Loss=2.2711; Val Acc=0.2508; Loss Att={'forw': '1.0000'}; Train Acc=0.274; Test Acc=0.2681; Entropy={'forw': '8.6055'}; Entropy_Test=

0 2.2711031 2.2741215
Logged Successfully: 
2018-05-17 19:23:50epoch=16; Loss Pred

0 1.4474546 1.4536229
Logged Successfully: 
2018-05-17 19:24:30epoch=48; Loss Pred=1.4662; Val Loss=1.4536; Val Acc=0.6224; Loss Att={'forw': '1.0000'}; Train Acc=0.625; Test Acc=0.6209; Entropy={'forw': '8.8591'}; Entropy_Test=

1 1.4474546 1.419526
Logged Successfully: 
2018-05-17 19:24:31epoch=49; Loss Pred=1.4404; Val Loss=1.4195; Val Acc=0.6315; Loss Att={'forw': '1.0000'}; Train Acc=0.632; Test Acc=0.6277; Entropy={'forw': '8.9071'}; Entropy_Test=

0 1.419526 1.4117947
Logged Successfully: 
2018-05-17 19:24:33epoch=50; Loss Pred=1.4403; Val Loss=1.4118; Val Acc=0.6280; Loss Att={'forw': '1.0000'}; Train Acc=0.631; Test Acc=0.6279; Entropy={'forw': '8.9071'}; Entropy_Test=

0 1.4117947 1.3893272
Logged Successfully: 
2018-05-17 19:24:34epoch=51; Loss Pred=1.4197; Val Loss=1.3893; Val Acc=0.6377; Loss Att={'forw': '1.0000'}; Train Acc=0.637; Test Acc=0.6337; Entropy={'forw': '8.8179'}; Entropy_Test=

0 1.3893272 1.3965511
Logged Successfully: 
2018-05-17 19:24:35epoch=52; Loss Pred

3 1.256437 1.2559024
Logged Successfully: 
2018-05-17 19:25:15epoch=84; Loss Pred=1.2741; Val Loss=1.2559; Val Acc=0.6783; Loss Att={'forw': '1.0000'}; Train Acc=0.665; Test Acc=0.6632; Entropy={'forw': '8.3715'}; Entropy_Test=

0 1.2559024 1.2574518
Logged Successfully: 
2018-05-17 19:25:17epoch=85; Loss Pred=1.2694; Val Loss=1.2575; Val Acc=0.6767; Loss Att={'forw': '1.0000'}; Train Acc=0.665; Test Acc=0.6661; Entropy={'forw': '8.4042'}; Entropy_Test=

1 1.2559024 1.2515104
Logged Successfully: 
2018-05-17 19:25:18epoch=86; Loss Pred=1.2707; Val Loss=1.2515; Val Acc=0.6796; Loss Att={'forw': '1.0000'}; Train Acc=0.665; Test Acc=0.6631; Entropy={'forw': '8.4042'}; Entropy_Test=

0 1.2515104 1.253444
Logged Successfully: 
2018-05-17 19:25:19epoch=87; Loss Pred=1.2665; Val Loss=1.2534; Val Acc=0.6762; Loss Att={'forw': '1.0000'}; Train Acc=0.666; Test Acc=0.6629; Entropy={'forw': '8.4277'}; Entropy_Test=

1 1.2515104 1.2549784
Logged Successfully: 
2018-05-17 19:25:20epoch=88; Loss Pred

4 1.1965779 1.2080276
Logged Successfully: 
2018-05-17 19:26:00epoch=120; Loss Pred=1.2257; Val Loss=1.2080; Val Acc=0.6861; Loss Att={'forw': '1.0000'}; Train Acc=0.669; Test Acc=0.6678; Entropy={'forw': '8.8211'}; Entropy_Test=

5 1.1965779 1.2096974
Logged Successfully: 
2018-05-17 19:26:01epoch=121; Loss Pred=1.2238; Val Loss=1.2097; Val Acc=0.6859; Loss Att={'forw': '1.0000'}; Train Acc=0.669; Test Acc=0.6644; Entropy={'forw': '8.8654'}; Entropy_Test=

6 1.1965779 1.1913226
Logged Successfully: 
2018-05-17 19:26:02epoch=122; Loss Pred=1.2228; Val Loss=1.1913; Val Acc=0.6827; Loss Att={'forw': '1.0000'}; Train Acc=0.669; Test Acc=0.6675; Entropy={'forw': '8.8654'}; Entropy_Test=

0 1.1913226 1.196545
Logged Successfully: 
2018-05-17 19:26:04epoch=123; Loss Pred=1.2211; Val Loss=1.1965; Val Acc=0.6854; Loss Att={'forw': '1.0000'}; Train Acc=0.670; Test Acc=0.6656; Entropy={'forw': '8.9132'}; Entropy_Test=

1 1.1913226 1.2027333
Logged Successfully: 
2018-05-17 19:26:05epoch=124; Los

2 1.1677967 1.1733073
Logged Successfully: 
2018-05-17 19:26:42epoch=156; Loss Pred=1.1995; Val Loss=1.1733; Val Acc=0.6847; Loss Att={'forw': '1.0000'}; Train Acc=0.670; Test Acc=0.6679; Entropy={'forw': '9.2709'}; Entropy_Test=

3 1.1677967 1.1613163
Logged Successfully: 
2018-05-17 19:26:44epoch=157; Loss Pred=1.1991; Val Loss=1.1613; Val Acc=0.6858; Loss Att={'forw': '1.0000'}; Train Acc=0.670; Test Acc=0.6681; Entropy={'forw': '9.3022'}; Entropy_Test=

0 1.1613163 1.184357
Logged Successfully: 
2018-05-17 19:26:45epoch=158; Loss Pred=1.1987; Val Loss=1.1844; Val Acc=0.6840; Loss Att={'forw': '1.0000'}; Train Acc=0.671; Test Acc=0.6658; Entropy={'forw': '9.3022'}; Entropy_Test=

1 1.1613163 1.1782836
Logged Successfully: 
2018-05-17 19:26:46epoch=159; Loss Pred=1.1986; Val Loss=1.1783; Val Acc=0.6882; Loss Att={'forw': '1.0000'}; Train Acc=0.671; Test Acc=0.6691; Entropy={'forw': '9.3510'}; Entropy_Test=

2 1.1613163 1.1694119
Logged Successfully: 
2018-05-17 19:26:47epoch=160; Los