In [None]:
import os
os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID"   # see issue #152
os.environ["CUDA_VISIBLE_DEVICES"]="1"
#!/usr/local/bin/python

# This version of the code trains the attractor connections with a separate
# objective function than the objective function used to train all other weights
# in the network (on the prediction task).

from __future__ import print_function
import itertools
import tensorflow as tf
import numpy as np
import sys
import argparse
import datetime


% load_ext autoreload
% autoreload

from tensorflow_helpers import *
from data_generator import generate_examples, pick_task

from helper_functions import get_batches, load_pretrained_embeddings, \
    get_model_type_str, translate_ids_to_words, \
    save_results, print_into_log, print_some_translated_sentences, \
    get_training_progress_comment
from graph_init import GRU_attractor, TANH_attractor


class EarlyStopper():
    def __init__(self, patience_max, disp_epoch, min_delta = 0.00):
        self.best = 1e10
        self.patience = 0  # our patience
        self.patience_max = patience_max
        self.display_epoch = disp_epoch
        self.min_delta = min_delta

    def update(self, current):
        if self.best > current:
            self.best = current
            self.patience = 0
        elif abs(self.best - current) > self.min_delta:
            self.patience += 1

    def patience_ran_out(self):
        if self.patience*self.display_epoch > self.patience_max:
            return True
        else:
            False
            
def softmax(x):
    """Compute softmax values for each sets of scores in x."""
    e_x = np.exp(x - np.max(x))
    return e_x / e_x.sum(axis=0) # only difference


ops = {
    'model_type': "TANH",  # OPTIONS: vanilla, LSTM_raw, LSTM_tensorflow, LSTM_attractor
    'hid': 150,
    'in': None,  # TBD
    'out': 1,
    #         'batch_size':n_examples, #since the sequences are 1-dimensional it's easier to just run them all at once
    'n_attractor_iterations': 15,
    'attractor_dynamics': "projection2",  # OPTIONS:  "" (for no attractor dynamics),
    #           "direct" (simple attractor weights applied to hidden states directly, trained with noise addition)
    #           "projection" (project the hidden state into a separate space via weights, do attraction, project back)
    #           "helper_hidden" (hidden-hidden neurons) - IMPORTANT: don't forget to add h_hid number
    'h_hid': 300,  # helper hidden for "helper hidden" "attractory_dynamics" mode
    'attractor_noise_level': 0.5,
    'attractor_noise_type': "bernoilli",  # OPTIONS: "gaussian", "dropout", "random_drop"

    'train_attr_weights_on_pred': False,  

    'attractor_regularization': "l2_regularization",  # OPTIONS: "l2_regularization", "l2_norm"
    'attractor_regularization_lambda': 0.0,

    'record_mutual_information': True,
    'problem_type': "topic_classification",  # OPTIONS: parity, parity_length, majority, reber, kazakov, pos_brown, ner_german, sentiment_imdb, topic_classification
    'masking': True,#"seq", "final"
    'prediction_type': 'final_class', #'seq', 'final', 'final_class'
    'seq_len': None,

    'save_best_model': True,
    'reshuffle_data_each_replication': False,  # relevant for POS datasets (since they are loaded from files)
    'test_partition': 0.3,
    'lrate': 0.0001,  # was 0.008

    # NLP related (pos_brown task)
    'bidirectional': False,
    'embedding_size': 100,
    'load_word_embeddings': True,
    'train_word_embeddings': False,
    'trainable_logic_symbols': 2, #make first *N* embeddings trainable(Pad, unknown, start symbols make it a separate matrix and trainable)
    'input_type': "embed",  # embed&prior, embed, prior
    'dropout': 0.2  # in range(0,1)
}

# !!!!!!!!!!!!!!!!!!!!!!
# SEQ_LEN = 12 # number of bits in input sequence
N_HIDDEN = ops['hid']  # number of hidden units
N_H_HIDDEN = ops['h_hid']
TASK = ops['problem_type']
ARCH = ops['model_type']  # hidden layer type: 'GRU' or 'tanh'
NOISE_LEVEL = ops['attractor_noise_level']
# noise in training attractor net
# if >=0, Gaussian with std dev NOISE_LEVEL
# if < 0, Bernoulli dropout proportion -NOISE_LEVEL

# !!!!!!!!!!!!!!!!!!!!!!
INPUT_NOISE_LEVEL = 0.1
ATTRACTOR_TYPE = ops['attractor_dynamics']
N_ATTRACTOR_STEPS = ops['n_attractor_iterations']
# number of time steps in attractor dynamics
# if = 0, then no attractor net
# !!!!!!!!!!!!!!!!!!!!!!
# ATTR_WEIGHT_CONSTRAINTS = True
# True: make attractor weights symmetric and have zero diag
# False: unconstrained
TRAIN_ATTR_WEIGHTS_ON_PREDICTION = False
# True: train attractor weights on attractor net _and_ prediction
REPORT_BEST_TRAIN_PERFORMANCE = True
# True: save the train/test perf on the epoch for which train perf was best
LOSS_SWITCH_FREQ = 1
# how often (in epochs) to switch between attractor
# and prediction loss

ops, SEQ_LEN, N_INPUT, N_CLASSES, N_TRAIN, N_TEST = pick_task(ops['problem_type'],
                                                              ops)  # task (parity, majority, reber, kazakov)

# Training Parameters

TRAINING_EPOCHS = 5000
N_REPLICATIONS = 20
BATCH_SIZE = 500
DISPLAY_EPOCH = 1
EARLY_STOPPING_THRESH = 0. # 1e-3 for POS, 0.03 for Sentiment
EARLY_STOPPING_PATIENCE = 50  # in epochs
EARLY_STOPPING_MINIMUM_EPOCH = 0

# NOTEBOOK CODE

######### MAIN CODE #############################################################
#0.02, 0.05, 0.1, 0.2, 0.35, 0.5, 
for dataset_part in [0.25, 0.5, 0.75, 0.99]:
    for attractor_steps in [15,0]:
#     for att_reg in [0.0]:
        # the tf seed needs to be within the context of the graph.
        tf.reset_default_graph()
        np.random.seed(13)
        tf.set_random_seed(13)
        ops['n_attractor_iterations'] = attractor_steps
        N_ATTRACTOR_STEPS = ops['n_attractor_iterations']

        #
        # PLACEHOLDERS
        #
        if 'pos' in ops['problem_type']:
            # X will be looked up in the embedding table, so the last dimension is just a number
            X = tf.placeholder("int64", [None, SEQ_LEN], name='X')
            # last dimension is left singular, tensorflow will expect it to be an id number, not 1-hot embed
            Y = tf.placeholder("int64", [None, SEQ_LEN], name='Y')
        elif ops['problem_type'] == 'sentiment_imdb':
             # X will be looked up in the embedding table, so the last dimension is just a number
            X = tf.placeholder("int64", [None, SEQ_LEN], name='X')
            Y = tf.placeholder("int64", [None, N_CLASSES], name='Y')
        elif ops['problem_type'] == 'topic_classification':
             # X will be looked up in the embedding table, so the last dimension is just a number
            X = tf.placeholder("int64", [None, SEQ_LEN], name='X')
            Y = tf.placeholder("int64", [None, 1], name='Y')
        elif ops['problem_type'] == 'ner_german':
            X = tf.placeholder("float", [None, SEQ_LEN, N_INPUT])
            Y = tf.placeholder("int64", [None, SEQ_LEN])
        else:  # single output 
            X = tf.placeholder("float", [None, SEQ_LEN, N_INPUT])
            Y = tf.placeholder("int64", [None, N_CLASSES])
        attractor_tgt_net = tf.placeholder("float", [None, N_HIDDEN], name='attractor_tgt')

        # Embedding matrix initialization
        if 'pos' in ops['problem_type'] or 'sentiment' in ops['problem_type'] or ops['problem_type'] == "topic_classification":
            [_, _, _, _, _, _, maps] = generate_examples(SEQ_LEN, N_TRAIN, N_TEST,
                                                         INPUT_NOISE_LEVEL, TASK, ops)

            if ops['load_word_embeddings']:
                embeddings_loaded, _ = load_pretrained_embeddings('data/glove.6B.{}d.txt'.format(ops['embedding_size']),
                                                               maps, ops)
                if ops['trainable_logic_symbols'] > 0:
                    with tf.variable_scope("TASK_WEIGHTS"):
                        symbols_embedding = tf.get_variable("symb_embedding",
                                                initializer=tf.truncated_normal_initializer(stddev=0.05),
                                                shape=[ops['trainable_logic_symbols'], ops['embedding_size']],
                                                dtype=tf.float32,
                                                trainable=True)
                    
                word_embedding = tf.get_variable("embedding",
                                            initializer=embeddings_loaded,
                                            dtype=tf.float32,
                                            trainable=ops['train_word_embeddings'])
                if ops['trainable_logic_symbols'] > 0:
                    embedding = tf.concat([symbols_embedding, word_embedding], axis=0)
                else:
                    embedding = word_embedding
            else:  # initialize randomly
                embedding = tf.get_variable("embedding",
                                            initializer=tf.truncated_normal_initializer(stddev=0.05),
                                            shape=[ops['vocab_size'], ops['embedding_size']],
                                            dtype=tf.float32,
                                            trainable=ops['train_word_embeddings'])
            embed_lookup = tf.nn.embedding_lookup(embedding, X)

            # load priors information
            if ops['input_type'] == 'prior' or ops['input_type'] == 'embed&prior':
                id2prior = maps['id2prior']
                word2id = maps['word2id']
                priors = np.zeros([len(id2prior), len(id2prior[0])]).astype("float32")
                for id, prior in id2prior.items():
                    priors[id] = prior
                priors_op = tf.get_variable("priors",
                                            initializer=priors,
                                            dtype=tf.float32,
                                            trainable=False)
                prior_lookup = tf.nn.embedding_lookup(priors_op, X)

            if ops['input_type'] == 'embed':
                embed = embed_lookup
            elif ops['input_type'] == 'prior':
                embed = prior_lookup
            elif ops['input_type'] == 'embed&prior':
                embed = tf.concat([embed_lookup, prior_lookup], axis=2)

        # Graph + all the training variables
        if 'pos' in ops['problem_type']:
            net_inputs = {'X': embed, 'mask': Y, 'attractor_tgt_net': attractor_tgt_net}
        elif ops['problem_type'] == 'sentiment_imdb' or ops['problem_type'] == 'topic_classification':
            net_inputs = {'X': embed, 'mask': X, 'attractor_tgt_net': attractor_tgt_net}
        else:
            net_inputs = {'X': X, 'mask': Y, 'attractor_tgt_net': attractor_tgt_net}
        if ops['model_type'] == "TANH":
            cell = TANH_attractor
        elif ops['model_type'] == "GRU":
            cell = GRU_attractor
        if ops['bidirectional']:
            G_attractors = {'forw': [], 'back': []}
            names = G_attractors.keys()
            # Forward:
            G_forw = cell(ops, inputs=net_inputs, direction='forward', suffix=names[0])
            attr_loss_op_forw = G_forw.attr_loss_op
            attr_train_op_forw = G_forw.attr_train_op
            h_clean_seq_flat_forw = G_forw.h_clean_seq_flat  # for computing entropy of states
            h_net_seq_flat_forw = G_forw.h_net_seq_flat  # -> attractor_tgt_net placeholder
            G_attractors['forw'] = {'attr_loss_op': attr_loss_op_forw, "attr_train_op": attr_train_op_forw,
                                    'h_clean_seq_flat': h_clean_seq_flat_forw, 'h_net_seq_flat': h_net_seq_flat_forw}
            G_forw_output = G_forw.output

            # Backward:
            G_back = cell(ops, inputs=net_inputs, direction='backward', suffix=names[1])
            attr_loss_op_back = G_back.attr_loss_op
            attr_train_op_back = G_back.attr_train_op
            h_clean_seq_flat_back = G_back.h_clean_seq_flat  # for computing entropy of states
            h_net_seq_flat_back = G_back.h_net_seq_flat  # -> attractor_tgt_net placeholder
            G_attractors['back'] = {'attr_loss_op': attr_loss_op_back, "attr_train_op": attr_train_op_back,
                                    'h_clean_seq_flat': h_clean_seq_flat_back, 'h_net_seq_flat': h_net_seq_flat_back}
            G_back_output = G_back.output

            
            
            # Merge: [seq_len, batch_size, n_hid*2]
            # Note that we reverse the backward cell's output to align with original direction
            # note in "final" only prediction, one less dimension
            if 'final' in ops['prediction_type']:
                merge_index = 1
            else:
                merge_index = 2
            output = tf.concat([G_forw_output, tf.reverse(G_back_output, axis=[0])], axis=merge_index)
    
            if ops['dropout'] > 0.0:
                # note keep_prob = 1.0 - drop_probability (not sure why they implemented it this way)
                # tensorflow implementation scales by 1/keep_prob automatically
                output = tf.nn.dropout(output, keep_prob=1.0 - ops['dropout'])
            else:
                output = output

            input_size_final_projection = 2 * ops['hid']
            Y_ =  project_into_output(Y, output, input_size_final_projection, ops['out'], ops)
            
            # LOSS, ACC, & TRAIN OPS
            pred_loss_op = task_loss(Y, Y_, ops)
            optimizer_pred = tf.train.AdamOptimizer(learning_rate=0.008)
            prediction_parameters = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, "TASK_WEIGHTS")
            
            if ops['train_attr_weights_on_pred']:
                prediction_parameters += tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, "ATTRACTOR_WEIGHTS")
            
            pred_train_op = optimizer_pred.minimize(pred_loss_op, var_list=prediction_parameters)
            accuracy = task_accuracy(Y, Y_, ops)
        else:
            G_attractors = {'forw': []}
            names = G_attractors.keys()
            # Forward:
            G_forw = cell(ops, inputs=net_inputs, direction='forward', suffix=names[0])
            attr_loss_op_forw = G_forw.attr_loss_op
            attr_train_op_forw = G_forw.attr_train_op
            h_clean_seq_flat_forw = G_forw.h_clean_seq_flat  # for computing entropy of states
            h_net_seq_flat_forw = G_forw.h_net_seq_flat  # -> attractor_tgt_net placeholder
            G_attractors['forw'] = {'attr_loss_op': attr_loss_op_forw, "attr_train_op": attr_train_op_forw,
                                    'h_clean_seq_flat': h_clean_seq_flat_forw, 'h_net_seq_flat': h_net_seq_flat_forw}
            G_forw_output = G_forw.output

            input_size_final_projection = ops['hid']
            
            if ops['dropout'] > 0.0:
                output = tf.nn.dropout(G_forw_output, keep_prob=1.0 - ops['dropout'])
            else:
                output = G_forw_output
            
            Y_ = project_into_output(Y, output, input_size_final_projection, ops['out'], ops)

            # LOSS, ACC, & TRAIN OPS
            pred_loss_op = task_loss(Y, Y_, ops)
            optimizer_pred = tf.train.AdamOptimizer(learning_rate=0.008)
            prediction_parameters = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, "TASK_WEIGHTS")
            
            if ops['train_attr_weights_on_pred']:
                prediction_parameters += tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, "ATTRACTOR_WEIGHTS")
            
            pred_train_op = optimizer_pred.minimize(pred_loss_op, var_list=prediction_parameters)
            accuracy = task_accuracy(Y, Y_, ops)

            
        mask_op = tf.cast(tf.sign(Y), dtype=tf.float32)
        # Initialize the variables (i.e. assign their default value)
        init = tf.global_variables_initializer()
        gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.9)
        with tf.Session(config=tf.ConfigProto(gpu_options=gpu_options)) as sess:
            # TODO: make a class for all "best" quantities (a lot of space)
            saved_train_acc = []
            saved_test_acc = []
            saved_epoch = []
            saved_att_loss = []
            saved_entropy_final = []
            saved_val_acc = []
            saved_val_loss = []
            saved_traini_loss = []
            saver = tf.train.Saver()

            # Start training
            for replication in range(N_REPLICATIONS):
                print("********** replication ", replication, " **********")
                early_stopper = EarlyStopper(EARLY_STOPPING_PATIENCE, DISPLAY_EPOCH)
                [X_full_train, Y_full_train, X_test, Y_test, X_val, Y_val, maps] = generate_examples(SEQ_LEN, N_TRAIN, N_TEST,
                                                                                           INPUT_NOISE_LEVEL, TASK, ops)
                # Take Only part of dataset:
                all_ids = range(len(X_full_train))
                np.random.shuffle(all_ids)
                train_part = int(dataset_part * len(X_full_train))
                ids_to_take = all_ids[0:train_part]
                ids_for_val = all_ids[train_part:int(train_part + 0.2*train_part)]
                if len(ids_to_take) > X_full_train.shape[0]:
                    ids_to_take = range(X_full_train.shape[0])
                X_train = X_full_train[ids_to_take, :]
                Y_train = Y_full_train[ids_to_take, :]
                
                if BATCH_SIZE < len(X_train):
                    ops['attractor_regularization_lambda'] = ops['attractor_regularization_lambda']/(len(X_train)*1.0/BATCH_SIZE)
                    print(ops['attractor_regularization_lambda'])
                
                X_val, Y_val = X_full_train[ids_for_val,:], Y_full_train[ids_for_val,:]
                
                N_TRAIN = len(X_train)
                print(X_train.shape, Y_train.shape, X_val.shape, Y_val.shape)

                # Log Path init-n:
                COMMENT = 'dataset_starvation_experiment'
                MODEL_NAME_FILE = '{}_(att_iter{}__bidir{}__drop{})_{}.txt'.format(ops['problem_type'],
                                                                                   ops['n_attractor_iterations'],
                                                                                   ops['bidirectional'],
                                                                                   ops['dropout'],
                                                                                   COMMENT)
                LOG_DIRECTORY = 'experiments/logs/{}'.format(MODEL_NAME_FILE)
                MODEL_DIRECTORY = 'experiments/logs/{}_{}'.format(datetime.date.today(), MODEL_NAME_FILE)
                print_into_log(LOG_DIRECTORY, get_model_type_str(ops, N_TRAIN, N_TEST, SEQ_LEN))
                print_into_log(MODEL_DIRECTORY, get_model_type_str(ops, N_TRAIN, N_TEST, SEQ_LEN), supress=True)

                sess.run(init)  # Run the initializer

                train_prediction_loss = True
                best_train_acc = -1000.
                best_test_acc = 0
                best_entropy = 0.0
                best_att_loss = 0
                best_train_loss = 0
                best_val_loss = 0.0
                best_val_acc = 0.0
                best_epoch = 0
                for epoch in range(1, TRAINING_EPOCHS + 2):
                    if (epoch - 1) % DISPLAY_EPOCH == 0:
                        # TRAIN set:
                        ploss, train_acc = batch_tensor_collect(sess, [pred_loss_op, accuracy],
                                                                X, Y, X_train, Y_train, BATCH_SIZE)
                        # TEST set:
                        test_acc = batch_tensor_collect(sess, [accuracy], X, Y, X_test, Y_test, BATCH_SIZE)[0]
    
                        # Validation set & Early stopping:
                        ploss_val, val_acc = batch_tensor_collect(sess, [pred_loss_op, accuracy],
                                                                  X, Y, X_val, Y_val, BATCH_SIZE)
            
                        # Precistion/Recall:
                        if ops['problem_type'] == 'ner_german':
                            y_pred, y_true, mask_val = batch_tensor_collect(sess, [Y_, Y, mask_op],
                                                                X, Y, X_test, Y_test, BATCH_SIZE)
                            y_pred = np.argmax(y_pred, axis=2)
                            
                            Y_pred_flat = np.extract(mask_val.astype(bool), y_pred)
                            Y_test_flat = np.extract(mask_val.astype(bool), y_true)
                            print("PRECISION:",compute_f1(Y_pred_flat, Y_test_flat, maps['id2tag']))
                            
                        print(early_stopper.patience, early_stopper.best, ploss_val)
                        early_stopper.update(ploss_val)
                        if early_stopper.patience_ran_out():
                            print_into_log(LOG_DIRECTORY, "STOPPED EARLY AT {}".format(epoch))
                            break

                        # ATTRACTOR(s) LOSS
                        aloss = {}
                        entropy = 0
                        hid_vals_arr = batch_tensor_collect(sess, [A['h_net_seq_flat'] for att_name, A in
                                                                   G_attractors.items()],
                                                            X, Y, X_train, Y_train, BATCH_SIZE)
                        h_clean_val_arr = batch_tensor_collect(sess, [A['h_clean_seq_flat'] for att_name, A in
                                                                      G_attractors.items()],
                                                               X, Y, X_train, Y_train, BATCH_SIZE)
                        for i, attractor_name in enumerate(G_attractors.keys()):
                            A = G_attractors[attractor_name]
                            a_loss_val = []
                            n_splits = np.max([1, int(len(X_train) / BATCH_SIZE)])
                            for batch_hid_vals in np.array_split(hid_vals_arr[i], n_splits):
                                a_loss_val.append(
                                    sess.run(A['attr_loss_op'], feed_dict={attractor_tgt_net: batch_hid_vals}))
                            aloss[attractor_name] = "{:.4f}".format(np.mean(a_loss_val))

#                             entropy[attractor_name] = "{:.4f}".format(
#                                 compute_entropy_fullvec(h_clean_val_arr[i], ops, n_bins=8))

                        # Print training information:
                        print_into_log(LOG_DIRECTORY, datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S') + get_training_progress_comment(epoch, ploss, aloss, ploss_val, val_acc, train_acc,
                                                                     test_acc, entropy))
                        # Update the logs:
                       
                        #                 if ops['record_mutual_information']:
                        # #                     h_attractor_val, h_clean_val = sess.run([h_attractor_collection, h_clean_seq_flat],
                        # #                                                                    feed_dict={X: X_train, Y: Y_train})
                        #                     # TODO: h_attractor_collection reshapeing masking.
                        #                     h_attractor_val = None
                        #                     h_clean_val = batch_tensor_collect(sess, [h_clean_seq_flat],
                        #                                                                         X, Y, X_train, Y_train, BATCH_SIZE)[0]
                        #                     MIS.update(ploss, aloss, train_acc, test_acc, np.tanh(hid_vals), h_attractor_val, h_clean_val)

                        if (val_acc > best_val_acc):
                            best_train_acc = train_acc
                            best_test_acc = test_acc
                            best_att_loss = aloss
                            best_epoch = epoch
                            best_val_acc = val_acc

                            best_val_loss = ploss_val
                            best_train_loss = ploss
                            if ops['save_best_model']:
                                save_path = saver.save(sess, MODEL_DIRECTORY)
                            best_entropy = entropy
                        if (1.0 - 1e-15 < 0.0):
                            print('reached_peak')
                            break

                    if epoch > 1 and LOSS_SWITCH_FREQ > 0 \
                            and (epoch - 1) % LOSS_SWITCH_FREQ == 0:
                        train_prediction_loss = not train_prediction_loss

                    # MODEL TRAINING
                    batches = get_batches(BATCH_SIZE, X_train, Y_train)
                    for (batch_x, batch_y) in batches:
                        if (LOSS_SWITCH_FREQ == 0 or train_prediction_loss):
                            # Optimize all parameters except for attractor weights
                            _ = sess.run([pred_train_op],
                                         feed_dict={X: batch_x, Y: batch_y})
                        # Attractor:
                        if (N_ATTRACTOR_STEPS > 0):
                            batch_hid_vals = sess.run([A['h_net_seq_flat'] for att_name, A in G_attractors.items()],
                                                      feed_dict={X:batch_x,  Y:batch_y})

                            for i, attractor_name in enumerate(G_attractors.keys()):
                                A = G_attractors[attractor_name]
                                _ = sess.run(A['attr_train_op'], feed_dict={attractor_tgt_net: batch_hid_vals[i]})
                                
                            
                            # TODO: redo how you did it before in the other version
                            # Don't stop until the att_loss is below 1
#                             print(aloss.values()[0])
#                             while float(aloss.values()[0]) > 1.0:
#                                 for i, attractor_name in enumerate(G_attractors.keys()):
#                                     A = G_attractors[attractor_name]
#                                     a_loss_val = []
#                                     n_splits = np.max([1, int(len(X_train) / BATCH_SIZE)])
#                                     for batch_hid_vals in np.array_split(hid_vals_arr[i], n_splits):
#                                         a_loss_val.append(
#                                             sess.run(A['attr_loss_op'], feed_dict={attractor_tgt_net: batch_hid_vals}))
#                                     aloss[attractor_name] = "{:.4f}".format(np.mean(a_loss_val))
#                                 print(aloss.values()[0])
#                                 # training procedure
#                                 batch_hid_vals = sess.run([A['h_net_seq_flat'] for att_name, A in G_attractors.items()],
#                                                           feed_dict={X:batch_x,  Y:batch_y})

#                                 for i, attractor_name in enumerate(G_attractors.keys()):
#                                     A = G_attractors[attractor_name]
#                                     _ = sess.run(A['attr_train_op'], feed_dict={attractor_tgt_net: batch_hid_vals[i]})
                            
                print("Optimization Finished!")

                if (REPORT_BEST_TRAIN_PERFORMANCE):
                    saved_train_acc.append(best_train_acc)
                    saved_test_acc.append(best_test_acc)
                    saved_att_loss.append(best_att_loss)
                    saved_entropy_final.append(best_entropy)
                    saved_epoch.append(best_epoch)

                    saved_val_acc.append(best_val_acc)
                    saved_val_loss.append(best_val_loss)
                    saved_traini_loss.append(best_train_loss)
                else:
                    saved_train_acc.append(train_acc)
                    saved_test_acc.append(test_acc)
                    #             saved_att_loss.append(aloss)

            save_results(ops, saved_epoch, saved_train_acc, saved_test_acc, saved_att_loss, saved_entropy_final, saved_val_acc,
                 saved_val_loss, saved_traini_loss, N_TRAIN, N_TEST, SEQ_LEN, comment=COMMENT)

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload
topic_classification
((11228, 300), (11228, 1))
Loading embeddings...
(400000, ' words loaded!')
10579
3 words not found in pretrained embeddings: ['<PAD>', '<START>', "may's"]
L2 reg-n
********** replication  0  **********
topic_classification
((11228, 300), (11228, 1))
0.0
(1965, 300) (1965, 1) (393, 300) (393, 1)
Logged Successfully: 

    model_type: 		TANH bidir(False), task: topic_classification
    hid: 			150,
    h_hid: 			300
    n_attractor_iterations: 	15,
    attractor_dynamics: 	projection2
    attractor_noise_level: 	0.5
    attractor_noise_type: 	bernoilli
    attractor_regu-n: 		l2_regularization(lambda:0.0)
    word_embedding: size	(100), train(False)
    dropout: 			0.2
    TRAIN/TEST_SIZE: 	1965/2245, SEQ_LEN: 300
Logged Successfully: 
0 10000000000.0 3.8673987
Logged Successfully: 
2018-07-16 13:52:29epoch=0; Loss Pred=3.8723; Val Loss=3.8674; Val Acc=0.0051; Loss Att={'forw': '

1 2.220088 2.2385256
Logged Successfully: 
2018-07-16 13:55:17epoch=35; Loss Pred=2.2170; Val Loss=2.2385; Val Acc=0.3740; Loss Att={'forw': '0.8226'}; Train Acc=0.403; Test Acc=0.3760; Entropy=0; Entropy_Test=

2 2.220088 2.2542531
Logged Successfully: 
2018-07-16 13:55:21epoch=36; Loss Pred=2.2060; Val Loss=2.2543; Val Acc=0.3715; Loss Att={'forw': '0.8001'}; Train Acc=0.407; Test Acc=0.3820; Entropy=0; Entropy_Test=

3 2.220088 2.2075071
Logged Successfully: 
2018-07-16 13:55:26epoch=37; Loss Pred=2.1908; Val Loss=2.2075; Val Acc=0.4071; Loss Att={'forw': '0.8360'}; Train Acc=0.405; Test Acc=0.3941; Entropy=0; Entropy_Test=

0 2.2075071 2.22924
Logged Successfully: 
2018-07-16 13:55:30epoch=38; Loss Pred=2.1791; Val Loss=2.2292; Val Acc=0.4198; Loss Att={'forw': '0.8397'}; Train Acc=0.403; Test Acc=0.3897; Entropy=0; Entropy_Test=

1 2.2075071 2.1857524
Logged Successfully: 
2018-07-16 13:55:36epoch=39; Loss Pred=2.1545; Val Loss=2.1858; Val Acc=0.4224; Loss Att={'forw': '0.8149'}; 

2 2.155002 2.271946
Logged Successfully: 
2018-07-16 13:58:22epoch=74; Loss Pred=2.1644; Val Loss=2.2719; Val Acc=0.3359; Loss Att={'forw': '0.8222'}; Train Acc=0.405; Test Acc=0.3816; Entropy=0; Entropy_Test=

3 2.155002 2.2852104
Logged Successfully: 
2018-07-16 13:58:28epoch=75; Loss Pred=2.1698; Val Loss=2.2852; Val Acc=0.4097; Loss Att={'forw': '0.8803'}; Train Acc=0.398; Test Acc=0.3862; Entropy=0; Entropy_Test=

4 2.155002 2.3181372
Logged Successfully: 
2018-07-16 13:58:32epoch=76; Loss Pred=2.1663; Val Loss=2.3181; Val Acc=0.4071; Loss Att={'forw': '0.8520'}; Train Acc=0.404; Test Acc=0.3835; Entropy=0; Entropy_Test=

5 2.155002 2.2109804
Logged Successfully: 
2018-07-16 13:58:37epoch=77; Loss Pred=2.1023; Val Loss=2.2110; Val Acc=0.3664; Loss Att={'forw': '0.8438'}; Train Acc=0.399; Test Acc=0.3737; Entropy=0; Entropy_Test=

6 2.155002 2.2358713
Logged Successfully: 
2018-07-16 13:58:42epoch=78; Loss Pred=2.1106; Val Loss=2.2359; Val Acc=0.3817; Loss Att={'forw': '0.8297'}; T

1 2.0633092 2.3811915
Logged Successfully: 
2018-07-16 14:01:30epoch=113; Loss Pred=2.2490; Val Loss=2.3812; Val Acc=0.3613; Loss Att={'forw': '0.8468'}; Train Acc=0.371; Test Acc=0.3287; Entropy=0; Entropy_Test=

2 2.0633092 2.3680885
Logged Successfully: 
2018-07-16 14:01:34epoch=114; Loss Pred=2.2623; Val Loss=2.3681; Val Acc=0.3537; Loss Att={'forw': '0.8427'}; Train Acc=0.370; Test Acc=0.3263; Entropy=0; Entropy_Test=

3 2.0633092 2.2592223
Logged Successfully: 
2018-07-16 14:01:39epoch=115; Loss Pred=2.1351; Val Loss=2.2592; Val Acc=0.4071; Loss Att={'forw': '0.8443'}; Train Acc=0.463; Test Acc=0.3918; Entropy=0; Entropy_Test=

4 2.0633092 2.2638187
Logged Successfully: 
2018-07-16 14:01:43epoch=116; Loss Pred=2.1409; Val Loss=2.2638; Val Acc=0.4148; Loss Att={'forw': '0.8458'}; Train Acc=0.454; Test Acc=0.3901; Entropy=0; Entropy_Test=

5 2.0633092 2.2498994
Logged Successfully: 
2018-07-16 14:01:48epoch=117; Loss Pred=2.0992; Val Loss=2.2499; Val Acc=0.4148; Loss Att={'forw': '

40 2.0633092 2.2579348
Logged Successfully: 
2018-07-16 14:04:35epoch=152; Loss Pred=1.9076; Val Loss=2.2579; Val Acc=0.4198; Loss Att={'forw': '0.8261'}; Train Acc=0.493; Test Acc=0.3983; Entropy=0; Entropy_Test=

41 2.0633092 2.298102
Logged Successfully: 
2018-07-16 14:04:41epoch=153; Loss Pred=1.8916; Val Loss=2.2981; Val Acc=0.4148; Loss Att={'forw': '0.8300'}; Train Acc=0.498; Test Acc=0.4000; Entropy=0; Entropy_Test=

42 2.0633092 2.3076806
Logged Successfully: 
2018-07-16 14:04:45epoch=154; Loss Pred=1.9091; Val Loss=2.3077; Val Acc=0.4122; Loss Att={'forw': '0.8290'}; Train Acc=0.496; Test Acc=0.4010; Entropy=0; Entropy_Test=

43 2.0633092 2.32975
Logged Successfully: 
2018-07-16 14:04:50epoch=155; Loss Pred=1.9093; Val Loss=2.3298; Val Acc=0.4275; Loss Att={'forw': '0.8552'}; Train Acc=0.510; Test Acc=0.3975; Entropy=0; Entropy_Test=

44 2.0633092 2.3491457
Logged Successfully: 
2018-07-16 14:04:54epoch=156; Loss Pred=1.9589; Val Loss=2.3491; Val Acc=0.4198; Loss Att={'forw':

4 2.317085 2.3473012
Logged Successfully: 
2018-07-16 14:07:31epoch=26; Loss Pred=2.3003; Val Loss=2.3473; Val Acc=0.3664; Loss Att={'forw': '0.8507'}; Train Acc=0.369; Test Acc=0.3861; Entropy=0; Entropy_Test=

5 2.317085 2.3645434
Logged Successfully: 
2018-07-16 14:07:36epoch=27; Loss Pred=2.3178; Val Loss=2.3645; Val Acc=0.3817; Loss Att={'forw': '0.8670'}; Train Acc=0.353; Test Acc=0.3696; Entropy=0; Entropy_Test=

6 2.317085 2.3607314
Logged Successfully: 
2018-07-16 14:07:40epoch=28; Loss Pred=2.3226; Val Loss=2.3607; Val Acc=0.3842; Loss Att={'forw': '0.8513'}; Train Acc=0.348; Test Acc=0.3702; Entropy=0; Entropy_Test=

7 2.317085 2.3816824
Logged Successfully: 
2018-07-16 14:07:46epoch=29; Loss Pred=2.3159; Val Loss=2.3817; Val Acc=0.3639; Loss Att={'forw': '0.8137'}; Train Acc=0.374; Test Acc=0.3789; Entropy=0; Entropy_Test=

8 2.317085 2.3863828
Logged Successfully: 
2018-07-16 14:07:50epoch=30; Loss Pred=2.3128; Val Loss=2.3864; Val Acc=0.3715; Loss Att={'forw': '0.8192'}; 

13 2.1999896 2.2562218
Logged Successfully: 
2018-07-16 14:10:39epoch=65; Loss Pred=2.0902; Val Loss=2.2562; Val Acc=0.4020; Loss Att={'forw': '0.8356'}; Train Acc=0.443; Test Acc=0.4139; Entropy=0; Entropy_Test=

14 2.1999896 2.243194
Logged Successfully: 
2018-07-16 14:10:44epoch=66; Loss Pred=2.0903; Val Loss=2.2432; Val Acc=0.4020; Loss Att={'forw': '0.8272'}; Train Acc=0.444; Test Acc=0.4189; Entropy=0; Entropy_Test=

15 2.1999896 2.2222507
Logged Successfully: 
2018-07-16 14:10:49epoch=67; Loss Pred=2.0755; Val Loss=2.2223; Val Acc=0.4224; Loss Att={'forw': '0.8270'}; Train Acc=0.444; Test Acc=0.4201; Entropy=0; Entropy_Test=

16 2.1999896 2.2303064
Logged Successfully: 
2018-07-16 14:10:53epoch=68; Loss Pred=2.0853; Val Loss=2.2303; Val Acc=0.4122; Loss Att={'forw': '0.8170'}; Train Acc=0.442; Test Acc=0.4225; Entropy=0; Entropy_Test=

17 2.1999896 2.1768663
Logged Successfully: 
2018-07-16 14:10:58epoch=69; Loss Pred=2.0537; Val Loss=2.1769; Val Acc=0.4020; Loss Att={'forw': '0

9 2.057245 2.2884688
Logged Successfully: 
2018-07-16 14:13:47epoch=104; Loss Pred=2.1604; Val Loss=2.2885; Val Acc=0.3995; Loss Att={'forw': '0.8314'}; Train Acc=0.427; Test Acc=0.4013; Entropy=0; Entropy_Test=

10 2.057245 2.2659914
Logged Successfully: 
2018-07-16 14:13:52epoch=105; Loss Pred=2.1200; Val Loss=2.2660; Val Acc=0.4046; Loss Att={'forw': '0.8367'}; Train Acc=0.437; Test Acc=0.4102; Entropy=0; Entropy_Test=

11 2.057245 2.2853131
Logged Successfully: 
2018-07-16 14:13:57epoch=106; Loss Pred=2.1246; Val Loss=2.2853; Val Acc=0.4097; Loss Att={'forw': '0.8266'}; Train Acc=0.443; Test Acc=0.4071; Entropy=0; Entropy_Test=

12 2.057245 2.2747056
Logged Successfully: 
2018-07-16 14:14:02epoch=107; Loss Pred=2.0801; Val Loss=2.2747; Val Acc=0.4097; Loss Att={'forw': '0.8336'}; Train Acc=0.444; Test Acc=0.3967; Entropy=0; Entropy_Test=

13 2.057245 2.2761168
Logged Successfully: 
2018-07-16 14:14:06epoch=108; Loss Pred=2.0974; Val Loss=2.2761; Val Acc=0.3995; Loss Att={'forw': '0

48 2.057245 2.1930761
Logged Successfully: 
2018-07-16 14:16:55epoch=143; Loss Pred=1.7236; Val Loss=2.1931; Val Acc=0.4198; Loss Att={'forw': '0.8005'}; Train Acc=0.530; Test Acc=0.4078; Entropy=0; Entropy_Test=

49 2.057245 2.178505
Logged Successfully: 
2018-07-16 14:16:59epoch=144; Loss Pred=1.7337; Val Loss=2.1785; Val Acc=0.4224; Loss Att={'forw': '0.8076'}; Train Acc=0.539; Test Acc=0.4091; Entropy=0; Entropy_Test=

50 2.057245 2.200171
Logged Successfully: 
STOPPED EARLY AT 146
Optimization Finished!
********** replication  2  **********
topic_classification
((11228, 300), (11228, 1))
0.0
(1965, 300) (1965, 1) (393, 300) (393, 1)
Logged Successfully: 

    model_type: 		TANH bidir(False), task: topic_classification
    hid: 			150,
    h_hid: 			300
    n_attractor_iterations: 	15,
    attractor_dynamics: 	projection2
    attractor_noise_level: 	0.5
    attractor_noise_type: 	bernoilli
    attractor_regu-n: 		l2_regularization(lambda:0.0)
    word_embedding: size	(100), train(F

4 2.3986452 2.4294178
Logged Successfully: 
2018-07-16 14:19:51epoch=34; Loss Pred=2.2435; Val Loss=2.4294; Val Acc=0.3461; Loss Att={'forw': '0.8590'}; Train Acc=0.387; Test Acc=0.3745; Entropy=0; Entropy_Test=

5 2.3986452 2.4147847
Logged Successfully: 
2018-07-16 14:19:56epoch=35; Loss Pred=2.2145; Val Loss=2.4148; Val Acc=0.3893; Loss Att={'forw': '0.8533'}; Train Acc=0.386; Test Acc=0.3683; Entropy=0; Entropy_Test=

6 2.3986452 2.429225
Logged Successfully: 
2018-07-16 14:20:00epoch=36; Loss Pred=2.2049; Val Loss=2.4292; Val Acc=0.3740; Loss Att={'forw': '0.8524'}; Train Acc=0.385; Test Acc=0.3777; Entropy=0; Entropy_Test=

7 2.3986452 2.357992
Logged Successfully: 
2018-07-16 14:20:06epoch=37; Loss Pred=2.1876; Val Loss=2.3580; Val Acc=0.3817; Loss Att={'forw': '0.8359'}; Train Acc=0.400; Test Acc=0.3894; Entropy=0; Entropy_Test=

0 2.357992 2.3954487
Logged Successfully: 
2018-07-16 14:20:10epoch=38; Loss Pred=2.1913; Val Loss=2.3954; Val Acc=0.3791; Loss Att={'forw': '0.8485'}

7 2.2744553 2.3799462
Logged Successfully: 
2018-07-16 14:22:59epoch=73; Loss Pred=2.1256; Val Loss=2.3799; Val Acc=0.3359; Loss Att={'forw': '0.8419'}; Train Acc=0.403; Test Acc=0.3657; Entropy=0; Entropy_Test=

8 2.2744553 2.356538
Logged Successfully: 
2018-07-16 14:23:03epoch=74; Loss Pred=2.1222; Val Loss=2.3565; Val Acc=0.3461; Loss Att={'forw': '0.8441'}; Train Acc=0.400; Test Acc=0.3679; Entropy=0; Entropy_Test=

9 2.2744553 2.3653414
Logged Successfully: 
2018-07-16 14:23:08epoch=75; Loss Pred=2.0990; Val Loss=2.3653; Val Acc=0.3664; Loss Att={'forw': '0.8348'}; Train Acc=0.403; Test Acc=0.3826; Entropy=0; Entropy_Test=

10 2.2744553 2.3756049
Logged Successfully: 
2018-07-16 14:23:12epoch=76; Loss Pred=2.1232; Val Loss=2.3756; Val Acc=0.3791; Loss Att={'forw': '0.8393'}; Train Acc=0.398; Test Acc=0.3787; Entropy=0; Entropy_Test=

11 2.2744553 2.2810445
Logged Successfully: 
2018-07-16 14:23:18epoch=77; Loss Pred=2.0409; Val Loss=2.2810; Val Acc=0.4224; Loss Att={'forw': '0.84

7 2.2590036 2.4685118
Logged Successfully: 
2018-07-16 14:26:05epoch=112; Loss Pred=2.0057; Val Loss=2.4685; Val Acc=0.3944; Loss Att={'forw': '0.8014'}; Train Acc=0.445; Test Acc=0.3936; Entropy=0; Entropy_Test=

8 2.2590036 2.4265175
Logged Successfully: 
2018-07-16 14:26:10epoch=113; Loss Pred=1.9626; Val Loss=2.4265; Val Acc=0.4046; Loss Att={'forw': '0.8233'}; Train Acc=0.492; Test Acc=0.4061; Entropy=0; Entropy_Test=

9 2.2590036 2.4707708
Logged Successfully: 
2018-07-16 14:26:15epoch=114; Loss Pred=1.9889; Val Loss=2.4708; Val Acc=0.4097; Loss Att={'forw': '0.8130'}; Train Acc=0.488; Test Acc=0.4081; Entropy=0; Entropy_Test=

10 2.2590036 2.2611954
Logged Successfully: 
2018-07-16 14:26:20epoch=115; Loss Pred=1.8565; Val Loss=2.2612; Val Acc=0.4300; Loss Att={'forw': '0.8248'}; Train Acc=0.506; Test Acc=0.4117; Entropy=0; Entropy_Test=

11 2.2590036 2.3027043
Logged Successfully: 
2018-07-16 14:26:24epoch=116; Loss Pred=1.8641; Val Loss=2.3027; Val Acc=0.4122; Loss Att={'forw':

46 2.2590036 2.3669178
Logged Successfully: 
2018-07-16 14:29:12epoch=151; Loss Pred=1.7097; Val Loss=2.3669; Val Acc=0.4020; Loss Att={'forw': '0.7985'}; Train Acc=0.540; Test Acc=0.4206; Entropy=0; Entropy_Test=

47 2.2590036 2.356343
Logged Successfully: 
2018-07-16 14:29:16epoch=152; Loss Pred=1.7221; Val Loss=2.3563; Val Acc=0.4097; Loss Att={'forw': '0.7962'}; Train Acc=0.539; Test Acc=0.4217; Entropy=0; Entropy_Test=

48 2.2590036 2.3797321
Logged Successfully: 
2018-07-16 14:29:22epoch=153; Loss Pred=1.7040; Val Loss=2.3797; Val Acc=0.4097; Loss Att={'forw': '0.7981'}; Train Acc=0.537; Test Acc=0.4206; Entropy=0; Entropy_Test=

49 2.2590036 2.3664753
Logged Successfully: 
2018-07-16 14:29:26epoch=154; Loss Pred=1.7142; Val Loss=2.3665; Val Acc=0.4173; Loss Att={'forw': '0.7926'}; Train Acc=0.536; Test Acc=0.4230; Entropy=0; Entropy_Test=

50 2.2590036 2.4450214
Logged Successfully: 
STOPPED EARLY AT 156
Optimization Finished!
********** replication  3  **********
topic_classifi

0 2.271766 2.2813666
Logged Successfully: 
2018-07-16 14:32:07epoch=32; Loss Pred=2.2669; Val Loss=2.2814; Val Acc=0.3639; Loss Att={'forw': '0.8575'}; Train Acc=0.384; Test Acc=0.3897; Entropy=0; Entropy_Test=

1 2.271766 2.253207
Logged Successfully: 
2018-07-16 14:32:13epoch=33; Loss Pred=2.2407; Val Loss=2.2532; Val Acc=0.3817; Loss Att={'forw': '0.8987'}; Train Acc=0.400; Test Acc=0.3899; Entropy=0; Entropy_Test=

0 2.253207 2.2633793
Logged Successfully: 
2018-07-16 14:32:17epoch=34; Loss Pred=2.2453; Val Loss=2.2634; Val Acc=0.3690; Loss Att={'forw': '0.8415'}; Train Acc=0.403; Test Acc=0.3927; Entropy=0; Entropy_Test=

1 2.253207 2.251007
Logged Successfully: 
2018-07-16 14:32:23epoch=35; Loss Pred=2.2350; Val Loss=2.2510; Val Acc=0.3537; Loss Att={'forw': '0.8538'}; Train Acc=0.407; Test Acc=0.3901; Entropy=0; Entropy_Test=

0 2.251007 2.2266142
Logged Successfully: 
2018-07-16 14:32:26epoch=36; Loss Pred=2.2336; Val Loss=2.2266; Val Acc=0.3562; Loss Att={'forw': '0.8823'}; Tr

16 2.1529522 2.1603975
Logged Successfully: 
2018-07-16 14:35:13epoch=71; Loss Pred=2.1177; Val Loss=2.1604; Val Acc=0.3715; Loss Att={'forw': '0.8268'}; Train Acc=0.401; Test Acc=0.3917; Entropy=0; Entropy_Test=

17 2.1529522 2.1438644
Logged Successfully: 
2018-07-16 14:35:18epoch=72; Loss Pred=2.1164; Val Loss=2.1439; Val Acc=0.4071; Loss Att={'forw': '0.8117'}; Train Acc=0.396; Test Acc=0.3854; Entropy=0; Entropy_Test=

0 2.1438644 2.0634491
Logged Successfully: 
2018-07-16 14:35:23epoch=73; Loss Pred=2.0631; Val Loss=2.0634; Val Acc=0.3969; Loss Att={'forw': '0.8053'}; Train Acc=0.402; Test Acc=0.3958; Entropy=0; Entropy_Test=

0 2.0634491 2.088372
Logged Successfully: 
2018-07-16 14:35:27epoch=74; Loss Pred=2.0736; Val Loss=2.0884; Val Acc=0.3842; Loss Att={'forw': '0.7994'}; Train Acc=0.402; Test Acc=0.3959; Entropy=0; Entropy_Test=

1 2.0634491 2.0408716
Logged Successfully: 
2018-07-16 14:35:32epoch=75; Loss Pred=2.0256; Val Loss=2.0409; Val Acc=0.3511; Loss Att={'forw': '0.80

4 2.0147216 2.7804546
Logged Successfully: 
2018-07-16 14:38:19epoch=110; Loss Pred=2.6170; Val Loss=2.7805; Val Acc=0.2417; Loss Att={'forw': '0.8146'}; Train Acc=0.303; Test Acc=0.2674; Entropy=0; Entropy_Test=

5 2.0147216 2.509386
Logged Successfully: 
2018-07-16 14:38:24epoch=111; Loss Pred=2.4901; Val Loss=2.5094; Val Acc=0.3613; Loss Att={'forw': '0.8600'}; Train Acc=0.374; Test Acc=0.3462; Entropy=0; Entropy_Test=

6 2.0147216 2.5121007
Logged Successfully: 
2018-07-16 14:38:28epoch=112; Loss Pred=2.4926; Val Loss=2.5121; Val Acc=0.3588; Loss Att={'forw': '0.8664'}; Train Acc=0.366; Test Acc=0.3514; Entropy=0; Entropy_Test=

7 2.0147216 2.3782008
Logged Successfully: 
2018-07-16 14:38:34epoch=113; Loss Pred=2.3152; Val Loss=2.3782; Val Acc=0.3562; Loss Att={'forw': '0.8895'}; Train Acc=0.349; Test Acc=0.3577; Entropy=0; Entropy_Test=

8 2.0147216 2.4288065
Logged Successfully: 
2018-07-16 14:38:38epoch=114; Loss Pred=2.3149; Val Loss=2.4288; Val Acc=0.3206; Loss Att={'forw': '0

43 2.0147216 2.2411437
Logged Successfully: 
2018-07-16 14:41:24epoch=149; Loss Pred=1.9978; Val Loss=2.2411; Val Acc=0.3766; Loss Att={'forw': '0.8225'}; Train Acc=0.453; Test Acc=0.3613; Entropy=0; Entropy_Test=

44 2.0147216 2.2268734
Logged Successfully: 
2018-07-16 14:41:28epoch=150; Loss Pred=1.9858; Val Loss=2.2269; Val Acc=0.3715; Loss Att={'forw': '0.8308'}; Train Acc=0.466; Test Acc=0.3701; Entropy=0; Entropy_Test=

45 2.0147216 2.1660917
Logged Successfully: 
2018-07-16 14:41:33epoch=151; Loss Pred=1.8707; Val Loss=2.1661; Val Acc=0.3944; Loss Att={'forw': '0.8202'}; Train Acc=0.503; Test Acc=0.3908; Entropy=0; Entropy_Test=

46 2.0147216 2.177804
Logged Successfully: 
2018-07-16 14:41:37epoch=152; Loss Pred=1.8785; Val Loss=2.1778; Val Acc=0.4046; Loss Att={'forw': '0.8305'}; Train Acc=0.496; Test Acc=0.3950; Entropy=0; Entropy_Test=

47 2.0147216 2.1382914
Logged Successfully: 
2018-07-16 14:41:42epoch=153; Loss Pred=1.7995; Val Loss=2.1383; Val Acc=0.4326; Loss Att={'forw

0 2.4223216 2.4208555
Logged Successfully: 
2018-07-16 14:44:20epoch=29; Loss Pred=2.2414; Val Loss=2.4209; Val Acc=0.3817; Loss Att={'forw': '0.7949'}; Train Acc=0.388; Test Acc=0.3895; Entropy=0; Entropy_Test=

0 2.4208555 2.4183574
Logged Successfully: 
2018-07-16 14:44:24epoch=30; Loss Pred=2.2438; Val Loss=2.4184; Val Acc=0.3766; Loss Att={'forw': '0.7881'}; Train Acc=0.393; Test Acc=0.3928; Entropy=0; Entropy_Test=

0 2.4183574 2.4179933
Logged Successfully: 
2018-07-16 14:44:30epoch=31; Loss Pred=2.2234; Val Loss=2.4180; Val Acc=0.3969; Loss Att={'forw': '0.8081'}; Train Acc=0.395; Test Acc=0.3920; Entropy=0; Entropy_Test=

0 2.4179933 2.4119966
Logged Successfully: 
2018-07-16 14:44:34epoch=32; Loss Pred=2.2148; Val Loss=2.4120; Val Acc=0.3817; Loss Att={'forw': '0.7998'}; Train Acc=0.399; Test Acc=0.3958; Entropy=0; Entropy_Test=

0 2.4119966 2.3763633
Logged Successfully: 
2018-07-16 14:44:40epoch=33; Loss Pred=2.1959; Val Loss=2.3764; Val Acc=0.3791; Loss Att={'forw': '0.822

8 2.3091075 2.31367
Logged Successfully: 
2018-07-16 14:47:27epoch=68; Loss Pred=1.9932; Val Loss=2.3137; Val Acc=0.3791; Loss Att={'forw': '0.8130'}; Train Acc=0.454; Test Acc=0.4036; Entropy=0; Entropy_Test=

9 2.3091075 2.3708847
Logged Successfully: 
2018-07-16 14:47:32epoch=69; Loss Pred=2.0642; Val Loss=2.3709; Val Acc=0.3537; Loss Att={'forw': '0.8240'}; Train Acc=0.416; Test Acc=0.3775; Entropy=0; Entropy_Test=

10 2.3091075 2.3767502
Logged Successfully: 
2018-07-16 14:47:36epoch=70; Loss Pred=2.0423; Val Loss=2.3768; Val Acc=0.3511; Loss Att={'forw': '0.8186'}; Train Acc=0.428; Test Acc=0.3809; Entropy=0; Entropy_Test=

11 2.3091075 2.4011106
Logged Successfully: 
2018-07-16 14:47:42epoch=71; Loss Pred=2.0584; Val Loss=2.4011; Val Acc=0.3766; Loss Att={'forw': '0.8574'}; Train Acc=0.429; Test Acc=0.3769; Entropy=0; Entropy_Test=

12 2.3091075 2.3987913
Logged Successfully: 
2018-07-16 14:47:46epoch=72; Loss Pred=2.0804; Val Loss=2.3988; Val Acc=0.3486; Loss Att={'forw': '0.84

26 2.252908 2.6561353
Logged Successfully: 
2018-07-16 14:50:35epoch=107; Loss Pred=2.5282; Val Loss=2.6561; Val Acc=0.3333; Loss Att={'forw': '0.8031'}; Train Acc=0.315; Test Acc=0.3450; Entropy=0; Entropy_Test=

27 2.252908 2.618278
Logged Successfully: 
2018-07-16 14:50:40epoch=108; Loss Pred=2.5216; Val Loss=2.6183; Val Acc=0.3104; Loss Att={'forw': '0.7879'}; Train Acc=0.328; Test Acc=0.3556; Entropy=0; Entropy_Test=

28 2.252908 2.6244223
Logged Successfully: 
2018-07-16 14:50:45epoch=109; Loss Pred=2.5349; Val Loss=2.6244; Val Acc=0.3104; Loss Att={'forw': '0.7881'}; Train Acc=0.315; Test Acc=0.3497; Entropy=0; Entropy_Test=

29 2.252908 2.6135159
Logged Successfully: 
2018-07-16 14:50:49epoch=110; Loss Pred=2.5240; Val Loss=2.6135; Val Acc=0.3232; Loss Att={'forw': '0.7793'}; Train Acc=0.319; Test Acc=0.3499; Entropy=0; Entropy_Test=

30 2.252908 2.6501052
Logged Successfully: 
2018-07-16 14:50:55epoch=111; Loss Pred=2.5383; Val Loss=2.6501; Val Acc=0.3079; Loss Att={'forw': '0

0 2.3630998 2.3556256
Logged Successfully: 
2018-07-16 14:53:30epoch=12; Loss Pred=2.4136; Val Loss=2.3556; Val Acc=0.3359; Loss Att={'forw': '0.8589'}; Train Acc=0.348; Test Acc=0.3371; Entropy=0; Entropy_Test=

0 2.3556256 2.3748574
Logged Successfully: 
2018-07-16 14:53:35epoch=13; Loss Pred=2.4100; Val Loss=2.3749; Val Acc=0.3461; Loss Att={'forw': '0.8555'}; Train Acc=0.358; Test Acc=0.3571; Entropy=0; Entropy_Test=

1 2.3556256 2.3817902
Logged Successfully: 
2018-07-16 14:53:40epoch=14; Loss Pred=2.4016; Val Loss=2.3818; Val Acc=0.3461; Loss Att={'forw': '0.8706'}; Train Acc=0.358; Test Acc=0.3556; Entropy=0; Entropy_Test=

2 2.3556256 2.3315544
Logged Successfully: 
2018-07-16 14:53:45epoch=15; Loss Pred=2.4047; Val Loss=2.3316; Val Acc=0.3511; Loss Att={'forw': '0.8237'}; Train Acc=0.359; Test Acc=0.3580; Entropy=0; Entropy_Test=

0 2.3315544 2.358902
Logged Successfully: 
2018-07-16 14:53:49epoch=16; Loss Pred=2.4016; Val Loss=2.3589; Val Acc=0.3410; Loss Att={'forw': '0.8365

2 2.2266545 2.285494
Logged Successfully: 
2018-07-16 14:56:38epoch=51; Loss Pred=2.1829; Val Loss=2.2855; Val Acc=0.3639; Loss Att={'forw': '0.8731'}; Train Acc=0.398; Test Acc=0.3738; Entropy=0; Entropy_Test=

3 2.2266545 2.2835412
Logged Successfully: 
2018-07-16 14:56:42epoch=52; Loss Pred=2.1903; Val Loss=2.2835; Val Acc=0.3690; Loss Att={'forw': '0.8530'}; Train Acc=0.385; Test Acc=0.3740; Entropy=0; Entropy_Test=

4 2.2266545 2.273694
Logged Successfully: 
2018-07-16 14:56:48epoch=53; Loss Pred=2.1649; Val Loss=2.2737; Val Acc=0.3613; Loss Att={'forw': '0.8698'}; Train Acc=0.395; Test Acc=0.3848; Entropy=0; Entropy_Test=

5 2.2266545 2.2514653
Logged Successfully: 
2018-07-16 14:56:52epoch=54; Loss Pred=2.1568; Val Loss=2.2515; Val Acc=0.3562; Loss Att={'forw': '0.8453'}; Train Acc=0.404; Test Acc=0.3870; Entropy=0; Entropy_Test=

6 2.2266545 2.256545
Logged Successfully: 
2018-07-16 14:56:57epoch=55; Loss Pred=2.1794; Val Loss=2.2565; Val Acc=0.3791; Loss Att={'forw': '0.8207'}

14 2.1823514 2.2351425
Logged Successfully: 
2018-07-16 14:59:45epoch=90; Loss Pred=1.8961; Val Loss=2.2351; Val Acc=0.4249; Loss Att={'forw': '0.8310'}; Train Acc=0.474; Test Acc=0.4086; Entropy=0; Entropy_Test=

15 2.1823514 2.2237444
Logged Successfully: 
2018-07-16 14:59:50epoch=91; Loss Pred=1.9262; Val Loss=2.2237; Val Acc=0.3868; Loss Att={'forw': '0.8481'}; Train Acc=0.457; Test Acc=0.3722; Entropy=0; Entropy_Test=

16 2.1823514 2.241009
Logged Successfully: 
2018-07-16 14:59:55epoch=92; Loss Pred=1.9331; Val Loss=2.2410; Val Acc=0.3969; Loss Att={'forw': '0.8383'}; Train Acc=0.453; Test Acc=0.3832; Entropy=0; Entropy_Test=

17 2.1823514 2.285837
Logged Successfully: 
2018-07-16 15:00:00epoch=93; Loss Pred=1.9641; Val Loss=2.2858; Val Acc=0.3893; Loss Att={'forw': '0.8439'}; Train Acc=0.445; Test Acc=0.3672; Entropy=0; Entropy_Test=

18 2.1823514 2.2718945
Logged Successfully: 
2018-07-16 15:00:04epoch=94; Loss Pred=1.9582; Val Loss=2.2719; Val Acc=0.4046; Loss Att={'forw': '0.

0 10000000000.0 3.796451
Logged Successfully: 
2018-07-16 15:02:41epoch=0; Loss Pred=3.7841; Val Loss=3.7965; Val Acc=0.0051; Loss Att={'forw': '1.1077'}; Train Acc=0.008; Test Acc=0.0081; Entropy=0; Entropy_Test=

0 3.796451 2.59911
Logged Successfully: 
2018-07-16 15:02:46epoch=1; Loss Pred=2.5909; Val Loss=2.5991; Val Acc=0.3155; Loss Att={'forw': '1.0987'}; Train Acc=0.360; Test Acc=0.3566; Entropy=0; Entropy_Test=

0 2.59911 2.6157205
Logged Successfully: 
2018-07-16 15:02:50epoch=2; Loss Pred=2.6085; Val Loss=2.6157; Val Acc=0.3155; Loss Att={'forw': '0.8493'}; Train Acc=0.359; Test Acc=0.3562; Entropy=0; Entropy_Test=

1 2.59911 2.5292048
Logged Successfully: 
2018-07-16 15:02:56epoch=3; Loss Pred=2.4711; Val Loss=2.5292; Val Acc=0.3155; Loss Att={'forw': '1.0700'}; Train Acc=0.358; Test Acc=0.3577; Entropy=0; Entropy_Test=

0 2.5292048 2.5164459
Logged Successfully: 
2018-07-16 15:03:00epoch=4; Loss Pred=2.4649; Val Loss=2.5164; Val Acc=0.3155; Loss Att={'forw': '0.8497'}; Trai

In [3]:
cell = GRU_attractor

In [5]:
cell(1, 1, 1, 1)

UnboundLocalError: local variable 'X' referenced before assignment