In [1]:
import numpy as np
import tensorflow as tf
import json
from datetime import datetime
import os
import sys
sys.path.append('../')
import retriever
import matplotlib
import matplotlib.pyplot as plt
%matplotlib inline  



In [2]:
trainset_file = '../data/training/order_train_data.bin'
testset_file = '../data/training/order_test_data.bin'
vocab_file =  '../data/metadata/w2v_vocab.json'
params_dir_tmp = '../data/training/models/All/'
embed_path =  '../data/metadata/w2v.bin'

<h3>Each entry in this list has the following structure:</h3>
<ul>
<li>entry[0]: query indexes </li>
<li>entry[1:n]: n items where each item is [bounding box vector, bounding box spaital features]. Note that different enteries might have different 'n' </li>
<li>entry[n+1]: integer, entry[ 1 + entry[n+1]] is the ture bbox </li>
</ul>

In [3]:
trainset = np.load(open(trainset_file, 'rb'))
trainset = [item for item in trainset if len(item)>2 and len(item[0])>0]
print('trainset barches #:', len(trainset))

test_N_val_set = np.load(open(testset_file, 'rb'))
test_N_val_set = [item for item in test_N_val_set if len(item)>2 and len(item[0])>0]
np.random.shuffle(test_N_val_set)
np.random.shuffle(test_N_val_set)
testset = test_N_val_set[:int(0.5*len(test_N_val_set))]
valset = test_N_val_set[int(0.5*len(test_N_val_set)):]

print('test barches #:', len(test_N_val_set))
print('val barches #:', len(valset))

with open(vocab_file, 'r') as f:
    vocab = json.loads(f.read())

trainset barches #: 297
test barches #: 297
val barches #: 149


In [4]:
vocab['<unk>'] = len(vocab)

In [5]:
len(vocab), vocab['<unk>']

(8242, 8241)

In [6]:
embed_vecs = np.load(open(embed_path, 'rb')).astype(np.float32)

<h3> augment_data function </h3>
<br>We try sevral regularization methods. One of the things I've tried is to add data points where for each data I pick a query from a random data point and a set of bbox from a different random point. We build the labels (bboxes) distribution by giving an equal probability to each label. <br><br>
The augment_data function does just that but the label of each added poind is writen as -1*number of bboxes. When we build the batch it self (function build_data in class Model), if we see a negative label index, we know its an added point and we know the number of bboxes so we can build the correct distribution.  

In [7]:
def augment_data(data, ratio=0.5, addNoise=False):
        '''
        The function add data points. 
        We pick a query from a random data point,
        and a set of bbox from a different random point and we join them
        to build a new data point. The label distribution of the new data point will 
        uniform, that is, all labels will have equal probability. 
        
        
        Params:
            data: a list of data entries
                                                
        Returns: a list of augmented data
            
                        
        '''
                          
        q_idx = np.random.choice(range(len(data)), int(len(data)*ratio), replace=False)
        im_idx = np.random.choice([i for i in range(len(data)) if i not in q_idx], int(len(data)*ratio))
        for i in range(len(q_idx)):
            q, im = data[q_idx[i]][0], data[im_idx[i]][1:-1]
            item = [q]
            for im_tmp in im:
                item.append(im_tmp)
            item.append(-len(im))
            data.append(item)

In [8]:
def stats(test, train, ephocs=100, title=None, params=[50, 100, 150, 200]):
    '''
    Plot metrics graphs and print some stats.
    
    Params:
        test: list. 
              Each item is a tuple, [test accuracy, test IOU, test loss]
        
        train: list. 
               Each item is a tuple, [train accuracy, train IOU, train loss, 0]
               For now we can ignore the last part in the tuple (zero)
               
        params: The hyper-parameters to iterate over, defult to number of rnn's hidden units.
    '''
    
    ephocs = range(ephocs)
    test_res = np.array(test)
    train_res = np.array(train)
    test_Glabels = ['test accuracy', 'test IOU', 'test loss']
    train_Glabels = ['train accuracy', 'train IOU', 'train loss']

    for j, param in enumerate(params):
        print('num_hidden:', param)
        print('='*(len('num_hidden:')+3))
        for i in range(len(train_Glabels)):
            plt.plot(ephocs, test_res[j][:,i])
            plt.plot(ephocs, train_res[j][:,i])
            plt.legend([test_Glabels[i], train_Glabels[i]], loc='upper left')
            if title is not None:
                plt.title('%s: %d'%(title, param))
            plt.show()

            metric = ''.join(train_Glabels[i][len('train')+1:])
            if metric=='loss':
                print('Train min %s:%.3f'%(metric, min(train_res[j][:,i])))
                print('Test min %s:%.3f'%(metric, min(test_res[j][:,i])))
            else:
                print('Train max %s:%.3f'%(metric, max(train_res[j][:,i])))
                print('Test max %s:%.3f'%(metric, max(test_res[j][:,i])))
        print('-'*100,'\n')

# ALSTM

This RNN cell has two LSTM cells, Bcell (for player B) and Acell (for Player A) and work as follow:
<ol> 
<li>We run B's cells with the true query input word, getting the un-edited state. If use_worsAttn=True we add attention on the images vectors for each word.</li>
<li>We run B's cells with the edited input word - 'unk', getting the edited state. If use_worsAttn=True we add attention on the images vectors for each word.</li>
<li>We feed the un-edited state to A's cell.</li>
<li>We run A's cells. A's input are:
<ul><li>B's un-edited state</li><li>The reward for editing a word</li><li>B's loss having no edited words.</li></ul></li>
<li>A's output is then goes throw a transformation which yields two values, one for editing a word and another for not.<br>If the the value for edit the word is higher, we pass B's edited state to the next time step, else we pass B's un_edit state.</li><br>
</ol>

In [48]:
class ALSTM(tf.nn.rnn_cell.LSTMCell):
    def __init__(self, 
                 batch_size, 
                 num_units, 
                 
                 # Size of A's attention vector.
                 words_attn_dim, 
                 # B's outputs (for each time step).
                 words_attn_states, 
                 # Inicates whether attentionvec is a padding (0) or not (1).
                 words_attn_idx, 
                 
                 # Size of B's attention vector ([image vector, spital features] size) .
                 img_attn_dim, 
                 # B's outputs (for each time step).
                 img_attn_states, 
                 
                 
                 # Inicates whether attentionvec is a padding (0) or not (1).
                 img_attn_idx, 
                 
                 unk, #'unk' word vector
                 
                 # Whehter to edit the query or not.
                 isEdit, 
                 
                 # Whether B uses words levlel attention or not.
                 use_wordAttn,
                 
                 # Probabilty for choosing an action (edit or not) randomly, 
                 editRandomlyProb, 
                 # If the action is chosen randomly. edit with probability of rnn_editProb
                 rnn_editProb,
                 
                 # Dropin and dropout ratio.
                 dropout_in, 
                 dropout_out,
                 # If True add noise instead of using 'unk'
                 editByNoise=False,
                 
                 # If useNoise is true word vec = alpha*word_vector+(1-alpha)*noise
                 useNoise=False,
                 alpha=.3,
                 
                 #this holds A's rewards and B's losses to
                 # be add to A's feature vectors.
                 reward_loss=None,
                 state_is_tuple=True
                ):
        
        
        # When useing A, the cell state will contain the concatenation 
        # of both B and A states. Therefore we set the unit number to be
        # 2*(A and B unit size).
        super().__init__(2*num_units, state_is_tuple=state_is_tuple)
    
        self.words_attn_states = words_attn_states
        self.words_attn_idx = words_attn_idx
        self.words_attn_dim = words_attn_dim
        
        self.img_attn_states = img_attn_states
        self.img_attn_idx = img_attn_idx
        self.img_attn_dim = img_attn_dim
        
        self.num_units = num_units
        self.batch_size = batch_size
        self.unk = unk 
        self.isEdit = isEdit
        self.use_wordAttn=use_wordAttn
        
        self.Bcell = tf.nn.rnn_cell.DropoutWrapper(
                tf.nn.rnn_cell.LSTMCell(self.num_units, state_is_tuple=True), input_keep_prob=dropout_in, output_keep_prob=dropout_out)
        
        self.Acell = tf.nn.rnn_cell.DropoutWrapper(
                tf.nn.rnn_cell.LSTMCell(self.num_units, state_is_tuple=True), output_keep_prob=dropout_out)
        
        
        self.editRandomlyProb = editRandomlyProb
        self.rnn_editProb = rnn_editProb
        self.reward_loss = reward_loss
        self.useNoise=useNoise
        self.alpha=alpha
        

    def call(self, inputs, state):
        '''
        Params:
            inputs: word embadding.
            state:  [B's state form privious state, A's state form privious state]
        '''
        # takse B's state from state[:self.num_units]
        Bstate_c = tf.slice(state[0], [0, 0], [-1, self.num_units])
        Bstate_h = tf.slice(state[1], [0, 0], [-1, self.num_units])
        self.Bstate =  tf.nn.rnn_cell.LSTMStateTuple(c=Bstate_c, h=Bstate_h)
         
        # If B's cell uses attention
        if self.use_wordAttn:
            words_attn = self.attention(Bstate_h, self.img_attn_states, self.img_attn_dim, self.img_attn_idx)
            new_input = tf.concat([inputs, words_attn], -1)
            Boutputs, Bnew_state =  self.Bcell(new_input, self.Bstate, 'Bcell')
        else:
            Boutputs, Bnew_state =  self.Bcell(inputs, self.Bstate, 'Bcell')

        
        def f1(): # If isEdit==True
            # takse A's state from state[self.num_units: 2*self.num_units]
            Astate_c = tf.slice(state[0], [0, self.num_units], [-1, self.num_units])
            Astate_h = tf.slice(state[1], [0, self.num_units], [-1, self.num_units])
            self.Astate =  tf.nn.rnn_cell.LSTMStateTuple(c=Astate_c, h=Astate_h)
            
            if self.useNoise: # just add noise to the edited words
                new_unk_vecs = self.alpha*inputs + (1-alpha)*tf.random_normal(shape=inputs.get_shape(), stddev=0.1)
            else: # change the edited words by 'unk'
                unk_vecs = tf.concat([self.unk for _ in range(self.batch_size)], 0) # shape: self.batch_size x 1 x embed_size
                new_unk_vecs = tf.squeeze(unk_vecs) # shape: self.batch_size x embed_size
            
            # run B's cell with unk_batch
            if self.use_wordAttn:
                new_unk = tf.concat([new_unk_vecs, words_attn], -1)
            else:
                new_unk = new_unk_vecs
            edit_output, edit_new_state = self.Bcell(new_unk, self.Bstate, 'Bcell')  
            out1, state1 = self.runCell(Boutputs, Bnew_state, edit_new_state)
            return out1, state1
        
        def f2(): 
            outs = tf.concat([Boutputs, tf.zeros((self.batch_size, self.num_units))], 1)
            new_state_c = tf.concat([Bnew_state[0], tf.zeros_like(Bnew_state[0])], 1)
            new_state_h = tf.concat([Bnew_state[1], tf.zeros_like(Bnew_state[1])], 1)
            return outs, tf.nn.rnn_cell.LSTMStateTuple(c=new_state_c, h=new_state_h)
        
        new_output, new_state = tf.cond(self.isEdit, f1, f2)
        
        return new_output, new_state
    
    def runCell(self, Boutputs, Bnew_state, edit_new_state):
        '''
        Run B's cell after editing.
        
        params:
            Boutputs: output vector without editing.
            Bnew_state: state vector without editing.
            edit_new_state: state vector after editing the input word to 'unk'. 
        '''
        
        with tf.variable_scope('runcell'):
            # get action values according to B's hidden state
            Aout, Anew_state, actions_vals = self.action_vals(Boutputs) 
            
            # We can't use a tf.cond with batch_size  conditions so....
            
            # A choose to edit a word if actions_vals[0]<actions_vals[1].
            # Note: if we edit the word cond=1, else cond=0.
            a1, a2 = tf.split(value=actions_vals, num_or_size_splits=2, axis=1)
            A_choice = tf.cast(tf.less(a1, a2), tf.float32)
            
            # If action is chosen randomly, edit word with probability self.rnn_editProb
            # Note: if we edit the word cond=1, else cond=0.
            rand = tf.multinomial(tf.log([[self.rnn_editProb, 1-self.rnn_editProb]]), self.batch_size)
            rand_choice = tf.cast(tf.less(tf.transpose(rand), 1), tf.float32) 
            
            # Choose whether to edit a word randomly with probability of editRandomlyProb
            editRandomly = tf.multinomial(tf.log([[self.editRandomlyProb, 1-self.editRandomlyProb]]), self.batch_size)
            editRandomly_choice = tf.cast(tf.less(tf.transpose(editRandomly), 1), tf.float32)
            
            # A list of decisions for each word in the batch. actions_idx[i] = 1->edit word in query i (at this time step), 0->do not edit.
            actions_idx = editRandomly_choice*rand_choice + (1-editRandomly_choice)*A_choice

            # We'd like to know the action values and decision for each word,
            # therefore theses info are placed on the first 3 dimensions of the 
            # output vector. Note that this vector is not passed to the 
            # next tiee step so it won't affect the model. 
            outs = tf.concat([actions_vals,  actions_idx], 1)

            # B's i state is replaced by the edited state if actions_idx[i]=1 (i =1, 2, ..., batch_size)
            new_edit_state_c = (1-actions_idx)*Bnew_state[0] + actions_idx*edit_new_state[0]
            new_edit_state_h = (1-actions_idx)*Bnew_state[1] + actions_idx*edit_new_state[1]


            new_outs = tf.concat([outs, tf.zeros((self.batch_size, 2*self.num_units-3))], 1)
            new_state_c = tf.concat([new_edit_state_c, Anew_state[0]], 1)
            new_state_h = tf.concat([new_edit_state_h, Anew_state[1]], 1)
            return new_outs, tf.nn.rnn_cell.LSTMStateTuple(c=new_state_c, h=new_state_h)
    
    
    def action_vals(self, Boutputs):
        '''
        Get values for editing/not editing the input word.
        
        Params:
            Boutputs: output vector without editing.
            
        Returns vals where:
            Aout: A's cell output
            Anew_state: A's cell state
            vals: Tensor where vals[0] is the value for not editing the word 
                    and vals[1] is the value for editing the word.
        '''
        
        with tf.variable_scope('action_vals') as scope:
            Aattn = self.attention(self.Astate[1], self.words_attn_states, self.words_attn_dim, self.words_attn_idx)
            
            # A's input: [input, B's output, attntion state, reward, B's loss with no edits]
            Anew_inputs = tf.concat([Boutputs, Aattn, self.reward_loss], 1)
            
            Aout, Anew_state = self.Acell(Anew_inputs, self.Astate, 'Acell')
            vals = tf.nn.relu(self.linear(Aout, 2))

            # Save the variables that only A uses. 
            # These variables will be trained separately from B's model.
            self.Avars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope=scope.name)
            
            return Aout, Anew_state, vals
            
        
    def linear(self, inputs, output_dim, scope='linear', bias=True, reuse=False):
        with tf.variable_scope(scope, reuse=False):
            W = tf.get_variable('W', initializer=tf.contrib.layers.xavier_initializer(),
                                shape=(inputs.get_shape()[-1], output_dim))
            if bias:
                b = tf.get_variable('b', initializer=tf.constant_initializer(0.1),
                               shape=[1, output_dim])
                return tf.matmul(inputs, W) + b

        return tf.matmul(inputs, W)
    
    
    def attention(self, state, attn_states, attn_dim, attn_idx, relu=False):
        '''
        Attention mechanism (see https://arxiv.org/pdf/1409.0473.pdf)
        
        state: State from previous time step.
        attn_states: Attetntion states. 
                     Tensor of shape (batch_size x max([len(attention_vectors[i]) for i in range(bach_size)]) x attn_dim)
        attn_dim: Attention vector size.
        attn_idx,: Tensor used for masking of shape (batch_size x max([len(attention_vectors[i]) for i in range(bach_size)]). 
                   attn_idx[i, j]=1 if the j's attention vcctior of sample i  is not padding, else its equat to 0.
        '''
        
        self.attn_length = tf.shape(attn_states)[1]  
        
        # Computing... hidden_attn = W*v_att (use tf.nn.conv2d for efficiency)
        attn_vecs = tf.reshape(attn_states, [self.batch_size, self.attn_length, 1, attn_dim])
        W = tf.get_variable("attn_W", [1, 1, attn_dim, self.num_units])
        hidden_attn = tf.nn.conv2d(attn_vecs, W, [1, 1, 1, 1], "SAME")

        # Computing... hidden_s = U*v_state
        hidden_s = tf.reshape(
            self.linear(
                tf.cast(state, tf.float32), output_dim=self.num_units, scope='hidden_s_linear'), [-1, 1, 1,  self.num_units], name='hidden_s')

        # Computing alpha
        v = tf.get_variable("attn_v", [self.num_units])
        if relu:
            logits = tf.reduce_sum(v * tf.nn.relu(hidden_attn + hidden_s), [2, 3])
        else:
            logits = tf.reduce_sum(v * tf.nn.tanh(hidden_attn + hidden_s), [2, 3])

        # Masked softmax
        max_logits = tf.reduce_max(logits, axis=-1)
        masked_logits = tf.exp(logits-tf.expand_dims(max_logits, axis=1))*attn_idx
        alpha = masked_logits/tf.reduce_sum(masked_logits, axis=-1, keep_dims=True)

        a = tf.reduce_sum(tf.reshape(alpha, [-1, self.attn_length, 1, 1]) * attn_vecs, [1, 2])
        b = tf.contrib.layers.fully_connected(a, num_outputs=self.num_units)
                                               

        return b

# Training
When A joins the game, each iteration is completed via three steps:
<ul>
<li>We feed the query as it is to B and run it alone (no optimization nor training is done in this step)</li>
<li>We run A and B together (no optimization nor training is done in this step). At this step we get A's decition and B's loss on the edited query. We use B's loss to calculate the Bellman's value for each time step. </li>
<li>We again run A and B together, since now we know the real values for each time step and the real action for each time step (these will be the same as in the previous step since we did not train optimize the parameters yet), we finaly train the model</li>
</ul>
<br>
But first we check the model's performance with out A. Class Model has a set of conditioning variables that set a different ragularization methods in the model. We start by checking them with out A's interference. We can also make the model RNN become bidirectional (by setting useBidirectionalRnn to True) and use words level attention (by setting use_wordAttn to True).

In [49]:
class Model():
    def __init__(self,
                 batch_size, 
                 num_hidden, 
                 
                 #Image's vector size.
                 img_dims, 
                 
                 #Spaital features length.tra
                 bbox_dims, 
                 vocab, 
                 
                 # For learning rate exponential decay
                 decay_steps, 
                 decay_rate, 
                 
                 # whether to use bach normaliztion for the last attention layer
                 bnorm,
                 
                 embed_size=embed_vecs.shape[1],
                 
                 # Whether B uses words levlel attention or not.
                 use_wordAttn=False,
                 
                 # Whther to use bidirectional rnn
                 useBidirectionalRnn=False,
                 
                 # Urnn_norm: Whether to use batch normalization for the queries.
                 # Uatt_norm: Whether to use batch normalization for the VGG outputs.
                 Urnn_norm=True, 
                 Uatt_norm=True,
                 
                 # Whether to scale and normelize queries 
                 # embedding to have zero mean and QSTD std
                 toQscale = False,
                 Qstd = 0.1
                ):
#         with tf.device('/cpu:0'):

        self.batch_size = batch_size
        self.img_dims = img_dims
        self.bbox_dims = bbox_dims 
        self.num_hidden = num_hidden
        self.embed_size = embed_size
        self.vocab = vocab
        self.toQscale=toQscale
        self.Qstd=Qstd
        
        # length of the logest query (all queries will be padded to this length)
        self.qlen = tf.placeholder(tf.int32, name='qlen_holder')
        self.queries = tf.placeholder(tf.int32, [None, None], name='queries')
        self.img  = tf.placeholder(tf.float32, [None, None, self.img_dims], name='img')# VGG output vectors
        self.bboxes = tf.placeholder(tf.float32, [None, None, self.bbox_dims], name='bboxes')# spatial bbox's features.

        # attn_idx: inicates whether attention box is a dummy (0) or not (1).
        self.attn_idx = tf.placeholder(tf.float32, [None, None], name='attn_idx')

        self.labels = tf.placeholder(tf.float32, [None, None], name='labels')
        self.isEdit = tf.placeholder(tf.bool, name='isEdit') # whehter to edit the query or not.
        
        # Probabilty for choosing an action (edit or not) randomly, 
        self.editRandomlyProb = tf.placeholder(tf.float32, name='editRandomlyProb_holder')
        # If the action is chosen randomly. edit with probability of rnn_editProb
        self.rnn_editProb = tf.placeholder(tf.float32, name='rnn_editProb_holder')

        # After we run B with no edited words 
        # this holds B cross enthropy loss per query, de
        # and  A's rewards to be add to A's feature vectors.
        self.BCE_holder = tf.placeholder(tf.float32, [None,1], name='BCE_holder')
        self.reward_holder = tf.placeholder(tf.float32, [None,1], name='rewards_holder')

        # Dropout ratio for rnn's inputs and outpouts
        self.dropout_in = tf.placeholder(tf.float32, name='dropoutIn_holder')
        self.dropout_out = tf.placeholder(tf.float32, name='dropoutOut_holder')

        # Dropout ratio for attention vector (for the final attention layer before the loss function)
        self.dropout_img = tf.placeholder(tf.float32, name='dropoutImg_holder')
        # Dropout ratio for query vector (for the final attention layer before the loss function)
        self.dropout_q = tf.placeholder(tf.float32, name='dropoutImg_holder')
        
        # A and B learning rates.
        self.Alr = tf.placeholder(tf.float32, name='Alr_holder')
        self.Blr = tf.placeholder(tf.float32, name='Blr_holder')

        # B outputs vectors (with no words edits), These are A's attention vectors
        # which it uses to decide whter to edit a word.
        self.Aattn_vecs = tf.placeholder(tf.float32, [None, None, None], name='Aattn_vecs_holder')    
        self.unk = tf.constant([[vocab['<unk>']]], tf.int32)

        self.isTrain = tf.placeholder(tf.bool, name='isTrain_holder') 
        self.queries_lens = self.length(self.queries) # list of all the lengths  of the batch's queriey 

        # Concatinate images vectors and their spaital features. 
        # These vectors wlll be used for attenionn when 
        # we calculate the loss function.
        attn_vecs = tf.concat([self.img, self.bboxes], 2) 
        voc_size = len(self.vocab)

        # Load pre-trained word imaddings.
        # w2v_embed is not trainable.
        with tf.variable_scope('w2v'):
            w2v_embed = tf.get_variable('w2v_embed', initializer=embed_vecs, trainable=False)
            w2v_queries = tf.nn.embedding_lookup(w2v_embed, self.queries, name='w2v_queries')

        with tf.variable_scope('embed'):
            embed = tf.get_variable('embed', shape=[voc_size, self.embed_size], 
                                    initializer=tf.random_uniform_initializer(minval=-0.1, maxval=0.1))
            embed_queries_tmp = tf.nn.embedding_lookup(embed, self.queries, name='embed_queries')

        embed_queries = embed_queries_tmp+w2v_queries

        with tf.variable_scope('rnn'):
            Aattn_idx = tf.cast(tf.abs(tf.sign(self.queries)), tf.float32)

            reward_loss = tf.concat([self.reward_holder, self.BCE_holder], 1)
            cell = ALSTM(num_units=self.num_hidden, 
                            words_attn_dim=self.num_hidden, 
                            words_attn_states=self.Aattn_vecs, 
                            words_attn_idx=Aattn_idx,
                            img_attn_dim=self.img_dims+self.bbox_dims,
                            img_attn_states=attn_vecs,
                            img_attn_idx=self.attn_idx,
                            batch_size=self.batch_size, 
                            unk=tf.nn.embedding_lookup(embed, self.unk),
                            isEdit=self.isEdit,
                            reward_loss=reward_loss, use_wordAttn=use_wordAttn,
                            editRandomlyProb=self.editRandomlyProb, rnn_editProb=self.rnn_editProb,
                            dropout_in=self.dropout_in, dropout_out=self.dropout_out)

            if useBidirectionalRnn:
                self.outputs, self.last_states = tf.nn.bidirectional_dynamic_rnn(
                    cell_fw=cell,
                    cell_bw=cell,
                    dtype=tf.float32,
                    sequence_length=self.queries_lens,
                    inputs=embed_queries)

                # self.last_states contain both forward state
                # and backward state.
                # We don't use A with bidirectional rnn
                # so no need for self.values (action values as calculated by A)
                Bstate = tf.concat(
                    [tf.slice(self.last_states[0][1], [0,0], [-1, self.num_hidden]), 
                     tf.slice(self.last_states[1][1], [0,0], [-1, self.num_hidden])], -1)
            else:
                self.outputs, self.last_states = tf.nn.dynamic_rnn(
                    cell=cell,
                    dtype=tf.float32,
                    sequence_length=self.queries_lens,
                    inputs=embed_queries)

                # self.values[0]=value for not editing, self.values[1]=value for editing
                self.values = tf.slice(self.outputs, [0,0,0], [-1,-1,2])
                Bstate = tf.slice(self.last_states[1], [0,0], [-1, self.num_hidden])  


        Avars = cell.Avars
        self.Avars = {var.name:var for var in Avars}
       

        if bnorm:
            self.scores = self.bnorm_attention(Bstate, Urnn_norm=Urnn_norm, Uatt_norm=Uatt_norm) 
        else:
            self.scores = self.attention(Bstate) 


        # Cross entophy loss per query in the batch.
        self.B_ce = -tf.reduce_sum(
                        self.labels*tf.log(self.scores+0.00000001)+
                            (1-self.labels)*tf.log((1-self.scores)+0.00000001), 
                            axis=-1)


        # We don't use A with bidirectional rnn
        if not useBidirectionalRnn:
            # A's decision for each word.
            # self.idx[i,j]=1 if word j in query i was edited, else zero.
            self.idx = tf.squeeze(tf.slice(self.outputs, [0,0,2], [-1,-1,1]))
            
            # edit_ratio: Over all ratio of edited words per query
            # 
            self.edit_ratio = tf.reduce_mean(tf.reduce_sum(
                tf.cast(self.idx, tf.float32)*tf.expand_dims(
                        1/tf.cast(self.queries_lens, tf.float32), axis=1), axis=1))

            # After running A for the first time, we get A's decisions and their values.
            # we then calulate the following tensors:
            # actions_idx[j,i] = 1 if the word i in query j was edited or 0 otherwise.     
            # bell_vall holds the values for each decision by the bellman function. 
            
           
            '''
            Now that we know what actions A took,
            we calculate the actual reward per word A gets:
            act_rewards[i,j] = idx[i,j]*edit_reward*BCE[i]/queries_lens[i]

            If word j in query i was edited then idx[i,j]=1 and:
               act_rewards[i,j] = 1*edit_reward*BCE[i]/queries_lens[i]=edit_reward*BCE[i]/queries_lens[i]
            else edited idx[i,j]=0 and:
               act_rewards[i,j] = 0*edit_reward*BCE[i]/queries_lens[i]=0
            
            if the word is '<pad>' than act_rewards[i,j]=0 always
            '''
            q_mask = tf.cast(tf.abs(tf.sign(self.queries)), tf.float32)
            act_rewards = q_mask*self.idx*self.reward_holder*self.BCE_holder/tf.expand_dims(tf.cast(self.queries_lens, tf.float32), 1)
            
            # Scale B's loss per query, when words were edited. 
            # We scale all the losses to have mean 1 and std 0.5,
            # this gives A a fixed range of (state, action) values
            Bce_min = tf.reduce_min(self.B_ce)
            Bce_max = tf.reduce_max(self.B_ce)
            Bce_scale = (self.B_ce-Bce_min)/(Bce_max-Bce_min)
            s = tf.contrib.keras.backend.std(Bce_scale, keepdims=True)
            # new_BCE is the scaled B's losses with words edits.
            new_BCE = 0.5*Bce_scale/s # 0.5 std
            new_BCE = 1 + new_BCE - tf.reduce_mean(new_BCE) # 1 mean
            
            # In order to get the values of A's real actions (see self.pyrite_val in __init)
            # Tensorflow need us to change idx to a list where each item
            # is [number of query x maximum number of words in a query, action (1 or 0 for edit or not respectively)]
            self.actions_idx = tf.concat(
                                [tf.expand_dims(tf.range(self.batch_size*self.qlen), 1), 
                                         tf.expand_dims(tf.reshape(tf.cast(self.idx,tf.int32), [-1]), 1)], -1)
            
            # Now that we have the real actions A took and
            # B's loss per query, we can calculate the REAL (word, action) value 
            # using bellman equation.
            self.bell_val = self.discount_rewards(act_rewards, new_BCE)
            
            # pyrite_val: holds A's predicted values for each of its (word, actions) pair.
            # self.pyrite_val[i,j] = 
            #            self.values[i,j][self.actions_idx[i,j]] = 
            #                               the value A predicted (i.e. self.values[i,j])
            #                               for the action it took (i.e. self.actions_idx[i,j])
            #                               at query i word j.
            # (see https://en.wikipedia.org/wiki/Pyrite.)
            pyrite_val = tf.reshape(tf.gather_nd(tf.reshape(self.values, (-1,2)), 
                                                      self.actions_idx), (self.batch_size, -1))
            
            self.pyrite_val=pyrite_val
            # RMSE loss between the real bellman value and 
            # A's predicted values. We mask the loss by zeroing the padded words.
            # A smooth factor is added to prevent the gradient from being nan. 
            self.A_loss = tf.sqrt(tf.reduce_mean(
                tf.reduce_sum(tf.square(q_mask*(self.bell_val-pyrite_val)), axis=1)/tf.expand_dims(tf.cast(self.queries_lens, tf.float32), 
                                                                                              axis=1))+0.00000001)

        self.B_loss = tf.reduce_mean(self.B_ce)
        self.Agrads = {Avar.name:Gvar for Avar, Gvar in zip(Avars, tf.gradients(self.A_loss, Avars))}
        ##############
        # Optimizers #
        ##############

        if not useBidirectionalRnn:
            # Train only A variables
            A_starter_learning_rate = self.Alr
            A_global_step = tf.Variable(0, name='A_global_step', trainable=False)
            self.A_learning_rate = tf.train.exponential_decay(A_starter_learning_rate, A_global_step,
                                                       decay_steps=decay_steps, decay_rate=decay_rate, staircase=True)
        
            self.A_optimizer =  tf.train.GradientDescentOptimizer(
                    learning_rate=self.A_learning_rate).minimize(self.A_loss, global_step=A_global_step, var_list=Avars)  

        # Train only B variables 
        B_starter_learning_rate = self.Blr
        self.global_step = tf.Variable(0, name='global_step', trainable=False)
        self.learning_rate = tf.train.exponential_decay(B_starter_learning_rate, self.global_step,
                                                   decay_steps=decay_steps, decay_rate=decay_rate, staircase=True)

        Bvars = [var for var in tf.trainable_variables() if var not in Avars]
        self.B_optimizer =  tf.train.GradientDescentOptimizer(
                    learning_rate=self.learning_rate).minimize(self.B_loss, global_step=self.global_step, 
                                                               var_list=Bvars)  

        if not os.path.exists(params_dir):
                os.makedirs(params_dir)
        self.saver = tf.train.Saver()

        
    def length(self, seq):
        '''
        Retruns real lengths (before addings) of all queries in seq  .
        '''
        return tf.cast(tf.reduce_sum(tf.sign(tf.abs(seq)), reduction_indices=1), tf.int32)
       

    def linear(self, inputs, output_dim, scope='linear', bias=True, reuse=False):

        with tf.variable_scope(scope, reuse=reuse):
            W = tf.get_variable('W', initializer=tf.contrib.layers.xavier_initializer(),
                                shape=(inputs.get_shape()[-1], output_dim))
            if bias:
                b = tf.get_variable('b', initializer=tf.constant_initializer(0.1),
                               shape=[1, output_dim])
                return tf.matmul(inputs, W) + b
            
            return tf.matmul(inputs, W)
        
    
    def Qscale(self, qVecs):
        '''
        Scale queries embedding vectors to have zero mean and std STD.
        
        Params:
            qVecs: Tensor (shape: batch_size x number_of_hidden_units) 
                   holding the queries vectors, 
        '''
        qVecs_min = tf.reduce_min(qVecs, axis=-1, keep_dims=True)
        qVecs_max = tf.reduce_max(qVecs, axis=-1, keep_dims=True)
        qVecs_scale = (qVecs-qVecs_min)/(qVecs_max-qVecs_min) # Scale to 0-1
        s = tf.contrib.keras.backend.std(qVecs_scale, axis=-1, keepdims=True)
        tmp = self.Qstd * qVecs_scale/s #Scale to have lf.Qstd std
        new_qVecs = tmp - tf.reduce_mean(tmp, axis=-1, keep_dims=True) # zero mean
        
        return new_qVecs
        

            
    def attention(self, q_embed):
        '''
        Given B's output vector, calculate the attention over 
        all the query's bounding boxes vectors, That is, calculate:
        
        probs = softmax(relu(context(Sq+Satt+b)))
        
        Where:
        Sq = <Wq, queries_states>
        Sattn = <Wattn, attention_bboxes_vectors>
        
        The  bounding box with the highest attention score will be chosen as the correct bounding box.
        
        Params:
            q_embed: Tensor of shape (batch size x num_hidden)B's outputs. 
            
        Returns:
            probs: Tensor of shape (batch_size x max bbox number for query).
                   Attention score for each bbox.
        '''
        # concatenate img vectors with spaical features
        attn_vecs = tf.concat([self.img, self.bboxes], 2)
        
        # B's outputs, shape: (batch size x num_hidden)
        if self.toQscale:
            Urnn = self.Qscale(q_embed)
        else:
            Urnn = q_embed
        
        # Attention vectors, 
        # shape: (batch size x max bbox number for query x attention vector size)
        Uatt = attn_vecs
           
        with tf.variable_scope('l1'):
            b = tf.get_variable(
                    'b', 
                    initializer=tf.constant_initializer(0.1), 
                    shape=[1, self.num_hidden])

            context = tf.get_variable(
                    'context', 
                    initializer=tf.random_uniform_initializer(minval=-0.1, maxval=0.1), 
                    shape=[self.num_hidden, 1])


            Sq = tf.nn.dropout(
                self.linear(Urnn, self.num_hidden, bias=False, scope='Sq'), 
                self.dropout_q)
            
            Sattn = tf.nn.dropout(
                tf.reshape(
                    self.linear(
                        tf.reshape(Uatt, (-1, self.img_dims+self.bbox_dims)), 
                        self.num_hidden, 
                        bias=False, scope='Sattn'), 
                    [self.batch_size, -1, self.num_hidden]),
                self.dropout_img)

        out = tf.nn.relu(tf.expand_dims(Sq, 1) + Sattn + b)
        logits = tf.reshape(tf.matmul(tf.reshape(out, (-1, tf.shape(out)[-1])),  context), (tf.shape(out)[0], -1))

        # Calculate logits's masked softmax (we use self.attn_idx to mas
        max_logits = tf.reduce_max(logits, axis=-1)
        masked_logits = tf.exp(logits-tf.expand_dims(max_logits, axis=1))*self.attn_idx
        probs = self.attn_idx*masked_logits/tf.reduce_sum(masked_logits, axis=-1, keep_dims=True)

        return probs

    
    def bnorm_attention(self, q_embed, Urnn_norm=True, Uatt_norm=True):
        '''
        Given B's output vector, calculate the attention over 
        all the query's bounding boxes vectors, That is, calculate:
        
        probs = softmax(relu(context(Sq+Satt+b)))
        
        Where:
        Sq = <Wq, queries_states>
        Sattn = <Wattn, attention_bboxes_vectors>
        
        The  bounding box with the highest attention score will be chosen as the correct bounding box.
        This function uses batch normalization. 
        
        Params:
            q_embed: Tensor of shape (batch size x num_hidden)B's outputs. 
            Urnn_norm: Whether to use batch normalization for the queries.
            Uatt_norm: Whether to use batch normalization for the VGG outputs.
            
        Returns:
            probs: Tensor of shape (batch_size x max bbox number for query).
                   Attention score for each bbox.
        '''
        # concatenate img vectors with with spaical features
        attn_vecs = tf.concat([self.img, self.bboxes], 2)
        if self.toQscale:
            q_embed = self.Qscale(q_embed)
            
        Urnn = q_embed
        Uatt = attn_vecs
        
        if Urnn_norm:
            # B's outputs with bath normalization. 
            # shape: (batch size x num_hidden)
            Urnn = tf.contrib.layers.batch_norm(
                q_embed, center=True, scale=True, is_training=self.isTrain) 
        
        if Uatt_norm:
            # Attention vectors with bath normalization. 
            # shape: (batch size x max bbox number for query x attention vector size)
            Uatt = tf.contrib.layers.batch_norm(
                attn_vecs, center=True, scale=True, is_training=self.isTrain)
           
        update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
        with tf.control_dependencies(update_ops):
            with tf.variable_scope('bnorm_l1'):
                b = tf.get_variable(
                        'b', 
                        initializer=tf.constant_initializer(0.1), 
                        shape=[1, self.num_hidden])

                context = tf.get_variable(
                        'context', 
                        initializer=tf.random_uniform_initializer(minval=-0.1, maxval=0.1), 
                        shape=[self.num_hidden, 1])


                Sq = tf.nn.dropout(
                    self.linear(Urnn, self.num_hidden, bias=False, scope='Sq'), 
                    self.dropout_q)
                
                Sattn = tf.nn.dropout(
                            tf.reshape(
                                self.linear(
                                    tf.reshape(Uatt, (-1, self.img_dims+self.bbox_dims)), 
                                    self.num_hidden, 
                                    bias=False, scope='Sattn'), 
                                 [self.batch_size, -1, self.num_hidden]),
                            self.dropout_img)
                    
                   
            out = tf.nn.relu(tf.expand_dims(Sq, 1) + Sattn + b)
            logits = tf.reshape(tf.matmul(tf.reshape(out, (-1, tf.shape(out)[-1])),  context), (tf.shape(out)[0], -1))

            # Calculate logits's masked softmax (we use self.attn_idx to mas
            max_logits = tf.reduce_max(logits, axis=-1)
            masked_logits = tf.exp(logits-tf.expand_dims(max_logits, axis=1))*self.attn_idx
            probs = self.attn_idx*masked_logits/tf.reduce_sum(masked_logits, axis=-1, keep_dims=True)

            return probs
  
        
    def q_padding(self, seq, max_length):
        '''
        Pad  seq with vocab['<pad>'] (0) to max_length length.
        '''                  
        return seq + [self.vocab['<pad>']]*(max_length-len(seq))

    
    def build_data(self, data, start, end, imScale, addNoise=False):
        '''
        Build batch.
        ------------
        
        Params:
            data: each entry in this list has the following structure:
                  [query indexes, [bounding box vector (VGG), bounding box spaital features], ..., 
                  [bounding box vector (VGG), bounding box spaital features], index of the true label]
                  
            start/end: batch data is built from data[start:end]
            
        Returns:
            attn_idx: attn_idx[i, j]=1 if the j'th bbox in the i'th query is not padding, else equals to 0. 
            
            padded_queries: list of queries, padded to the length of the longest query in the batch.
                            Note: vocab['pad']=0
                            
            padded_im: list of bounding boxes vectors, padded to the maximum number of bbox per query.
                       Note: padded vector is vector of zeros. 
                            
            padded_bbox: list of bounding boxes spatial features, padded to the maximum number of bbox per query.
                         Note: padded vector is vector of zeros.  
        
            onehot_labels: onehot_labels[i][j]=1 if j is the true bbox for query i, else  onehot_labels[i][j]=0
            
            addNoise: Boolean. Whether to add normal noise to the images.
            
            imScale: If not None, scale the image vectors (VGG16 outputs) to have 0 mean and imScale std
                        
        '''
                          
        qlen = max([len(data[i][0]) for i in range(start, end)]) # Length fo the longest query
        imlen = max([len(data[i]) for i in range(start, end)])-2 # Maximum number of bbox per query.
        padded_queries, padded_im, padded_bbox, attn_idx = [], [], [], []
        
        # Build one hot labels from the labels index, given in the data.                  
        labels = [item[-1] for item in data[start:end]] #data[i][-1]=index of the true bbox of query i
        dist_labels = np.zeros((end-start, imlen)) #label distribution
        
        # Real data points
        dist_labels[[i for i in np.arange(end-start) if labels[i]>0], [l for l in labels if l>0]]=1
        
        # augmented data points
        for i in np.arange(end-start):
            if labels[i]<0:
                dist_labels[i] = [-1/labels[i] for _ in range(-labels[i])]+[0. for _ in range(imlen+labels[i])]
                          
        im_dim, bbox_dim = data[0][1][0].shape[1], data[0][1][1].shape[1]
        for i in range(start, end):
            padded_queries.append(self.q_padding(data[i][0], qlen))
            
            attn_idx.append([1 for _ in range(len(data[i])-2)]+[0 for _ in range(imlen-(len(data[i])-2))])
            
            padded_im.append(np.concatenate([data[i][j][0] for j in range(1, len(data[i])-1)] + 
                                       [np.full((imlen-(len(data[i])-2), im_dim), vocab['<pad>'], dtype=np.float32)], axis=0))
            
            padded_bbox.append(np.concatenate([data[i][j][1] for j in range(1, len(data[i])-1)] + 
                                       [np.full((imlen-(len(data[i])-2),bbox_dim), vocab['<pad>'], dtype=np.float32)], axis=0))
           
        
        
        if addNoise:
            padded_im+=(padded_im+np.random.normal(0, .1, np.array(padded_im).shape))*np.expand_dims(attn_idx, 2)
        else:
            padded_im=np.array(padded_im)
            
        if imScale is not None:
            # Smoothing factor
            halper = (1-np.expand_dims(np.array(attn_idx).astype(np.float32), 2))
            img_min = np.min(padded_im, axis=-1, keepdims=True)
            img_max = np.max(padded_im, axis=-1, keepdims=True)
            img_scale = (padded_im-img_min)/((img_max-img_min)+halper) # Scale to 0-1
            s = np.std(img_scale, axis=-1, keepdims=True)
            padded_im = imScale*img_scale/(s+halper)  #Scale to have imScale std
            padded_im = padded_im - np.mean(padded_im, axis=-1, keepdims=True) # scale to zero mean
            
        return qlen, np.array(attn_idx), np.array(padded_queries, dtype=np.int32), padded_im, np.array(padded_bbox), np.array(dist_labels)
            
   
    def ground(self, data=None, start=None, end=None, sess=None, feed_dict = None, isEdit=True, imScale=False):
        '''
        Given a query and a list of bboxes, the function returns the index of the referred bbox.
        '''
        isSess = (sess==None)
        if isSess:
            sess = tf.Session()
        with sess.as_default():
            if isSess:
                tf.global_variables_initializer().run()
                ckpt = tf.train.get_checkpoint_state(params_dir)
                if ckpt and ckpt.model_checkpoint_path:
                    self.saver.restore(sess, ckpt.model_checkpoint_path) # restore all variables
                else:
                    print('Initializing variables')
            if feed_dict is None:
                qlen, attn_idx, padded_queries, padded_im, padded_bbox, labels = self.build_data(data, start, end, imScale=imScale)
                feed_dict = {
                        self.qlen:qlen,
                        self.queries:padded_queries,
                        self.img:padded_im,
                        self.bboxes:padded_bbox,
                        self.labels:labels,
                        self.attn_idx:attn_idx
                    }
            feed_dict[self.isTrain]=False
            feed_dict[self.isEdit] = isEdit
            feed_dict[self.dropout_in]=1.
            feed_dict[self.dropout_out]=1.
            feed_dict[self.dropout_img]=1.
            feed_dict[self.dropout_q]=1.
            scores = sess.run(self.scores, feed_dict=feed_dict) # get score for each bbox

        return np.argmax(scores, axis=1), np.argmax(feed_dict[self.labels], axis=1)
        
        
    def iou_accuracy(self, data, start, end, imScale, sess=None, feed_dict = None, 
                     threshold=0.5, test=False, isEdit=True, scores=[], labels=[]):
        '''
        Calculate the IOU score between the Model bbox and the true bbox.
        ''' 
        # Get score for each bbox (labels) and the true bbox index (gt_idx)  
        if len(scores)!=0  and len(labels)!=0:
            labels, gt_idx = np.argmax(scores, axis=1), np.argmax(labels, axis=1)
        else:             
            if feed_dict is None:
                labels, gt_idx = self.ground(data, start, end, sess=sess, feed_dict=feed_dict, 
                                             isEdit=isEdit, imScale=imScale)
            else: labels, gt_idx = self.ground(sess=sess, feed_dict=feed_dict, 
                                               isEdit=isEdit, imScale=imScale)
            
        acc = 0
        for i in range(start, end):
            gt = data[i][gt_idx[i-start]+1][1][0] # ground truth bbox
            crops = np.expand_dims(data[i][labels[i-start]+1][1][0], axis=0) #Model chosen bbox
            acc += (retriever.compute_iou(crops, gt)[0]>threshold) #IOU for the i sample.
            
        return acc/(end-start)
        
    def accuracy(self, imScale, data=None, start=None, end=None, sess=None, feed_dict = None, isEdit=True):
        isSess = (sess==None)
        if isSess:
            print('Building sess')
            sess = tf.Session()
        with sess.as_default():
            if isSess:
                print('Building sess used')
                tf.global_variables_initializer().run()
                ckpt = tf.train.get_checkpoint_state(params_dir)
                if ckpt and ckpt.model_checkpoint_path:
                    print('3')
                    self.saver.restore(sess, ckpt.model_checkpoint_path) # restore all variables
                else:
                    print('Initializing variables')
            if feed_dict is None:
                print('Building feed_dict')
                qlen, attn_idx, padded_queries, padded_im, padded_bbox, labels = self.build_data(data, start, end, imScale=imScale)
                feed_dict = {
                        self.qlen:qlen,
                        self.queries:padded_queries,
                        self.img:padded_im,
                        self.bboxes:padded_bbox,
                        self.attn_idx:attn_idx,
                        self.labels:labels,
                    }
                
            feed_dict[self.isTrain]=False
            feed_dict[self.isEdit] = isEdit
            feed_dict[self.dropout_in]=1.
            feed_dict[self.dropout_out]=1.
            feed_dict[self.dropout_img]=1.
            feed_dict[self.dropout_q]=1.
            scores = sess.run(self.scores, feed_dict=feed_dict)
            acc = sum(np.equal(np.argmax(scores, axis=1), np.argmax(feed_dict[self.labels], axis=1))/len(feed_dict[self.labels]))

                    
        return acc
    
    def discount_rewards(self, rewards, last_rewards, gamma=1.0):
        """ 
        take 1D float array of rewards and compute discounted reward 
        using bellman function, i.e. for some query:
                
                value_(time step t) = rewards[t] + gamma*values_(time step t+1)
    
        We need to iterate over the rewards tensor, which is tricky 
        with Tensorflow, specially when the query maximum length (rewards.shape[1])
        can change at each iteration.
        
        We start with initializer tensor with shape (batch_size+1) x self.qlen.
        The first vector in initializer is BCE (B's loss per query) all the other 
        vectors are zero vectors, that is:
        
        initializer[0][i] is B's loss for query i.
        initializer[1:batch_size] are all zero vectors.
        
        This is doen since tf.foldl does not allow the shape of the function accumulator (bell_vals) to change.
        We then use tf.foldl function to go over all the words in reverse (see tf.range(self.qlen)[::-1] in 
        tf.foldl below). tf.foldl initialize fn's argument, bell_vals, with initializer and change it at each iteration.
        
        params:
            rewards: rewards[i,j] is the reward for the action A took on word j at query i.
            last_rewards: last_rewards[i] is B's loss for query i with no edits. This is the final reward.
            gamma: discount factor.
        """
                 
        initializer = tf.concat(
            [tf.expand_dims(last_rewards,1), tf.zeros(shape=(self.batch_size, self.qlen), dtype=tf.float32)], 1)
        
        def fn(bell_vals, time_steps):
            '''
            calculate bellman function.
            
            returns: 
                bell_vals: Tensor where bell_vals[i][t] is the value for word t in query i. 
            
            algorithm description:
                initialization: bell_vals=intitializer.
                for time_step in range(maximum_time_step-1, 0, -1):
                    1. val_ts = rewards[:,time_step]+bell_vals[0] (time step t values at each query)
                    2. bell_vals = bell_vals[:,:-1] (pop last column (zero vector) from bell_vals)
                    3. push (concatenate) rewards_ts to the begining of bell_vals so that
                       bell_vals[0] = val_ts (time step t values)
                    
                
            '''
            
            val_ts = tf.slice(bell_vals, [0, 0], [-1, 1]) + tf.slice(rewards, [0, time_steps], [-1 ,1])
            bell_vals = tf.concat([val_ts, bell_vals[:,:-1]], 1)
            return bell_vals
        
        bell_vals = tf.foldl(fn, tf.range(self.qlen)[::-1], initializer=initializer)[:,:-1]
        
        return bell_vals
        
    def train(self, trn_data, tst_data, val_data, ephocs_num, edit_reward, startA=10, 
              activation_ephoc=19, muteBnum=3, start_ephoc=0, dropout_in=1., rnn_editProb=0.2,
              dropout_out=1., dropout_img=1., dropout_q=1., queries_editProb=0.95,  
              addNoise=False, imScale=None, Alr=0.1, Blr=0.05):
                          
        '''
        Params:
            trn_data: list, train set. 
 
            tst_data: list, test set.
            
            val_data: list, validation data.
                      At each ephoc>activation_ephoc, if we don't train
                      B at this ephoc than we run the model twice on val_data:
                      
                      * The first time no action will be chosen randomly. 
                        We calculate Bloss (ABloss) and the average ratio of 
                        edited words per query (p). 
                      * Then, at the second run, all the actions are chosen
                        randomly with probability of p to edit a word. Again, we
                        calculate B's loss (RandBloss)
                        
                      We then define A's gain at ephoc ep as:
                          AGain_ep = ABloss_ep - RandBloss_ep
                          
                      If AGain_ep<AGain_(ep-1) we reduce A's learning rate
                      and aventialy stop A training but keep training B with A's
                      edits.
             
            ephocs_num: number of ephocs
             
            start_ephoc: number of first ephoc.
             
            edit_reward: int, coefficient to multiply the reward by when editing a word.
             
            startA: int, Start competition only at ephoc # startA.
             
            muteBnum: After A starts, for each ephoc which A & B trains, 
                      only A will be trained for this amount of ephocs.
                   
            activation_ephoc: at ephoc numer "activation_ephoc", A will be activate.
                               That is, for (activation_ephoc-startA) number of ephocs, 
                               A will choose an action randomly and train only when B 
                               does not (see muteBnum argument).
                   
            queries_editProb: probabilty for editing a query.
            
            rnn_edit_prob: If the action is chosen randomly. edit with probability of rnn_editProb
            
            dropout_in: dropout ratio of B's rnn inputs.
            
            dropout_output: dropout ratio of B's rnn output.
            
            dropout_img: dropout ratio of images vectors before the last attention layer .
            
            addNoise: Boolean. Whether to add normal noise to the images (see build_data).
            
            imScale: If not None, scale the image vectors (VGG16 outputs) to have 0 mean and 1/imScale std
            
            Alr: A's learning rate
            
            Blr: B's learning rate
                               
        '''                  
        
        trn_nbatch = len(trn_data)
        tst_nbatch = len(tst_data)
        val_nbatch = len(val_data)
        print('# Train set size:', sum([len(batch) for batch in trn_data]))
        print('# Training batches:', trn_nbatch)
        print('# Validation set size:', sum([len(batch) for batch in val_data]))
        print('# Validation batches:', val_nbatch)
        print('# Test set size:', sum([len(batch) for batch in tst_data]))
        print('# Testing batches:', tst_nbatch)
        self.test_res, self.train_res, self.val_res = [], [], [] #list to hold reults for train, test and validation sets
        
        # Holds different between validation B loss with and without random actions selections (see val_data in params)
        self.AGain = []
        
        Nrounds = 1 # number of time editRandomlyProb was decreased (see below for details)
        AlrDecreaseNum = 0 # number of time we've decrease A's learing rate
        
        # When AlrDecreaseNum=maxDecreaseNum stop training A (toTrainA=False)
        # but keeping using it for B training.
        maxDecreaseNum = 6 
        toTrainA = True
        
        sess = tf.Session()
        with sess.as_default():
            tf.global_variables_initializer().run()
            ckpt = tf.train.get_checkpoint_state(params_dir)
            if ckpt and ckpt.model_checkpoint_path:
                print('Loading parameters from', ckpt.model_checkpoint_path)
                self.saver.restore(sess, ckpt.model_checkpoint_path) # restore all variables
            else:
                print('Initializing variables')
                
            for ephoc in range(start_ephoc, ephocs_num):
                startTime = datetime.now().replace(microsecond=0)
                          
                # Start training A only at ephooc number startA. 
                # when ephoc>=startA, each time we train B we train 
                # only A for muteBnum ephocs, so at ephoc startA, only 
                # A will train for muteBnum ephocs and B will only then be trained again.
                toTrainB = toTrainA==False or ephoc<startA or (ephoc-(startA-1))%(muteBnum+1)==0 # Whether to train only B 
                  
                '''
                If activation_ephoc> ephoc >=startA, A will always choose an action
                randomly (i.e. editRandomlyProb=1) and will not edit words while B
                is training. At ephoc 'activation_ephoc', editRandomlyProb will decrease 
                and an action will be chosen randomly only if only A is training, in 
                other words, when B is training editRandomlyProb will always be 0.
                
                If ephoc >=activation_ephoc, after each round (in each round we train only A 
                for muteBnum ephocs and then B for one ephoc, so each round takes muteBnum+1 epohocs
                and the first round start at ephoc startA), the probability for choosing an action 
                randomly will decrease:
                let Nrounds be the number of time editRandomlyProb was decreased so far 
                (Nrounds initialize to be 1), than for every rounds:
                    Nrounds+=1
                    editRandomlyProb = 1/Nrounds
                    
                While training B we never choose an action randomly.
                    
                Note that we set activation_ephoc so that (activation_ephoc-startA)%(muteBnum+1)=0
                thereby insuring that editRandomlyProb will start decreasing at the begining of
                a round and won't decrease during the following round.
                '''
                assert (activation_ephoc-startA)%(muteBnum+1)==0, 'activation_ephoc-(startA-1) mod %d != 0'%(muteBnum+1)
                
                # editRandomlyProb: Probabilty for choosing an action (edit or not) randomly, 
                if ephoc>=activation_ephoc and (toTrainB or not toTrainA):
                    # Never choose an action randomly if training B or 
                    # A stoped its training (toTrainA=False).
                    # If ephoc<activation_ephoc we set isEdit to False
                    # when training B hence no words will be edited 
                    # during B's training any way, only A training 
                    # will be effected so we can ignore this case.
                    editRandomlyProb=0
                else:
                    editRandomlyProb=1/Nrounds
                    if ephoc>startA and (ephoc-startA)%(muteBnum+1)==0: # If this is true toTrainB=False
                        Nrounds+=1 
                    
                        
                # When we train B, we show B's results on the trainset 
                # with edits (itrInEphoc=1) and without (itrInEphoc=2)
                itrInEphoc=1 
                if ephoc>=startA and toTrainB:
                    # we show B's results on the trainset without edits
                    itrInEphoc=2
                    
                print('='*50,'\nTRAIN, ephoc:%d; Training B:%s; A is a know-it-all:%s; ActivateA:%s'%(ephoc, toTrainB, 
                                                                                                      toTrainA==False, 
                                                                                                      ephoc>=activation_ephoc))
                np.random.shuffle(trn_data)
                for ep in range(itrInEphoc):
                    A_trn_loss, B_trn_loss, trn_acc, trn_iou = 0, 0, 0, 0
                    ephoc_edits_ratio = 0 # edited words per query average
                    tst_loss, tst_acc, tst_iou = 0, 0, 0
                    if ep==1:
                        # If ep=1, we're in round 2 in which we only show B's 
                        # results with out editing (on the train set).
                        print('No Edit')
                        print('ooooooo')
                        
                    
                    # There's a probability of 1-queries_editProb that B will be trained 
                    # with no edits in a batch. 
                    # editRount_count counts the number of batches with competition.
                    editRount_count = 0 
                    for b in range(trn_nbatch):
                        if ep==1:
                            # If ep=1, we're in round 2 in which we only show B's 
                            # results with out editing (on the train set).
                            isEdit=False
                        else:
                            # if A started (ephoc>=startA), edit queries in this 
                            # batch with probability of queries_editProb else don't edit.
                            isEdit = ephoc>=startA and (toTrainB==False or np.random.rand(1)[0]<queries_editProb)
                        
                        np.random.shuffle(trn_data[b])
                        qlen, attn_idx, padded_queries, padded_im, padded_bbox, labels = self.build_data(trn_data[b], 
                                                                                            0, 
                                                                                            self.batch_size, 
                                                                                            addNoise=addNoise,
                                                                                            imScale=imScale)
                
                        feed_dict = {
                            self.qlen:qlen,
                            self.queries:padded_queries,
                            self.img:padded_im,
                            self.bboxes:padded_bbox,
                            self.attn_idx:attn_idx,
                            self.labels: labels,
                            self.unk:np.array([[vocab['<unk>']]]),
                            self.isEdit:isEdit,
                            self.reward_holder:np.array([[0.] for _ in range(self.batch_size)]), # dummy holder 
                            self.BCE_holder:np.array([[0.] for _ in range(self.batch_size)]), # dummy holder 
                            self.Aattn_vecs:[[[]]],# dummy holder 
                            self.dropout_in:dropout_in,
                            self.dropout_out:dropout_out,
                            self.dropout_img:dropout_img,
                            self.dropout_q:dropout_q,
                            self.editRandomlyProb:editRandomlyProb,
                            self.rnn_editProb:rnn_editProb,
                            self.Alr:Alr,
                            self.Blr:Blr,
                            self.isTrain:True
                        }
                        
                        
                        if isEdit:
                            editRount_count+=1

                            # We first run B with no edit in order to get it's loss, which will be given to A,
                            # and outputs, which A will use attention mechanism over them.   
                            feed_dict[self.isEdit] = False
                            queries_lens, B_ce, outputs = sess.run([self.queries_lens, self.B_ce, self.outputs], feed_dict=feed_dict)
                            feed_dict[self.isEdit] = True
                            
                            # B_ce contain the cross entrophy loss for each query.
                            # We scale all the losses to have mean 1 and std 0.5,
                            # this gives A a fixed range of (state, action) values
                            Bce_min = min(B_ce)
                            Bce_max = max(B_ce)
                            Bce_scale = (B_ce-Bce_min)/(Bce_max-Bce_min)
                            s = np.std(Bce_scale)
                            # BCE is the scaled B's losses
                            BCE = 0.5*Bce_scale/s
                            BCE = 1 + BCE - np.mean(BCE)
                          
                            # Reward for edit a word = edit_reward*BCE/queries_lens.
                            # A's get the reward for edit a word and loss per query as 2 of its features.
                            feed_dict[self.reward_holder]=np.expand_dims(edit_reward*BCE/queries_lens, 1)
                            feed_dict[self.BCE_holder]=np.expand_dims(BCE, 1)
                           
                            
                            # A's attention vectors are B's outputs 
                            # when not words were edited (for all time steps).
                            Aattn_vecs = outputs[:,:,:self.num_hidden]
                            feed_dict[self.Aattn_vecs]=Aattn_vecs
                           
                            # Next we run B with edits. Now we have everything we need to train A.
                            # Note that we din't optemize A or B yet, hence 
                            # B will losses and un-edited outputs is still relevent
                            if toTrainA:
                                values, pyrite_val, bell_val, idx, Agrads, Avars, gs, scores, batch_Alr, batch_Blr, B_loss, batch_edit_ratio, A_loss, _ = sess.run(
                                    [self.values, self.pyrite_val, self.bell_val, self.idx, self.Agrads, self.Avars, self.global_step, self.scores, self.A_learning_rate, self.learning_rate,
                                                          self.B_loss, self.edit_ratio, self.A_loss, self.A_optimizer], 
                                                            feed_dict=feed_dict)
                            else:
                                gs, scores, batch_Blr, B_loss, batch_edit_ratio, A_loss, _ = sess.run([self.global_step, self.scores, self.learning_rate, self.B_loss, 
                                                                                                    self.edit_ratio, self.A_loss, self.B_optimizer], 
                                                                                               feed_dict=feed_dict)
                                
                            # After calculating A results we  want to see
                            # B results in this context, therefore we calulate B
                            # metrics before optimizing it.
                            # In ep=1 we'll see B loss for the unedited inputs.
                            acc = sum(np.equal(np.argmax(scores, axis=1), np.argmax(labels, axis=1))/len(labels))
                            iou_acc = self.iou_accuracy(
                                trn_data[b], 0, self.batch_size, imScale=imScale,
                                sess=sess, feed_dict=feed_dict, isEdit=isEdit, scores=scores, labels=labels)

                            trn_acc += acc/trn_nbatch
                            A_trn_loss += A_loss
                            B_trn_loss += B_loss/trn_nbatch
                            trn_iou += iou_acc/trn_nbatch
                            ephoc_edits_ratio += batch_edit_ratio
                            
                            # Optimize B separately from A to insure the correct flow
                            if toTrainB and toTrainA:
                                if ephoc<activation_ephoc:
                                    # if ephoc<activation_ephoc all actions
                                    # are chosen randomly and we don't edit words 
                                    # while B is training.
                                    isEdit = False
                                    feed_dict[self.isEdit]=isEdit
                                    
                                batch_Blr, gs, _ = sess.run(
                                    [self.learning_rate, self.global_step, self.B_optimizer], 
                                    feed_dict=feed_dict)
                            if b%50==0:
                                if toTrainA==0:
                                    batch_Alr=-1
                                print( 'Edit:', isEdit, 
                                      ';batch:%s'%(b), ';gs:%d'%(gs), ';Blr: %.3f'%(batch_Blr), 
                                      ';Alr:%.3f'%(batch_Alr), ';ERP:%.2f'%(editRandomlyProb),
                                      ';Bloss:%.3f'%(B_loss), ';Aloss:%.3f'%(A_loss), 
                                      ';edits ratio:%.3f'%(batch_edit_ratio), ';acc:%.3f'%(acc), 
                                      ';iou:%.3f'%(iou_acc), ';time:', datetime.now().replace(microsecond=0)-startTime)
                                print('idx:\n', np.sum(idx), idx.shape)
                                print('bell_val:\n', bell_val)
                                print('pyrite_val:\n', pyrite_val)
                                print("values:\n", values,'\n')
                                        
                        else: # isEdit=False
                            if ep==0:
                                B_loss, lr, gs, _ = sess.run([self.B_loss, self.learning_rate, 
                                                                self.global_step, self.B_optimizer], feed_dict=feed_dict)
                            else: 
                                # just show results of B with no edits
                                B_loss, lr, gs = sess.run([self.B_loss, self.learning_rate, self.global_step], feed_dict=feed_dict)

                            acc = self.accuracy(sess=sess, feed_dict=feed_dict, isEdit=isEdit, imScale=imScale)  
                            iou_acc = self.iou_accuracy(trn_data[b], 0, self.batch_size, imScale=imScale,
                                                        sess=sess, feed_dict=feed_dict, isEdit=isEdit)

                            trn_acc += acc/trn_nbatch
                            B_trn_loss += B_loss/trn_nbatch
                            trn_iou += iou_acc/trn_nbatch

                            if b%50==0:
                                print('Edit:', isEdit, 
                                      ';batch:', b, ';gs:', gs, ';Blr: %.3f'%(lr), 
                                      ';B loss: %.3f'%(B_loss),  ';acc: %.3f'%(acc), 
                                      ';iou: %.3f'%(iou_acc),';time:', datetime.now().replace(microsecond=0)-startTime)

                    if editRount_count>0: 
                        # If we edited any words during this ephoc,
                        # if ephoc <startA we don't.
                        print('\n*B Train loss: %.3f'%(B_trn_loss), 
                              ';A Train loss: %.3f'%(A_trn_loss/editRount_count),       
                              ';Edit ratio: %.3f'%(ephoc_edits_ratio/editRount_count), 
                              ';Train accuracy: %.3f'%(trn_acc),  ';IOU accuracy: %.3f'%(trn_iou), 
                              ';Time:', datetime.now().replace(microsecond=0)-startTime, '\n')
                        self.train_res.append([trn_acc, trn_iou, B_trn_loss, A_trn_loss/editRount_count])
                    else:
                        print('\n*B Train loss: %.3f'%(B_trn_loss),
                              ';Train accuracy: %.3f'%(trn_acc), 
                              ';IOU accuracy: %.3f'%(trn_iou),  
                              ';Time:', datetime.now().replace(microsecond=0)-startTime, '\n')
                        self.train_res.append([trn_acc, trn_iou, B_trn_loss, 0])
                self.saver.save(sess, params_dir + "/model.ckpt", global_step=ephoc)    
                
                print('GRADS:')
                print('#'*100)
                for item in Agrads.items():
                    print(item[0])
                    print(item[1], '\n')
                print('#'*100, '\n')
                print('VARS:')
                print('#'*100)
                for item in Avars.items():
                    print(item[0])
                    print(item[1], '\n')
                print('#'*100, '\n')
                
            
#                 if toTrainB==False:
#                     print('VALIDATION, ephoc:',ephoc)
#                     print('-'*len('VALIDATION, ephoc: %d'%(ephoc)))
#                     '''
#                     VALIDATION

#                     If ephoc>activation_ephoc, and B's not training (toTrainB=False)
#                     we run the model twice on val_data:

#                     * The first time no action will be chosen randomly. 
#                         We calculate Bloss (ABloss) and the average ratio of 
#                         edited words per query (p). 
#                     * Then, at the second run, all the actions are chosen
#                         randomly with probability of p to edit a word. Again, we
#                         calculate B's loss (RandBloss)

#                     We then define A's gain at ephoc # ep as: AGain_ep = ABloss_ep - RandBloss_ep

#                     If AGain_ep<AGain_(ep-1) we reduce A's learning rate, after maxDecreaseNum 
#                     reductions we stop A training but keep training B with A's edits.
#                     '''
#                     ABloss = 0 # B loss when none of the actions are chosen randomly
#                     RandBloss = 0 # B loss when all actions are chosen randomly
#                     for ep in range(2):
#                         startTime = datetime.now().replace(microsecond=0)
                        
#                         if ep==0:
#                             # If ep==0, none of the actions will be chosen randomly
#                             editRandomlyProb=0
#                             val_rnn_editProb = rnn_editProb
#                         else:
#                             # If ep==1, all actions will be chosen randomly
#                             editRandomlyProb=1
#                             val_rnn_editProb = ephoc_edits_ratio
                            
#                         Aval_loss, Bval_loss, val_acc, val_iou, ephoc_edits_ratio = 0, 0, 0, 0, 0
#                         print('\nRandom choice:', ep==1)
#                         for b in range(val_nbatch):
#                             qlen, attn_idx, padded_queries, padded_im, padded_bbox, labels = self.build_data(val_data[b],
#                                                                                         0, self.batch_size, imScale=imScale)
#                             feed_dict = {
#                                         self.qlen:qlen,
#                                         self.queries:padded_queries,
#                                         self.img:padded_im,
#                                         self.bboxes:padded_bbox,
#                                         self.attn_idx:attn_idx,
#                                         self.labels: labels,
#                                         self.unk:np.array([[vocab['<unk>']]]),
#                                         self.isEdit:True,
#                                         self.reward_holder:np.array([[0.] for _ in range(self.batch_size)]), # dummy holder 
#                                         self.BCE_holder:np.array([[0.] for _ in range(self.batch_size)]), # dummy holder 
#                                         self.Aattn_vecs:[[[]]],# dummy holder 
#                                         self.dropout_in:1.,
#                                         self.dropout_out:1.,
#                                         self.dropout_img:1.,
#                                         self.dropout_q:1.,
#                                         self.editRandomlyProb:editRandomlyProb,
#                                         self.rnn_editProb:val_rnn_editProb,
#                                         self.Alr:Alr,
#                                         self.Blr:Blr,
#                                         self.isTrain:False
#                                     }

#                             feed_dict[self.isEdit] = False
#                             queries_lens, B_ce, outputs = sess.run([self.queries_lens, self.B_ce, self.outputs], feed_dict=feed_dict)
#                             feed_dict[self.isEdit] = True

#                             Bce_min = min(B_ce)
#                             Bce_max = max(B_ce)
#                             Bce_scale = (B_ce-Bce_min)/(Bce_max-Bce_min)
#                             s = np.std(Bce_scale)
#                             BCE = 0.5*Bce_scale/s
#                             BCE = 1 + BCE - np.mean(BCE)

#                             feed_dict[self.reward_holder]=np.expand_dims(edit_reward*BCE/queries_lens, 1)
#                             feed_dict[self.BCE_holder]=np.expand_dims(BCE, 1)

#                             Aattn_vecs = outputs[:,:,:self.num_hidden]
#                             feed_dict[self.Aattn_vecs]=Aattn_vecs

#                             scores, B_loss, A_loss, batch_edit_ratio = sess.run(
#                                 [self.scores, self.B_loss, self.A_loss, self.edit_ratio], feed_dict=feed_dict)
#                             acc = sum(np.equal(np.argmax(scores, axis=1), np.argmax(labels, axis=1))/len(labels))
#                             iou_acc = self.iou_accuracy(
#                                 val_data[b], 0, self.batch_size, sess=sess, 
#                                 feed_dict=feed_dict, isEdit=True, imScale=imScale, labels=labels, scores=scores)

#                             val_acc += acc/val_nbatch
#                             Bval_loss += B_loss/val_nbatch
#                             Aval_loss += A_loss/val_nbatch
#                             val_iou += iou_acc/val_nbatch
#                             ephoc_edits_ratio += batch_edit_ratio/val_nbatch

#                             if b%50==0:
#                                 print('batch:', b, ';edits ratio: %.3f'%(batch_edit_ratio), 
#                                       ';B loss: %.3f'%(B_loss), ';A loss: %.3f'%(A_loss), ';acc: %.3f'%(acc), 
#                                       ';iou_acc: %.3f'%(iou_acc), ';time:', datetime.now().replace(microsecond=0)-startTime)

#                         print('\n*B loss: %.3f'%(Bval_loss), ';A loss: %.3f'%(Aval_loss), ';Edit ratio: %.3f'%(ephoc_edits_ratio),  
#                               ';Accuracy %.3f'%(val_acc), ';IOU accuracy: %.3f'%(val_iou), ';Time:', datetime.now().replace(microsecond=0)-startTime)
                        
#                         if ep==0:
#                             self.val_res.append([[val_acc, val_iou, Bval_loss, Aval_loss]])
#                             self.AGain.append(Bval_loss)
#                         else:
#                             self.val_res[-1].append([val_acc, val_iou, Bval_loss, Aval_loss])
#                             self.AGain[-1] = self.AGain[-1]-Bval_loss

#                     if toTrainA and  len(self.AGain)>3 and self.AGain[-1]<min(self.AGain[-4:-1]):
#                         Alr=Alr*0.8
#                         AlrDecreaseNum+=1
#                         toTrainA = AlrDecreaseNum<=maxDecreaseNum
                        
#                     print('\n*AlrDecreaseNum: %d'%(AlrDecreaseNum), '; maxDecreaseNum: %d'%(maxDecreaseNum), 
#                           ';AGain: %.3f'%(self.AGain[-1]), ';toTrainA: %s'%(toTrainA))
#                     print('-'*len('*AlrDecreaseNum: %d ; maxDecreaseNum: %d ;AGain: %.3f ;toTrainA: %s'%(AlrDecreaseNum, maxDecreaseNum, self.AGain[-1], toTrainA)), '\n')
                    
                
                
                
#                 '''
#                 TESTING
#                 '''
#                 if toTrainB:
#                     print('TESTING, ephoc:',ephoc)
#                     tstTime = datetime.now().replace(microsecond=0)
#                     tst_loss, tst_acc, tst_iou = 0, 0, 0
#                     t_data = tst_data+val_data
#                     t_nbatch = len(t_data)
#                     print('# t_data size:', sum([len(batch) for batch in t_data]))
#                     print('# t_data batches:', t_nbatch)
                    
#                     for b in range(t_nbatch):
#                         qlen, attn_idx, padded_queries, padded_im, padded_bbox, labels = self.build_data(t_data[b],
#                                                                                     0, self.batch_size, imScale=imScale)
                        
#                         feed_dict={
#                                     self.qlen:qlen,
#                                     self.queries:padded_queries,
#                                     self.img:padded_im,
#                                     self.bboxes:padded_bbox,
#                                     self.attn_idx:attn_idx,
#                                     self.labels: labels,
#                                     self.unk:np.array([[vocab['<unk>']]]),
#                                     self.isEdit:False,
#                                     self.reward_holder:np.array([[0.] for _ in range(self.batch_size)]),# dummy holder 
#                                     self.BCE_holder:np.array([[0.] for _ in range(self.batch_size)]),# dummy holder 
#                                     self.Aattn_vecs:[[[]]],# dummy holder 
#                                     self.dropout_in:1.,
#                                     self.dropout_out:1.,
#                                     self.dropout_img:1.,
#                                     self.dropout_q:1.,
#                                     self.editRandomlyProb:0.,
#                                     self.rnn_editProb:0.,
#                                     self.Alr:Alr,
#                                     self.Blr:Blr,
#                                     self.isTrain:False
#                                 }
                       
#                         scores, B_loss = sess.run([self.scores, self.B_loss], feed_dict=feed_dict)
#                         acc = sum(np.equal(np.argmax(scores, axis=1), np.argmax(labels, axis=1))/len(labels))

#                         iou_acc = self.iou_accuracy(
#                             t_data[b], 0, self.batch_size, sess=sess, 
#                             feed_dict=feed_dict, isEdit=False, imScale=imScale)
                        
#                         tst_acc += acc/t_nbatch
#                         tst_loss += B_loss/t_nbatch
#                         tst_iou += iou_acc/t_nbatch
#                         if b%50==0:
#                             print('batch:', b, ';B loss: %.3f'%(B_loss), ';acc: %.3f'%(acc), 
#                                    ';iou_acc: %.3f'%(iou_acc), ';time:', datetime.now().replace(microsecond=0)-startTime)
                            
#                     print('\n*Test loss: %.3f'%(tst_loss), ';Test accuracy %.3f'%(tst_acc), 
#                           ';IOU accuracy: %.3f'%(tst_iou), ';Time:', datetime.now().replace(microsecond=0)-startTime)
#                     self.test_res.append([tst_acc, tst_iou, tst_loss])
#                     if len(self.test_res)>10 and self.test_res[-1][-1]>max([item[-1] for item in self.test_res[-6:-1]]):
#                         Blr=Blr*0.9
                    
#                 print('='*50,'\n')
#             return self.test_res, self.train_res, self.val_res, self.AGain


# We start by runnging the Grounder in its simplest form
<p> We test the model with different state sizes: 50, 100, 150 and 200. We run each test for 100 ephocs</p>
<p> We get about 60% IOU in all the test, about the same as the baseline.<br> On the other hand, the train set IOU gets bigger as the hidden state gets bigger, starting from about 89%, for 50 hidden units, to about 97% with 200.

In [50]:
num_hidden=200
edit_rewards = -1.
edit_rewards_tst, edit_rewards_trn, edit_rewards_val, edit_rewards_AGain = [], [], [], []


params_dir = params_dir_tmp+'RL/edit_rewards_'+str(edit_reward)+'hidden_'+str(num_hidden)
tf.reset_default_graph()
m = Model(
    batch_size=200, 
    num_hidden=num_hidden,
    embed_size=embed_vecs.shape[1],
    img_dims=trainset[0][0][1][0].shape[1], 
    bbox_dims=trainset[0][0][1][1].shape[1], 
    vocab=vocab, 
    decay_steps=10000, 
    decay_rate=0.9, 
    bnorm=False,
    toQscale=True
)

print('params_dir:', params_dir)
print('num_hidden:', m.num_hidden)
print('edit_reward:', edit_reward)
tst, trn, val, AGain = m.train(trainset, testset, valset,
                                ephocs_num=200,
                                start_ephoc=35,
                                startA=10,
                                activation_ephoc=13,
                                muteBnum=2, 
                                queries_editProb=0.95,
                                edit_reward=edit_reward,
                                rnn_editProb=0.5,
                                Blr=.05,
                                Alr=.2,
                                imScale=1/50)
edit_rewards_tst.append(tst) 
edit_rewards_trn.append(trn)
edit_rewards_val.append(val) 
edit_rewards_AGain.append(AGain)

print('\n'+'*'*100)
print('*'*100)
print('*'*100,'\n')

params_dir: ../data/training/models/All/RL/edit_rewards_-2.5hidden_200
num_hidden: 200
edit_reward: -2.5
# Train set size: 59400
# Training batches: 297
# Validation set size: 29800
# Validation batches: 149
# Test set size: 29600
# Testing batches: 148
Initializing variables
TRAIN, ephoc:35; Training B:False; A is a know-it-all:False; ActivateA:True
Edit: True ;batch:0 ;gs:0 ;Blr: 0.050 ;Alr:0.200 ;ERP:1.00 ;Bloss:2.114 ;Aloss:1.306 ;edits ratio:0.512 ;acc:0.200 ;iou:0.275 ;time: 0:00:01
idx:
 315.0 (200, 13)
bell_val:
 [[-0.84408069  0.23563385  1.31534839 ...,  1.31534839  1.31534839
   1.31534839]
 [ 0.83069396  0.83069396  0.83069396 ...,  1.30316758  1.30316758
   1.30316758]
 [ 0.83451521  0.95908993  1.08366466 ...,  1.33281398  1.33281398
   1.33281398]
 ..., 
 [ 0.1744276   0.19915235  0.19915235 ...,  0.19915235  0.19915235
   0.19915235]
 [-2.81715202  1.28899336  1.28899336 ...,  1.28899336  1.28899336
   1.28899336]
 [ 0.17434102  0.19905198  0.19905198 ...,  0.19905198  

KeyboardInterrupt: 

In [None]:
num_hidden=200
edit_rewards = -1.
edit_rewards_tst, edit_rewards_trn, edit_rewards_val, edit_rewards_AGain = [], [], [], []


params_dir = params_dir_tmp+'RL/edit_rewards_'+str(edit_reward)+'hidden_'+str(num_hidden)
tf.reset_default_graph()
m = Model(
    batch_size=200, 
    num_hidden=num_hidden,
    embed_size=embed_vecs.shape[1],
    img_dims=trainset[0][0][1][0].shape[1], 
    bbox_dims=trainset[0][0][1][1].shape[1], 
    vocab=vocab, 
    decay_steps=10000, 
    decay_rate=0.9, 
    bnorm=False,
    toQscale=True
)

print('params_dir:', params_dir)
print('num_hidden:', m.num_hidden)
print('edit_reward:', edit_reward)
tst, trn, val, AGain = m.train(trainset, testset, valset,
                                ephocs_num=200,
                                start_ephoc=10,
                                startA=10,
                                activation_ephoc=13,
                                muteBnum=2, 
                                queries_editProb=0.95,
                                edit_reward=edit_reward,
                                rnn_editProb=0.2,
                                Blr=.05,
                                Alr=.2,
                                imScale=1/50)
edit_rewards_tst.append(tst) 
edit_rewards_trn.append(trn)
edit_rewards_val.append(val) 
edit_rewards_AGain.append(AGain)

print('\n'+'*'*100)
print('*'*100)
print('*'*100,'\n')

In [12]:
num_hidden=200
edit_rewards = -1.
edit_rewards_tst, edit_rewards_trn, edit_rewards_val, edit_rewards_AGain = [], [], [], []


params_dir = params_dir_tmp+'RL/edit_rewards_'+str(edit_reward)+'hidden_'+str(num_hidden)
tf.reset_default_graph()
m = Model(
    batch_size=200, 
    num_hidden=num_hidden,
    embed_size=embed_vecs.shape[1],
    img_dims=trainset[0][0][1][0].shape[1], 
    bbox_dims=trainset[0][0][1][1].shape[1], 
    vocab=vocab, 
    decay_steps=10000, 
    decay_rate=0.9, 
    bnorm=False,
    toQscale=True
)

print('params_dir:', params_dir)
print('num_hidden:', m.num_hidden)
print('edit_reward:', edit_reward)
tst, trn, val, AGain = m.train(trainset, testset, valset,
                                ephocs_num=200,
                                start_ephoc=0,
                                startA=10,
                                activation_ephoc=13,
                                muteBnum=2, 
                                queries_editProb=0.95,
                                edit_reward=edit_reward,
                                rnn_editProb=0.2,
                                Blr=.05,
                                Alr=.2,
                                imScale=1/50)
edit_rewards_tst.append(tst) 
edit_rewards_trn.append(trn)
edit_rewards_val.append(val) 
edit_rewards_AGain.append(AGain)

print('\n'+'*'*100)
print('*'*100)
print('*'*100,'\n')

params_dir: ../data/training/models/All/RL/edit_rewards_-2.5hidden_200
num_hidden: 200
edit_reward: -2.5
# Train set size: 59400
# Training batches: 297
# Validation set size: 29800
# Validation batches: 149
# Test set size: 29600
# Testing batches: 148
Initializing variables
TRAIN, ephoc:0; Training B:True; A is a know-it-all:False; ActivateA:False
Edit: False ;batch: 0 ;gs: 1 ;Blr: 0.050 ;B loss: 3.048 ;acc: 0.100 ;iou: 0.160 ;time: 0:00:00
Edit: False ;batch: 50 ;gs: 51 ;Blr: 0.050 ;B loss: 2.144 ;acc: 0.230 ;iou: 0.330 ;time: 0:00:08
Edit: False ;batch: 100 ;gs: 101 ;Blr: 0.050 ;B loss: 2.821 ;acc: 0.180 ;iou: 0.295 ;time: 0:00:16
Edit: False ;batch: 150 ;gs: 151 ;Blr: 0.050 ;B loss: 1.520 ;acc: 0.600 ;iou: 0.685 ;time: 0:00:24
Edit: False ;batch: 200 ;gs: 201 ;Blr: 0.050 ;B loss: 2.900 ;acc: 0.310 ;iou: 0.460 ;time: 0:00:32
Edit: False ;batch: 250 ;gs: 251 ;Blr: 0.050 ;B loss: 1.777 ;acc: 0.580 ;iou: 0.705 ;time: 0:00:41

*B Train loss: 2.390 ;Train accuracy: 0.349 ;IOU accuracy: 

KeyboardInterrupt: 