In [1]:
import torch
import torch.nn as nn
from torch.autograd import Variable
import torch.optim as optim
import torch.utils.data as data

import numpy as np
import random
import matplotlib.pyplot as plt

import json
from translating_trees import *
from for_prog_dataset import ForDataset
from functools import partial

In [2]:
cd ..

/home/mehdi2277/Documents/HarveyMuddWork/Neural_Nets_Research/neural_nets_research


In [3]:
from neural_nets_library import training

In [4]:
class TreeCell(nn.Module):
    """
    LSTM Cell which takes in arbitrary numbers of hidden and cell states (one per child).
    """
    def __init__(self, input_size, hidden_size, num_children):
        """
        Initialize the LSTM cell.
        
        :param input_size: length of input vector
        :param hidden_size: length of hidden vector (and cell state)
        :param num_children: number of children = number of hidden/cell states passed in
        """
        super(TreeCell, self).__init__()
        
        self.input_size = input_size
        self.hidden_size = hidden_size
        
        # Gates = input, output, memory + one forget gate per child
        numGates = 3 + num_children
        
        self.gates_value = torch.nn.ModuleList()
        self.gates_children = torch.nn.ModuleList()
        for _ in range(numGates):
            # One linear layer to handle the value of the node
            value_linear = nn.Linear(input_size, hidden_size, bias = True)
            children_linear = torch.nn.ModuleList()
            # One per child of the node
            for _ in range(num_children):
                children_linear.append(nn.Linear(hidden_size, hidden_size, bias = False))
            self.gates_value.append(value_linear)
            self.gates_children.append(children_linear)
            
        self.tanh = nn.Tanh()
        self.sigmoid = nn.Sigmoid()
        self.relu = nn.ReLU()
        self.reset_parameters()
        
    def reset_parameters(self):
        stdev = 0.1
        
        for gate_value in self.gates_value:
            nn.init.uniform(gate_value.weight, -stdev, stdev)
            nn.init.uniform(gate_value.bias, -stdev, stdev)
        
        for gate_child in self.gates_children:
            for gate_in_child in gate_child:
                nn.init.uniform(gate_in_child.weight, -stdev, stdev)            
    
    def forward(self, input, hidden_states, cell_states):
        """
        Calculate a new hidden state and a new cell state from the LSTM gates
        
        :param hidden_states: A list of num_children hidden states.
        :param cell_states: A list of num_children cell states.
        :return A tuple containing (new hidden state, new cell state)
        """
        
        data_sums = []

        for i in range(len(self.gates_value)):
            data_sum = self.gates_value[i](input)
            for j in range(len(hidden_states)):
                data_sum += self.gates_children[i][j](hidden_states[j])
            data_sums.append(data_sum)
        
        # First gate is the input gate
        i = self.sigmoid(data_sums[0])
        # Next output gate
        o = self.sigmoid(data_sums[1])
        # Next memory gate
        m = self.tanh(data_sums[2])
        # All the rest are forget gates
        forget_data = 0
        for i in range(len(cell_states)):
            forget_data += data_sums[3 + i] * cell_states[i]

        # Put it all together!
        new_state = i * m + forget_data
        new_hidden = o * self.tanh(new_state)
        
                
        return new_hidden, new_state

In [5]:
class TreeLSTM(nn.Module):
    '''
    TreeLSTM

    Takes in a tree where each node has a value and a list of children.
    Produces a tree of the same size where the value of each node is now encoded.

    '''

    def __init__(self, input_size, hidden_size, valid_num_children):
        """
        Initialize tree cells we'll need later.
        """
        super(TreeLSTM, self).__init__()
        
        self.valid_num_children = [0] + valid_num_children
        self.lstm_list = torch.nn.ModuleList()
        
        for size in self.valid_num_children:
            self.lstm_list.append(TreeCell(input_size, hidden_size, size))
        
    def forward(self, node):
        """
        Creates a tree where each node's value is the encoded version of the original value.
        
        :param tree: a tree where each node has a value vector and a list of children
        :return a tuple - (root of encoded tree, cell state)
        """
        
        # List of tuples: (node, cell state)
        children = []
        
        # Recursively encode children
        for child in node.children:
            encoded_child = self.forward(child)
            children.append(encoded_child)

        # Extract the TreeCell inputs
        inputH = [vec[0].value for vec in children]
        inputC = [vec[1] for vec in children]

        value = node.value

        found = False
        
        # Feed the inputs into the TreeCell with the appropriate number of children.        
        for i in range(len(self.valid_num_children)):
            if self.valid_num_children[i] == len(children):
                newH, newC = self.lstm_list[i](value, inputH, inputC)
                found = True
                break
                
        if not found:
            print("WHAAAAAT?")
            raise ValueError("Beware.  Something has gone horribly wrong.  You may not have long to live.")
        
        # Set our encoded vector as the root of the new tree
        rootNode = Node(newH)
        rootNode.children = [vec[0] for vec in children]
        return (rootNode, newC)

In [6]:
class SeqEncoder(nn.Module):
    # If you are using an end of sequence token that should be accounted for in input_size.
    def __init__(self, input_size, hidden_size, num_layers, attention=True, 
                 use_embedding=True, embedding_size=256):
        super(SeqEncoder, self).__init__()
        
        self.use_embedding = use_embedding
        
        if use_embedding:
            self.embedding = nn.Embedding(input_size, embedding_size)
            self.lstm = nn.LSTM(embedding_size, hidden_size, num_layers)
        else:
            self.lstm = nn.LSTM(input_size, hidden_size, num_layers)
        
        self.attention = attention
        self.reset_parameters()
    
    def reset_parameters(self):
        stdev = 0.1
        
        for parameter in self.lstm.parameters():
            nn.init.uniform(parameter, -stdev, stdev)
        
        if self.use_embedding:
            nn.init.uniform(self.embedding.weight, -stdev, stdev)
    
    def forward(self, input):
        if self.use_embedding:
            input = self.embedding(input)
        outputs, (hiddens, cell_states) = self.lstm(input.unsqueeze(1))
        outputs, hiddens, cell_states = outputs.squeeze(1), hiddens.squeeze(1), cell_states.squeeze(1)
        
        if self.attention:
            return outputs, hiddens, cell_states
        else:
            return hiddens, cell_states

In [7]:
class TreeEncoder(nn.Module):
    """
    Takes in a tree where each node has a value vector and a list of children
    Produces a sequence encoding of the tree
    """
    def __init__(self, input_size, hidden_size, num_layers, valid_num_children, 
                 attention=True, use_embedding=True, embedding_size=256):
        super(TreeEncoder, self).__init__()
        
        self.lstm_list = torch.nn.ModuleList()
        self.use_embedding = use_embedding
        
        if use_embedding:
            self.embedding = nn.Embedding(input_size, embedding_size)
            self.lstm_list.append(TreeLSTM(embedding_size, hidden_size, valid_num_children))
        else:
            self.lstm_list.append(TreeLSTM(input_size, hidden_size, valid_num_children))
        
        # All TreeLSTMs have input of hidden_size except the first.
        for i in range(num_layers-1):
            self.lstm_list.append(TreeLSTM(hidden_size, hidden_size, valid_num_children))
        
        self.attention = attention
        self.reset_parameters()
        
    def reset_parameters(self):
        stdev = 0.1
        
        if self.use_embedding:
            nn.init.uniform(self.embedding.weight, -stdev, stdev)

    def forward(self, tree):
        """
        Encodes nodes of a tree in the rows of a matrix.
        
        :param tree: a tree where each node has a value vector and a list of children
        :return a matrix where each row represents the encoded output of a single node and also
                the hidden/cell states of the root node.
        
        """
        if self.use_embedding:
            tree = map_tree(lambda node: nn.embedding(node).squeeze(0), tree)
        
        hiddens = []
        cell_states = []
        
        for lstm in self.lstm_list:
            tree, cell_state = lstm(tree)
            hiddens.append(tree.value)
            cell_states.append(cell_state)
        
        
        hiddens = torch.stack(hiddens)
        cell_states = torch.stack(cell_states)
        
        if self.attention:
            return torch.stack(tree_to_list(tree)), hiddens, cell_states
        else:
            return hiddens, cell_states

In [8]:
'''
Decoder
'''
class Tree_to_Sequence_Model(nn.Module):
    """
      For the decoder this expects something like an lstm cell or a gru cell and not an lstm/gru.
      Batch size is not supported at all. More precisely the encoder expects an input that does not
      appear in batches and most also output non-batched tensors.
    """
    def __init__(self, encoder, decoder, hidden_size, nclass, embedding_size):
        super(Tree_to_Sequence_Model, self).__init__()
        self.encoder = encoder
        self.decoder = decoder
        
        # nclass + 2 to include end of sequence and trash
        self.output_log_odds = nn.Linear(hidden_size, nclass+2)
        self.softmax = nn.Softmax(dim=0)
        self.log_softmax = nn.LogSoftmax(dim=0)

        self.register_buffer('SOS_token', torch.LongTensor([[nclass+2]]))
        self.EOS_value = nclass + 1

        # nclass + 3 to include start of sequence, end of sequence, and trash.
        # n + 2 - start of sequence, end of sequence - n + 1, trash - n.
        # The first n correspond to the alphabet in order.
        self.embedding = nn.Embedding(nclass+3, embedding_size)

        # nclass is the trash category to avoid penalties after target's EOS token
        self.loss_func = nn.CrossEntropyLoss(ignore_index=nclass)
        self.reset_parameters()

    def reset_parameters(self):
        stdev = 0.1
        
        nn.init.uniform(self.output_log_odds.weight, -stdev, stdev)
        nn.init.uniform(self.output_log_odds.bias, -stdev, stdev)
        nn.init.uniform(self.embedding.weight, -stdev, stdev)
    
    """
        input: The output of the encoder for the input should be a pair. The first part
               should correspond to the hidden state of the root. The second part
               should correspond to the cell state of the root. They both should be
               [num_layers, hidden_size].
        target: The target should have dimension, seq_len, and should be a LongTensor.
    """
    def forward_train(self, input, target, teacher_forcing=False):
        # root hidden state/cell state
        decoder_hiddens, decoder_cell_states = self.encoder(input) # num_layers x hidden_size
        decoder_hiddens = decoder_hiddens.unsqueeze(1)
        decoder_cell_states = decoder_cell_states.unsqueeze(1)
                                                            
        num_layers, _, _ = decoder_hiddens.size()

        target_length, = target.size()
        SOS_token = Variable(self.SOS_token)
        decoder_input = self.embedding(SOS_token).squeeze(0) # 1 x embedding_size
        loss = 0

        for i in range(target_length):
            decoder_hiddens, decoder_cell_states = self.decoder(decoder_input, (decoder_hiddens, decoder_cell_states)) # num_layers x 1 x hidden_size
            decoder_hidden = decoder_hiddens[-1] # 1 x hidden_size
            log_odds = self.output_log_odds(decoder_hidden)

            loss += self.loss_func(log_odds, target[i])

            if teacher_forcing:
                next_input = target[i].unsqueeze(1)
            else:
                _, next_input = log_odds.topk(1)

            decoder_input = self.embedding(next_input).squeeze(1) # 1 x embedding_size
                
        return loss

    """
        This is just an alias for point_wise_prediction, so that training code that assumes the presence
        of a forward_train and forward_prediction works.
    """
    def forward_prediction(self, input, maximum_length=20):
        return self.point_wise_prediction(input, maximum_length)
    
    def point_wise_prediction(self, input, maximum_length=20):
        decoder_hiddens, decoder_cell_states = self.encoder(input)
        decoder_hiddens = decoder_hiddens.unsqueeze(1)
        decoder_cell_states = decoder_cell_states.unsqueeze(1)
        
        num_layers, _, _ = decoder_hiddens.size()
        SOS_token = Variable(self.SOS_token)

        decoder_input = self.embedding(SOS_token).squeeze(0) # 1 x embedding_size
        output_so_far = []

        for _ in range(maximum_length):
            decoder_hiddens, decoder_cell_states = self.decoder(decoder_input, (decoder_hiddens, decoder_cell_states))
            decoder_hidden = decoder_hiddens[-1]
            log_odds = self.output_log_odds(decoder_hidden)

            _, next_input = log_odds.topk(1)
            output_so_far.append(int(next_input))
            
            if int(next_input) == self.EOS_value:
                break
                
            decoder_input = self.embedding(next_input).squeeze(1) # 1 x embedding size

        return output_so_far

    def beam_search_prediction(self, input, maximum_length=20, beam_width=5):
        decoder_hiddens, decoder_cell_states = self.encoder(input)
        decoder_hiddens = decoder_hiddens.unsqueeze(1)
        decoder_cell_states = decoder_cell_states.unsqueeze(1)
        
        num_layers, _, _ = decoder_hiddens.size()

        SOS_token = Variable(self.SOS_token)
        decoder_input = self.embedding(SOS_token).squeeze(0) # 1 x embedding_size
        word_inputs = []

        for _ in range(beam_width):
            word_inputs.append((0, [], True, [decoder_input, decoder_hiddens, decoder_cell_states]))

        for _ in range(maximum_length):
            new_word_inputs = []

            for i in range(beam_width):
                if not word_inputs[i][2]:
                    new_word_inputs.append(word_inputs[i])
                    continue

                decoder_input, decoder_hiddens, decoder_cell_states = word_inputs[i][3]
                decoder_hiddens, decoder_cell_states = self.decoder(decoder_input, (decoder_hiddens, decoder_cell_states))
                decoder_hidden = decoder_hiddens[-1]
                log_odds = self.output_log_odds(decoder_hidden).squeeze(0) # nclasses
                log_probs = self.log_softmax(log_odds)

                log_value, next_input = log_probs.topk(beam_width) # beam_width, beam_width
                decoder_input = self.embedding(next_input.unsqueeze(1)) # beam_width x 1 x embedding size

                new_word_inputs.extend((word_inputs[i][0] + float(log_value[k]), word_inputs[i][1] + [int(next_input[k])],
                                        int(next_input[k]) != self.EOS_value, [decoder_input[k], decoder_hiddens, decoder_cell_states])
                                        for k in range(beam_width))
                    
            word_inputs = sorted(new_word_inputs, key=lambda word_input: word_input[0])[-beam_width:]
        return word_inputs[-1][1]

In [9]:
class Tree_to_Sequence_Attention_Model(Tree_to_Sequence_Model):
    def __init__(self, encoder, decoder, hidden_size, nclass, embedding_size,
                 alignment_size=50, align_type=1):
        super(Tree_to_Sequence_Attention_Model, self).__init__(encoder, decoder, hidden_size, nclass, embedding_size)
        
        self.attention_presoftmax = nn.Linear(2 * hidden_size, hidden_size)
        self.tanh = nn.Tanh()
        
        if align_type == 0:
            self.attention_hidden = nn.Linear(hidden_size, alignment_size)
            self.attention_context = nn.Linear(hidden_size, alignment_size, bias=False)
            self.attention_alignment_vector = nn.Linear(alignment_size, 1)
        elif align_type == 1:
            self.attention_hidden = nn.Linear(hidden_size, hidden_size)
            
        self.align_type = align_type
        self.register_buffer('et', torch.zeros(1, hidden_size))
        self.reset_attention_parameters()
            
    def reset_attention_parameters(self):
        stdev = 0.1
        
        if self.align_type <= 1:
            nn.init.uniform(self.attention_hidden.weight, -stdev, stdev)
            nn.init.uniform(self.attention_hidden.bias, -stdev, stdev)
        
        if self.align_type == 0:
            nn.init.uniform(self.attention_context.weight, -stdev, stdev)
            nn.init.uniform(self.attention_alignment_vector.weight, -stdev, stdev)
            nn.init.uniform(self.attention_alignment_vector.bias, -stdev, stdev)
        
    """
        input: The output of the encoder for the tree should have be a triple. The first 
               part of the triple should be the annotations and have dimensions, 
               number_of_nodes x hidden_size. The second triple of the pair should be the hidden 
               representations of the root and should have dimensions, num_layers x hidden_size.
               The third part should correspond to the cell states of the root and should
               have dimensions, num_layers x hidden_size.
        target: The target should have dimensions, seq_len, and should be a LongTensor.
    """
    def forward_train(self, input, target, teacher_forcing=True):
        annotations, decoder_hiddens, decoder_cell_states = self.encoder(input)
        # align_size: 0 number_of_nodes x alignment_size or align_size: 1-2 bengio number_of_nodes x hidden_size
        if self.align_type <= 1:
            attention_hidden_values = self.attention_hidden(annotations)
        else:
            attention_hidden_values = annotations
        
        decoder_hiddens = decoder_hiddens.unsqueeze(1) # num_layers x 1 x hidden_size
        decoder_cell_states = decoder_cell_states.unsqueeze(1) # num_layers x 1 x hidden_size

        target_length, = target.size()
        num_layers, _, _ = decoder_hiddens.size()
        SOS_token = Variable(self.SOS_token)

        word_input = self.embedding(SOS_token).squeeze(0) # 1 x embedding_size
        et = Variable(self.et)
        loss = 0

        for i in range(target_length):
            decoder_input = torch.cat((word_input, et), dim=1) # 1 x embedding_size + hidden_size
            decoder_hiddens, decoder_cell_states = self.decoder(decoder_input, (decoder_hiddens, decoder_cell_states))
            decoder_hidden = decoder_hiddens[-1]
            
            attention_logits = self.attention_logits(attention_hidden_values, decoder_hidden)
            attention_probs = self.softmax(attention_logits) # number_of_nodes x 1
            context_vec = (attention_probs * annotations).sum(0).unsqueeze(0) # 1 x hidden_size
            et = self.tanh(self.attention_presoftmax(torch.cat((decoder_hidden, context_vec), dim=1)))
            log_odds = self.output_log_odds(et)
            loss += self.loss_func(log_odds, target[i])

            if teacher_forcing:
                next_input = target[i].unsqueeze(1)
            else:
                _, next_input = log_odds.topk(1)

            word_input = self.embedding(next_input).squeeze(1) # 1 x embedding size
        return loss
        

    """
        This is just an alias for point_wise_prediction, so that training code that assumes the presence
        of a forward_train and forward_prediction works.
    """
    def forward_prediction(self, input, maximum_length=20):
        return self.point_wise_prediction(input, maximum_length)
    
    def point_wise_prediction(self, input, maximum_length=20):
        annotations, decoder_hiddens, decoder_cell_states = self.encoder(input)
        
        # align_size: 0 number_of_nodes x alignment_size or align_size: 1-2 bengio number_of_nodes x hidden_size
        if self.align_type <= 1:
            attention_hidden_values = self.attention_hidden(annotations)
        else:
            attention_hidden_values = annotations
        
        decoder_hiddens = decoder_hiddens.unsqueeze(1) # num_layers x 1 x hidden_size
        decoder_cell_states = decoder_cell_states.unsqueeze(1) # num_layers x 1 x hidden_size
        
        num_layers, _, _ = decoder_hiddens.size()
        SOS_token = Variable(self.SOS_token)
        
        word_input = self.embedding(SOS_token).squeeze(0) # 1 x embedding_size
        et = Variable(self.et)
        output_so_far = []
        
        for i in range(maximum_length):
            decoder_input = torch.cat((word_input, et), dim=1) # 1 x embedding_size + hidden_size
            decoder_hiddens, decoder_cell_states = self.decoder(decoder_input, (decoder_hiddens, decoder_cell_states))
            decoder_hidden = decoder_hiddens[-1]
            
            attention_logits = self.attention_logits(attention_hidden_values, decoder_hidden)
            attention_probs = self.softmax(attention_logits) # number_of_nodes x 1
            context_vec = (attention_probs * annotations).sum(0).unsqueeze(0) # 1 x hidden_size
            et = self.tanh(self.attention_presoftmax(torch.cat((decoder_hidden, context_vec), dim=1)))
            log_odds = self.output_log_odds(et)
            _, next_input = log_odds.topk(1)

            output_so_far.append(int(next_input))
            
            if int(next_input) == self.EOS_value:
                break
                
            word_input = self.embedding(next_input).squeeze(1) # 1 x embedding size

        return output_so_far

    def beam_search_prediction(self, input, maximum_length=20, beam_width=5):
        annotations, decoder_hiddens, decoder_cell_states = self.encoder(input)
        # align_size: 0 number_of_nodes x alignment_size or align_size: 1-2 bengio number_of_nodes x hidden_size
        if self.align_type <= 1:
            attention_hidden_values = self.attention_hidden(annotations)
        else:
            attention_hidden_values = annotations
        
        decoder_hiddens = decoder_hiddens.unsqueeze(1) # num_layers x 1 x hidden_size
        decoder_cell_states = decoder_cell_states.unsqueeze(1) # num_layers x 1 x hidden_size
        
        num_layers, _, _ = decoder_hiddens.size()
        SOS_token = Variable(self.SOS_token)
        word_input = self.embedding(SOS_token).squeeze(0) # 1 x embedding_size
        et = Variable(self.et)
        
        decoder_input = torch.cat((word_input, et), dim=1)
        word_inputs = []

        for _ in range(beam_width):
            word_inputs.append((0, [], True, [decoder_input, decoder_hiddens, decoder_cell_states]))

        for _ in range(maximum_length):
            new_word_inputs = []

            for i in range(beam_width):
                if not word_inputs[i][2]:
                    new_word_inputs.append(word_inputs[i])
                    continue

                decoder_input, decoder_hiddens, decoder_cell_states = word_inputs[i][3]
                decoder_hiddens, decoder_cell_states = self.decoder(decoder_input, (decoder_hiddens, decoder_cell_states))
                decoder_hidden = decoder_hiddens[-1]
            
                attention_logits = self.attention_logits(attention_hidden_values, decoder_hidden)
                attention_probs = self.softmax(attention_logits) # number_of_nodes x 1
                context_vec = (attention_probs * annotations).sum(0).unsqueeze(0) # 1 x hidden_size
                et = self.tanh(self.attention_presoftmax(torch.cat((decoder_hidden, context_vec), dim=1))) # 1 x hidden_size
                log_odds = self.output_log_odds(et).squeeze(0) # nclasses
                log_probs = self.log_softmax(log_odds)

                log_value, next_input = log_probs.topk(beam_width) # beam_width, beam_width
                word_input = self.embedding(next_input.unsqueeze(1)) # beam_width x 1 x embedding size
                decoder_input = torch.cat((word_input, et.unsqueeze(0).repeat(beam_width, 1, 1)), dim=2)

                new_word_inputs.extend((word_inputs[i][0] + float(log_value[k]), word_inputs[i][1] + [int(next_input[k])],
                                        int(next_input[k]) != self.EOS_value, [word_input[k], decoder_hiddens, decoder_cell_states])
                                        for k in range(beam_width))
            word_inputs = sorted(new_word_inputs, key=lambda word_input: word_input[0])[-beam_width:]
        return word_inputs[-1][1]
    
    def attention_logits(self, attention_hidden_values, decoder_hidden):
        if self.align_type == 0:
            return self.attention_alignment_vector(self.tanh(self.attention_context(decoder_hidden) + attention_hidden_values))
        else:
            return (decoder_hidden * attention_hidden_values).sum(1).unsqueeze(1)

In [10]:
class MultilayerLSTMCell(nn.Module):
    def __init__(self, input_size, hidden_sizes, num_layers, bias=True):
        super(MultilayerLSTMCell, self).__init__()
        self.lstm_layers = nn.ModuleList()
        
        if isinstance(hidden_sizes, int):
            temp = []
            
            for _ in range(num_layers):
                temp.append(hidden_sizes)
            
            hidden_sizes = temp
            
        hidden_sizes = [input_size] + hidden_sizes
        
        for i in range(num_layers):
            curr_lstm = nn.LSTMCell(hidden_sizes[i], hidden_sizes[i+1], bias=bias)
            self.lstm_layers.append(curr_lstm)
    
        self.reset_parameters()
    
    def reset_parameters(self):
        stdev = 0.1
        
        for lstm_cell in self.lstm_layers:
            nn.init.uniform(lstm_cell.weight_ih, -stdev, stdev)
            nn.init.uniform(lstm_cell.weight_hh, -stdev, stdev)
            nn.init.uniform(lstm_cell.bias_ih, -stdev, stdev)
            nn.init.uniform(lstm_cell.bias_hh, -stdev, stdev)
    
    def forward(self, input, past_states):
        hiddens, cell_states = past_states
        result_hiddens, result_cell_states = [], []
        curr_input = input
        
        for lstm_cell, curr_hidden, curr_cell_state in zip(self.lstm_layers, hiddens, cell_states):
            curr_input, new_cell_state = lstm_cell(curr_input, (curr_hidden, curr_cell_state))
            result_hiddens.append(curr_input)
            result_cell_states.append(new_cell_state)
        
        return torch.stack(result_hiddens), torch.stack(result_cell_states)

In [11]:
num_vars = 10
num_ints = 11

for_ops = {
    "Var": 0,
    "Const": 1,
    "Plus": 2,
    "Minus": 3,
    "EqualFor": 4,
    "LeFor": 5,
    "GeFor": 6,
    "Assign": 7,
    "If": 8,
    "Seq": 9,
    "For": 10
}

for_ops = {"<" + k.upper() + ">": v for k,v in for_ops.items()}

lambda_ops = {
    "Var": 0,
    "Const": 1,
    "Plus": 2,
    "Minus": 3,
    "EqualFor": 4,
    "LeFor": 5,
    "GeFor": 6,
    "If": 7,
    "Let": 8,
    "Unit": 9,
    "Letrec": 10,
    "App": 11
}

lambda_ops = {"<" + k.upper() + ">": v for k,v in lambda_ops.items()}

In [12]:
input_eos_token = True
input_as_seq = True
use_embedding = False
eos_bonus = 1 if input_eos_token and input_as_seq else 0

for_lambda_dset = ForDataset('ANC/Easy-arbitraryForList.json', input_eos_token=True, 
                             input_as_seq=True, use_embedding=False)

In [20]:
embedding_size = 256
hidden_size = 256
nclass = num_vars + num_ints + len(lambda_ops.keys())
num_layers = 3
attention = True
alignment_size = 50
align_type = 1
encoder_input_size = num_vars + num_ints + len(for_ops.keys()) + eos_bonus

if input_as_seq:
    encoder = SeqEncoder(encoder_input_size, hidden_size, num_layers, attention=attention, use_embedding=use_embedding)
else:
    encoder = TreeEncoder(encoder_input_size, hidden_size, num_layers, [1,2], attention=attention, use_embedding=use_embedding)

if attention:
    decoder = MultilayerLSTMCell(embedding_size + hidden_size, hidden_size, num_layers)
    program_model = Tree_to_Sequence_Attention_Model(encoder, decoder, hidden_size, nclass, embedding_size, alignment_size=alignment_size, align_type=align_type)
else:
    decoder = MultilayerLSTMCell(embedding_size, hidden_size, num_layers)
    program_model = Tree_to_Sequence_Model(encoder, decoder, hidden_size, nclass, embedding_size)

In [21]:
program_model = program_model.cuda()

In [22]:
def program_accuracy(prediction, target):
    return 1 if list(target.data) == prediction else 0

def token_accuracy(prediction, target):
    pass

optimizer = torch.optim.SGD(program_model.parameters(), lr=0.001)

In [24]:
program_model, train_losses, validation_losses = \
    training.train_model_anc(program_model, for_lambda_dset, optimizer, 
                             lr_scheduler=partial(training.exp_lr_scheduler, init_lr=0.001, lr_decay_epoch=1), 
                             num_epochs=5, validation_criterion=program_accuracy, batch_size=100, 
                             use_cuda=True)

Epoch 0/4
----------
LR is set to 0.001
Epoch Number: 0, Batch Number: 200, Validation Metric: 0.0000
Epoch Number: 0, Batch Number: 200, Training Loss: 12.7670
Time so far is 0m 7s
Epoch Number: 0, Batch Number: 400, Validation Metric: 0.0000
Epoch Number: 0, Batch Number: 400, Training Loss: 12.3716
Time so far is 0m 15s
Epoch Number: 0, Batch Number: 600, Validation Metric: 0.0000
Epoch Number: 0, Batch Number: 600, Training Loss: 13.0873
Time so far is 0m 23s
Epoch Number: 0, Batch Number: 800, Validation Metric: 0.0000
Epoch Number: 0, Batch Number: 800, Training Loss: 19.5550
Time so far is 0m 30s
Epoch Number: 0, Batch Number: 1000, Validation Metric: 0.0000
Epoch Number: 0, Batch Number: 1000, Training Loss: 16.9287
Time so far is 0m 38s
Epoch Number: 0, Batch Number: 1200, Validation Metric: 0.0000
Epoch Number: 0, Batch Number: 1200, Training Loss: 18.5352
Time so far is 0m 45s
Epoch Number: 0, Batch Number: 1400, Validation Metric: 0.0000
Epoch Number: 0, Batch Number: 1400,

Epoch Number: 0, Batch Number: 11600, Validation Metric: 0.0000
Epoch Number: 0, Batch Number: 11600, Training Loss: 15.8832
Time so far is 7m 1s
Epoch Number: 0, Batch Number: 11800, Validation Metric: 0.0000
Epoch Number: 0, Batch Number: 11800, Training Loss: 13.4306
Time so far is 7m 8s
Epoch Number: 0, Batch Number: 12000, Validation Metric: 0.0000
Epoch Number: 0, Batch Number: 12000, Training Loss: 12.7862
Time so far is 7m 15s
Epoch Number: 0, Batch Number: 12200, Validation Metric: 0.0000
Epoch Number: 0, Batch Number: 12200, Training Loss: 13.4970
Time so far is 7m 22s
Epoch Number: 0, Batch Number: 12400, Validation Metric: 0.0000
Epoch Number: 0, Batch Number: 12400, Training Loss: 13.7913
Time so far is 7m 29s
Epoch Number: 0, Batch Number: 12600, Validation Metric: 0.0000
Epoch Number: 0, Batch Number: 12600, Training Loss: 13.4117
Time so far is 7m 36s
Epoch Number: 0, Batch Number: 12800, Validation Metric: 0.0000
Epoch Number: 0, Batch Number: 12800, Training Loss: 13.

Epoch Number: 0, Batch Number: 22800, Validation Metric: 0.0000
Epoch Number: 0, Batch Number: 22800, Training Loss: 14.0255
Time so far is 13m 41s
Epoch Number: 0, Batch Number: 23000, Validation Metric: 0.0000
Epoch Number: 0, Batch Number: 23000, Training Loss: 14.2934
Time so far is 13m 48s
Epoch Number: 0, Batch Number: 23200, Validation Metric: 0.0000
Epoch Number: 0, Batch Number: 23200, Training Loss: 14.4611
Time so far is 13m 56s
Epoch Number: 0, Batch Number: 23400, Validation Metric: 0.0000
Epoch Number: 0, Batch Number: 23400, Training Loss: 13.4653
Time so far is 14m 3s
Epoch Number: 0, Batch Number: 23600, Validation Metric: 0.0000
Epoch Number: 0, Batch Number: 23600, Training Loss: 13.6134
Time so far is 14m 10s
Epoch Number: 0, Batch Number: 23800, Validation Metric: 0.0000
Epoch Number: 0, Batch Number: 23800, Training Loss: 13.2933
Time so far is 14m 17s
Epoch Number: 0, Batch Number: 24000, Validation Metric: 0.0000
Epoch Number: 0, Batch Number: 24000, Training Lo

Epoch Number: 0, Batch Number: 34000, Validation Metric: 0.0000
Epoch Number: 0, Batch Number: 34000, Training Loss: 14.0700
Time so far is 20m 19s
Epoch Number: 0, Batch Number: 34200, Validation Metric: 0.0000
Epoch Number: 0, Batch Number: 34200, Training Loss: 14.1309
Time so far is 20m 27s
Epoch Number: 0, Batch Number: 34400, Validation Metric: 0.0000
Epoch Number: 0, Batch Number: 34400, Training Loss: 14.6842
Time so far is 20m 34s
Epoch Number: 0, Batch Number: 34600, Validation Metric: 0.0000
Epoch Number: 0, Batch Number: 34600, Training Loss: 14.3725
Time so far is 20m 41s
Epoch Number: 0, Batch Number: 34800, Validation Metric: 0.0000
Epoch Number: 0, Batch Number: 34800, Training Loss: 13.4877
Time so far is 20m 48s
Epoch Number: 0, Batch Number: 35000, Validation Metric: 0.0000
Epoch Number: 0, Batch Number: 35000, Training Loss: 13.2197
Time so far is 20m 55s
Epoch Number: 0, Batch Number: 35200, Validation Metric: 0.0000
Epoch Number: 0, Batch Number: 35200, Training L

Epoch Number: 0, Batch Number: 45200, Validation Metric: 0.0000
Epoch Number: 0, Batch Number: 45200, Training Loss: 12.7615
Time so far is 26m 58s
Epoch Number: 0, Batch Number: 45400, Validation Metric: 0.0000
Epoch Number: 0, Batch Number: 45400, Training Loss: 12.4194
Time so far is 27m 5s
Epoch Number: 0, Batch Number: 45600, Validation Metric: 0.0000
Epoch Number: 0, Batch Number: 45600, Training Loss: 13.3780
Time so far is 27m 12s
Epoch Number: 0, Batch Number: 45800, Validation Metric: 0.0000
Epoch Number: 0, Batch Number: 45800, Training Loss: 13.6129
Time so far is 27m 19s
Epoch Number: 0, Batch Number: 46000, Validation Metric: 0.0000
Epoch Number: 0, Batch Number: 46000, Training Loss: 14.1175
Time so far is 27m 26s
Epoch Number: 0, Batch Number: 46200, Validation Metric: 0.0000
Epoch Number: 0, Batch Number: 46200, Training Loss: 14.2683
Time so far is 27m 33s
Epoch Number: 0, Batch Number: 46400, Validation Metric: 0.0000
Epoch Number: 0, Batch Number: 46400, Training Lo

Epoch Number: 1, Batch Number: 6400, Validation Metric: 0.0000
Epoch Number: 1, Batch Number: 6400, Training Loss: 12.8053
Time so far is 33m 34s
Epoch Number: 1, Batch Number: 6600, Validation Metric: 0.0000
Epoch Number: 1, Batch Number: 6600, Training Loss: 12.9181
Time so far is 33m 41s
Epoch Number: 1, Batch Number: 6800, Validation Metric: 0.0000
Epoch Number: 1, Batch Number: 6800, Training Loss: 13.2671
Time so far is 33m 48s
Epoch Number: 1, Batch Number: 7000, Validation Metric: 0.0000
Epoch Number: 1, Batch Number: 7000, Training Loss: 13.5087
Time so far is 33m 55s
Epoch Number: 1, Batch Number: 7200, Validation Metric: 0.0000
Epoch Number: 1, Batch Number: 7200, Training Loss: 12.8894
Time so far is 34m 2s
Epoch Number: 1, Batch Number: 7400, Validation Metric: 0.0000
Epoch Number: 1, Batch Number: 7400, Training Loss: 12.1591
Time so far is 34m 9s
Epoch Number: 1, Batch Number: 7600, Validation Metric: 0.0000
Epoch Number: 1, Batch Number: 7600, Training Loss: 12.6983
Tim

Epoch Number: 1, Batch Number: 17600, Validation Metric: 0.0000
Epoch Number: 1, Batch Number: 17600, Training Loss: 12.8025
Time so far is 40m 13s
Epoch Number: 1, Batch Number: 17800, Validation Metric: 0.0000
Epoch Number: 1, Batch Number: 17800, Training Loss: 12.8440
Time so far is 40m 21s
Epoch Number: 1, Batch Number: 18000, Validation Metric: 0.0000
Epoch Number: 1, Batch Number: 18000, Training Loss: 13.7305
Time so far is 40m 28s
Epoch Number: 1, Batch Number: 18200, Validation Metric: 0.0000
Epoch Number: 1, Batch Number: 18200, Training Loss: 12.8883
Time so far is 40m 35s
Epoch Number: 1, Batch Number: 18400, Validation Metric: 0.0000
Epoch Number: 1, Batch Number: 18400, Training Loss: 13.0734
Time so far is 40m 42s
Epoch Number: 1, Batch Number: 18600, Validation Metric: 0.0000
Epoch Number: 1, Batch Number: 18600, Training Loss: 12.8593
Time so far is 40m 50s
Epoch Number: 1, Batch Number: 18800, Validation Metric: 0.0000
Epoch Number: 1, Batch Number: 18800, Training L

Epoch Number: 1, Batch Number: 28800, Validation Metric: 0.0000
Epoch Number: 1, Batch Number: 28800, Training Loss: 13.0268
Time so far is 46m 52s
Epoch Number: 1, Batch Number: 29000, Validation Metric: 0.0000
Epoch Number: 1, Batch Number: 29000, Training Loss: 13.6858
Time so far is 46m 60s
Epoch Number: 1, Batch Number: 29200, Validation Metric: 0.0000
Epoch Number: 1, Batch Number: 29200, Training Loss: 12.2785
Time so far is 47m 6s
Epoch Number: 1, Batch Number: 29400, Validation Metric: 0.0000
Epoch Number: 1, Batch Number: 29400, Training Loss: 13.0240
Time so far is 47m 14s
Epoch Number: 1, Batch Number: 29600, Validation Metric: 0.0000
Epoch Number: 1, Batch Number: 29600, Training Loss: 12.3698
Time so far is 47m 21s
Epoch Number: 1, Batch Number: 29800, Validation Metric: 0.0000
Epoch Number: 1, Batch Number: 29800, Training Loss: 13.6023
Time so far is 47m 28s
Epoch Number: 1, Batch Number: 30000, Validation Metric: 0.0000
Epoch Number: 1, Batch Number: 30000, Training Lo

Epoch Number: 1, Batch Number: 40000, Validation Metric: 0.0000
Epoch Number: 1, Batch Number: 40000, Training Loss: 13.3625
Time so far is 53m 31s
Epoch Number: 1, Batch Number: 40200, Validation Metric: 0.0000
Epoch Number: 1, Batch Number: 40200, Training Loss: 13.2782
Time so far is 53m 39s
Epoch Number: 1, Batch Number: 40400, Validation Metric: 0.0000
Epoch Number: 1, Batch Number: 40400, Training Loss: 13.1172
Time so far is 53m 46s
Epoch Number: 1, Batch Number: 40600, Validation Metric: 0.0000
Epoch Number: 1, Batch Number: 40600, Training Loss: 12.2744
Time so far is 53m 53s
Epoch Number: 1, Batch Number: 40800, Validation Metric: 0.0000
Epoch Number: 1, Batch Number: 40800, Training Loss: 13.5296
Time so far is 54m 0s
Epoch Number: 1, Batch Number: 41000, Validation Metric: 0.0000
Epoch Number: 1, Batch Number: 41000, Training Loss: 12.1124
Time so far is 54m 7s
Epoch Number: 1, Batch Number: 41200, Validation Metric: 0.0000
Epoch Number: 1, Batch Number: 41200, Training Los

Epoch Number: 2, Batch Number: 1200, Validation Metric: 0.0000
Epoch Number: 2, Batch Number: 1200, Training Loss: 12.2889
Time so far is 60m 5s
Epoch Number: 2, Batch Number: 1400, Validation Metric: 0.0000
Epoch Number: 2, Batch Number: 1400, Training Loss: 13.5863
Time so far is 60m 13s
Epoch Number: 2, Batch Number: 1600, Validation Metric: 0.0000
Epoch Number: 2, Batch Number: 1600, Training Loss: 12.9157
Time so far is 60m 20s
Epoch Number: 2, Batch Number: 1800, Validation Metric: 0.0000
Epoch Number: 2, Batch Number: 1800, Training Loss: 11.6208
Time so far is 60m 27s
Epoch Number: 2, Batch Number: 2000, Validation Metric: 0.0000
Epoch Number: 2, Batch Number: 2000, Training Loss: 13.2640
Time so far is 60m 34s
Epoch Number: 2, Batch Number: 2200, Validation Metric: 0.0000
Epoch Number: 2, Batch Number: 2200, Training Loss: 12.1626
Time so far is 60m 41s
Epoch Number: 2, Batch Number: 2400, Validation Metric: 0.0000
Epoch Number: 2, Batch Number: 2400, Training Loss: 12.3085
Ti

Epoch Number: 2, Batch Number: 12600, Validation Metric: 0.0000
Epoch Number: 2, Batch Number: 12600, Training Loss: 12.3376
Time so far is 66m 49s
Epoch Number: 2, Batch Number: 12800, Validation Metric: 0.0000
Epoch Number: 2, Batch Number: 12800, Training Loss: 12.5221
Time so far is 66m 56s
Epoch Number: 2, Batch Number: 13000, Validation Metric: 0.0000
Epoch Number: 2, Batch Number: 13000, Training Loss: 12.8817
Time so far is 67m 3s
Epoch Number: 2, Batch Number: 13200, Validation Metric: 0.0000
Epoch Number: 2, Batch Number: 13200, Training Loss: 13.2124
Time so far is 67m 11s
Epoch Number: 2, Batch Number: 13400, Validation Metric: 0.0000
Epoch Number: 2, Batch Number: 13400, Training Loss: 12.1494
Time so far is 67m 17s
Epoch Number: 2, Batch Number: 13600, Validation Metric: 0.0000
Epoch Number: 2, Batch Number: 13600, Training Loss: 12.9662
Time so far is 67m 25s
Epoch Number: 2, Batch Number: 13800, Validation Metric: 0.0000
Epoch Number: 2, Batch Number: 13800, Training Lo

Epoch Number: 2, Batch Number: 23800, Validation Metric: 0.0000
Epoch Number: 2, Batch Number: 23800, Training Loss: 12.2813
Time so far is 73m 24s
Epoch Number: 2, Batch Number: 24000, Validation Metric: 0.0000
Epoch Number: 2, Batch Number: 24000, Training Loss: 12.7958
Time so far is 73m 31s
Epoch Number: 2, Batch Number: 24200, Validation Metric: 0.0000
Epoch Number: 2, Batch Number: 24200, Training Loss: 12.0765
Time so far is 73m 38s
Epoch Number: 2, Batch Number: 24400, Validation Metric: 0.0000
Epoch Number: 2, Batch Number: 24400, Training Loss: 12.4901
Time so far is 73m 45s
Epoch Number: 2, Batch Number: 24600, Validation Metric: 0.0000
Epoch Number: 2, Batch Number: 24600, Training Loss: 12.6658
Time so far is 73m 52s
Epoch Number: 2, Batch Number: 24800, Validation Metric: 0.0000
Epoch Number: 2, Batch Number: 24800, Training Loss: 13.2537
Time so far is 73m 59s
Epoch Number: 2, Batch Number: 25000, Validation Metric: 0.0000
Epoch Number: 2, Batch Number: 25000, Training L

Epoch Number: 2, Batch Number: 35000, Validation Metric: 0.0000
Epoch Number: 2, Batch Number: 35000, Training Loss: 12.4010
Time so far is 79m 60s
Epoch Number: 2, Batch Number: 35200, Validation Metric: 0.0000
Epoch Number: 2, Batch Number: 35200, Training Loss: 13.0661
Time so far is 80m 7s
Epoch Number: 2, Batch Number: 35400, Validation Metric: 0.0000
Epoch Number: 2, Batch Number: 35400, Training Loss: 12.3877
Time so far is 80m 14s
Epoch Number: 2, Batch Number: 35600, Validation Metric: 0.0000
Epoch Number: 2, Batch Number: 35600, Training Loss: 13.0675
Time so far is 80m 21s
Epoch Number: 2, Batch Number: 35800, Validation Metric: 0.0000
Epoch Number: 2, Batch Number: 35800, Training Loss: 11.7741
Time so far is 80m 28s
Epoch Number: 2, Batch Number: 36000, Validation Metric: 0.0000
Epoch Number: 2, Batch Number: 36000, Training Loss: 13.4469
Time so far is 80m 35s
Epoch Number: 2, Batch Number: 36200, Validation Metric: 0.0000
Epoch Number: 2, Batch Number: 36200, Training Lo

Epoch Number: 2, Batch Number: 46200, Validation Metric: 0.0000
Epoch Number: 2, Batch Number: 46200, Training Loss: 13.0628
Time so far is 86m 35s
Epoch Number: 2, Batch Number: 46400, Validation Metric: 0.0000
Epoch Number: 2, Batch Number: 46400, Training Loss: 12.9590
Time so far is 86m 42s
Epoch Number: 2, Batch Number: 46600, Validation Metric: 0.0000
Epoch Number: 2, Batch Number: 46600, Training Loss: 12.4600
Time so far is 86m 49s
Epoch Number: 2, Batch Number: 46800, Validation Metric: 0.0000
Epoch Number: 2, Batch Number: 46800, Training Loss: 13.0064
Time so far is 86m 56s
Epoch Number: 2, Batch Number: 47000, Validation Metric: 0.0000
Epoch Number: 2, Batch Number: 47000, Training Loss: 12.4445
Time so far is 87m 3s
Epoch Number: 2, Batch Number: 47200, Validation Metric: 0.0000
Epoch Number: 2, Batch Number: 47200, Training Loss: 11.8992
Time so far is 87m 10s
Epoch Number: 2, Batch Number: 47400, Validation Metric: 0.0000
Epoch Number: 2, Batch Number: 47400, Training Lo

Epoch Number: 3, Batch Number: 7400, Validation Metric: 0.0000
Epoch Number: 3, Batch Number: 7400, Training Loss: 12.0257
Time so far is 93m 10s
Epoch Number: 3, Batch Number: 7600, Validation Metric: 0.0000
Epoch Number: 3, Batch Number: 7600, Training Loss: 12.5623
Time so far is 93m 17s
Epoch Number: 3, Batch Number: 7800, Validation Metric: 0.0000
Epoch Number: 3, Batch Number: 7800, Training Loss: 13.3576
Time so far is 93m 24s
Epoch Number: 3, Batch Number: 8000, Validation Metric: 0.0000
Epoch Number: 3, Batch Number: 8000, Training Loss: 12.9750
Time so far is 93m 31s
Epoch Number: 3, Batch Number: 8200, Validation Metric: 0.0000
Epoch Number: 3, Batch Number: 8200, Training Loss: 12.7404
Time so far is 93m 38s
Epoch Number: 3, Batch Number: 8400, Validation Metric: 0.0000
Epoch Number: 3, Batch Number: 8400, Training Loss: 12.5478
Time so far is 93m 45s
Epoch Number: 3, Batch Number: 8600, Validation Metric: 0.0000
Epoch Number: 3, Batch Number: 8600, Training Loss: 12.5287
T

Epoch Number: 3, Batch Number: 18600, Validation Metric: 0.0000
Epoch Number: 3, Batch Number: 18600, Training Loss: 12.7700
Time so far is 99m 46s
Epoch Number: 3, Batch Number: 18800, Validation Metric: 0.0000
Epoch Number: 3, Batch Number: 18800, Training Loss: 12.6009
Time so far is 99m 53s
Epoch Number: 3, Batch Number: 19000, Validation Metric: 0.0000
Epoch Number: 3, Batch Number: 19000, Training Loss: 12.6943
Time so far is 99m 60s
Epoch Number: 3, Batch Number: 19200, Validation Metric: 0.0000
Epoch Number: 3, Batch Number: 19200, Training Loss: 12.5354
Time so far is 100m 7s
Epoch Number: 3, Batch Number: 19400, Validation Metric: 0.0000
Epoch Number: 3, Batch Number: 19400, Training Loss: 13.0769
Time so far is 100m 14s
Epoch Number: 3, Batch Number: 19600, Validation Metric: 0.0000
Epoch Number: 3, Batch Number: 19600, Training Loss: 12.2932
Time so far is 100m 21s
Epoch Number: 3, Batch Number: 19800, Validation Metric: 0.0000
Epoch Number: 3, Batch Number: 19800, Training

Epoch Number: 3, Batch Number: 29800, Validation Metric: 0.0000
Epoch Number: 3, Batch Number: 29800, Training Loss: 13.5462
Time so far is 106m 21s
Epoch Number: 3, Batch Number: 30000, Validation Metric: 0.0000
Epoch Number: 3, Batch Number: 30000, Training Loss: 13.4020
Time so far is 106m 28s
Epoch Number: 3, Batch Number: 30200, Validation Metric: 0.0000
Epoch Number: 3, Batch Number: 30200, Training Loss: 12.6071
Time so far is 106m 35s
Epoch Number: 3, Batch Number: 30400, Validation Metric: 0.0000
Epoch Number: 3, Batch Number: 30400, Training Loss: 12.5648
Time so far is 106m 42s
Epoch Number: 3, Batch Number: 30600, Validation Metric: 0.0000
Epoch Number: 3, Batch Number: 30600, Training Loss: 12.5031
Time so far is 106m 49s
Epoch Number: 3, Batch Number: 30800, Validation Metric: 0.0000
Epoch Number: 3, Batch Number: 30800, Training Loss: 13.2618
Time so far is 106m 56s
Epoch Number: 3, Batch Number: 31000, Validation Metric: 0.0000
Epoch Number: 3, Batch Number: 31000, Trai

Epoch Number: 3, Batch Number: 41000, Validation Metric: 0.0000
Epoch Number: 3, Batch Number: 41000, Training Loss: 12.0845
Time so far is 112m 57s
Epoch Number: 3, Batch Number: 41200, Validation Metric: 0.0000
Epoch Number: 3, Batch Number: 41200, Training Loss: 13.0850
Time so far is 113m 4s
Epoch Number: 3, Batch Number: 41400, Validation Metric: 0.0000
Epoch Number: 3, Batch Number: 41400, Training Loss: 12.6174
Time so far is 113m 11s
Epoch Number: 3, Batch Number: 41600, Validation Metric: 0.0000
Epoch Number: 3, Batch Number: 41600, Training Loss: 12.5387
Time so far is 113m 18s
Epoch Number: 3, Batch Number: 41800, Validation Metric: 0.0000
Epoch Number: 3, Batch Number: 41800, Training Loss: 12.5462
Time so far is 113m 25s
Epoch Number: 3, Batch Number: 42000, Validation Metric: 0.0000
Epoch Number: 3, Batch Number: 42000, Training Loss: 12.3671
Time so far is 113m 32s
Epoch Number: 3, Batch Number: 42200, Validation Metric: 0.0000
Epoch Number: 3, Batch Number: 42200, Train

Epoch Number: 4, Batch Number: 2000, Validation Metric: 0.0000
Epoch Number: 4, Batch Number: 2000, Training Loss: 13.2538
Time so far is 119m 23s
Epoch Number: 4, Batch Number: 2200, Validation Metric: 0.0000
Epoch Number: 4, Batch Number: 2200, Training Loss: 12.1527
Time so far is 119m 30s
Epoch Number: 4, Batch Number: 2400, Validation Metric: 0.0000
Epoch Number: 4, Batch Number: 2400, Training Loss: 12.2991
Time so far is 119m 38s
Epoch Number: 4, Batch Number: 2600, Validation Metric: 0.0000
Epoch Number: 4, Batch Number: 2600, Training Loss: 13.4814
Time so far is 119m 45s
Epoch Number: 4, Batch Number: 2800, Validation Metric: 0.0000
Epoch Number: 4, Batch Number: 2800, Training Loss: 12.7889
Time so far is 119m 52s
Epoch Number: 4, Batch Number: 3000, Validation Metric: 0.0000
Epoch Number: 4, Batch Number: 3000, Training Loss: 11.8746
Time so far is 119m 59s
Epoch Number: 4, Batch Number: 3200, Validation Metric: 0.0000
Epoch Number: 4, Batch Number: 3200, Training Loss: 13.

Epoch Number: 4, Batch Number: 13200, Validation Metric: 0.0000
Epoch Number: 4, Batch Number: 13200, Training Loss: 13.2048
Time so far is 126m 1s
Epoch Number: 4, Batch Number: 13400, Validation Metric: 0.0000
Epoch Number: 4, Batch Number: 13400, Training Loss: 12.1418
Time so far is 126m 8s
Epoch Number: 4, Batch Number: 13600, Validation Metric: 0.0000
Epoch Number: 4, Batch Number: 13600, Training Loss: 12.9586
Time so far is 126m 15s
Epoch Number: 4, Batch Number: 13800, Validation Metric: 0.0000
Epoch Number: 4, Batch Number: 13800, Training Loss: 13.0489
Time so far is 126m 22s
Epoch Number: 4, Batch Number: 14000, Validation Metric: 0.0000
Epoch Number: 4, Batch Number: 14000, Training Loss: 12.6580
Time so far is 126m 29s
Epoch Number: 4, Batch Number: 14200, Validation Metric: 0.0000
Epoch Number: 4, Batch Number: 14200, Training Loss: 13.0901
Time so far is 126m 36s
Epoch Number: 4, Batch Number: 14400, Validation Metric: 0.0000
Epoch Number: 4, Batch Number: 14400, Traini

Epoch Number: 4, Batch Number: 24400, Validation Metric: 0.0000
Epoch Number: 4, Batch Number: 24400, Training Loss: 12.4848
Time so far is 132m 35s
Epoch Number: 4, Batch Number: 24600, Validation Metric: 0.0000
Epoch Number: 4, Batch Number: 24600, Training Loss: 12.6601
Time so far is 132m 42s
Epoch Number: 4, Batch Number: 24800, Validation Metric: 0.0000
Epoch Number: 4, Batch Number: 24800, Training Loss: 13.2483
Time so far is 132m 49s
Epoch Number: 4, Batch Number: 25000, Validation Metric: 0.0000
Epoch Number: 4, Batch Number: 25000, Training Loss: 12.3058
Time so far is 132m 56s
Epoch Number: 4, Batch Number: 25200, Validation Metric: 0.0000
Epoch Number: 4, Batch Number: 25200, Training Loss: 12.9433
Time so far is 133m 3s
Epoch Number: 4, Batch Number: 25400, Validation Metric: 0.0000
Epoch Number: 4, Batch Number: 25400, Training Loss: 12.5116
Time so far is 133m 10s
Epoch Number: 4, Batch Number: 25600, Validation Metric: 0.0000
Epoch Number: 4, Batch Number: 25600, Train

Epoch Number: 4, Batch Number: 35600, Validation Metric: 0.0000
Epoch Number: 4, Batch Number: 35600, Training Loss: 13.0639
Time so far is 139m 11s
Epoch Number: 4, Batch Number: 35800, Validation Metric: 0.0000
Epoch Number: 4, Batch Number: 35800, Training Loss: 11.7708
Time so far is 139m 18s
Epoch Number: 4, Batch Number: 36000, Validation Metric: 0.0000
Epoch Number: 4, Batch Number: 36000, Training Loss: 13.4433
Time so far is 139m 25s
Epoch Number: 4, Batch Number: 36200, Validation Metric: 0.0000
Epoch Number: 4, Batch Number: 36200, Training Loss: 12.5145
Time so far is 139m 32s
Epoch Number: 4, Batch Number: 36400, Validation Metric: 0.0000
Epoch Number: 4, Batch Number: 36400, Training Loss: 12.0496
Time so far is 139m 39s
Epoch Number: 4, Batch Number: 36600, Validation Metric: 0.0000
Epoch Number: 4, Batch Number: 36600, Training Loss: 12.7122
Time so far is 139m 46s
Epoch Number: 4, Batch Number: 36800, Validation Metric: 0.0000
Epoch Number: 4, Batch Number: 36800, Trai

Epoch Number: 4, Batch Number: 46800, Validation Metric: 0.0000
Epoch Number: 4, Batch Number: 46800, Training Loss: 13.0049
Time so far is 145m 47s
Epoch Number: 4, Batch Number: 47000, Validation Metric: 0.0000
Epoch Number: 4, Batch Number: 47000, Training Loss: 12.4430
Time so far is 145m 54s
Epoch Number: 4, Batch Number: 47200, Validation Metric: 0.0000
Epoch Number: 4, Batch Number: 47200, Training Loss: 11.8978
Time so far is 146m 0s
Epoch Number: 4, Batch Number: 47400, Validation Metric: 0.0000
Epoch Number: 4, Batch Number: 47400, Training Loss: 11.9333
Time so far is 146m 7s
Epoch Number: 4, Batch Number: 47600, Validation Metric: 0.0000
Epoch Number: 4, Batch Number: 47600, Training Loss: 11.9656
Time so far is 146m 14s
Epoch Number: 4, Batch Number: 47800, Validation Metric: 0.0000
Epoch Number: 4, Batch Number: 47800, Training Loss: 12.7607
Time so far is 146m 21s
Epoch Number: 4, Batch Number: 48000, Validation Metric: 0.0000
Epoch Number: 4, Batch Number: 48000, Traini