In [1]:
cd ..

/Users/ericweiner/Documents/neural_nets_research


In [2]:
import torch.optim as optim
import torch.nn as nn

from tree_to_sequence.program_datasets import TreeNTMDataset
from tree_to_sequence.tree_to_sequence_attention_ntm import TreeToSequenceAttentionNTM
from tree_to_sequence.tree_encoder import TreeEncoder
from tree_to_sequence.sequence_encoder import SequenceEncoder
from tree_to_sequence.multilayer_lstm_cell import MultilayerLSTMCell
from tree_to_sequence.translating_trees import tree_to_list, decode_tokens, print_tree
from neural_nets_library import training
from neural_turing_machine.ntm import NTM
from ANC.util import printProgram

In [3]:
num_vars = 10
num_ints = 11

for_ops = {
    "Var": 0,
    "Const": 1,
    "Plus": 2,
    "Minus": 3,
    "EqualFor": 4,
    "LeFor": 5,
    "GeFor": 6,
    "Assign": 7,
    "If": 8,
    "Seq": 9,
    "For": 10
}

for_ops = {"<" + k.upper() + ">": v for k,v in for_ops.items()}

lambda_calculus_ops = {
                "<VARIABLE>": 0,
                "<ABSTRACTION>": 1,
                "<NUMBER>": 2,
                "<BOOLEAN>": 3,
                "<NIL>": 4,
                "<IF>": 5,
                "<CONS>": 6,
                "<MATCH>": 7,
                "<UNARYOPER>": 8,
                "<BINARYOPER>": 9,
                "<LET>": 10,
                "<LETREC>": 11,
                "<TRUE>": 12,
                "<FALSE>": 13,
                "<TINT>": 14,
                "<TBOOL>": 15,
                "<TINTLIST>": 16,
                "<TFUN>": 17,
                "<ARGUMENT>": 18,
                "<NEG>": 19,
                "<NOT>": 20,
                "<PLUS>": 21,
                "<MINUS>": 22,
                "<TIMES>": 23,
                "<DIVIDE>": 24,
                "<AND>": 25,
                "<OR>": 26,
                "<EQUAL>": 27,
                "<LESS>": 28,
                "<APPLICATION>": 29,
                "<HEAD>": 30,
                "<TAIL>": 31
            }

In [4]:
input_eos_token = False
input_as_seq = False
use_embedding = True
eos_bonus = 1 if input_eos_token and input_as_seq else 0
long_base_case = True
binarize = True

In [5]:
is_lambda_calculus = False
M, R = 5, 3
N = 11
thinking_time = 10
for_ntm_dset = TreeNTMDataset("ANC/Easy-arbitraryForListWithOutput.json", 
                              is_lambda_calculus, thinking_time, num_ints = M,
                              binarize=binarize, input_eos_token=input_eos_token, 
                              use_embedding=use_embedding, long_base_case=long_base_case, 
                              input_as_seq=input_as_seq, cuda=False)


In [6]:
for i in range(10):
    print(for_ntm_dset[i])

(<tree_to_sequence.translating_trees.Node object at 0x107178a20>, 
    0     0     0     0     0     0     0     0     0     0     0
    1     0     0     0     0     0     0     0     0     0     0
    2     0     0     0     0     0     0     0     0     0     0
    3     0     0     0     0     0     0     0     0     0     0
    4     0     0     0     0     0     0     0     0     0     0
    5     0     0     0     0     0     0     0     0     0     0
    6     0     0     0     0     0     0     0     0     0     0
    7     0     0     0     0     0     0     0     0     0     0
    8     0     0     0     0     0     0     0     0     0     0
    9     0     0     0     0     0     0     0     0     0     0
   10     0     0     0     0     0     0     0     0     0     0
[torch.FloatTensor of size 11x11]
, 
 7
 7
 7
 7
 7
 7
 7
 7
 7
 7
 7
[torch.FloatTensor of size 11]
)
(<tree_to_sequence.translating_trees.Node object at 0x10fadceb8>, 
    0     0     0     0     0     0  

In [7]:
def reset_all_parameters_uniform(model, stdev):
    for param in model.parameters():
        nn.init.uniform(param, -stdev, stdev)

In [8]:
embedding_size = 30
hidden_size = 30
num_layers = 1
alignment_size = 50
align_type = 1
encoder_input_size = num_vars + num_ints + len(for_ops.keys()) + eos_bonus

if input_as_seq:
    encoder = SequenceEncoder(encoder_input_size, hidden_size, num_layers, attention=True, use_embedding=use_embedding)
else:
    encoder = TreeEncoder(encoder_input_size, hidden_size, num_layers, [1, 2], attention=True, use_embedding=use_embedding)

    
address_count=128
address_size=30
ntm = NTM(1, hidden_size, 1, address_count, address_size, [0,1])    

# Each step takes input + prevguess + decoder w/ attention, input 0 on non first step
decoder = MultilayerLSTMCell(2 + hidden_size, hidden_size, num_layers)
program_model = TreeToSequenceAttentionNTM(encoder, decoder, ntm, hidden_size, embedding_size, 
                                           alignment_size=alignment_size, align_type=align_type)
    
reset_all_parameters_uniform(program_model, 0.1)
encoder.initialize_forget_bias(3)
decoder.initialize_forget_bias(3)

In [9]:
#program_model = program_model.cuda()

In [10]:
optimizer = optim.Adam(program_model.parameters(), lr=0.005)
lr_scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, verbose=True, patience=100, factor=0.8)

In [None]:
import torch
#program_model.load_state_dict(torch.load('program_model.pth'))

In [None]:
training.train_model_tree_to_ntm(program_model, for_ntm_dset, 
                                 optimizer, lr_scheduler=lr_scheduler, num_epochs=3, 
                                 batch_size=100, plateau_lr=True, print_every=200,
                                 use_cuda=False)

Epoch 0/2
----------
Epoch Number: 0, Batch Number: 200, Training Loss: 567.1163
Time so far is 1m 6s
Example diff:
Example Outs:  [0.2626033127307892, 0.26559606194496155, 0.26838207244873047, 0.2709384262561798, 0.2732480764389038, 0.2753008306026459, 0.277093768119812, 0.27863138914108276, 0.2799246311187744, 0.28098976612091064, 0.2818467319011688]
Expected Outs:  Variable containing:
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
[torch.FloatTensor of size 11]

Epoch Number: 0, Batch Number: 400, Training Loss: 475.5195
Time so far is 2m 11s
Example diff:
Example Outs:  [0.73655104637146, 0.7374688386917114, 0.7382774949073792, 0.7389780282974243, 0.739575207233429, 0.7400757670402527, 0.7404880523681641, 0.7408206462860107, 0.741081953048706, 0.7412800788879395, 0.7414225339889526]
Expected Outs:  Variable containing:
-4
-3
-2
-1
 0
 1
 2
 3
 4
 5
 6
[torch.FloatTensor of size 11]

Epoch Number: 0, Batch Number: 600, Training Loss: 494.8550
Time so far is 3m 19s
Example diff:
Exampl

In [None]:
# i = 0

# for prog, target in for_anc_dset:
# #     controller = program_model.forward_prediction(prog)
# #     util.printProgram(controller, 0.5)
    
# #     loss.backward()
    
# #     for name, param in program_model.named_parameters():
# #         print(name)
# #         print(param.grad)
        
# #     optimizer.step()
# #     optimizer.zero_grad()

#     if i == 3:
#         break
        
#     i += 1

In [None]:
for prog, target in for_anc_dset[0:10]:
    print(decode_tokens(tree_to_list(prog), 10, M, for_ops))
    controller = program_model.forward_prediction(prog)
    
    input_memory = target[0][0]
    correct_memory = target[1][0]
    prediction_memory, _ = controller.forward_prediction([input_memory])
    
    # printProgram(controller, 0.5)
    print(correct_memory[0])
    print(prediction_memory[0])
    
    mem_diff = correct_memory[0] - prediction_memory[0].data
    print(torch.sum(mem_diff * mem_diff))