In [1]:
cd ..

/Users/ericweiner/Documents/neural_nets_research


In [2]:
import torch.optim as optim
import torch.nn as nn

from tree_to_sequence.program_datasets import TreeNTMDataset
from tree_to_sequence.tree_to_sequence_attention_ntm import TreeToSequenceAttentionNTM
from tree_to_sequence.tree_encoder import TreeEncoder
from tree_to_sequence.sequence_encoder import SequenceEncoder
from tree_to_sequence.multilayer_lstm_cell import MultilayerLSTMCell
from tree_to_sequence.translating_trees import tree_to_list, decode_tokens, print_tree
from neural_nets_library import training
from neural_turing_machine.ntm import NTM
from ANC.util import printProgram

In [3]:
num_vars = 10
num_ints = 11

for_ops = {
    "Var": 0,
    "Const": 1,
    "Plus": 2,
    "Minus": 3,
    "EqualFor": 4,
    "LeFor": 5,
    "GeFor": 6,
    "Assign": 7,
    "If": 8,
    "Seq": 9,
    "For": 10
}

for_ops = {"<" + k.upper() + ">": v for k,v in for_ops.items()}

lambda_calculus_ops = {
                "<VARIABLE>": 0,
                "<ABSTRACTION>": 1,
                "<NUMBER>": 2,
                "<BOOLEAN>": 3,
                "<NIL>": 4,
                "<IF>": 5,
                "<CONS>": 6,
                "<MATCH>": 7,
                "<UNARYOPER>": 8,
                "<BINARYOPER>": 9,
                "<LET>": 10,
                "<LETREC>": 11,
                "<TRUE>": 12,
                "<FALSE>": 13,
                "<TINT>": 14,
                "<TBOOL>": 15,
                "<TINTLIST>": 16,
                "<TFUN>": 17,
                "<ARGUMENT>": 18,
                "<NEG>": 19,
                "<NOT>": 20,
                "<PLUS>": 21,
                "<MINUS>": 22,
                "<TIMES>": 23,
                "<DIVIDE>": 24,
                "<AND>": 25,
                "<OR>": 26,
                "<EQUAL>": 27,
                "<LESS>": 28,
                "<APPLICATION>": 29,
                "<HEAD>": 30,
                "<TAIL>": 31
            }

In [4]:
input_eos_token = False
input_as_seq = False
use_embedding = True
eos_bonus = 1 if input_eos_token and input_as_seq else 0
long_base_case = True
binarize = True

In [13]:
is_lambda_calculus = False
M, R = 5, 3
N = 11
thinking_time = 10
for_ntm_dset = TreeNTMDataset("ANC/Easy-arbitraryForListWithOutput.json", 
                              is_lambda_calculus, thinking_time, repeats=1, num_ints = M,
                              binarize=binarize, input_eos_token=input_eos_token, 
                              use_embedding=use_embedding, long_base_case=long_base_case, 
                              input_as_seq=input_as_seq, cuda=False)


In [6]:
for i in range(10):
    print(for_ntm_dset[i])

(<tree_to_sequence.translating_trees.Node object at 0x1072a1c18>, 
    0     0     0     0     0     0     0     0     0     0     0
    1     0     0     0     0     0     0     0     0     0     0
    2     0     0     0     0     0     0     0     0     0     0
    3     0     0     0     0     0     0     0     0     0     0
    4     0     0     0     0     0     0     0     0     0     0
    5     0     0     0     0     0     0     0     0     0     0
    6     0     0     0     0     0     0     0     0     0     0
    7     0     0     0     0     0     0     0     0     0     0
    8     0     0     0     0     0     0     0     0     0     0
    9     0     0     0     0     0     0     0     0     0     0
   10     0     0     0     0     0     0     0     0     0     0
    0     0     0     0     0     0     0     0     0     0     0
    1     0     0     0     0     0     0     0     0     0     0
    2     0     0     0     0     0     0     0     0     0     0
    3    

In [7]:
def reset_all_parameters_uniform(model, stdev):
    for param in model.parameters():
        nn.init.uniform(param, -stdev, stdev)

In [8]:
embedding_size = 30
hidden_size = 40
num_layers = 1
alignment_size = 50
align_type = 1
encoder_input_size = num_vars + num_ints + len(for_ops.keys()) + eos_bonus

if input_as_seq:
    encoder = SequenceEncoder(encoder_input_size, hidden_size, num_layers, attention=True, use_embedding=use_embedding)
else:
    encoder = TreeEncoder(encoder_input_size, hidden_size, num_layers, [1, 2], attention=True, use_embedding=use_embedding)

    
address_count=128
address_size=30
#num_ints = 20
ntm = NTM(1, hidden_size, 1, address_count, address_size, [0,1])    

# Each step takes input + prevguess + decoder w/ attention, input 0 on non first step
decoder = MultilayerLSTMCell(2 + hidden_size, hidden_size, num_layers)
program_model = TreeToSequenceAttentionNTM(encoder, decoder, ntm, hidden_size, embedding_size, 
                                           alignment_size=alignment_size, align_type=align_type)
    
reset_all_parameters_uniform(program_model, 0.1)
encoder.initialize_forget_bias(3)
decoder.initialize_forget_bias(3)

In [9]:
#program_model = program_model.cuda()

In [10]:
optimizer = optim.Adam(program_model.parameters(), lr=0.005)
lr_scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, verbose=True, patience=100, factor=0.8)

In [11]:
import torch
#program_model.load_state_dict(torch.load('program_model.pth'))

In [None]:
training.train_model_tree_to_ntm(program_model, for_ntm_dset, 
                                 optimizer, lr_scheduler=lr_scheduler, num_epochs=3, 
                                 batch_size=100, plateau_lr=True, print_every=100,
                                 use_cuda=False)

Epoch 0/2
----------
Epoch Number: 0, Batch Number: 100, Training Loss: 494.9503
Time so far is 0m 34s
Example diff:
Example Outs:  [1.0569264888763428, 1.0581172704696655, 1.0590182542800903, 1.0596485137939453, 1.0600374937057495, 1.0602188110351562, 1.0602256059646606, 1.060088038444519, 1.0598313808441162, 1.059476613998413, 1.0590399503707886]
Expected Outs:  Variable containing:
 12
 12
 12
 12
 12
 12
 12
 12
 12
 12
 12
[torch.FloatTensor of size 11]

Epoch Number: 0, Batch Number: 200, Training Loss: 457.5253
Time so far is 1m 9s
Example diff:
Example Outs:  [1.5656388998031616, 1.5658396482467651, 1.5660182237625122, 1.5661760568618774, 1.566315770149231, 1.5664395093917847, 1.5665497779846191, 1.5666483640670776, 1.5667363405227661, 1.566814661026001, 1.5668834447860718]
Expected Outs:  Variable containing:
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
[torch.FloatTensor of size 11]

Epoch Number: 0, Batch Number: 300, Training Loss: 421.5866
Time so far is 1m 43s
Example diff

Epoch Number: 0, Batch Number: 2000, Training Loss: 251.6568
Time so far is 16m 22s
Example diff:
Example Outs:  [5.404211521148682, 5.404211521148682, 5.404211521148682, 5.404211521148682, 5.404211521148682, 5.404211521148682, 5.404211521148682, 5.404211521148682, 5.404211521148682, 5.404211521148682, 5.404211521148682]
Expected Outs:  Variable containing:
  0
  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
[torch.FloatTensor of size 11]

Epoch Number: 0, Batch Number: 2100, Training Loss: 247.8828
Time so far is 16m 56s
Example diff:
Example Outs:  [5.429813385009766, 5.429813385009766, 5.429813385009766, 5.429813385009766, 5.429813385009766, 5.429813385009766, 5.429813385009766, 5.429813385009766, 5.429813385009766, 5.429813385009766, 5.429813385009766]
Expected Outs:  Variable containing:
  0
  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
[torch.FloatTensor of size 11]

Epoch Number: 0, Batch Number: 2200, Training Loss: 242.8664
Time so far is 17m 30s
Example diff:
Example Outs:  [5.429640293121

Epoch Number: 0, Batch Number: 3900, Training Loss: 315.0857
Time so far is 866m 49s
Example diff:
Example Outs:  [4.754257678985596, 4.754258632659912, 4.754258632659912, 4.75425910949707, 4.7542595863342285, 4.7542595863342285, 4.754260540008545, 4.754260540008545, 4.754260540008545, 4.754260540008545, 4.754261016845703]
Expected Outs:  Variable containing:
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
[torch.FloatTensor of size 11]

Epoch Number: 0, Batch Number: 4000, Training Loss: 217.6580
Time so far is 1039m 48s
Example diff:
Example Outs:  [4.762508869171143, 4.762509822845459, 4.762509822845459, 4.762510776519775, 4.762510776519775, 4.762511253356934, 4.762511730194092, 4.762511730194092, 4.762511730194092, 4.762511730194092, 4.76251220703125]
Expected Outs:  Variable containing:
  0
  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
[torch.FloatTensor of size 11]

Epoch Number: 0, Batch Number: 4100, Training Loss: 262.8293
Time so far is 1042m 35s
Example diff:
Example Outs:  [4.7474675

In [None]:
# i = 0

# for prog, target in for_anc_dset:
# #     controller = program_model.forward_prediction(prog)
# #     util.printProgram(controller, 0.5)
    
# #     loss.backward()
    
# #     for name, param in program_model.named_parameters():
# #         print(name)
# #         print(param.grad)
        
# #     optimizer.step()
# #     optimizer.zero_grad()

#     if i == 3:
#         break
        
#     i += 1

In [None]:
for prog, target in for_anc_dset[0:10]:
    print(decode_tokens(tree_to_list(prog), 10, M, for_ops))
    controller = program_model.forward_prediction(prog)
    
    input_memory = target[0][0]
    correct_memory = target[1][0]
    prediction_memory, _ = controller.forward_prediction([input_memory])
    
    # printProgram(controller, 0.5)
    print(correct_memory[0])
    print(prediction_memory[0])
    
    mem_diff = correct_memory[0] - prediction_memory[0].data
    print(torch.sum(mem_diff * mem_diff))