## TransNeo/AlphaNeo
- AlphaNeo using pre-trained TransE embeddings (optional)
- Stage: Cambrian
- Version: Spriggina
- Update Logs
    - 0713: with DeepPath style rollback at training
    - **0716: new learning paradigm, see memo for details**

#### Related Commands
- tensorboard --logdir runs
- nohup jupyter lab > jupyter.log &

In [1]:
import logging 
logging.basicConfig(level=logging.CRITICAL)

In [2]:
import os
import itertools
import copy
import random
import pickle
import numpy as np

import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
from torch.autograd import Variable

# from tensorboardX import SummaryWriter

use_cuda = torch.cuda.is_available()
print("use_cuda: {}".format(use_cuda))

use_cuda: True


In [3]:
import tyrell.spec as S
from tyrell.decider import Example

# Morpheus Version
from MorpheusInterpreter import *
from ProgramSpace import *

In [4]:
torch.__version__

'1.0.0'

In [5]:
class TransNeo(nn.Module):
    def __init__(self, p_config=None):
        super(TransNeo, self).__init__()
        self.config = p_config
        
        # predict a fixed number of shells
#         self.policy = nn.Linear(
#             self.config["embd_dim"],
#             self.config["fn"]["vocab_size"],
#         )
        
        # deeper
        self.policy0 = nn.Linear(
            self.config["embd_dim"],
            128,
        )
        self.policy1 = nn.Linear(
            128,
            self.config["fn"]["vocab_size"],
        )
        
    def forward(self, p_mapin, p_mapout):
        # p_mapin/p_mapout: (B, 15*3)
        v_delta = p_mapout-p_mapin
#         tmp_out = torch.log_softmax(
#             self.policy(v_delta),dim=1
#         )
        tmp_out = torch.log_softmax(
            self.policy1(
                F.relu(
                    self.policy0(
                        v_delta
                    )
                )
            ),dim=1
        )
        
        return tmp_out

In [6]:
# replace certain node id with certain value
def modify_shell(p_shell, p_id_from, p_id_to):
    d_prod = p_shell[0]
    d_rhs = p_shell[1]
    ld_rhs = [p_id_to if d_rhs[i]==p_id_from else d_rhs[i]
             for i in range(len(d_rhs))]
    return (d_prod, tuple(ld_rhs))


'''
meta-train the agent in a supervised way
epoch -> episode, one attempt with hint
NOTICE: only valid for size 1 training
'''
def MetaTrain(p_config, p_spec, p_interpreter, p_generator, p_model, p_optim, p_writer):
    print("# Start Meta-Train...")
    for d_epoch in range(p_config["meta_train"]["n_epoch"]):
        p_model.train()
        
        epoch_loss_list = []
        batch_loss_list = []
        
        for d_ind in range(p_config["meta_train"]["n_total"]):
            print("\r# epoch:{}, index:{}/{}, avg.loss:{:.2f}".format(
                d_epoch, d_ind, p_config["meta_train"]["n_total"],
                sum(epoch_loss_list)/len(epoch_loss_list)
                if len(epoch_loss_list)>0 else 0,
            ),end="")
            
            # initialize a solution
            ps_solution = p_generator.get_new_chain_program(
                2,
            )
            
            # initialize a new ProgramSpace
            ps_current = ProgramSpace(
                p_spec, p_interpreter, ps_solution.inputs, ps_solution.output,
            )
            # then initialize a shell template
            tmp_shell_list = ps_current.get_neighboring_shells()
            tmp_node_to_replace = ps_current.node_dict["ParamNode"][0] # for chain only
            # replace the Param Node id in shells with -1 to make them templates
            template_list = [
                modify_shell(tmp_shell_list[i],tmp_node_to_replace,-1)
                for i in range(len(tmp_shell_list))
            ]
            
            id_current = ps_current.get_strict_frontiers()[0]
            var_current = ps_current.node_list[id_current].ps_data # need the real var name in r env
            var_output = ps_current.output
            
            map_current = p_interpreter.camb_get_ventogyrus(var_current)
            map_output = p_interpreter.camb_get_ventogyrus(var_output)
            
            # make current shell list
            current_shell_list = [
                modify_shell(template_list[i],-1,id_current)
                for i in range(len(template_list))
            ]
            
            # wrap in B=1
            if use_cuda:
                td_current = Variable(torch.tensor([map_current],dtype=torch.float)).cuda()
                td_output = Variable(torch.tensor([map_output],dtype=torch.float)).cuda()
            else:
                td_current = Variable(torch.tensor([map_current],dtype=torch.float))
                td_output = Variable(torch.tensor([map_output],dtype=torch.float))

            # (B=1, fn_vocab_size)
            td_pred = p_model(td_current, td_output)
            # directly give the hint / supervised, ps.solution.shell[0] works for 1
            tmp_id = current_shell_list.index(ps_solution.shells[0])
            d_loss = (+1)*(-td_pred[0,tmp_id])
            batch_loss_list.append(
                d_loss, # supervised / always correct with +1 reward
            )
            epoch_loss_list.append(
                d_loss.cpu().data.numpy(),
            )
            
            if len(batch_loss_list)%p_config["meta_train"]["batch_size"]==0:
                # do back-prop.
                if len(batch_loss_list)>0:
                    batch_loss = sum(batch_loss_list)/len(batch_loss_list)
#                     print("BP:{:.2f}".format(batch_loss))
                    p_optim.zero_grad()
                    batch_loss.backward()
                    p_optim.step()
                # after back-prop., clean up
                batch_loss = None
                batch_loss_list = []
                
        print()
    

In [7]:
def compare_sketch(ps0,ps1):
    if len(ps0.node_list)!=len(ps1.node_list):
        return False
    for i in range(len(ps0.shells)):
        if ps0.node_list[-i-1].name != ps1.node_list[-i-1].name:
            return False
    return True

'''
meta-test an agent, directly run into testing / online adaptation
'''
def MetaTest(p_config, p_spec, p_interpreter, p_generator, p_model, p_bmrks, p_optim, p_writer):
    print("# Start Meta-Test...")
    
    n_solved = 0 # track the number of solved problem
    n_sketch_solved = 0
    n_attempt_list = [] # track the number of attempts in every episode
    n_sketch_atlist = [] # track the number of attempts when hitting the sketch
    
    for d_episode in range(p_config["meta_test"]["n_episode"]):
        proposal_tracker = set()
        
        # retrieve the given meta-trained model for testing
        test_model = copy.deepcopy(p_model)
        test_model.train()
        test_optim = torch.optim.Adam(list(test_model.parameters()))
        
        # ==== prepare the benchmark ====
        bmrk_prog, bmrk_str_example = p_bmrks[d_episode]
        bmrk_example = Example(
            input=[p_interpreter.load_data_into_var(p) for p in bmrk_str_example.input],
            output=p_interpreter.load_data_into_var(bmrk_str_example.output),
        )
        ps_solution = ProgramSpace(
            p_spec, p_interpreter, bmrk_example.input, bmrk_example.output,
        )
        ps_solution.init_by_prog(bmrk_prog)
        solution_prod_names = [
            ps_solution.prod_list[p[0]].name for p in ps_solution.shells
        ]
        solution_shells = ps_solution.shells
        
        # solution self-check
        if ps_solution.check_eq() is None:
            print("ERROR, SOLUTION NOT CONSISTENT!")
        
        
#         f = open("./outputs/Sarah3/Problem_{}.txt".format(d_episode), "w")
#         f.write("# Problem: {}\n\n".format(str(ps_solution.node_list[-1])))
#         f.write("# Input:\n{}\n".format(p_interpreter.renv(ps_solution.inputs[0])))
#         f.write("# Output:\n{}\n".format(p_interpreter.renv(ps_solution.output)))
#         f.flush()
        
        is_solved = False
        is_sketch_solved = False
        
        d_attempt = -1
        d_update = 0
        while d_attempt<p_config["meta_test"]["maxn_attempt"]:
            if d_update>=p_config["meta_test"]["maxn_update"]:
                # try too long, stop
                break
            d_attempt += 1
            d_update += 1
        # for d_attempt in range(p_config["meta_test"]["maxn_attempt"]):
            
            current_prod_names = []
            current_shells = []
            current_outputs = []
            
            selected_neurons = []
            stored_groups = [] # with lists of neurons of the same production name
            
            # in every new attempt, initialize a new Program Space
            ps_current = ProgramSpace(
                p_spec, p_interpreter, ps_solution.inputs, ps_solution.output,
            )
            # then initialize a shell template
            tmp_shell_list = ps_current.get_neighboring_shells()
            tmp_node_to_replace = ps_current.node_dict["ParamNode"][0] # for chain only
            # replace the Param Node id in shells with -1 to make them templates
            template_list = [
                modify_shell(tmp_shell_list[i],tmp_node_to_replace,-1)
                for i in range(len(tmp_shell_list))
            ]
                
            d_step = 0
            while d_step<p_config["meta_test"]["maxn_step"]:
                
                # print the training progress
                print("\r# AC/SK/EP:{}/{}/{}, AT:{}, SP:{}, att.ske.:{:.2f}, att.prog.:{:.2f},".format(
                    n_solved, n_sketch_solved, d_episode, d_attempt, d_step,
                    sum(n_sketch_atlist)/len(n_sketch_atlist) if len(n_sketch_atlist)>0 else -1,
                    sum(n_attempt_list)/len(n_attempt_list) if len(n_attempt_list)>0 else -1,
                ),end="")
                
                # ### assume chain execution, so only 1 possible returns
                # ### at d_step=0, this should be input[0]
                id_current = ps_current.get_strict_frontiers()[0]
                var_current = ps_current.node_list[id_current].ps_data # need the real var name in r env
                var_output = ps_current.output
                
                map_current = p_interpreter.camb_get_ventogyrus(var_current)
                map_output = p_interpreter.camb_get_ventogyrus(var_output)
                
                # make current shell list
                current_shell_list = [
                    modify_shell(template_list[i],-1,id_current)
                    for i in range(len(template_list))
                ]
                
                # wrap in B=1
                if use_cuda:
                    td_current = Variable(torch.tensor([map_current],dtype=torch.float)).cuda()
                    td_output = Variable(torch.tensor([map_output],dtype=torch.float)).cuda()
                else:
                    td_current = Variable(torch.tensor([map_current],dtype=torch.float))
                    td_output = Variable(torch.tensor([map_output],dtype=torch.float))
                    
                # (B=1, fn_vocab_size)
                td_pred = test_model(td_current, td_output)
                
                # no hints
                if random.random()<=p_config["meta_test"]["exploration_rate"]:
                    # exploration
                    tmp_id = random.choice(range(len(current_shell_list)))
                else:
                    # exploitation
                    # tmp_id = torch.multinomial(td_pred.exp().flatten(), 1).cpu().flatten().numpy()[0]
                    tmp_id = torch.argmax(td_pred.flatten()).cpu().tolist()
                    
                    
                # == Yorgia ==
                # find out all other shells that share the same product name
                tmp_component_name = ps_current.prod_list[current_shell_list[tmp_id][0]].name
                tmp_group = []
                for i in range(len(current_shell_list)):
                    if ps_current.prod_list[current_shell_list[i][0]].name==tmp_component_name:
                        tmp_group.append(td_pred[0,i])
                stored_groups.append(tmp_group)
                    
                # == Yorgia ==
                # add prod names first
                current_prod_names.append(tmp_component_name)
                # then add shells
                current_shells.append(current_shell_list[tmp_id])
                
                # update ps_current
                update_status = ps_current.add_neighboring_shell(
                    current_shell_list[tmp_id]
                )
                
                if update_status:
                    # record selected neuron
                    selected_neurons.append((True, td_pred[0,tmp_id]))
                    current_outputs.append(ps_current.node_list[-1].ps_data)
                    d_step += 1
                    
                    # succeed
                    if ps_current.check_eq() is not None:
                        # and solved!
                        is_solved = True
                        n_solved += 1
                        break
                else:
                    selected_neurons.append((False, td_pred[0,tmp_id]))
                    break
            
            # <END_FOR_STEP>
#             f.write("# ({}) Proposed {}/{}: {}\n".format(
#                 "accept" if is_solved else "reject",
#                 d_attempt, p_config["meta_test"]["maxn_attempt"],
#                 str(ps_current.node_list[-1])
#             ))
#             f.flush()
            
            if not is_sketch_solved:
                if compare_sketch(ps_current, ps_solution):
                    n_sketch_atlist.append(d_attempt)
                    is_sketch_solved = True
                    n_sketch_solved += 1
                    break
                    
            if is_solved:
                n_attempt_list.append(d_attempt)
                break
                
            d_proposal = str(ps_current.node_list[-1])
            if d_proposal in proposal_tracker:
                d_attempt -= 1
            else:
                proposal_tracker.add(d_proposal)
            
            # print("# Current Sketch: {}".format(current_prod_names))
            # == Yorgia ==
            # compute the loss according to the loss computation rules
            # first component, then function call
            batch_loss_list = []
            for i in range(len(current_prod_names)):
                batch_loss_list.append(
                    (-1.0)*(-selected_neurons[i][1])
                )
#                 if solution_prod_names[i]==current_prod_names[i]:
#                     # component match, promote the whole group
#                     for j in range(len(stored_groups[i])):
#                         batch_loss_list.append(
#                             (+1.0)*(-stored_groups[i][j])
#                         )
#                 else:
#                     for j in range(len(stored_groups[i])):
#                         batch_loss_list.append(
#                             (-1.0)*(-stored_groups[i][j])
#                         )
#                 # then compare function call
#                 if solution_shells[i]==current_shells[i]:
#                     batch_loss_list.append(
#                         (+1.0)*(-selected_neurons[i][1])
#                     )
#                 else:
#                     batch_loss_list.append(
#                         (-1.0)*(-selected_neurons[i][1])
#                     )
            
            batch_loss = sum(batch_loss_list)
            test_optim.zero_grad()
            batch_loss.backward()
            test_optim.step()
            
                
        # <END_FOR_ATTEMPT>     
        
#         f.close()
            
    # <END_FOR_EPISODE>
    

In [8]:
m_interpreter = MorpheusInterpreter()
m_spec = S.parse_file('./example/camb3.tyrell')
m_generator = MorpheusGenerator(
    spec=m_spec,
    interpreter=m_interpreter,
)

# dumb variable to help infer the shells
m_ps = ProgramSpace(
    m_spec, m_interpreter, [None], None,
)

m_config = {
    "fn":{
        "vocab_size": len(m_ps.get_neighboring_shells())
    },
    "embd_dim": 15*7,
    # ==== Meta-Learning Setting ==== #
    "meta_train":{
        "n_epoch": 5,
        "batch_size": 64, # how many indices
        "n_total": 6400,
    },
    "meta_test":{
        "n_episode": 250, # only pick the first 250
        "batch_size": 1, # how many attempts
        # "fixed_depth": 4,
        "maxn_attempt": 100,
        "maxn_update": 1000,
        "maxn_step": 3, # program size
        "exploration_rate": 0,
        "benchmarks": "./0804MDsize3.pkl",
    },
}

with open(m_config["meta_test"]["benchmarks"],"rb") as f:
    bmrks = pickle.load(f)

trans_neo = TransNeo(p_config=m_config)
if use_cuda:
    trans_neo = trans_neo.cuda()
optimizer = torch.optim.Adam(list(trans_neo.parameters()))

# writer = SummaryWriter("runs/0713CAMB_RL2_camb3")
writer = None

In [9]:
m_config

{'fn': {'vocab_size': 120},
 'embd_dim': 105,
 'meta_train': {'n_epoch': 5, 'batch_size': 64, 'n_total': 6400},
 'meta_test': {'n_episode': 250,
  'batch_size': 1,
  'maxn_attempt': 100,
  'maxn_update': 1000,
  'maxn_step': 3,
  'exploration_rate': 0,
  'benchmarks': './0804MDsize3.pkl'}}

In [10]:
MetaTrain(m_config, m_spec, m_interpreter, m_generator, trans_neo, optimizer, writer)

# Start Meta-Train...
# epoch:0, index:6399/6400, avg.loss:3.93
# epoch:1, index:6399/6400, avg.loss:2.68
# epoch:2, index:6399/6400, avg.loss:2.26
# epoch:3, index:6399/6400, avg.loss:2.04
# epoch:4, index:6399/6400, avg.loss:1.89


In [11]:
MetaTest(m_config, m_spec, m_interpreter, m_generator, trans_neo, bmrks, optimizer, writer)

# Start Meta-Test...
# AC/SK/EP:5/28/102, AT:26, SP:0, att.ske.:43.39, att.prog.:3.00,Traceback (most recent call last):
  File "/home/ju-ucsb/anaconda3/lib/python3.7/site-packages/IPython/core/interactiveshell.py", line 3267, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-11-7c3e2755fa43>", line 1, in <module>
    MetaTest(m_config, m_spec, m_interpreter, m_generator, trans_neo, bmrks, optimizer, writer)
  File "<ipython-input-7-a8af7ba683ac>", line 104, in MetaTest
    map_output = p_interpreter.camb_get_ventogyrus(var_output)
  File "/home/ju-ucsb/Trinity/MorpheusInterpreter.py", line 1192, in camb_get_ventogyrus
    np_obj, dr, dc = self.camb_get_np_obj(p_obj)
  File "/home/ju-ucsb/Trinity/MorpheusInterpreter.py", line 1046, in camb_get_np_obj
    dr = self.renv('nrow({})'.format(p_obj))[0]
  File "/home/ju-ucsb/anaconda3/lib/python3.7/site-packages/rpy2/robjects/__init__.py", line 389, in __call__
    res = self.eval(p)
  File "/home/ju-uc

KeyboardInterrupt: 

In [None]:
# 12/75/246

In [None]:
# 99/151, 128