## TransNeo/AlphaNeo
- AlphaNeo using pre-trained TransE embeddings (optional)
- Stage: Cambrian
- Version: Spriggina
- Update Logs
    - 0713: with DeepPath style rollback at training
    - **0716: new learning paradigm, see memo for details**

#### Related Commands
- tensorboard --logdir runs
- nohup jupyter lab > jupyter.log &

In [1]:
import logging 
logging.basicConfig(level=logging.CRITICAL)

In [2]:
import os
import itertools
import copy
import random
import pickle
import numpy as np

import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
from torch.autograd import Variable

# from tensorboardX import SummaryWriter

use_cuda = torch.cuda.is_available()
print("use_cuda: {}".format(use_cuda))

use_cuda: True


In [3]:
import tyrell.spec as S
from tyrell.decider import Example

# Morpheus Version
from MorpheusInterpreter import *
from ProgramSpace import *

In [4]:
torch.__version__

'1.0.0'

In [5]:
class TransNeo(nn.Module):
    def __init__(self, p_config=None):
        super(TransNeo, self).__init__()
        self.config = p_config
        
        # deeper
        self.policy0 = nn.Linear(
            self.config["embd_dim"]*2,
#             self.config["embd_dim"],
            128,
        )
        self.policy1 = nn.Linear(
            128,
            self.config["fn"]["vocab_size"],
        )
        
    def forward(self, p_mapin, p_mapout):
        # p_mapin/p_mapout: (B, 15*7)
#         v_delta = p_mapout-p_mapin
        v_con = torch.cat([p_mapin,p_mapout],dim=1)
        tmp_out = torch.log_softmax(
            self.policy1(
                F.relu(
                    self.policy0(
                        v_con
#                         v_delta
                    )
                )
            ),dim=1
        )
        
        return tmp_out

In [6]:
# replace certain node id with certain value
def modify_shell(p_shell, p_id_from, p_id_to):
    d_prod = p_shell[0]
    d_rhs = p_shell[1]
    ld_rhs = [p_id_to if d_rhs[i]==p_id_from else d_rhs[i]
             for i in range(len(d_rhs))]
    return (d_prod, tuple(ld_rhs))


# '''
# meta-train the agent in a supervised way
# epoch -> episode, one attempt with hint
# NOTICE: only valid for size 1 training
# '''
# def MetaTrain(p_config, p_spec, p_interpreter, p_generator, p_model, p_optim, p_writer):
#     print("# Start Meta-Train...")
#     for d_epoch in range(p_config["meta_train"]["n_epoch"]):
#         p_model.train()
        
#         epoch_loss_list = []
#         batch_loss_list = []
        
#         for d_ind in range(p_config["meta_train"]["n_total"]):
#             print("\r# epoch:{}, index:{}/{}, avg.loss:{:.2f}".format(
#                 d_epoch, d_ind, p_config["meta_train"]["n_total"],
#                 sum(epoch_loss_list)/len(epoch_loss_list)
#                 if len(epoch_loss_list)>0 else 0,
#             ),end="")
            
#             # initialize a solution
#             ps_solution = p_generator.get_new_chain_program(
#                 2,
#             )
            
#             # initialize a new ProgramSpace
#             ps_current = ProgramSpace(
#                 p_spec, p_interpreter, ps_solution.inputs, ps_solution.output,
#             )
#             # then initialize a shell template
#             tmp_shell_list = ps_current.get_neighboring_shells()
#             tmp_node_to_replace = ps_current.node_dict["ParamNode"][0] # for chain only
#             # replace the Param Node id in shells with -1 to make them templates
#             template_list = [
#                 modify_shell(tmp_shell_list[i],tmp_node_to_replace,-1)
#                 for i in range(len(tmp_shell_list))
#             ]
            
#             id_current = ps_current.get_strict_frontiers()[0]
#             var_current = ps_current.node_list[id_current].ps_data # need the real var name in r env
#             var_output = ps_current.output
            
#             map_current = p_interpreter.camb_get_ventogyrus(var_current)
#             map_output = p_interpreter.camb_get_ventogyrus(var_output)
            
#             # make current shell list
#             current_shell_list = [
#                 modify_shell(template_list[i],-1,id_current)
#                 for i in range(len(template_list))
#             ]
            
#             # wrap in B=1
#             if use_cuda:
#                 td_current = Variable(torch.tensor([map_current],dtype=torch.float)).cuda()
#                 td_output = Variable(torch.tensor([map_output],dtype=torch.float)).cuda()
#             else:
#                 td_current = Variable(torch.tensor([map_current],dtype=torch.float))
#                 td_output = Variable(torch.tensor([map_output],dtype=torch.float))

#             # (B=1, fn_vocab_size)
#             td_pred = p_model(td_current, td_output)
#             # directly give the hint / supervised, ps.solution.shell[0] works for 1
#             tmp_id = current_shell_list.index(ps_solution.shells[0])
#             d_loss = (+1)*(-td_pred[0,tmp_id])
#             batch_loss_list.append(
#                 d_loss, # supervised / always correct with +1 reward
#             )
#             epoch_loss_list.append(
#                 d_loss.cpu().data.numpy(),
#             )
            
#             if len(batch_loss_list)%p_config["meta_train"]["batch_size"]==0:
#                 # do back-prop.
#                 if len(batch_loss_list)>0:
#                     batch_loss = sum(batch_loss_list)/len(batch_loss_list)
#                     p_optim.zero_grad()
#                     batch_loss.backward()
#                     p_optim.step()
#                 # after back-prop., clean up
#                 batch_loss = None
#                 batch_loss_list = []
                
#         print()
    

In [7]:
def MetaTrain(p_config, p_spec, p_interpreter, p_generator, p_model, p_bmrks, p_optim, p_writer):
    print("# Start Meta-Train...")
    
    for d_epoch in range(p_config["meta_train"]["n_epoch"]):
        epoch_loss_list = []
        for d_episode in range(p_config["meta_train"]["n_episode"]):
            print("\r# EP:{}/{}, loss:{:.2f}".format(
                d_epoch, d_episode,
                sum(epoch_loss_list)/len(epoch_loss_list) if len(epoch_loss_list)>0 else -1,
            ),end="")
            p_model.train()
            
            eid = random.choice(range(p_config["meta_train"]["n_episode"]))

            # ==== prepare the benchmark ====
            bmrk_prog, bmrk_str_example = p_bmrks[eid]
            bmrk_example = Example(
                input=[p_interpreter.load_data_into_var(p) for p in bmrk_str_example.input],
                output=p_interpreter.load_data_into_var(bmrk_str_example.output),
            )
            ps_solution = ProgramSpace(
                p_spec, p_interpreter, bmrk_example.input, bmrk_example.output,
            )
            ps_solution.init_by_prog(bmrk_prog)
            solution_prod_names = [
                ps_solution.prod_list[p[0]].name for p in ps_solution.shells
            ]
            solution_shells = ps_solution.shells

            # solution self-check
            if ps_solution.check_eq() is None:
                continue
                # print("ERROR, SOLUTION NOT CONSISTENT!")

            is_solved = False

            current_prod_names = []

            selected_neurons = []
            stored_groups = [] # with lists of neurons of the same production name

            # in every new attempt, initialize a new Program Space
            ps_current = ProgramSpace(
                p_spec, p_interpreter, ps_solution.inputs, ps_solution.output,
            )
            # then initialize a shell template
            tmp_shell_list = ps_current.get_neighboring_shells()
            tmp_node_to_replace = ps_current.node_dict["ParamNode"][0] # for chain only
            # replace the Param Node id in shells with -1 to make them templates
            template_list = [
                modify_shell(tmp_shell_list[i],tmp_node_to_replace,-1)
                for i in range(len(tmp_shell_list))
            ]

            # for d_step in range(p_config["meta_test"]["maxn_step"]):
            for d_step in range(len(ps_solution.shells)):

                # ### assume chain execution, so only 1 possible returns
                # ### at d_step=0, this should be input[0]
                id_current = ps_current.get_strict_frontiers()[0]
                var_current = ps_current.node_list[id_current].ps_data # need the real var name in r env
                var_output = ps_current.output

                map_current = p_interpreter.camb_get_ventogyrus(var_current)
                map_output = p_interpreter.camb_get_ventogyrus(var_output)

                # make current shell list
                current_shell_list = [
                    modify_shell(template_list[i],-1,id_current)
                    for i in range(len(template_list))
                ]

                # wrap in B=1
                if use_cuda:
                    td_current = Variable(torch.tensor([map_current],dtype=torch.float)).cuda()
                    td_output = Variable(torch.tensor([map_output],dtype=torch.float)).cuda()
                else:
                    td_current = Variable(torch.tensor([map_current],dtype=torch.float))
                    td_output = Variable(torch.tensor([map_output],dtype=torch.float))

                # (B=1, fn_vocab_size)
                td_pred = p_model(td_current, td_output)

                # no hints
                # exploitation
                # tmp_id = torch.argmax(td_pred.flatten()).cpu().tolist()
                tmp_id = current_shell_list.index(ps_solution.shells[d_step])

                # update ps_current
                update_status = ps_current.add_neighboring_shell(
                    current_shell_list[tmp_id]
                )

                if update_status:
                    # record selected neuron
                    selected_neurons.append((True, td_pred[0,tmp_id]))

                    # succeed
                    if ps_current.check_eq() is not None:
                        # and solved!
                        is_solved = True
                        break
                else:
                    selected_neurons.append((False, td_pred[0,tmp_id]))
                    break

            # <END_FOR_STEP>
            if not is_solved:
                raise Exception()

            # == Yorgia ==
            # compute the loss according to the loss computation rules
            # first component, then function call
            batch_loss_list = []
            for i in range(len(ps_current.shells)):
                if not selected_neurons[i][0]:
                    raise Exception()
                batch_loss_list.append(
                    (+1.0)*(-selected_neurons[i][1])
                )

            batch_loss = sum(batch_loss_list)
            epoch_loss_list.append(batch_loss)
            p_optim.zero_grad()
            batch_loss.backward()
            p_optim.step()

                
        # <END_FOR_EPISODE>  
        print()
            
    # <END_FOR_EPOCH>
    

In [8]:
# def compare_sketch(ps0,ps1):
#     if len(ps0.node_list)!=len(ps1.node_list):
#         return False
#     for i in range(len(ps0.shells)):
#         if ps0.node_list[-i-1].name != ps1.node_list[-i-1].name:
#             return False
#     return True

'''
meta-test an agent, directly run into testing / online adaptation
'''
def MetaTest(p_config, p_spec, p_interpreter, p_generator, p_model, p_ngram, p_bmrks, p_optim, p_writer):
    print("# Start Meta-Test...")
    
    n_solved = 0 # track the number of solved problem
    n_sketch_solved = 0
    n_attempt_list = [] # track the number of attempts in every episode
    n_sketch_atlist = [] # track the number of attempts when hitting the sketch
    n_sat_list = [] # track the number of sketches when hitting the sketch
    
    n_ngram_sketch = [] # sketch ranking from ngram (falling back)
    
    for d_episode in range(p_config["meta_test"]["n_episode"]):
        proposal_tracker = set()
        sketch_tracker = set()
        sketch_tracker_sizeN = set()
        
        # retrieve the given meta-trained model for testing
        test_model = copy.deepcopy(p_model)
        test_model.train()
        test_optim = torch.optim.Adam(list(test_model.parameters()))
        
        # ==== prepare the benchmark ====
        bmrk_prog, bmrk_str_example = p_bmrks[d_episode]
        bmrk_example = Example(
            input=[p_interpreter.load_data_into_var(p) for p in bmrk_str_example.input],
            output=p_interpreter.load_data_into_var(bmrk_str_example.output),
        )
        ps_solution = ProgramSpace(
            p_spec, p_interpreter, bmrk_example.input, bmrk_example.output,
        )
        ps_solution.init_by_prog(bmrk_prog)
        solution_prod_names = [
            ps_solution.prod_list[p[0]].name for p in ps_solution.shells
        ]
        solution_shells = ps_solution.shells
        
        # solution self-check
        if ps_solution.check_eq() is None:
            print("ERROR, SOLUTION NOT CONSISTENT!")
        
        
#         f = open("./outputs/Sarah3/Problem_{}.txt".format(d_episode), "w")
#         f.write("# Problem: {}\n\n".format(str(ps_solution.node_list[-1])))
#         f.write("# Input:\n{}\n".format(p_interpreter.renv(ps_solution.inputs[0])))
#         f.write("# Output:\n{}\n".format(p_interpreter.renv(ps_solution.output)))
#         f.flush()
        
        is_solved = False
        is_sketch_solved = False
        
        d_attempt = -1
        d_update = 0
        while d_attempt<p_config["meta_test"]["maxn_attempt"]:
            if d_update>=p_config["meta_test"]["maxn_update"]:
                # try too long, stop
                break
            d_attempt += 1
            d_update += 1
        # for d_attempt in range(p_config["meta_test"]["maxn_attempt"]):
            
            current_prod_names = []
            current_shells = []
            current_outputs = []
            
            selected_neurons = []
            stored_groups = [] # with lists of neurons of the same production name
            
            # in every new attempt, initialize a new Program Space
            ps_current = ProgramSpace(
                p_spec, p_interpreter, ps_solution.inputs, ps_solution.output,
            )
            # then initialize a shell template
            tmp_shell_list = ps_current.get_neighboring_shells()
            tmp_node_to_replace = ps_current.node_dict["ParamNode"][0] # for chain only
            # replace the Param Node id in shells with -1 to make them templates
            template_list = [
                modify_shell(tmp_shell_list[i],tmp_node_to_replace,-1)
                for i in range(len(tmp_shell_list))
            ]
                
            d_step = 0
            while d_step<p_config["meta_test"]["maxn_step"]:
                
                # print the training progress
                print("\r# SK/EP:{}/{}, AT:{}, SP:{}, att.ske.:{:.2f}, ske.:{:.2f}, ngram:{:.2f}".format(
                    n_sketch_solved, d_episode, d_attempt, d_step,
                    sum(n_sketch_atlist)/len(n_sketch_atlist) if len(n_sketch_atlist)>0 else -1,
                    sum(n_sat_list)/len(n_sat_list) if len(n_sat_list)>0 else -1,
                    sum(n_ngram_sketch)/len(n_ngram_sketch) if len(n_ngram_sketch)>0 else -1,
                ),end="")
                
                # ### assume chain execution, so only 1 possible returns
                # ### at d_step=0, this should be input[0]
                id_current = ps_current.get_strict_frontiers()[0]
                var_current = ps_current.node_list[id_current].ps_data # need the real var name in r env
                var_output = ps_current.output
                
                map_current = p_interpreter.camb_get_ventogyrus(var_current)
                map_output = p_interpreter.camb_get_ventogyrus(var_output)
                
                # make current shell list
                current_shell_list = [
                    modify_shell(template_list[i],-1,id_current)
                    for i in range(len(template_list))
                ]
                
                # wrap in B=1
                if use_cuda:
                    td_current = Variable(torch.tensor([map_current],dtype=torch.float)).cuda()
                    td_output = Variable(torch.tensor([map_output],dtype=torch.float)).cuda()
                else:
                    td_current = Variable(torch.tensor([map_current],dtype=torch.float))
                    td_output = Variable(torch.tensor([map_output],dtype=torch.float))
                    
                # (B=1, fn_vocab_size)
                td_pred = test_model(td_current, td_output)
                
                # no hints
                # exploitation
                tmp_id = torch.argmax(td_pred.flatten()).cpu().tolist()
                # tmp_id = current_shell_list.index(ps_solution.shells[d_step])
                    
                    
                # == Yorgia ==
                # find out all other shells that share the same product name
                tmp_component_name = ps_current.prod_list[current_shell_list[tmp_id][0]].name
                tmp_group = []
                for i in range(len(current_shell_list)):
                    if ps_current.prod_list[current_shell_list[i][0]].name==tmp_component_name:
                        tmp_group.append(td_pred[0,i])
                stored_groups.append(tmp_group)
                    
                # == Yorgia ==
                # add prod names first
                current_prod_names.append(tmp_component_name)
                # then add shells
                current_shells.append(current_shell_list[tmp_id])
                
                # update ps_current
                update_status = ps_current.add_neighboring_shell(
                    current_shell_list[tmp_id]
                )
                
                if update_status:
                    # record selected neuron
                    selected_neurons.append((True, td_pred[0,tmp_id]))
                    current_outputs.append(ps_current.node_list[-1].ps_data)
                    d_step += 1
                    
                    # succeed
                    if ps_current.check_eq() is not None:
                        # and solved!
                        is_solved = True
                        n_solved += 1
                        break
                else:
                    selected_neurons.append((False, td_pred[0,tmp_id]))
                    break
            
            # <END_FOR_STEP>
#             f.write("# ({}) Proposed {}/{}: {}\n".format(
#                 "accept" if is_solved else "reject",
#                 d_attempt, p_config["meta_test"]["maxn_attempt"],
#                 str(ps_current.node_list[-1])
#             ))
#             f.flush()
            sketch_tracker.add(str(current_prod_names))
            if len(current_prod_names)==p_config["meta_test"]["maxn_step"]:
                sketch_tracker_sizeN.add(str(current_prod_names))
            if not is_sketch_solved:
                if current_prod_names==solution_prod_names:
                    n_sketch_atlist.append(d_attempt)
                    n_sat_list.append(len(sketch_tracker_sizeN))
                    is_sketch_solved = True
                    n_sketch_solved += 1
                    break
            # early force to ngram
            if len(sketch_tracker_sizeN)>25:
                break
                    
            if is_solved:
                n_attempt_list.append(d_attempt)
                break
                
            d_proposal = str(ps_current.node_list[-1])
            if d_proposal in proposal_tracker:
                d_attempt -= 1
            else:
                proposal_tracker.add(d_proposal)
            
            # print("# Current Sketch: {}".format(current_prod_names))
            # == Yorgia ==
            # compute the loss according to the loss computation rules
            # first component, then function call
            batch_loss_list = []
            for i in range(len(current_prod_names)):
                batch_loss_list.append(
                    (-1.0)*(-selected_neurons[i][1])
                )
#                 if solution_prod_names[i]==current_prod_names[i]:
#                     # component match, promote the whole group
#                     for j in range(len(stored_groups[i])):
#                         batch_loss_list.append(
#                             (+1.0)*(-stored_groups[i][j])
#                         )
#                 else:
#                     for j in range(len(stored_groups[i])):
#                         batch_loss_list.append(
#                             (-1.0)*(-stored_groups[i][j])
#                         )
#                 # then compare function call
#                 if solution_shells[i]==current_shells[i]:
#                     batch_loss_list.append(
#                         (+10.0)*(-selected_neurons[i][1])
#                     )
#                 else:
#                     batch_loss_list.append(
#                         (-10.0)*(-selected_neurons[i][1])
#                     )
            
            batch_loss = sum(batch_loss_list)
            test_optim.zero_grad()
            batch_loss.backward()
            test_optim.step()
            
                
        # <END_FOR_ATTEMPT>    
        
        # end of trial, if not solved, fall back to ngram version
        if not is_sketch_solved:
            target_ngram = str(tuple(solution_prod_names))
            ngram_ranking = len(sketch_tracker_sizeN)
            for p in p_ngram:
                if p in sketch_tracker_sizeN:
                    continue
                if p==target_ngram:
                    break
                else:
                    ngram_ranking += 1
            n_ngram_sketch.append(ngram_ranking)
#         print("ngram rank:{}".format(ngram_ranking))
        
#         f.close()
            
    # <END_FOR_EPISODE>
    

In [9]:
m_interpreter = MorpheusInterpreter()
m_spec = S.parse_file('./example/camb5.tyrell')
m_generator = MorpheusGenerator(
    spec=m_spec,
    interpreter=m_interpreter,
)

# dumb variable to help infer the shells
m_ps = ProgramSpace(
    m_spec, m_interpreter, [None], None,
)

m_config = {
    "fn":{
        "vocab_size": len(m_ps.get_neighboring_shells())
    },
    "embd_dim": 15*7+1,
    # ==== Meta-Learning Setting ==== #
    "meta_train":{
        "n_epoch": 5,
        "n_episode": 100,
    },
    "meta_test":{
        "n_episode": 100, # only pick the first 250
        "batch_size": 1, # how many attempts
        # "fixed_depth": 4,
        "maxn_attempt": 1000,
        "maxn_update": 2000,
        "maxn_step": 4, # program size
        "exploration_rate": 0,
        "benchmarks": "./0807SOSize4.pkl",
    },
}

with open(m_config["meta_test"]["benchmarks"],"rb") as f:
    bmrks = pickle.load(f)


trans_neo = TransNeo(p_config=m_config)
if use_cuda:
    trans_neo = trans_neo.cuda()
optimizer = torch.optim.Adam(list(trans_neo.parameters()))

# writer = SummaryWriter("runs/0713CAMB_RL2_camb3")
writer = None

In [10]:
# process the ngram info
with open("./size4-ngram-camb5.txt","r") as f:
    ngram4_raw = f.readlines()
ngram4_parsed = [str(tuple(p.split())) for p in ngram4_raw]

In [11]:
m_config

{'fn': {'vocab_size': 201},
 'embd_dim': 106,
 'meta_train': {'n_epoch': 5, 'n_episode': 100},
 'meta_test': {'n_episode': 100,
  'batch_size': 1,
  'maxn_attempt': 1000,
  'maxn_update': 2000,
  'maxn_step': 4,
  'exploration_rate': 0,
  'benchmarks': './0807SOSize4.pkl'}}

In [12]:
MetaTrain(m_config, m_spec, m_interpreter, m_generator, trans_neo, bmrks[100:200], optimizer, writer)

# Start Meta-Train...
# EP:0/99, loss:19.18
# EP:1/99, loss:16.21
# EP:2/99, loss:15.09
# EP:3/99, loss:13.40
# EP:4/99, loss:12.03


In [13]:
MetaTest(m_config, m_spec, m_interpreter, m_generator, trans_neo, ngram4_parsed, bmrks[:100], optimizer, writer)

# Start Meta-Test...
# SK/EP:8/10, AT:3, SP:3, att.ske.:33.62, ske.:13.25, ngram:70.00ERROR, SOLUTION NOT CONSISTENT!
# SK/EP:11/19, AT:2, SP:3, att.ske.:29.45, ske.:12.18, ngram:300.75ERROR, SOLUTION NOT CONSISTENT!
# SK/EP:17/27, AT:48, SP:3, att.ske.:27.65, ske.:11.76, ngram:318.40ERROR, SOLUTION NOT CONSISTENT!
# SK/EP:31/60, AT:96, SP:3, att.ske.:26.84, ske.:11.61, ngram:189.31ERROR, SOLUTION NOT CONSISTENT!
# SK/EP:33/69, AT:76, SP:3, att.ske.:26.52, ske.:11.64, ngram:200.78ERROR, SOLUTION NOT CONSISTENT!
# SK/EP:33/73, AT:137, SP:3, att.ske.:26.52, ske.:11.64, ngram:185.10ERROR, SOLUTION NOT CONSISTENT!
# SK/EP:42/99, AT:98, SP:3, att.ske.:23.10, ske.:10.36, ngram:161.88

In [14]:
# 12/75/246

In [15]:
# 99/151, 128