## MetaNeo
- a version using meta information only, while excluding value-based features
- Stage: Cambrian
- Version: Yorgia
- Update Logs
    - 0713: with DeepPath style rollback at training
    - 0716: new learning paradigm, see memo for details
    - 0726: this is a interactive debugging version, with dead pool deactivated

#### Related Commands
- tensorboard --logdir runs
- nohup jupyter lab > jupyter.log &

In [1]:
import logging 
logging.basicConfig(level=logging.CRITICAL)

In [2]:
import os
import itertools
import copy
import random
import pickle
import numpy as np

import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
from torch.autograd import Variable

# from tensorboardX import SummaryWriter

use_cuda = torch.cuda.is_available()
print("use_cuda: {}".format(use_cuda))

use_cuda: True


In [3]:
import tyrell.spec as S
from tyrell.decider import Example

# Morpheus Version
from MorpheusInterpreter import *
from ProgramSpace import *

In [4]:
torch.__version__

'1.0.0'

In [None]:
class MetaNeo(nn.Module):
    def __init__(self, p_config=None):
        super(MetaNeo, self).__init__()
        self.config = p_config
        
        # predict a fixed number of shells
#         self.policy = nn.Linear(
#             self.config["embd_dim"],
#             self.config["fn"]["vocab_size"],
#         )
        
        # deeper
        self.policy0 = nn.Linear(
            self.config["embd_dim"],
            128,
        )
        self.policy1 = nn.Linear(
            128,
            self.config["fn"]["vocab_size"],
        )
        
    def forward(self, p_mapin, p_mapout):
        # p_mapin/p_mapout: (B, 15*3)
        v_delta = p_mapout-p_mapin
#         tmp_out = torch.log_softmax(
#             self.policy(v_delta),dim=1
#         )
        tmp_out = torch.log_softmax(
            self.policy1(
                F.relu(
                    self.policy0(
                        v_delta
                    )
                )
            ),dim=1
        )
        
        return tmp_out

In [None]:
# replace certain node id with certain value
def modify_shell(p_shell, p_id_from, p_id_to):
    d_prod = p_shell[0]
    d_rhs = p_shell[1]
    ld_rhs = [p_id_to if d_rhs[i]==p_id_from else d_rhs[i]
             for i in range(len(d_rhs))]
    return (d_prod, tuple(ld_rhs))


# '''
# meta-train the agent in a supervised way
# epoch -> episode, one attempt with hint
# NOTICE: only valid for size 1 training
# '''
# def MetaTrain(p_config, p_spec, p_interpreter, p_model, p_data, p_optim, p_writer):
#     print("# Start Meta-Train...")
#     for d_epoch in range(p_config["meta_train"]["n_epoch"]):
#         p_model.train()
        
#         epoch_loss_list = []
#         batch_loss_list = []
#         random.shuffle(p_data)
#         train_data = p_data[:p_config["meta_train"]["n_truncated"]]
        
#         for d_ind in range(len(train_data)):
#             print("\r# epoch:{}, index:{}/{}, avg.loss:{:.2f}".format(
#                 d_epoch, d_ind, len(train_data),
#                 sum(epoch_loss_list)/len(epoch_loss_list)
#                 if len(epoch_loss_list)>0 else 0,
#             ),end="")
#             d_prog, dstr_example = train_data[d_ind]
#             d_example = Example(
#                 input=[
#                     p_interpreter.load_data_into_var(p)
#                     for p in dstr_example.input
#                 ],
#                 output=p_interpreter.load_data_into_var(
#                     dstr_example.output
#                 )
#             )
            
#             # initialize a solution
#             ps_solution = ProgramSpace(
#                 p_spec, p_interpreter, d_example.input, d_example.output,
#             )
#             ps_solution.init_by_prog(d_prog) # this constructs a solution for this problem
            
#             # initialize a new ProgramSpace
#             ps_current = ProgramSpace(
#                 p_spec, p_interpreter, d_example.input, d_example.output,
#             )
#             # then initialize a shell template
#             tmp_shell_list = ps_current.get_neighboring_shells()
#             tmp_node_to_replace = ps_current.node_dict["ParamNode"][0] # for chain only
#             # replace the Param Node id in shells with -1 to make them templates
#             template_list = [
#                 modify_shell(tmp_shell_list[i],tmp_node_to_replace,-1)
#                 for i in range(len(tmp_shell_list))
#             ]
            
#             id_current = ps_current.get_strict_frontiers()[0]
#             var_current = ps_current.node_list[id_current].ps_data # need the real var name in r env
#             var_output = d_example.output
            
#             map_current = p_interpreter.camb_get_simp_abs(var_current)
#             map_output = p_interpreter.camb_get_simp_abs(var_output)
            
#             # make current shell list
#             current_shell_list = [
#                 modify_shell(template_list[i],-1,id_current)
#                 for i in range(len(template_list))
#             ]
            
#             # wrap in B=1
#             if use_cuda:
#                 td_current = Variable(torch.tensor([map_current],dtype=torch.float)).cuda()
#                 td_output = Variable(torch.tensor([map_output],dtype=torch.float)).cuda()
#             else:
#                 td_current = Variable(torch.tensor([map_current],dtype=torch.float))
#                 td_output = Variable(torch.tensor([map_output],dtype=torch.float))

#             # (B=1, fn_vocab_size)
#             td_pred = p_model(td_current, td_output)
#             # directly give the hint / supervised, ps.solution.shell[0] works for 1
#             tmp_id = current_shell_list.index(ps_solution.shells[0])
#             d_loss = (+1)*(-td_pred[0,tmp_id])
#             batch_loss_list.append(
#                 d_loss, # supervised / always correct with +1 reward
#             )
#             epoch_loss_list.append(
#                 d_loss.cpu().data.numpy(),
#             )
            
#             if len(batch_loss_list)%p_config["meta_train"]["batch_size"]==0 or len(batch_loss_list)==len(train_data):
#                 # do back-prop.
#                 if len(batch_loss_list)>0:
#                     batch_loss = sum(batch_loss_list)/len(batch_loss_list)
#                     p_optim.zero_grad()
#                     batch_loss.backward()
#                     p_optim.step()
#                 # after back-prop., clean up
#                 batch_loss = None
#                 batch_loss_list = []
                
#         print()
    

In [None]:
def compare_sketch(ps0,ps1):
    if len(ps0.node_list)!=len(ps1.node_list):
        return False
    for i in range(len(ps0.shells)):
        if ps0.node_list[-i-1].name != ps1.node_list[-i-1].name:
            return False
    return True

'''
meta-test an agent, directly run into testing / online adaptation
'''
def MetaTest(p_config, p_spec, p_interpreter, p_generator, p_model, p_optim, p_writer):
    print("# Start Meta-Test...")
    
    nth_attempt = 0 # tell whether to back-prop or not
    batch_loss_list = []
    
    n_solved = 0 # track the number of solved problem
    n_sketch_solved = 0
    n_attempt_list = [] # track the number of attempts in every episode
    
    stored_neurons = []
    stored_nodes = []
    stored_groups = [] # with lists of neurons of the same production name
    
    for d_episode in range(p_config["meta_test"]["n_episode"]):
        
        # retrieve the given meta-trained model for testing
        test_model = copy.deepcopy(p_model)
        test_model.train()
        
        # if doing random meta-testing
        # then randomly generate a program for testing
        ps_solution = p_generator.get_new_chain_program(
            p_config["meta_test"]["fixed_depth"],
        )
        print("# benchmark program: {}".format(
            " -> ".join(
                [
                    str(ps_solution.node_list[-p_config["meta_test"]["maxn_step"]+i]).replace(str(ps_solution.node_list[-p_config["meta_test"]["maxn_step"]+i-1]),"@output") if i>=1 else str(ps_solution.node_list[-p_config["meta_test"]["maxn_step"]+i])
                    for i in range(p_config["meta_test"]["maxn_step"])
                ]
            )
        ))
        print("# === input ===")
        print(p_interpreter.renv(ps_solution.inputs[0]))
        print("# === output ===")
        print(p_interpreter.renv(ps_solution.output))
        
        is_solved = False
        is_sketch_solved = False
        
        for d_attempt in range(p_config["meta_test"]["maxn_attempt"]):
            if is_solved:
                # already solved in the last attempt, stop
                break
            
            nth_attempt += 1
            attempt_reward = None
            
            # in every new attempt, initialize a new Program Space
            ps_current = ProgramSpace(
                p_spec, p_interpreter, ps_solution.inputs, ps_solution.output,
            )
            # then initialize a shell template
            tmp_shell_list = ps_current.get_neighboring_shells()
            tmp_node_to_replace = ps_current.node_dict["ParamNode"][0] # for chain only
            # replace the Param Node id in shells with -1 to make them templates
            template_list = [
                modify_shell(tmp_shell_list[i],tmp_node_to_replace,-1)
                for i in range(len(tmp_shell_list))
            ]
                
            d_step = 0
            while d_step<p_config["meta_test"]["maxn_step"]:
                
#                 # print the training progress
#                 print("\r# AC/SK/EP:{}/{}/{}, AT:{}, SP:{}, DN:{}, avg.attempt:{:.2f}, er:{:.2f}".format(
#                     n_solved, n_sketch_solved, d_episode, d_attempt, d_step, 
#                     len(dead_neurons),
#                     sum(n_attempt_list)/len(n_attempt_list) if len(n_attempt_list)>0 else -1,
#                     p_config["meta_test"]["exploration_rate"](d_episode,d_attempt),
#                 ),end="")
                
                # ### assume chain execution, so only 1 possible returns
                # ### at d_step=0, this should be input[0]
                id_current = ps_current.get_strict_frontiers()[0]
                var_current = ps_current.node_list[id_current].ps_data # need the real var name in r env
                var_output = ps_current.output
                
                map_current = p_interpreter.camb_get_simp_abs(var_current)
                map_output = p_interpreter.camb_get_simp_abs(var_output)
                
                # make current shell list
                current_shell_list = [
                    modify_shell(template_list[i],-1,id_current)
                    for i in range(len(template_list))
                ]
                
                # wrap in B=1
                if use_cuda:
                    td_current = Variable(torch.tensor([map_current],dtype=torch.float)).cuda()
                    td_output = Variable(torch.tensor([map_output],dtype=torch.float)).cuda()
                else:
                    td_current = Variable(torch.tensor([map_current],dtype=torch.float))
                    td_output = Variable(torch.tensor([map_output],dtype=torch.float))
                    
                # (B=1, fn_vocab_size)
                td_pred = test_model(td_current, td_output)
                
                # no hints
                if random.random()<=p_config["meta_test"]["exploration_rate"](d_episode,d_attempt):
                    # exploration
                    tmp_id = random.choice(range(len(current_shell_list)))
                else:
                    # exploitation
                    tmp_id = torch.multinomial(td_pred.exp().flatten(), 1).cpu().flatten().numpy()[0]
                
                # == Yorgia ==
                # find out all other shells that share the same product name
                tmp_component_name = ps_current.prod_list[current_shell_list[tmp_id][0]].name
                tmp_group = []
                for i in range(len(current_shell_list)):
                    if ps_current.prod_list[current_shell_list[i][0]].name==tmp_component_name:
                        tmp_group.append(td_pred[0,i])
                stored_groups.append(tmp_group)
                
                # == Yorgia ==
                # append before adding shell to ProgramSpace
                stored_nodes.append(ps_current.get_node_from_shell(
                    current_shell_list[tmp_id]
                ))
                
                # update ps_current
                update_status = ps_current.add_neighboring_shell(
                    current_shell_list[tmp_id]
                )
                
                if update_status:
                    # record selected neuron
                    stored_neurons.append(
                        (td_pred[0,tmp_id], True)
                    )
                    d_step += 1
                    
                    # succeed
                    if ps_current.check_eq() is not None:
                        # and solved!
                        is_solved = True
                        n_solved += 1
                        break
                else:
                    stored_neurons.append(
                        (td_pred[0,tmp_id], False)
                    )
                    break
            
            
            if not is_sketch_solved:
                if compare_sketch(ps_current, ps_solution):
                    is_sketch_solved = True
                    n_sketch_solved += 1
                    
            print("# stored groups: {}".format(
                " / ".join([
                    str(len(stored_groups[i])) for i in range(len(stored_groups))
                ])
            ))
            
            # ask for separate rewards for every step
            ar = input("# attempt {}, input reward(s) for: {}".format(
                nth_attempt,
                " -> ".join([
                    "({}){}".format(
                        "✓" if stored_neurons[i][1] else "x",
                        str(stored_nodes[i]).replace(str(stored_nodes[i-1]),"@output") if i>=1 else str(stored_nodes[i]),
                    ) for i in range(len(stored_nodes))
                ]),
            ))
            assigned_rewards = eval("[{}]".format(ar))
                
            
            # compute the loss (sequential selected)
#             for i in range(len(stored_neurons)):
#                 batch_loss_list.append(
#                     assigned_rewards[i]*(-stored_neurons[i][0]) 
#                 )
            # == Yorgia ==
            # compute grouped loss
            for i in range(len(stored_groups)):
                for j in range(len(stored_groups[i])):
                    batch_loss_list.append(
                        assigned_rewards[i]*(-stored_groups[i][j])
                    )
            
            # directly do the back-prop
            batch_loss = sum(batch_loss_list)/len(batch_loss_list)
            p_optim.zero_grad()
            batch_loss.backward()
            p_optim.step()
            
            batch_loss_list = []
            stored_neurons = []
            stored_nodes = []
            stored_groups = []
            
            if is_solved:
                nth_attempt = 0
                break
                
        # <END_FOR_ATTEMPT> 
        
            
    # <END_FOR_EPISODE>
    

In [None]:
m_interpreter = MorpheusInterpreter()
m_spec = S.parse_file('./example/camb3.tyrell')
m_generator = MorpheusGenerator(
    spec=m_spec,
    interpreter=m_interpreter,
)

# dumb variable to help infer the shells
m_ps = ProgramSpace(
    m_spec, m_interpreter, [None], None,
)

m_config = {
    "fn":{
        "vocab_size": len(m_ps.get_neighboring_shells())
    },
    "embd_dim": 15*3,
    # ==== Meta-Learning Setting ==== #
#     "meta_train":{
#         "n_epoch": 10,
#         "batch_size": 4, # how many indices
#         "data_path": "./0716MDsize1.pkl",
#         "n_truncated": 1000,
#     },
    "meta_test":{
        "n_episode": 100000,
        "batch_size": 1, # how many attempts
        "fixed_depth": 3,
        "maxn_attempt": 1000000,
        "maxn_step": 2, # program size
        "exploration_rate": lambda pep,pat:0.1,
        "decay_rate": 0.9,
        "dp_cap": 50,
    },
}

# load the size 1 supervised data
# with open(m_config["meta_train"]["data_path"],"rb") as f:
#     dt_data = pickle.load(f)
# m_data = [
#     dt_data[dkey][i]
#     for dkey in dt_data.keys()
#     for i in range(len(dt_data[dkey]))
# ]
# print("# Total Meta-Train Data: {}".format(len(m_data)))

meta_neo = MetaNeo(p_config=m_config)
if use_cuda:
    meta_neo = meta_neo.cuda()
optimizer = torch.optim.Adam(list(meta_neo.parameters()))

# writer = SummaryWriter("runs/0713CAMB_RL2_camb3")
writer = None

In [None]:
m_config

{'fn': {'vocab_size': 120},
 'embd_dim': 45,
 'meta_test': {'n_episode': 100000,
  'batch_size': 1,
  'fixed_depth': 3,
  'maxn_attempt': 1000000,
  'maxn_step': 2,
  'exploration_rate': <function __main__.<lambda>(pep, pat)>,
  'decay_rate': 0.9,
  'dp_cap': 50}}

In [None]:
# MetaTrain(m_config, m_spec, m_interpreter, meta_neo, m_data, optimizer, writer)

In [None]:
MetaTest(m_config, m_spec, m_interpreter, m_generator, meta_neo, optimizer, writer)

# Start Meta-Test...
# benchmark program: separate(@param0, 2) -> gather(@output, ['2', '4'])
# === input ===
                     atomisation                   dubby     capitellate
1        stormwise_inextinguible  teenage_trionychoidean       subcostae
2         virtualize_hematoplast         skittering_week       subcostae
3          gonycampsis_uncoupler      undiverse_sahaptin       subcostae
4              deliver_heliotype    coonroot_prayerfully unhideboundness
5           opaloid_amniochorial          tetchily_anorn unhideboundness
6    philogynous_epigrammatarian            usheen_snaky       subcostae
7             voltairean_mobster        zootrophic_fatwa       subcostae
8           parahopeite_banderol  defectoscope_fratority      stewarding
9      preindicating_katabothron lutecia_overmelodiously      stewarding
10 unspiritualizing_bacilligenic     primrosed_refilming      stewarding

# === output ===
                     atomisation        beauship          sauve      

# attempt 1, input reward(s) for: (x)separate(@param0, 6) 1


# stored groups: 36


# attempt 2, input reward(s) for: (x)spread(@param0, 5, 6) -1


# stored groups: 21 / 36


# attempt 3, input reward(s) for: (✓)gather(@param0, ['2', '3']) -> (x)spread(@output, 1, 4) -1,-1


# stored groups: 36


# attempt 4, input reward(s) for: (x)spread(@param0, 5, 5) -1


# stored groups: 36


# attempt 5, input reward(s) for: (x)spread(@param0, 5, 3) -1


# stored groups: 36


# attempt 6, input reward(s) for: (x)unite(@param0, 2, 6) -1


# stored groups: 21


# attempt 7, input reward(s) for: (x)gather(@param0, ['4', '5']) -10


# stored groups: 36


# attempt 8, input reward(s) for: (x)spread(@param0, 3, 5) -10


# stored groups: 36


# attempt 9, input reward(s) for: (x)spread(@param0, 4, 1) -10


# stored groups: 36


# attempt 10, input reward(s) for: (x)spread(@param0, 6, 2) -10


# stored groups: 36 / 21


# attempt 11, input reward(s) for: (✓)unite(@param0, 3, 1) -> (x)gather(@output, ['4', '6']) -10,1


# stored groups: 36


# attempt 12, input reward(s) for: (x)spread(@param0, 4, 4) -10


# stored groups: 36 / 21


# attempt 13, input reward(s) for: (✓)unite(@param0, 3, 1) -> (x)select(@output, ['5']) -10,-10


# stored groups: 6


# attempt 14, input reward(s) for: (x)separate(@param0, 4) -1


# stored groups: 21


# attempt 15, input reward(s) for: (x)gather(@param0, ['2', '4']) -10


# stored groups: 6


# attempt 16, input reward(s) for: (x)separate(@param0, 5) -10


# stored groups: 21


# attempt 17, input reward(s) for: (x)select(@param0, ['6']) -10


# stored groups: 36


# attempt 18, input reward(s) for: (x)spread(@param0, 5, 6) -10


# stored groups: 36


# attempt 19, input reward(s) for: (x)unite(@param0, 2, 6) -10


# stored groups: 6


# attempt 20, input reward(s) for: (x)separate(@param0, 4) -10


# stored groups: 36


# attempt 21, input reward(s) for: (x)spread(@param0, 5, 1) -10


# stored groups: 36
