## AlphaNeo
- AlphaNeo with Pre-Trained Ranker
- Stage: Cambrian
- Version: Spriggina
- **Notice: small changes to ValueEncoder (sigmoid to relu)**

#### Related Commands
- tensorboard --logdir runs
- nohup jupyter lab > jupyter.log &

In [1]:
import logging 
logging.basicConfig(level=logging.CRITICAL)

In [2]:
import os
import itertools
import copy
import random
import pickle
import numpy as np

import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
from torch.autograd import Variable

# from tensorboardX import SummaryWriter

use_cuda = torch.cuda.is_available()
print("use_cuda: {}".format(use_cuda))

use_cuda: True


In [3]:
import tyrell.spec as S
from tyrell.decider import Example

# Morpheus Version
from MorpheusInterpreter import *
from ProgramSpace import *

In [4]:
torch.__version__

'1.0.0'

In [5]:
class ValueEncoder(nn.Module):
    def __init__(self, p_config=None):
        super(ValueEncoder, self).__init__()
        self.config = p_config
        
        self.vocab_size = self.config["val"]["vocab_size"]
        self.embd_dim = self.config["val"]["embd_dim"]
        self.embedding = nn.Embedding(
            self.vocab_size,
            self.embd_dim,
            self.config["val"]["IDX_PAD"],
        )
        
        self.conv = nn.Conv2d(
            in_channels = self.config["val"]["embd_dim"],
            out_channels = self.config["val"]["conv_n_kernels"],
            kernel_size = self.config["val"]["conv_kernel_size"],
        )
        
        self.pool = nn.MaxPool2d(
            kernel_size = self.config["val"]["pool_kernel_size"],
            padding = self.config["val"]["IDX_PAD"],
        )
        
        self.fc = nn.Linear(
            self.config["val"]["conv_n_kernels"],
            self.config["embd_dim"],
        )
        
    def forward(self, bp_map):
        # batched maps, (B, map_r, map_c)
        # in this version, every value only contains 1 map
        B = bp_map.shape[0]
        
        # (B, map_r, map_c, val_embd_dim) -> (B, val_embd_dim, map_r, map_c)
        d_embd = self.embedding(bp_map).permute(0,3,1,2)
        
        # (B, n_kernel, map_r, 1)
        d_conv = F.relu(self.conv(d_embd))
        
        # (B, n_kernel)
        d_pool = self.pool(d_conv).view(B,self.config["val"]["conv_n_kernels"])
        
        # (B, embd_dim)
        # d_out = torch.sigmoid(
        d_out = F.relu(
            self.fc(d_pool)
        )
        
        return d_out
    

In [6]:
class MorphTransE(nn.Module):
    def __init__(self, p_config=None):
        super(MorphTransE, self).__init__()
        self.config = p_config
        
        self.value_encoder = ValueEncoder(p_config=p_config)
        
        self.fn_vocab_size = self.config["fn"]["vocab_size"]
        self.embd_dim = self.config["embd_dim"]
        
        self.fn_embedding = nn.Embedding(
            self.fn_vocab_size,
            self.embd_dim,
        )
        
        nn.init.xavier_uniform_(self.fn_embedding.weight.data)
        self.fn_embedding.weight.data = F.normalize(
            self.fn_embedding.weight.data, p=2, dim=1,
        )
# ----> skip the forward part since we don't need it <---- #

In [7]:
class TransNeo(nn.Module):
    def __init__(self, p_config=None):
        super(TransNeo, self).__init__()
        self.config = p_config
        
        # predict a fixed number of shells
#         self.policy = nn.Linear(
#             self.config["embd_dim"],
#             self.config["fn"]["vocab_size"],
#         )
        
        # deeper
        self.policy0 = nn.Linear(
            self.config["embd_dim"],
            2048,
        )
        self.policy1 = nn.Linear(
            2048,
            self.config["fn"]["vocab_size"],
        )
        
        self.value_encoder = ValueEncoder(p_config=p_config)
        
    def forward(self, p_mapin, p_mapout):
        # p_mapin/p_mapout: (B, map_r, map_c)
        v_in = self.value_encoder(p_mapin) # (B, embd_dim)
        v_out= self.value_encoder(p_mapout) # (B, embd_dim)
        v_delta = v_out - v_in
#         tmp_out = torch.log_softmax(
#             self.policy(v_delta),dim=1
#         )
        tmp_out = torch.log_softmax(
            self.policy1(
                F.relu(
                    self.policy0(
                        v_delta
                    )
                )
            ),dim=1
        )
        
        return tmp_out

In [8]:
class Ranker(nn.Module):
    def __init__(self, p_config=None):
        super(Ranker, self).__init__()
        self.config = p_config
        self.value_encoder = ValueEncoder(p_config=p_config)
        self.fc0 = nn.Linear(
            self.config["embd_dim"]*2,
            2048,
        )
        self.fc1 = nn.Linear(
            2048,
            2,
        )
        
    '''
    do not use forward function in actual testing, it's only for training
    '''
    def forward(self, pA, pB, pC, pD):
        # pA/pB/pC/pD: (B=1, map_r, map_c)
        
        # (B, embd_dim)
        vA = self.value_encoder(pA)
        vB = self.value_encoder(pB)
        vC = self.value_encoder(pC)
        vD = self.value_encoder(pD)
        
        # (B=2, embd_dim * 2)
        vPOS = torch.cat(
            [
                torch.cat([vA,vB],dim=1),
                torch.cat([vA,vC],dim=1),
            ],dim=0
        )
        
        # (B=10, embd_dim * 2)
        vNEG = torch.cat(
            [
                torch.cat([vB,vA],dim=1),
                torch.cat([vB,vC],dim=1),
                torch.cat([vC,vA],dim=1),
                torch.cat([vC,vB],dim=1),
            ],dim=0
        )
        
        # (B=4*3=12, embd_dim * 2)
        vII = torch.cat([vPOS,vNEG],dim=0)
        
        # == Notice: don't do any activation at the last layer ==
        vOO = self.fc1(
            F.relu(
                self.fc0(
                    vII
                )
            )
        )
        # (B=4*3=12, 2)
        
        return vOO
    
    
    '''
    strictly in this form A->B
    '''
    def inference(self, pA, pB):
        vA = self.value_encoder(pA) # (B, embd_dim)
        vB = self.value_encoder(pB) # (B, embd_dim)
        vUNK = torch.cat([vA,vB],dim=1) # (B, embd_dim * 2)
        vOUT = self.fc1(
            F.relu(
                self.fc0(
                    vUNK
                )
            )
        ) # (B, 2)
        vOUT = F.softmax(vOUT, dim=1)
        return vOUT

In [9]:
# replace certain node id with certain value
def modify_shell(p_shell, p_id_from, p_id_to):
    d_prod = p_shell[0]
    d_rhs = p_shell[1]
    ld_rhs = [p_id_to if d_rhs[i]==p_id_from else d_rhs[i]
             for i in range(len(d_rhs))]
    return (d_prod, tuple(ld_rhs))

In [10]:
'''
meta-test an agent, directly run into testing / online adaptation
'''
def MetaTest(p_config, p_spec, p_interpreter, p_generator, p_model, p_ranker, p_optim, p_writer):
    print("# Start Meta-Test...")
    p_ranker.eval()
    
    nth_attempt = 0 # tell whether to back-prop or not
    batch_lossA_list = []
    batch_lossD_list = []
    
    n_solved = 0 # track the number of solved problem
    n_attempt_list = [] # track the number of attempts in every episode
    
    selected_neurons = []
    ranker_scores = [] # parallel to selected_neurons, with adjusted scores
    dead_neurons = [] # DeepPath: store node with execution error
    
    for d_episode in range(p_config["meta_test"]["n_episode"]):
        
        # retrieve the given meta-trained model for testing
        test_model = copy.deepcopy(p_model)
        test_model.train()
        
        # if doing random meta-testing
        # then randomly generate a program for testing
        ps_solution = p_generator.get_new_chain_program(
            p_config["meta_test"]["fixed_depth"],
        )
        
        is_solved = False
        
        for d_attempt in range(p_config["meta_test"]["maxn_attempt"]):
            if is_solved:
                # already solved in the last attempt, stop
                break
            
            nth_attempt += 1
            attempt_reward = None
            
            # in every new attempt, initialize a new Program Space
            ps_current = ProgramSpace(
                p_spec, p_interpreter, ps_solution.inputs, ps_solution.output,
            )
            # then initialize a shell template
            tmp_shell_list = ps_current.get_neighboring_shells()
            tmp_node_to_replace = ps_current.node_dict["ParamNode"][0] # for chain only
            # replace the Param Node id in shells with -1 to make them templates
            template_list = [
                modify_shell(tmp_shell_list[i],tmp_node_to_replace,-1)
                for i in range(len(tmp_shell_list))
            ]
                
            d_step = 0
            while d_step<p_config["meta_test"]["maxn_step"]:
                
                # print the training progress
                print("\r# AC/EP:{}/{}, AT:{}, SP:{}, DN:{}, avg.attempt:{:.2f}, er:{:.2f}".format(
                    n_solved, d_episode, d_attempt, d_step, 
                    len(dead_neurons),
                    sum(n_attempt_list)/len(n_attempt_list) if len(n_attempt_list)>0 else -1,
                    p_config["meta_test"]["exploration_rate"](d_episode,d_attempt),
                ),end="")
                
                # ### assume chain execution, so only 1 possible returns
                # ### at d_step=0, this should be input[0]
                id_current = ps_current.get_strict_frontiers()[0]
                var_current = ps_current.node_list[id_current].ps_data # need the real var name in r env
                var_output = ps_current.output
                
                map_current = p_interpreter.camb_get_abs(var_current)
                map_output = p_interpreter.camb_get_abs(var_output)
                
                # make current shell list
                current_shell_list = [
                    modify_shell(template_list[i],-1,id_current)
                    for i in range(len(template_list))
                ]
                
                # wrap in B=1
                if use_cuda:
                    td_current = Variable(torch.tensor([map_current],dtype=torch.long)).cuda()
                    td_output = Variable(torch.tensor([map_output],dtype=torch.long)).cuda()
                else:
                    td_current = Variable(torch.tensor([map_current],dtype=torch.long))
                    td_output = Variable(torch.tensor([map_output],dtype=torch.long))
                    
                # (B=1, fn_vocab_size)
                td_pred = test_model(td_current, td_output)
                
                # no hints
                if random.random()<=p_config["meta_test"]["exploration_rate"](d_episode,d_attempt):
                    # exploration
                    tmp_id = random.choice(range(len(current_shell_list)))
                else:
                    # exploitation
                    tmp_id = torch.multinomial(td_pred.exp().flatten(), 1).cpu().flatten().numpy()[0]
                
                # update ps_current
                update_status = ps_current.add_neighboring_shell(
                    current_shell_list[tmp_id]
                )
                
                if update_status:
                    # record selected neuron
                    selected_neurons.append(td_pred[0,tmp_id])
                    
                    id_very = ps_current.get_strict_frontiers()[0]
                    var_very = ps_current.node_list[id_very].ps_data
                    map_very = p_interpreter.camb_get_abs(var_very)
                    if use_cuda:
                        td_very = Variable(torch.tensor([map_very],dtype=torch.long)).cuda()
                    else:
                        td_very = Variable(torch.tensor([map_very],dtype=torch.long))
                    ranker_scores.append(p_ranker.inference(td_very,td_output)[0,1])
                    
                    d_step += 1
                    
                    # succeed
                    if ps_current.check_eq() is not None:
                        # and solved!
                        is_solved = True
                        n_solved += 1
                        attempt_reward = 1.0
                        break
                else:
                    # DeepPath: fail, add to dead list
                    dead_neurons.append(td_pred[0,tmp_id])
                    if len(dead_neurons)>p_config["meta_test"]["dp_cap"]:
                        # exceed the max capacity of dead pool
                        break
            
            # <END_FOR_STEP>
            
            # check the attempt_reward
            if attempt_reward is None:
                # means either failure in execution or exceeding max_step
                attempt_reward = -1.
            
            # compute the loss (sequential selected)
            for i in range(len(selected_neurons)):
                # d_decay = p_config["meta_test"]["decay_rate"]**(len(selected_neurons)-1-i)
                # batch_lossA_list.append(
                #     d_decay*attempt_reward*(-selected_neurons[i]) 
                # )
                if attempt_reward>0: # success
                    batch_lossA_list.append(
                        attempt_reward*(-selected_neurons[i]) 
                    )
                else: # fail
                    if ranker_scores[i]>0.5:
                        batch_lossA_list.append(
                            0.5*(-selected_neurons[i]) 
                        )
                    else:
                        batch_lossA_list.append(
                            (-0.5)*(-selected_neurons[i]) 
                        )
            
            # compute the loss (dead neurons)
            for i in range(len(dead_neurons)):
                batch_lossD_list.append(
                    (-1.)*(-dead_neurons[i])
                )
            
            if is_solved or nth_attempt>=p_config["meta_test"]["batch_size"]:
                # directly do the back-prop
                if len(batch_lossD_list)>0:
                    batch_lossD = sum(batch_lossD_list)/len(batch_lossD_list)
                    p_optim.zero_grad()
                    batch_lossD.backward()
                    p_optim.step()
                
                if len(batch_lossA_list)>0:
                    batch_lossA = sum(batch_lossA_list)/len(batch_lossA_list)
                    p_optim.zero_grad()
                    batch_lossA.backward()
                    p_optim.step()
            
                nth_attempt = 0
                batch_lossA_list = []
                batch_lossD_list = []
                selected_neurons = []
                ranker_scores = []
                dead_neurons = []
                
        # <END_FOR_ATTEMPT>     
        
        # after all the attempts
        n_attempt_list.append(d_attempt)
        if writer is not None:
            writer.add_scalar(
                'avg.attempt',
                sum(n_attempt_list)/len(n_attempt_list) if len(n_attempt_list)>0 else 0,
                len(n_attempt_list),
            )
        
#         if d_episode%100==0:
#             # save the model
#             torch.save(
#                 p_model.state_dict(),
#                 "./saved_models/0713CAMB_RL2_camb3_ep{}.pt".format(d_episode)
#             )
            
    # <END_FOR_EPISODE>
    

In [11]:
m_interpreter = MorpheusInterpreter()
m_spec = S.parse_file('./example/camb3.tyrell')
m_generator = MorpheusGenerator(
    spec=m_spec,
    interpreter=m_interpreter,
)

# dumb variable to help infer the shells
m_ps = ProgramSpace(
    m_spec, m_interpreter, [None], None,
)

m_config = {
    # ==== TransE Setting ==== #
    "val":{
        "vocab_size": len(m_interpreter.CAMB_LIST),
        "embd_dim": 16, # embedding dim of CAMB abstract token
        "conv_n_kernels": 512,
        "conv_kernel_size": (1,m_interpreter.CAMB_NCOL), 
        "pool_kernel_size": (m_interpreter.CAMB_NROW,1), 
        "IDX_PAD": 0,
    },
    "fn":{
        "vocab_size": len(m_ps.get_neighboring_shells())
    },
    "embd_dim": 128,
    "ranker":{
        "from": "./saved_models/Zaina_ep52.pt",
    },
    "meta_test":{
        "n_episode": 100000,
        "batch_size": 1, # how many attempts
        "fixed_depth": 3,
        "maxn_attempt": 100,
        "maxn_step": 2, # program size
        "exploration_rate": lambda pep,pat:0.1,
        "decay_rate": 0.9,
        "dp_cap": 50,
    },
}

trans_neo = TransNeo(p_config=m_config)
ranker = Ranker(p_config=m_config)
ranker.load_state_dict(torch.load(m_config["ranker"]["from"]))
if use_cuda:
    trans_neo = trans_neo.cuda()
    ranker = ranker.cuda()
optimizer = torch.optim.Adam(list(trans_neo.parameters()))

# writer = SummaryWriter("runs/0713CAMB_RL2_camb3")
writer = None

In [12]:
m_config

{'val': {'vocab_size': 150,
  'embd_dim': 16,
  'conv_n_kernels': 512,
  'conv_kernel_size': (1, 15),
  'pool_kernel_size': (15, 1),
  'IDX_PAD': 0},
 'fn': {'vocab_size': 120},
 'embd_dim': 128,
 'ranker': {'from': './saved_models/Zaina_ep52.pt'},
 'meta_test': {'n_episode': 100000,
  'batch_size': 1,
  'fixed_depth': 3,
  'maxn_attempt': 100,
  'maxn_step': 2,
  'exploration_rate': <function __main__.<lambda>(pep, pat)>,
  'decay_rate': 0.9,
  'dp_cap': 50}}

In [13]:
MetaTest(m_config, m_spec, m_interpreter, m_generator, trans_neo, ranker, optimizer, writer)

# Start Meta-Test...
# AC/EP:17/97, AT:35, SP:0, DN:1, avg.attempt:91.53, er:0.10

KeyboardInterrupt: 

In [None]:
var1 = m_interpreter.random_table()
map1 = m_interpreter.camb_get_abs(var1)
td1 = Variable(torch.tensor([map1],dtype=torch.long)).cuda()
print(var1)
m_interpreter.print_obj(var1)

In [None]:
var2 = m_interpreter.random_table()
map2 = m_interpreter.camb_get_abs(var2)
td2 = Variable(torch.tensor([map2],dtype=torch.long)).cuda()
print(var2)
m_interpreter.print_obj(var2)

In [None]:
m_interpreter.renv('TEST1 <- select({},1,2)'.format(var1))
var3 = 'TEST1'
map3 = m_interpreter.camb_get_abs(var3)
td3 = Variable(torch.tensor([map3],dtype=torch.long)).cuda()
m_interpreter.print_obj(var3)

In [None]:
m_interpreter.renv('TEST2 <- unite({},"new",c(1),c(2))'.format(var3))
var4 = 'TEST2'
map4 = m_interpreter.camb_get_abs(var4)
td4 = Variable(torch.tensor([map4],dtype=torch.long)).cuda()
m_interpreter.print_obj(var4)

In [None]:
var3 = 

In [None]:
ranker.eval()

In [None]:
ranker.inference(td1,td2)

In [None]:
ranker.inference(td1,td3)

In [None]:
ranker.inference(td1,td4)

In [None]:
ranker.inference(td3,td4)

In [None]:
ranker.inference(td2,td4)