## AlphaNeo for Max Length of 1 (depth of 2)

```
tensorboard --logdir runs
```

```
cd ./Trinity/
python ./AlphaNeo_Cambrian_pworker.py 1
```

```
nohup jupyter lab > jupyter.log &
```

In [1]:
import logging 
logging.basicConfig(level=logging.CRITICAL)

In [2]:
import os
import itertools
import copy
import random
import fcntl

from pathlib import Path

In [3]:
import tyrell.spec as S
from tyrell.interpreter import Interpreter, PostOrderInterpreter, GeneralError, InterpreterError
from tyrell.enumerator import Enumerator, SmtEnumerator, RandomEnumerator, DesignatedEnumerator, RandomEnumeratorS, ExhaustiveEnumerator
from tyrell.decider import Example, ExampleConstraintPruningDecider, ExampleDecider, TestDecider
from tyrell.synthesizer import Synthesizer
from tyrell.logger import get_logger
from sexpdata import Symbol
from tyrell import dsl as D
from typing import Callable, NamedTuple, List, Any

In [4]:
# import pickle
import dill as pickle
import random
import numpy as np

import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.nn.parameter import Parameter
from torch.utils.data import Dataset, DataLoader
from torch.autograd import Variable
from torch.nn.utils.rnn import pack_padded_sequence, pad_packed_sequence

from tensorboardX import SummaryWriter

use_cuda = torch.cuda.is_available()
print("use_cuda: {}".format(use_cuda))

use_cuda: True


In [None]:
# Morpheus Version
from utils_morpheus import *
from ProgramSpace import *

In [None]:
torch.__version__

'1.0.0'

In [None]:
# debug block
DBG_VAR = None
DBG_VAR2 = None

In [None]:
class ListModule(object):
    def __init__(self, module, prefix, *args):
        self.module = module
        self.prefix = prefix
        self.num_module = 0
        for new_module in args:
            self.append(new_module)
    
    def append(self, new_module):
        if not isinstance(new_module, nn.Module):
            raise ValueError('Not a Module')
        else:
            self.module.add_module(self.prefix + str(self.num_module), new_module)
            self.num_module += 1
            
    def __len__(self):
        return self.num_module
    
    def __getitem__(self, i):
        if i<0 or i>=self.num_module:
            raise IndexError('Out of bound')
        return getattr(self.module, self.prefix+str(i))

In [None]:
'''
Node Encoder for ParamNode & ApplyNode
'''
class NodeEncoderABS(nn.Module):
    def __init__(self, p_config=None):
        super(NodeEncoderABS, self).__init__()
        self.config = p_config
        
        self.vocab_size = self.config["abs"]["vocab_size"]
        self.embd_dim = self.config["abs"]["embd_dim"]
        self.node_dim = self.config["node_dim"] # shared between tml/abs
        self.embedding = nn.Embedding(
            self.vocab_size,
            self.embd_dim,
            self.config["IDX_PAD"],
        )
        
        self.convs = ListModule(self, "abs_convs_")
        self.pools = ListModule(self, "abs_pools_")
        for i in range(len(self.config["abs"]["conv_n_kernels"])):
            self.convs.append(
                nn.Conv2d(
                    in_channels = self.config["abs"]["embd_dim"],
                    out_channels = self.config["abs"]["conv_n_kernels"][i],
                    kernel_size = self.config["abs"]["conv_kernel_sizes"][i],
                )
            )
            self.pools.append(
                nn.MaxPool2d(
                    kernel_size = self.config["abs"]["pool_kernel_sizes"][i],
                    padding = self.config["IDX_PAD"],
                )
            )
            
        self.fc = nn.Linear(
            sum(self.config["abs"]["conv_n_kernels"]),
            self.config["node_dim"],
        )
            
    def forward(self, pmaps):
        # pmaps: list of maps of batch_size=1
        
        d_embds = [
            self.embedding(pmaps[i]).permute(0,3,1,2)
            for i in range(self.config["abs"]["n_maps"])
            # (1, dim, nrow, ncol) -> (1, n_kernel, nrow, 1)
        ]
        d_convs = [
            F.relu(self.convs[i](d_embds[i]))
            for i in range(self.config["abs"]["n_maps"])
            # (1, dim, nrow, ncol) -> (1, n_kernel, nrow, 1)
        ]
        d_pools = [
            self.pools[i](d_convs[i]).view(1, self.config["abs"]["conv_n_kernels"][i])
            for i in range(self.config["abs"]["n_maps"])
            # (1, n_kernel, nrow, 1) -> (1, n_kernel, 1, 1) -> (1, n_kernel)
        ]
        
        d_known = torch.cat(d_pools, dim=1) # (1, n_kernel*n_maps)
        d_out = F.relu(self.fc(d_known))
        
        # (1, node_dim)
        return d_out

In [None]:
'''
Node Encoder for AtomNode & Candidate Node
'''
class NodeEncoderGCN(nn.Module):
    def __init__(self, p_config=None):
        super(NodeEncoderGCN, self).__init__()
        self.config = p_config
        
        # NOTICE:
        # vocab_size should be the size of all EnumNode only
        # no ParamNode are included
        self.vocab_size = self.config["tml"]["vocab_size"]
        self.node_dim = self.config["node_dim"]
        
        self.tml_h = Parameter(torch.Tensor(
            self.vocab_size,
            self.node_dim,
        ))
        self.tml_w = Parameter(torch.Tensor(
            self.node_dim,
            self.node_dim,
        ))
        # should initialize the parameter
        self.reset_parameters()
        
    def forward(self, adjmtx, sensp):
        # adjmtx / (vocab_size+new_n,vocab_size+new_n), no batch: normalized adjacency matrix
        # sensp / (new_n, node_dim): stacked new sensed nodes in designated dimension
        
        # first concatenate the features/node_dim
        # (vocab_size+new_n, node_dim)
        stacked_h = torch.cat((self.tml_h,sensp),dim=0)
        
        # then do GCN forward
        # don't forget the activation function
        # (vocab_size+new_n, node_dim)
        out_embd = F.relu(
            adjmtx.matmul(stacked_h).matmul(self.tml_w)
        )
        
        return out_embd
    
    def reset_parameters(self):
        nn.init.normal_(self.tml_h)
        nn.init.normal_(self.tml_w)
        
        

In [None]:
class AlphaNeo(nn.Module):
    def __init__(self, p_config=None):
        super(AlphaNeo, self).__init__()
        self.config = p_config
        
        self.abs_encoder = NodeEncoderABS(p_config=p_config)
        self.tml_encoder = NodeEncoderGCN(p_config=p_config)
        self.policy = nn.Linear(
            self.config["node_dim"],
            self.config["node_dim"],
        )
        
        # using all prods (including EnumProduction)
        # so some of them may never be used (EnumProduction)
        self.prods = ListModule(self, "prods_")
        for i in range(self.config["n_prods"]):
            self.prods.append(
                nn.Linear(
                    self.config["node_dim"],
                    self.config["node_dim"]
                )
            )
    
    '''
    single batch behavior, no batch dim expected
    '''
    def forward(self, adjmtx, all_maps, frontier_ids, out_map, candidate_shells):
        # adjmtx / (vocab_size+new_n,vocab_size+new_n), no batch: normalized adjacency matrix
        # all_maps / new_n * [(a,b,c,d,...), (), ...]
        # frontier_ids / list: node ids used for aggregation
        # out_map: output map
        # candidate_shells: call get_neighboring_shells
        
        d_feats = [
            self.abs_encoder(all_maps[i])
            for i in range(len(all_maps))
            # (1, node_dim)
        ]
        # (new_n, node_dim)
        d_sensp = torch.cat(d_feats, dim=0)
        
        # (1, node_dim)
        out_feat = self.abs_encoder(out_map)
        
        # (vocab_size+new_n, node_dim)
        gcn_embd = self.tml_encoder(adjmtx, d_sensp)
        
        # then we do aggregation
        # (1, node_dim)
        graph_encoding = torch.mean(gcn_embd[frontier_ids,:], dim=0, keepdim=True)
        
        # then compute the encoding of every candidate shells
        cand_embds = [
            self.prods[p[0]](torch.mean(gcn_embd[p[1],:], dim=0, keepdim=True))
            for p in candidate_shells
            # (1, node_dim)
        ]
        
        # (n_cand, node_dim)
        cand_encoding = torch.cat(cand_embds, dim=0)
        
        # perform a soft forward
        # (n_cand, node_dim) x (node_dim, 1) -> (n_cand, 1)
        score_mtx = F.log_softmax(cand_encoding.matmul(graph_encoding.t()).flatten(),dim=0)
        
        # (n_cand,)
        return score_mtx

In [None]:
def AlphaNeoTrainer(p_config, p_spec, p_interpreter, p_generator, p_model, p_optim, p_writer):
    reward_list = []
    n_batch = 1
    batch_loss = 0.
    c_nth = 0
    
    total_ac = {1:0,2:0,3:0}
    
    is_solved = False
    solved_rewards = []
    solved_attempts = []
    n_attempt = 0
    # initialize a program first
    while True:
        p_input = p_interpreter.random_table()
        p_prog, p_example = p_generator.generate(
            fixed_depth=p_config["max_depth"],
            example=Example(input=[p_input], output=None),
        )
        # make sure at least one function call
        if p_prog is not None and p_prog.is_apply():
            break
    # one program each step
    for d_step in range(p_config["n_steps"]):
        p_model.train()
        
        if is_solved:
            is_solved = False
            solved_rewards = []
            solved_attempts.append(n_attempt)
            n_attempt = 0
            while True:
                p_input = p_interpreter.random_table()
                p_prog, p_example = p_generator.generate(
                    fixed_depth=p_config["max_depth"],
                    example=Example(input=[p_input], output=None),
                )
                # make sure at least one function call
                if p_prog is not None and p_prog.is_apply():
                    break
        else:
            n_attempt += 1
        # else: use the same data and perform RL again until it converges
        # in this case, n_batch should always be 1
            
            
        # start from the first state
        ps_current = ProgramSpace(
            p_spec, p_interpreter, p_example.input, p_example.output,
        )
        d_reward = None
        selected_edges = []
        
        # store computed features
        # put the ParamNode in first
        # ==== TODO: temporarily based on input[0] ====
        stored_maps = [
            camb_get_features(ps_current.inputs[0], ps_current.inputs[i])
            for i in range(len(ps_current.inputs))
        ]
        output_map = camb_get_features(ps_current.inputs[0], ps_current.output)
        if use_cuda:
            td_out = [
                Variable(torch.LongTensor( [output_map[i]] )).cuda()
                for i in range(len(output_map))
            ]
        else:
            td_out = [
                Variable(torch.LongTensor( [output_map[i]] ))
                for i in range(len(output_map))
            ]
        
        
        while True:
            current_frontiers = ps_current.get_frontiers()
            current_candidates = ps_current.get_neighboring_shells()
            current_adjmtx = ps_current.get_normalized_adjacency_matrix_u()
            
            
            if use_cuda:
                td_maps = [
                    [
                        Variable(torch.LongTensor( [stored_maps[i][j]] )).cuda()
                        for j in range(len(stored_maps[i]))
                    ]
                    for i in range(len(stored_maps))
                ]
                td_adj = Variable(torch.FloatTensor(current_adjmtx)).cuda()
            else:
                td_maps = [
                    [
                        Variable(torch.LongTensor( [stored_maps[i][j]] ))
                        for j in range(len(stored_maps[i]))
                    ]
                    for i in range(len(stored_maps))
                ]
                td_adj = Variable(torch.FloatTensor(current_adjmtx))
                       
            # (n_cand,)
            td_output = p_model(
                td_adj,
                td_maps,
                current_frontiers,
                td_out,
                current_candidates,
            )

            if random.random()<=p_config["exploration_rate"](d_step):
                # exploration
                selected_id = random.choice(range(len(current_candidates)))
            else:
                # exploitation
                selected_id = torch.multinomial(td_output.exp().flatten(), 1).cpu().flatten().numpy()[0]
            
            # keep track of selected edges
            selected_edges.append(td_output[selected_id])
            # add selected edges and fill
            ret = ps_current.add_neighboring_shell(current_candidates[selected_id])
            
            if ret==False:
                # failed interpretation
                d_reward = -0.1
                solved_rewards.append(d_reward)
                break
            else:
                # TODO: don't forget to add features to stored_map
                stored_maps.append(
                    camb_get_features(ps_current.inputs[0], ps_current.node_list[-1])
                    # the last one, use -1 to index
                )
                # succeeded
                # see if it's solved or not              
                pid = ps_current.check_eq()
                if pid is not None:
                    # solved in depth less than or equal to max_depth
                    d_reward = 1.
                    solved_rewards.append(d_reward)
                    total_ac[ps_current.get_ncalls(pid=pid)] += 1
                    is_solved = True
                    break
                elif ps_current.get_ncalls()>=p_config["max_calls"]:
                    d_reward = -0.1
                    solved_rewards.append(d_reward)
                    break
                    
        
        # finally compute the loss
        d_loss = 0.
        ns = len(selected_edges)
        for i in range(ns):
            d_decay = p_config["gamma"]**(ns-1-i)
            # should negate the log probabilities
            d_loss += d_decay*d_reward*(-selected_edges[i])

        if is_solved:
            reward_list.append(
                sum(solved_rewards)/len(solved_rewards)
            )
        batch_loss += d_loss

        print("\r# Attempt{}, reward:{:.4f}, avg.reward:{:.4f}, avg.attempt:{:.2f}, ac: 1->{}, 2->{}, 3->{}".format(
            n_attempt, d_reward, 
            sum(reward_list)/len(reward_list) if len(reward_list)>0 else 0,
            sum(solved_attempts)/len(solved_attempts) if len(solved_attempts)>0 else 0,
            total_ac[1], total_ac[2], total_ac[3],
        ), end="")

        if writer is not None:
            writer.add_scalar(
                'avg.reward/step',
                sum(reward_list)/len(reward_list) if len(reward_list)>0 else 0,
                # d_step,
                # use number of programs as step(episode), not number of attempts
                total_ac[1]+total_ac[2]+total_ac[3],
            )
        
        
        c_nth += 1
        if c_nth%n_batch==0:
            c_nth = 0
            # perform gradient in every batch
            batch_loss.backward()
            p_optim.step()
            p_optim.zero_grad()
            batch_loss = 0.
            

In [None]:
m_interpreter = MorpheusInterpreter()
m_spec = S.parse_file('./example/set_select.tyrell')
m_eq = eq_r
m_generator = MorpheusGenerator(
    spec=m_spec,
    interpreter=m_interpreter,
    sfn=m_interpreter.sanity_check,
)
m_ps = ProgramSpaceChainOneNB(
    m_spec, m_interpreter, m_eq, None, None,
)
m_config = {
    "n_prods": m_spec.num_productions(), # include all prods
    "abs": {
        "n_maps": 4,
        "vocab_size": len(CAMB_LIST),
        "embd_dim": 10,
        "conv_n_kernels": None,
        "conv_kernel_sizes": None,
        "pool_kernel_sizes": None,
    },
    "tml": {
        # all EnumProd
        "vocab_size": len(list(filter(lambda x:x.is_enum(),m_spec.productions()))),
    },
    "node_dim": 128,
    "IDX_PAD": 0,
    "n_steps": 10000000,
    "gamma": 0.618,
    "exploration_rate": lambda x:0.9-0.8*(min(1, x/2500)),
    "max_depth": 2,
    "max_calls": 10,
}
m_config["abs"]["conv_n_kernels"] = [10 for _ in range(m_config["abs"]["n_maps"])]
m_config["abs"]["conv_kernel_sizes"] = [
    (1,CAMB_NCOL) 
    for _ in range(m_config["abs"]["n_maps"])
]
m_config["abs"]["pool_kernel_sizes"] = [
    (1,1), (CAMB_NROW,1), (1,1), (CAMB_NROW,1)
    # a:1, b:nrow, c:1, d:nrow
]

# print(m_config)

alpha_neo = AlphaNeo(p_config=m_config)
if use_cuda:
    alpha_neo = alpha_neo.cuda()
optimizer = torch.optim.Adam(list(alpha_neo.parameters()))
writer = SummaryWriter("runs/0701CAMB_RL1_select")
# writer = None

In [None]:
m_config

{'n_prods': 72,
 'abs': {'n_maps': 4,
  'vocab_size': 600,
  'embd_dim': 10,
  'conv_n_kernels': [10, 10, 10, 10],
  'conv_kernel_sizes': [(1, 20), (1, 20), (1, 20), (1, 20)],
  'pool_kernel_sizes': [(1, 1), (50, 1), (1, 1), (50, 1)]},
 'tml': {'vocab_size': 69},
 'node_dim': 128,
 'IDX_PAD': 0,
 'n_steps': 10000000,
 'gamma': 0.618,
 'exploration_rate': <function __main__.<lambda>(x)>,
 'max_depth': 2,
 'max_calls': 10}

In [None]:
len(m_ps.shell_list)

21

In [None]:
AlphaNeoTrainer(m_config, m_spec, m_interpreter, m_generator, alpha_neo, optimizer, writer)

# Attempt5, reward:-0.1000, avg.reward:0.4088, avg.attempt:3.50, ac: 1->4, 2->0, 3->0