## TransNeo/AlphaNeo Ranker
- tell which pair of tables are functionally/logically closer on execution chain
- Stage: Cambrian
- Version: Spriggina

In [1]:
import logging 
logging.basicConfig(level=logging.CRITICAL)

In [2]:
import os
import itertools
import copy
import random
import pickle
import numpy as np

import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
from torch.autograd import Variable

# from tensorboardX import SummaryWriter

use_cuda = torch.cuda.is_available()
print("use_cuda: {}".format(use_cuda))

use_cuda: True


In [3]:
import tyrell.spec as S
from tyrell.decider import Example

# Morpheus Version
from MorpheusInterpreter import *
from ProgramSpace import *

In [4]:
torch.__version__

'1.0.0'

In [5]:
'''
dataset for test: load the examples from given dataset
'''
class RankerTestDataset(Dataset):
    def __init__(self, p_config=None, p_dataset=None, p_interpreter=None, p_spec=None, is_fake=False):
        self.interpreter = p_interpreter
        self.spec = p_spec
        self.dataset = p_dataset
        self.config = p_config
        
        # flatten the dataset in the form [str_example]
        # we don't care about what the prog is
        self.str_examples = [
            self.dataset[dkey][i][1]
            for dkey in self.dataset.keys()
            for i in range(len(self.dataset[dkey])-990)
        ]
        self.n_exp = len(self.str_examples)
        self.n_row = self.config["ranker"]["n_row"]
        self.n_col = self.config["ranker"]["n_col"]
        
        print("# Test Dataset Examples: {}".format(self.n_exp))
        
        # == only works for chain: 1 input, 1 output ==
        # then compute the abstraction map of the variables
        print("# Parsing Dataset...")
        self.LMTX = np.full(
            (self.n_exp, self.n_row, self.n_col),
            self.interpreter.CAMB_DICT["<PAD>"],
            dtype=int,
        )
        self.RMTX = np.full(
            (self.n_exp, self.n_row, self.n_col),
            self.interpreter.CAMB_DICT["<PAD>"],
            dtype=int,
        )
        for i in range(self.n_exp):
            print("\r## in:{}/{}".format(i,self.n_exp),end="")
            if is_fake:
                # fake the data for negative samples
                var_input = self.interpreter.random_table()
                var_output= self.interpreter.random_table()
            else:
                d_example = self.str_examples[i]
                var_input = self.interpreter.load_data_into_var(d_example.input[0]) ## == chain == ##
                var_output= self.interpreter.load_data_into_var(d_example.output)
            map_input = self.interpreter.camb_get_abs(var_input)
            map_output= self.interpreter.camb_get_abs(var_output)
            # store into matrices
            self.LMTX[i,:,:] = map_input
            self.RMTX[i,:,:] = map_output
        print()
        
    def __len__(self):
        return self.n_exp
    
    '''
    should always use batch_size=1 so as to ensure the ratio of negative examples
    '''
    def __getitem__(self, p_ind):
        # (left, right)
        return (
            self.LMTX[p_ind,:,:], # (map_r, map_c)
            self.RMTX[p_ind,:,:], # (map_r, map_c)
        )

In [6]:
'''
dataset for training: load random positive/negative examples every time
'''
class RankerTrainDataset(Dataset):
    def __init__(self, p_config=None, p_dataset=None, p_interpreter=None, p_spec=None):
        self.interpreter = p_interpreter
        self.spec = p_spec
        self.dataset = p_dataset
        self.config = p_config
        
        # we need the prog to sample IOs
        self.progs = [
            self.dataset[dkey][0][0]
            for dkey in self.dataset.keys()
            if len(self.dataset[dkey])>20
        ]
        self.n_exp = len(self.progs)
        self.n_neg = self.config["ranker"]["n_neg"]
        self.n_pos = self.config["ranker"]["n_pos"]
        self.n_row = self.config["ranker"]["n_row"]
        self.n_col = self.config["ranker"]["n_col"]
        
        print("# Train Dataset Programs: {}".format(self.n_exp))
        
    def __len__(self):
        return self.n_exp
    
    '''
    should always use batch_size=1 so as to ensure the ratio of negative examples
    '''
    def __getitem__(self, p_ind):
        # generate both positive and negative data on the fly
        tmpLMTX = np.full(
            (self.n_pos, self.n_row, self.n_col),
            self.interpreter.CAMB_DICT["<PAD>"],
            dtype=int,
        )
        tmpRMTX = np.full(
            (self.n_pos, self.n_row, self.n_col),
            self.interpreter.CAMB_DICT["<PAD>"],
            dtype=int,
        )
        
        # generate positive data
        for k in range(self.n_pos):
            d_prog = random.choice(self.progs)
            while True:
                tmp_input = self.interpreter.random_table()
                try:
                    tmp_eval = self.interpreter.eval(
                        d_prog,
                        [tmp_input],
                    )
                except Exception:
                    continue
                tmp_example = Example(
                    input=[tmp_input],
                    output=tmp_eval,
                )
                tmp_ps = ProgramSpace(
                    self.spec, self.interpreter,
                    tmp_example.input, tmp_example.output
                )
                tmp_ps.init_by_prog(d_prog)
                tmp_check = self.interpreter.sanity_check(tmp_ps)
                if tmp_check[0]:
                    # succeed
                    map_input = self.interpreter.camb_get_abs(tmp_input)
                    map_output= self.interpreter.camb_get_abs(tmp_eval)
                    tmpLMTX[k,:,:] = map_input
                    tmpRMTX[k,:,:] = map_output
                    break
                else:
                    # fail, try again
                    continue
                    
        # generate negative data
        tmpWMTX = np.full(
            (self.n_neg, self.n_row, self.n_col),
            self.interpreter.CAMB_DICT["<PAD>"],
            dtype=int,
        )
        for i in range(self.n_neg):
            var_table = self.interpreter.random_table()
            map_table = self.interpreter.camb_get_abs(var_table)
            tmpWMTX[i,:,:] = map_table
        # generate negative data
        tmpVMTX = np.full(
            (self.n_neg, self.n_row, self.n_col),
            self.interpreter.CAMB_DICT["<PAD>"],
            dtype=int,
        )
        for i in range(self.n_neg):
            var_table = self.interpreter.random_table()
            map_table = self.interpreter.camb_get_abs(var_table)
            tmpVMTX[i,:,:] = map_table
        
        # (left, right, negatives)
        return (
            tmpLMTX, # (n_pos, map_r, map_c)
            tmpRMTX, # (n_pos, map_r, map_c)
            tmpWMTX, # (n_neg, map_r, map_c)
            tmpVMTX, # (n_neg, map_r, map_c)
        )

In [7]:
class ValueEncoder(nn.Module):
    def __init__(self, p_config=None):
        super(ValueEncoder, self).__init__()
        self.config = p_config
        
        self.vocab_size = self.config["val"]["vocab_size"]
        self.embd_dim = self.config["val"]["embd_dim"]
        self.embedding = nn.Embedding(
            self.vocab_size,
            self.embd_dim,
            self.config["val"]["IDX_PAD"],
        )
        
        self.conv = nn.Conv2d(
            in_channels = self.config["val"]["embd_dim"],
            out_channels = self.config["val"]["conv_n_kernels"],
            kernel_size = self.config["val"]["conv_kernel_size"],
        )
        
        self.pool = nn.MaxPool2d(
            kernel_size = self.config["val"]["pool_kernel_size"],
            padding = self.config["val"]["IDX_PAD"],
        )
        
        self.fc = nn.Linear(
            self.config["val"]["conv_n_kernels"],
            self.config["embd_dim"],
        )
        
    def forward(self, bp_map):
        # batched maps, (B, map_r, map_c)
        # in this version, every value only contains 1 map
        B = bp_map.shape[0]
        
        # (B, map_r, map_c, val_embd_dim) -> (B, val_embd_dim, map_r, map_c)
        d_embd = self.embedding(bp_map).permute(0,3,1,2)
        
        # (B, n_kernel, map_r, 1)
        d_conv = F.relu(self.conv(d_embd))
        
        # (B, n_kernel)
        d_pool = self.pool(d_conv).view(B,self.config["val"]["conv_n_kernels"])
        
        # (B, embd_dim)
        # d_out = torch.sigmoid(
        d_out = F.relu(
            self.fc(d_pool)
        )
        
        return d_out
    

In [8]:
class Ranker(nn.Module):
    def __init__(self, p_config=None):
        super(Ranker, self).__init__()
        self.config = p_config
        self.value_encoder = ValueEncoder(p_config=p_config)
        self.fc0 = nn.Linear(
            self.config["embd_dim"]*2,
            2048,
        )
        self.fc1 = nn.Linear(
            2048,
            2,
        )
        
    def forward(self, pL, pR, pW, pV):
        # pL/pR: (B=1, n_pos, map_r, map_c)
        # pW/pV: (B=1, n_neg, map_r, map_c)
        vL = self.value_encoder(
            pL.view(
                self.config["ranker"]["n_pos"],
                self.config["ranker"]["n_row"],
                self.config["ranker"]["n_col"],
            )
        ) # (B=n_pos, embd_dim)
        vR = self.value_encoder(
            pR.view(
                self.config["ranker"]["n_pos"],
                self.config["ranker"]["n_row"],
                self.config["ranker"]["n_col"],
            )
        ) # (B=n_pos, embd_dim)
        vW = self.value_encoder(
            pW.view(
                self.config["ranker"]["n_neg"],
                self.config["ranker"]["n_row"],
                self.config["ranker"]["n_col"],
            ),
        ) # (B=n_neg, embd_dim)
        vV = self.value_encoder(
            pV.view(
                self.config["ranker"]["n_neg"],
                self.config["ranker"]["n_row"],
                self.config["ranker"]["n_col"],
            )
        ) # (B=n_neg, embd_dim)
        
        vLR = torch.cat([vL,vR],dim=1) # (n_pos, embd_dim * 2)
        vWV = torch.cat([vW,vV],dim=1) # (n_neg, embd_dim * 2)
        
        vII = torch.cat([vLR,vWV],dim=0) # (n_pos+n_neg, embd_dim * 2)
        
        # == Notice: don't do any activation at the last layer ==
        vOO = self.fc1(
            F.relu(
                self.fc0(
                    vII
                )
            )
        )# (n_neg+1, 2)
        
        return vOO
        
        
    '''
    used for actual testing
    '''
    def inference(self, pL, pR):
        # pL/pR: (B, map_r, map_c)
        vL = self.value_encoder(pL) # (B, embd_dim)
        vR = self.value_encoder(pR) # (B, embd_dim)
        
        vLR = torch.cat([vL,vR],dim=1) # (B, embd_dim * 2)
        
        # == Notice: don't do any activation at the last layer ==
        vOO = self.fc1(
            F.relu(
                self.fc0(
                    vLR
                )
            )
        )# (B, 2)
        
        return vOO

In [9]:
def RankerTester(p_config, p_model, p_ld_test_data, p_lossfn, p_target):
    test_loss_list = []
    test_prob_list = [] 
    test_accu_list = [] # number of correct predictions
    for batch_idx, (d_left, d_right) in enumerate(p_ld_test_data):
        p_model.eval()
        B = d_left.shape[0]
        if use_cuda:
            td_left = Variable(d_left).cuda()
            td_right= Variable(d_right).cuda()
            td_label = Variable(torch.tensor(
                [p_target for _ in range(B)]
            )).cuda()
        else:
            td_left = Variable(d_left)
            td_right= Variable(d_right)
            td_label = Variable(torch.tensor(
                [p_target for _ in range(B)]
            ))
        d_output = p_model.inference(td_left, td_right) # (B, 2)
        d_loss = p_lossfn(
            F.log_softmax(d_output, dim=1),
            td_label,
        )
        test_loss_list.append(d_loss.cpu().data.numpy())
        test_prob_list += F.softmax(d_output,dim=1)[:,p_target].cpu().data.tolist()
        test_accu_list += (torch.argmax(d_output,dim=1)==td_label).cpu().data.tolist()
    print("# Test target:{}, avg.loss:{:.2f}, avg.prob.:{:.2f}, avg.acc.:{:.2f}".format(
        p_target,
        sum(test_loss_list)/len(test_loss_list),
        sum(test_prob_list)/len(test_prob_list),
        sum(test_accu_list)/len(test_accu_list),
    ))
    # print(test_prob_list)
    # print(test_accu_list)
    

In [10]:
def RankerTrainer(p_config, p_model, p_ld_train_data, p_ld_testpos_data, p_ld_testneg_data, p_optim, p_lossfn):
    RankerTester(p_config, p_model, p_ld_testneg_data, p_lossfn, 0)
    RankerTester(p_config, p_model, p_ld_testpos_data, p_lossfn, 1)
    for d_ep in range(p_config["ranker"]["n_ep"]):
        epoch_loss_list = []
        
        for batch_idx, (d_left, d_right, d_window, d_violin) in enumerate(p_ld_train_data):
            p_model.train()
            
            if use_cuda:
                td_left = Variable(d_left).cuda() # (B=1, n_pos, map_r, map_c)
                td_right = Variable(d_right).cuda() # (B=1, n_pos, map_r, map_c)
                td_window = Variable(d_window).cuda() # (B=1, n_neg, map_r, map_c)
                td_violin = Variable(d_violin).cuda() # (B=1, n_neg, map_r, map_c)
                td_label = Variable(torch.tensor(
                    [1 for _ in range(p_config["ranker"]["n_pos"])]+\
                    [0 for _ in range(p_config["ranker"]["n_neg"])]
                )).cuda()
            else:
                td_left = Variable(d_left) # (B=1, n_pos, map_r, map_c)
                td_right = Variable(d_right) # (B=1, n_pos, map_r, map_c)
                td_window = Variable(d_window) # (B=1, n_neg, map_r, map_c)
                td_violin = Variable(d_violin) # (B=1, n_neg, map_r, map_c)
                td_label = Variable(torch.tensor(
                    [1 for _ in range(p_config["ranker"]["n_pos"])]+\
                    [0 for _ in range(p_config["ranker"]["n_neg"])]
                ))
                
            # (n_neg+1, 2)
            d_output = p_model(td_left, td_right, td_window, td_violin)
            p_optim.zero_grad()
            d_loss = p_lossfn(
                F.log_softmax(d_output, dim=1),
                td_label,
            )
            epoch_loss_list.append(d_loss.cpu().data.numpy())
            d_loss.backward()
            p_optim.step()
            
            print("\r# Training EP:{}, B:{}, ep.loss:{:.2f}".format(
                d_ep, batch_idx, sum(epoch_loss_list),
            ),end="")
        
        # end of epoch print a new line
        print()
        RankerTester(p_config, p_model, p_ld_testneg_data, p_lossfn, 0)
        RankerTester(p_config, p_model, p_ld_testpos_data, p_lossfn, 1)
        

In [11]:
m_interpreter = MorpheusInterpreter()
m_spec = S.parse_file('./example/camb3.tyrell')

m_config = {
    "val":{
        "vocab_size": len(m_interpreter.CAMB_LIST),
        "embd_dim": 16, # embedding dim of CAMB abstract token
        "conv_n_kernels": 512,
        "conv_kernel_size": (1,m_interpreter.CAMB_NCOL), 
        "pool_kernel_size": (m_interpreter.CAMB_NROW,1), 
        "IDX_PAD": 0,
    },
    "embd_dim": 128,
    "ranker":{
        "data_path": "./0716MDsize1.pkl",
        "n_row": m_interpreter.CAMB_NROW,
        "n_col": m_interpreter.CAMB_NCOL,
        "n_neg": 9,
        "n_pos": 1,
        "n_ep": 1000000,
    },
}



# load the data and dataset
with open(m_config["ranker"]["data_path"],"rb") as f:
    m_data = pickle.load(f)
    
dt_train = RankerTrainDataset(
    p_config=m_config, 
    p_dataset=m_data, 
    p_interpreter=m_interpreter,
    p_spec=m_spec,
)
ld_train = DataLoader(dataset=dt_train, batch_size=1, shuffle=True)

dt_testpos = RankerTestDataset(
    p_config=m_config, 
    p_dataset=m_data, 
    p_interpreter=m_interpreter,
    p_spec=m_spec,
    is_fake=False,
)
ld_testpos = DataLoader(dataset=dt_testpos, batch_size=128, shuffle=True)

dt_testneg = RankerTestDataset(
    p_config=m_config, 
    p_dataset=m_data, 
    p_interpreter=m_interpreter,
    p_spec=m_spec,
    is_fake=True,
)
ld_testneg = DataLoader(dataset=dt_testneg, batch_size=128, shuffle=True)


m_ranker = Ranker(p_config=m_config)
if use_cuda:
    m_ranker = m_ranker.cuda()
optimizer = torch.optim.Adam(list(m_ranker.parameters()))
lossfn = nn.NLLLoss()

# Train Dataset Programs: 78
# Test Dataset Examples: 770
# Parsing Dataset...
## in:769/770
# Test Dataset Examples: 770
# Parsing Dataset...
## in:769/770


In [12]:
RankerTrainer(m_config, m_ranker, ld_train, ld_testpos, ld_testneg, optimizer, lossfn)

# Test target:0, avg.loss:0.69, avg.prob.:0.50, avg.acc.:0.50
# Test target:1, avg.loss:0.69, avg.prob.:0.50, avg.acc.:0.55
# Training EP:0, B:77, ep.loss:25.30
# Test target:0, avg.loss:0.11, avg.prob.:0.90, avg.acc.:1.00
# Test target:1, avg.loss:2.06, avg.prob.:0.18, avg.acc.:0.00
# Training EP:1, B:77, ep.loss:24.68
# Test target:0, avg.loss:0.07, avg.prob.:0.93, avg.acc.:1.00
# Test target:1, avg.loss:2.53, avg.prob.:0.11, avg.acc.:0.00
# Training EP:2, B:77, ep.loss:23.04
# Test target:0, avg.loss:0.03, avg.prob.:0.97, avg.acc.:1.00
# Test target:1, avg.loss:2.83, avg.prob.:0.11, avg.acc.:0.00
# Training EP:3, B:77, ep.loss:22.29
# Test target:0, avg.loss:0.14, avg.prob.:0.93, avg.acc.:1.00
# Test target:1, avg.loss:2.16, avg.prob.:0.19, avg.acc.:0.04
# Training EP:4, B:77, ep.loss:19.95
# Test target:0, avg.loss:0.11, avg.prob.:0.90, avg.acc.:0.97
# Test target:1, avg.loss:1.55, avg.prob.:0.36, avg.acc.:0.37
# Training EP:5, B:77, ep.loss:21.35
# Test target:0, avg.loss:0.11, av

KeyboardInterrupt: 