## TransNeo/AlphaNeo Ranker
- tell which pair of tables are functionally/logically closer on execution chain
- Stage: Cambrian
- Version: Spriggina

In [1]:
import logging 
logging.basicConfig(level=logging.CRITICAL)

In [2]:
import os
import itertools
import copy
import random
import pickle
import numpy as np

import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
from torch.autograd import Variable

# from tensorboardX import SummaryWriter

use_cuda = torch.cuda.is_available()
print("use_cuda: {}".format(use_cuda))

use_cuda: True


In [3]:
import tyrell.spec as S
from tyrell.decider import Example

# Morpheus Version
from MorpheusInterpreter import *
from ProgramSpace import *

In [4]:
torch.__version__

'1.0.0'

In [5]:
'''
dataset for training: load random positive/negative examples every time
'''
class RankerDataset(Dataset):
    def __init__(self, p_config=None, p_dataset=None, p_interpreter=None, p_spec=None):
        self.n_sample = None # should manually assign
        self.interpreter = p_interpreter
        self.spec = p_spec
        self.dataset = p_dataset
        self.config = p_config
        
        # record all possible programs
        self.progs = [
            self.dataset[dkey][0][0]
            for dkey in self.dataset.keys()
        ]
        
        self.n_exp = len(self.progs)
        self.n_row = self.config["ranker"]["n_row"]
        self.n_col = self.config["ranker"]["n_col"]
        
    def __len__(self):
        return self.n_sample
    
    '''
    return a single triangle sample
    (input, output1, output2, random)
    A->B, A->C, D
    pos: (A,B), (A,C)
    neg: (B,A), (C,A)
    neg: (B,C), (C,B)
    neg: (A,D), (B,D), (C,D)
    neg: (D,A), (D,B), (D,C)
    '''
    def get_triangle(self):
        # sample (A,B)
        while True:
            d_progAB = random.choice(self.progs)
            d_inputA = self.interpreter.random_table()
            try:
                d_evalB = self.interpreter.eval(
                    d_progAB,
                    [d_inputA],
                )
            except Exception:
                continue
            d_exampleAB = Example(
                input=[d_inputA],
                output=d_evalB,
            )
            d_psAB = ProgramSpace(
                self.spec, self.interpreter,
                d_exampleAB.input, d_exampleAB.output,
            )
            d_psAB.init_by_prog(d_progAB)
            d_checkAB = self.interpreter.sanity_check(d_psAB)
            if d_checkAB[0]:
                break
            else:
                continue
        
        # sample (A,C)
        while True:
            d_progAC = random.choice(self.progs)
            if d_progAC==d_progAB:
                continue
            try:
                d_evalC = self.interpreter.eval(
                    d_progAC,
                    [d_inputA],
                )
            except Exception:
                continue
            d_exampleAC = Example(
                input=[d_inputA],
                output=d_evalC,
            )
            d_psAC = ProgramSpace(
                self.spec, self.interpreter,
                d_exampleAC.input, d_exampleAC.output,
            )
            d_psAC.init_by_prog(d_progAC)
            d_checkAC = self.interpreter.sanity_check(d_psAC)
            if d_checkAC[0]:
                break
            else:
                continue
        
        # then randomly generate a table D
        d_randD = self.interpreter.random_table()
        
        # all (map_r, map_c)
        map_inputA = self.interpreter.camb_get_abs(d_inputA)
        map_evalB = self.interpreter.camb_get_abs(d_evalB)
        map_evalC = self.interpreter.camb_get_abs(d_evalC)
        map_randD = self.interpreter.camb_get_abs(d_randD)
        
        return (map_inputA, map_evalB, map_evalC, map_randD)
        
    
    '''
    should always use batch_size=1 so as to ensure the ratio of negative examples
    '''
    def __getitem__(self, p_ind):
        return self.get_triangle()

In [6]:
class ValueEncoder(nn.Module):
    def __init__(self, p_config=None):
        super(ValueEncoder, self).__init__()
        self.config = p_config
        
        self.vocab_size = self.config["val"]["vocab_size"]
        self.embd_dim = self.config["val"]["embd_dim"]
        self.embedding = nn.Embedding(
            self.vocab_size,
            self.embd_dim,
            self.config["val"]["IDX_PAD"],
        )
        
        self.conv = nn.Conv2d(
            in_channels = self.config["val"]["embd_dim"],
            out_channels = self.config["val"]["conv_n_kernels"],
            kernel_size = self.config["val"]["conv_kernel_size"],
        )
        
        self.pool = nn.MaxPool2d(
            kernel_size = self.config["val"]["pool_kernel_size"],
            padding = self.config["val"]["IDX_PAD"],
        )
        
        self.fc = nn.Linear(
            self.config["val"]["conv_n_kernels"],
            self.config["embd_dim"],
        )
        
    def forward(self, bp_map):
        # batched maps, (B, map_r, map_c)
        # in this version, every value only contains 1 map
        B = bp_map.shape[0]
        
        # (B, map_r, map_c, val_embd_dim) -> (B, val_embd_dim, map_r, map_c)
        d_embd = self.embedding(bp_map).permute(0,3,1,2)
        
        # (B, n_kernel, map_r, 1)
        d_conv = F.relu(self.conv(d_embd))
        
        # (B, n_kernel)
        d_pool = self.pool(d_conv).view(B,self.config["val"]["conv_n_kernels"])
        
        # (B, embd_dim)
        # d_out = torch.sigmoid(
        d_out = F.relu(
            self.fc(d_pool)
        )
        
        return d_out
    

In [7]:
class Ranker(nn.Module):
    def __init__(self, p_config=None):
        super(Ranker, self).__init__()
        self.config = p_config
        self.value_encoder = ValueEncoder(p_config=p_config)
        self.fc0 = nn.Linear(
            self.config["embd_dim"]*2,
            2048,
        )
        self.fc1 = nn.Linear(
            2048,
            512,
        )
        self.fc2 = nn.Linear(
            512,
            2,
        )
        
    def forward(self, pA, pB, pC, pD):
        # pA/pB/pC/pD: (B=1, map_r, map_c)
        
        # (B, embd_dim)
        vA = self.value_encoder(pA)
        vB = self.value_encoder(pB)
        vC = self.value_encoder(pC)
        vD = self.value_encoder(pD)
        
        # (B=2, embd_dim * 2)
        vPOS = torch.cat(
            [
                torch.cat([vA,vB],dim=1),
                torch.cat([vA,vC],dim=1),
            ],dim=0
        )
        
        # (B=10, embd_dim * 2)
        vNEG = torch.cat(
            [
                torch.cat([vB,vA],dim=1),
                torch.cat([vB,vC],dim=1),
                torch.cat([vC,vA],dim=1),
                torch.cat([vC,vB],dim=1),
            ],dim=0
        )
        
        # (B=4*3=12, embd_dim * 2)
        vII = torch.cat([vPOS,vNEG],dim=0)
        
        # == Notice: don't do any activation at the last layer ==
        vOO = self.fc2(
            F.relu(
                self.fc1(
                    F.relu(
                        self.fc0(
                            vII
                        )
                    )
                )
            )
        )
        # (B=4*3=12, 2)
        
        return vOO

In [8]:
def RankerTester(p_config, p_model, pld_test, p_lossfn):
    test_loss_list = []
    test_pPOS_list = []
    test_pNEG_list = []
    test_aPOS_list = []
    test_aNEG_list = []
    test_sPOS_list = []
    test_sNEG_list = []
    for batch_idx, (dA, dB, dC, dD) in enumerate(pld_test):
        p_model.eval()
        if use_cuda:
            tdA = Variable(dA).cuda() # (B=1, map_r, map_c)
            tdB = Variable(dB).cuda() # (B=1, map_r, map_c)
            tdC = Variable(dC).cuda() # (B=1, map_r, map_c)
            tdD = Variable(dD).cuda() # (B=1, map_r, map_c)
            td_label = Variable(torch.tensor(
                [1 for _ in range(2)]+\
                [0 for _ in range(4)]
            )).cuda()
        else:
            tdA = Variable(dA) # (B=1, map_r, map_c)
            tdB = Variable(dB) # (B=1, map_r, map_c)
            tdC = Variable(dC) # (B=1, map_r, map_c)
            tdD = Variable(dD) # (B=1, map_r, map_c)
            td_label = Variable(torch.tensor(
                [1 for _ in range(2)]+\
                [0 for _ in range(4)]
            ))
            
        d_output = p_model(tdA, tdB, tdC, tdD) # (B, 2)
        d_loss = p_lossfn(
            F.log_softmax(d_output, dim=1),
            td_label,
        )
        
        test_loss_list.append(d_loss.cpu().data.numpy())
        test_pPOS_list += F.softmax(d_output,dim=1)[:2,1].cpu().data.tolist()
        test_pNEG_list += F.softmax(d_output,dim=1)[2:,0].cpu().data.tolist()
        test_aPOS_list += (torch.argmax(d_output,dim=1)[:2]==td_label[:2]).cpu().data.tolist()
        test_aNEG_list += (torch.argmax(d_output,dim=1)[2:]==td_label[2:]).cpu().data.tolist()
        test_sPOS_list += F.softmax(d_output,dim=1)[:2,1].cpu().data.tolist()
        test_sNEG_list += F.softmax(d_output,dim=1)[2:,1].cpu().data.tolist()
        
    print("# Test avg.loss:{:.2f}, avg.prob.:{:.2f}/{:.2f}, avg.acc.:{:.2f}/{:.2f}, avg.score:{:.2f}/{:.2f}".format(
        sum(test_loss_list)/len(test_loss_list),
        sum(test_pNEG_list)/len(test_pNEG_list),
        sum(test_pPOS_list)/len(test_pPOS_list),
        sum(test_aNEG_list)/len(test_aNEG_list),
        sum(test_aPOS_list)/len(test_aPOS_list),
        sum(test_sNEG_list)/len(test_sNEG_list), # score is the similarity score
        sum(test_sPOS_list)/len(test_sPOS_list),
    ))
    

In [9]:
def RankerTrainer(p_config, p_model, pld_train, pld_test, p_optim, p_lossfn):
    RankerTester(p_config, p_model, pld_test, p_lossfn)
    for d_ep in range(p_config["ranker"]["n_ep"]):
        epoch_loss_list = []
        for batch_idx, (dA, dB, dC, dD) in enumerate(pld_train):
            p_model.train()
            
            if use_cuda:
                tdA = Variable(dA).cuda() # (B=1, map_r, map_c)
                tdB = Variable(dB).cuda() # (B=1, map_r, map_c)
                tdC = Variable(dC).cuda() # (B=1, map_r, map_c)
                tdD = Variable(dD).cuda() # (B=1, map_r, map_c)
                td_label = Variable(torch.tensor(
                    [1 for _ in range(2)]+\
                    [0 for _ in range(4)]
                )).cuda()
            else:
                tdA = Variable(dA) # (B=1, map_r, map_c)
                tdB = Variable(dB) # (B=1, map_r, map_c)
                tdC = Variable(dC) # (B=1, map_r, map_c)
                tdD = Variable(dD) # (B=1, map_r, map_c)
                td_label = Variable(torch.tensor(
                    [1 for _ in range(2)]+\
                    [0 for _ in range(4)]
                ))
                
            # (B=12, 2)
            d_output = p_model(tdA, tdB, tdC, tdD)
            p_optim.zero_grad()
            d_loss = p_lossfn(
                F.log_softmax(d_output, dim=1),
                td_label,
            )
            epoch_loss_list.append(d_loss.cpu().data.numpy())
            d_loss.backward()
            p_optim.step()
            
            print("\r# Training EP:{}, B:{}, ep.loss:{:.2f}".format(
                d_ep, batch_idx, sum(epoch_loss_list),
            ),end="")
        
        # end of epoch print a new line
        print()
        RankerTester(p_config, p_model, pld_test, p_lossfn)
        
        # save the model
        # if d_ep%10==0:
        # torch.save(p_model.state_dict(), "./saved_models/0722CambRanker_Zion_ep{}.pt".format(d_ep))
            
        

In [10]:
m_interpreter = MorpheusInterpreter()
m_spec = S.parse_file('./example/camb3.tyrell')

m_config = {
    "val":{
        "vocab_size": len(m_interpreter.CAMB_LIST),
        "embd_dim": 16, # embedding dim of CAMB abstract token
        "conv_n_kernels": 512,
        "conv_kernel_size": (1,m_interpreter.CAMB_NCOL), 
        "pool_kernel_size": (m_interpreter.CAMB_NROW,1), 
        "IDX_PAD": 0,
    },
    "embd_dim": 128,
    "ranker":{
        "data_path": "./0716MDsize1.pkl",
        "train_size": 500, # how many samples in every epoch
        "test_size": 100,
        "n_row": m_interpreter.CAMB_NROW,
        "n_col": m_interpreter.CAMB_NCOL,
        "n_ep": 1000000,
    },
}



# load the data and dataset
with open(m_config["ranker"]["data_path"],"rb") as f:
    m_data = pickle.load(f)
    
dt_train = RankerDataset(
    p_config=m_config, 
    p_dataset=m_data, 
    p_interpreter=m_interpreter,
    p_spec=m_spec,
)
dt_train.n_sample = m_config["ranker"]["train_size"]
ld_train = DataLoader(dataset=dt_train, batch_size=1, shuffle=True)

dt_test = RankerDataset(
    p_config=m_config, 
    p_dataset=m_data, 
    p_interpreter=m_interpreter,
    p_spec=m_spec,
)
dt_test.n_sample = m_config["ranker"]["test_size"]
ld_test = DataLoader(dataset=dt_test, batch_size=1, shuffle=True)

m_ranker = Ranker(p_config=m_config)
if use_cuda:
    m_ranker = m_ranker.cuda()
optimizer = torch.optim.Adam(list(m_ranker.parameters()))
lossfn = nn.NLLLoss()

In [11]:
RankerTrainer(m_config, m_ranker, ld_train, ld_test, optimizer, lossfn)

# Test avg.loss:0.70, avg.prob.:0.49/0.51, avg.acc.:0.00/1.00, avg.score:0.51/0.51
# Training EP:0, B:499, ep.loss:246.95
# Test avg.loss:0.51, avg.prob.:0.64/0.62, avg.acc.:0.51/0.95, avg.score:0.36/0.62
# Training EP:1, B:499, ep.loss:214.99
# Test avg.loss:0.37, avg.prob.:0.82/0.60, avg.acc.:0.83/0.78, avg.score:0.18/0.60
# Training EP:2, B:499, ep.loss:205.46
# Test avg.loss:0.41, avg.prob.:0.80/0.63, avg.acc.:0.83/0.81, avg.score:0.20/0.63
# Training EP:3, B:499, ep.loss:192.31
# Test avg.loss:0.37, avg.prob.:0.80/0.67, avg.acc.:0.79/0.84, avg.score:0.20/0.67
# Training EP:4, B:499, ep.loss:191.82
# Test avg.loss:0.36, avg.prob.:0.79/0.70, avg.acc.:0.81/0.84, avg.score:0.21/0.70
# Training EP:5, B:499, ep.loss:173.96
# Test avg.loss:0.30, avg.prob.:0.87/0.62, avg.acc.:0.88/0.75, avg.score:0.13/0.62
# Training EP:6, B:499, ep.loss:171.33
# Test avg.loss:0.32, avg.prob.:0.87/0.68, avg.acc.:0.88/0.79, avg.score:0.13/0.68
# Training EP:7, B:499, ep.loss:165.69
# Test avg.loss:0.29, av

KeyboardInterrupt: 