In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import time
from copy import deepcopy
import argparse
import sys
import traceback
import json
from tqdm import tqdm_notebook, tqdm

import numpy as np
import matplotlib.pyplot as plt

import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.autograd import Variable
import torch.optim as optim
from torch.utils import data
from torch.utils.data import Dataset, DataLoader
from sklearn.metrics import mean_absolute_error

import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from matplotlib import gridspec
from utils import *


parser = argparse.ArgumentParser()
args = parser.parse_args("")
args.seed = 123
args.nbits = 2048
args.n_splits = 5
args.test_size = 0.2
args.num_mol = 50000
args.max_len = 120
args.shuffle = True
args.device = 'cuda:0' if torch.cuda.is_available() else 'cpu'
args.optim = 'RMSProp'

np.random.seed(args.seed)
torch.manual_seed(args.seed)

<torch._C.Generator at 0x7f180f3cb090>

# 1. Prepare Dataset

In [3]:
list_partition = make_partition(args)

# 2. Model Architecture

Create char_to_ix Ref: https://github.com/pytorch/tutorials/blob/master/beginner_source/nlp/word_embeddings_tutorial.py  
Pre-defined Embedding Layer Ref: https://medium.com/@martinpella/how-to-use-pre-trained-word-embeddings-in-pytorch-71ca59249f76  
ResNet Variation Ref: https://towardsdatascience.com/an-overview-of-resnet-and-its-variants-5281e2f56035

In [4]:
def create_vocab(filename='../Data/logP/vocab.npy'):
    vocab = np.load(filename)
    vocab_size = len(vocab)
    char_to_ix = {char: i for i, char in enumerate(vocab)}
    return vocab, char_to_ix

In [15]:
class ResBlock(nn.Module):
    def __init__(self, in_filter, out_filter, stride, use_bn, dp_rate, block_type):
        super(ResBlock, self).__init__()   
        self.use_bn = use_bn
        self.block_type = block_type
        self.conv1 = nn.Conv2d(in_filter, out_filter, kernel_size=3, stride=stride, padding=1, bias=False)
        self.conv2 = nn.Conv2d(out_filter, out_filter, kernel_size=3, stride=1, padding=1, bias=False)
        self.relu = nn.ReLU()
        self.bn1 = nn.BatchNorm2d(out_filter)
        self.bn2 = nn.BatchNorm2d(out_filter)
        self.dropout = nn.Dropout2d(dp_rate)
        self.shortcut = nn.Sequential()
        if in_filter != out_filter:
            self.shortcut.add_module(
                'conv', nn.Conv2d(in_filter, out_filter,
                                  kernel_size=1, stride=stride, 
                                  padding=0, bias=False)
            )
        
    def forward(self, _x):
        if self.block_type == 'a': #original residual block
            x = self.relu(self.bn1(self.conv1(_x))) if self.use_bn else self.relu(self.conv1(_x))
            x = self.bn2(self.conv2(x)) if self.use_bn else self.conv2(x)
            x = x + self.shortcut(_x)
            return self.dropout(self.relu(x))
        
        elif self.block_type == 'b': # BN after addition
            x = self.relu(self.bn1(self.conv1(_x))) if self.use_bn else self.relu(self.conv1(_x))
            x = self.conv2(x) + self.shortcut(_x)
            return self.dropout(self.relu(self.bn2(x)) if self.use_bn else self.relu(x))
        
        elif self.block_type == 'c': # ReLU before addition
            x = self.relu(self.bn1(self.conv1(_x))) if self.use_bn else self.relu(self.conv1(_x))
            x = self.relu(self.bn2(self.conv2(x))) if self.use_bn else self.relu(self.conv2(x))
            return self.dropout(x + self.shortcut(_x))
        
        elif self.block_type == 'd': # ReLU-only pre-activation
            x = self.bn1(self.conv1(self.relu(_x))) if self.use_bn else self.conv1(self.relu(_x))
            x = self.bn2(self.conv2(self.relu(x))) if self.use_bn else self.conv2(self.relu(x))
            return self.dropout(x + self.shortcut(_x))
        
        elif self.block_type == 'e': # full pre-activation
            x = self.conv1(self.relu(self.bn1(_x))) if self.use_bn else self.conv1(self.relu(_x))
            x = self.conv2(self.relu(self.bn2(x))) if self.use_bn else self.conv2(self.relu(x))
            return self.dropout(x + self.shortcut(_x))
             
            

class Net(nn.Module):
    def __init__(self, args):
        super(Net, self).__init__()   
        
        # Create Atom Element embedding layer
        self.embedding = self.create_emb_layer(args.vocab_size, args.emb_train)
        
        # Create Residual Convolution layer
        list_res_blocks = list()
        n_channel = 1
        for i in range(args.n_stage):
            if i==0:
                list_res_blocks.append(ResBlock(n_channel, n_channel*args.start_channel, args.stride, args.use_bn, args.dp_rate, args.block_type))
                n_channel *= args.start_channel
            else:
                list_res_blocks.append(ResBlock(n_channel, n_channel*2, args.stride, args.use_bn, args.dp_rate, args.block_type))
                n_channel *= 2
            for j in range(args.n_layer-1):
                list_res_blocks.append(ResBlock(n_channel, n_channel, 1, args.use_bn, args.dp_rate, args.block_type))
        self.res_blocks = nn.Sequential(*list_res_blocks)
        
        # Create MLP layers
        fc_shape = self._estimate_fc_shape((1, args.max_len,))
#         fc_shape = (4000, 200)
        self.fc1 = nn.Linear(fc_shape[-1], 200)
        self.fc2 = nn.Linear(200, 50)
        self.fc3 = nn.Linear(50, 1)

        self.relu = nn.ReLU()

    def forward(self, x):
        x = self._conv_forward(x)
        x = x.view(x.shape[0], -1)
        x = self.relu(self.fc1(x))
        x = self.relu(self.fc2(x))
        x = self.relu(self.fc3(x))
        return torch.squeeze(x)
    
    def _conv_forward(self, x):
        embeds = self.embedding(x)
        embeds = embeds.view(embeds.shape[0], 1, embeds.shape[1], embeds.shape[2])
        x = self.res_blocks(embeds)
        return x
    
    def _estimate_fc_shape(self, input_shape):
        dummy_input = torch.zeros(input_shape).long()
        dummy_output = self._conv_forward(dummy_input)
        fc_shape = dummy_output.view(dummy_output.shape[0], -1).shape
        return fc_shape
        

    def create_emb_layer(self, vocab_size, emb_train):
        emb_layer = nn.Embedding(vocab_size, vocab_size)
        weight_matrix = torch.zeros((vocab_size, vocab_size))
        for i in range(vocab_size):
            weight_matrix[i][i] = 1
        emb_layer.load_state_dict({'weight': weight_matrix})

        if not emb_train:
            emb_layer.weight.requires_grad = False
        return emb_layer

# 3. Train, Validate, Evaluate

In [16]:
def train(model, partition, optimizer, criterion, char_to_ix, args, **kwargs):
    data_iter = DataLoader(
        partition['train'],
        batch_size=args.batch_size,
        shuffle=args.shuffle
    )
    
    epoch_train_loss = 0
    cnt_iter = 0
    for batch_idx, batch in enumerate(data_iter):
        X, y = batch[0], batch[1]
        X = torch.Tensor([[char_to_ix[c] for c in smile] for smile in X]).long()
        X, y = X.to(args.device), y.to(args.device).float()
    
        model.train()
        optimizer.zero_grad()

        pred_y = model(X)
        pred_y.require_grad = False
        train_loss = criterion(pred_y, y)
        epoch_train_loss += train_loss.item()
        train_loss.backward()
        optimizer.step()

        cnt_iter += 1
        args.bar.update(len(X))

    return model, epoch_train_loss/cnt_iter

def validate(model, partition, criterion, char_to_ix, args):
    data_iter = DataLoader(
        partition['val'],
        batch_size=args.test_batch_size,
        shuffle=args.shuffle
    )
    
    epoch_val_loss = 0
    cnt_iter = 0
    for batch_idx, batch in enumerate(data_iter):
        X, y = batch[0], batch[1]
        X = torch.Tensor([[char_to_ix[c] for c in smile] for smile in X]).long()
        X, y = X.to(args.device), y.to(args.device).float()
    
        model.eval()
        pred_y = model(X)
        pred_y.require_grad = False
        val_loss = criterion(pred_y, y)
        epoch_val_loss += val_loss.item()
        cnt_iter += 1

    return epoch_val_loss/cnt_iter

def test(model, partition, char_to_ix, args, **kwargs):
    data_iter = DataLoader(
        partition['test'],
        batch_size=args.test_batch_size,
        shuffle=False
    )
    
    list_y, list_pred_y = list(), list()
    for batch_idx, batch in enumerate(data_iter):
        X, y = batch[0], batch[1]
        X = torch.Tensor([[char_to_ix[c] for c in smile] for smile in X]).long()
        X, y = X.to(args.device), y.to(args.device).float()
    
        model.eval()
        pred_y = model(X)
        list_y += y.cpu().detach().numpy().tolist()
        list_pred_y += pred_y.cpu().detach().numpy().tolist()
        args.bar.update(len(X))

    mae = mean_absolute_error(list_y, list_pred_y)
    std = np.std(np.array(list_y)-np.array(list_pred_y))
    return mae, std, np.array(list_y), np.array(list_pred_y)

def experiment(partition, args):
    ts = time.time()
    vocab, char_to_ix = create_vocab()
    args.vocab_size = len(vocab)
    args.input_shape = (args.max_len, args.vocab_size)
    model = Net(args)
    model.to(args.device)
    criterion = nn.MSELoss()
    
    # Initialize Optimizer
    trainable_parameters = filter(lambda p: p.requires_grad, model.parameters())
    if args.optim == 'ADAM':
        optimizer = optim.Adam(trainable_parameters, lr=args.lr, weight_decay=args.l2_coef)
    elif args.optim == 'RMSProp':
        optimizer = optim.RMSprop(trainable_parameters, lr=args.lr, weight_decay=args.l2_coef)
    elif args.optim == 'SGD':
        optimizer = optim.SGD(trainable_parameters, lr=args.lr, weight_decay=args.l2_coef)
    else:
        assert False, "Undefined Optimizer Type"
        
    # Train, Validate, Evaluate
    list_train_loss = list()
    list_val_loss = list()
    list_mae = list()
    list_std = list()
    
    args.best_mae = 10000
    for epoch in range(args.epoch):
        model, train_loss = train(model, partition, optimizer, criterion, char_to_ix, args, **{'bar':bar})
        val_loss = validate(model, partition, criterion, char_to_ix, args)
        mae, std, true_y, pred_y = test(model, partition, char_to_ix, args, **{'bar':bar})

        list_train_loss.append(train_loss)
        list_val_loss.append(val_loss)
        list_mae.append(mae)
        list_std.append(std)
        
        if args.best_mae > mae:
            args.best_epoch = epoch
            args.best_mae = mae
            args.best_std = std
            args.best_true_y = true_y
            args.best_pred_y = pred_y
    
    te = time.time()
    
    # Logging Experiment Results
    args.elapsed = te-ts
    args.train_losses = list_train_loss
    args.val_losses = list_val_loss
    args.maes = list_mae
    args.stds = list_std
    return args

# Experiment.1  

In [17]:
args.exp_name = 'exp_hidden_dim'
args.n_layer = 2
args.n_stage = 1
args.lr = 0.001
args.l2_coef = 0
args.optim = 'RMSProp'
args.epoch = 3
args.batch_size=32
args.test_batch_size=32
args.emb_train = False
args.start_channel = 8
args.stride = 1
args.use_bn = True
args.dp_rate = 0
args.block_type = 'a'
args.device = 'cuda' if torch.cuda.is_available() else 'cpu'
writer = Writer(prior_keyword=['n_layer', 'block_type', 'use_bn', 'dp_rate', 'emb_train', 'epoch', 'batch_size'])
partition = list_partition[0]

# Define Hyperparameter Search Space
list_n_layer = [1, 2]#,3,4,5]
list_n_stage = [1, 2]#,3,4,5]

# Initialize num iteration, num experiment, progress bar
n_iter = args.epoch * (len(partition['train']) + len(partition['test']))
n_exp = len(list_n_layer)*len(list_n_stage)
cnt_exp = 0
bar = tqdm_notebook(total=n_exp*n_iter, file=sys.stdout, position=0)
bar.set_description('P {:2}/{} Exp'.format(cnt_exp, n_exp))

for n_layer in list_n_layer:
    for n_stage in list_n_stage:
        # Update hyperparameter
        args.n_layer = n_layer
        args.n_stage = n_stage
        args.bar = bar
        result = experiment(partition, args)
        writer.write(result)
        torch.cuda.empty_cache()

        cnt_exp += 1
        bar.set_description('P {:2}/{} Exp'.format(cnt_exp, n_exp))
        print('[Exp {:2}] got mae: {:2.2f}, std: {:2.2f} at epoch {:2}'.format(cnt_exp, result.best_mae, result.best_std, result.epoch))


HBox(children=(IntProgress(value=0, max=504000), HTML(value='')))


[Exp  1] got mae: 1.33, std: 1.30 at epoch  3
[Exp  2] got mae: 1.33, std: 1.30 at epoch  3
[Exp  3] got mae: 1.33, std: 1.30 at epoch  3
[Exp  4] got mae: 1.33, std: 1.30 at epoch  3
