In [1]:
%cd ..

/home/eli/AnacondaProjects/categorical_bpl


In [2]:
import argparse
import collections
import pyro
import torch
import numpy as np
import data_loader.data_loaders as module_data
import model.model as module_arch
from parse_config import ConfigParser
from trainer import Trainer



In [3]:
%matplotlib inline

In [4]:
# pyro.enable_validation(True)
# torch.autograd.set_detect_anomaly(True)

In [5]:
# fix random seeds for reproducibility
SEED = 123
torch.manual_seed(SEED)
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False
np.random.seed(SEED)

In [6]:
Args = collections.namedtuple('Args', 'config resume device')
config = ConfigParser.from_args(Args(config='chemical_config.json', resume=None, device=None))

In [7]:
logger = config.get_logger('train')

In [8]:
# setup data_loader instances
data_loader = config.init_obj('data_loader', module_data)
valid_data_loader = data_loader.split_validation()

In [9]:
# build model architecture, then print to console
model = config.init_obj('arch', module_arch)

In [10]:
optimizer = pyro.optim.ReduceLROnPlateau({
    'optimizer': torch.optim.Adam,
    'optim_args': {
        "lr": 1e-4,
        "weight_decay": 0,
        "amsgrad": True
    },
    "patience": 5,
    "factor": 0.1,
    "verbose": True,
})

In [11]:
# optimizer = config.init_obj('optimizer', pyro.optim)

In [12]:
trainer = Trainer(model, [], optimizer, config=config,
                  data_loader=data_loader,
                  valid_data_loader=valid_data_loader,
                  lr_scheduler=optimizer, log_images=False)

In [13]:
trainer.train()

    epoch          : 1
    loss           : 31931.124110094923
    val_loss       : 23639.291135074527
    epoch          : 2
    loss           : 22602.933402103776
    val_loss       : 21641.625861329885
    epoch          : 3
    loss           : 21558.244393931313
    val_loss       : 21382.67244413791
    epoch          : 4
    loss           : 21281.185568539357
    val_loss       : 21153.943108900814
    epoch          : 5
    loss           : 21137.57281223336
    val_loss       : 21053.74821007684
    epoch          : 6
    loss           : 21010.087454004904
    val_loss       : 20869.71012652193
    epoch          : 7
    loss           : 20909.208757732507
    val_loss       : 20762.819365686133
    epoch          : 8
    loss           : 20841.687205031463
    val_loss       : 20968.142700413257
    epoch          : 9
    loss           : 20879.810721856335
    val_loss       : 20792.072276242816
    epoch          : 10
    loss           : 20806.254887811967
    val_loss 

In [14]:
model.cpu()

MolecularVaeCategoryModel(
  (_category): FreeCategory(
    (generator_0): MolecularDecoder(
      (pre_recurrence_linear): Sequential(
        (0): Linear(in_features=196, out_features=196, bias=True)
        (1): SELU()
      )
      (recurrence): GRU(196, 50, num_layers=3, batch_first=True)
      (decoder): Sequential(
        (0): Linear(in_features=50, out_features=34, bias=True)
        (1): LogSoftmax(dim=-1)
      )
    )
    (generator_0_dagger): ConvMolecularEncoder(
      (smiles_conv): Sequential(
        (0): Conv1d(120, 9, kernel_size=(9,), stride=(1,))
        (1): ReLU()
        (2): Conv1d(9, 9, kernel_size=(9,), stride=(1,))
        (3): ReLU()
        (4): Conv1d(9, 10, kernel_size=(11,), stride=(1,))
        (5): ReLU()
      )
      (smiles_linear): Sequential(
        (0): Linear(in_features=80, out_features=196, bias=True)
        (1): SELU()
      )
      (embedding_loc): Linear(in_features=196, out_features=196, bias=True)
      (embedding_log_scale): Linear(in

In [15]:
valid_xs, valid_ys = list(valid_data_loader)[-1]

In [16]:
m, recons = model(observations=valid_xs, train=False)

In [17]:
(recons == valid_xs).all(dim=-1).flatten().to(dtype=torch.float).mean()

tensor(0.6935)