In [None]:
%load_ext autoreload
%autoreload 2
# Occupy a GPU for the model to be loaded 
%env CUDA_DEVICE_ORDER=PCI_BUS_ID
# GPU ID, if occupied change to an available GPU ID listed under !nvidia-smi
%env CUDA_VISIBLE_DEVICES=0

import numpy as np
import rdkit
from rdkit import Chem
import h5py, ast, pickle

from ddc_pub import ddc_v3 as ddc

In [None]:
# Load dataset
dataset_filename = "datasets/CHEMBL25_TRAIN_MOLS.h5"
with h5py.File(dataset_filename, "r") as f:
    binmols = f["mols"][:]

In [None]:
# All apriori known characters of the SMILES in the dataset
charset = "Brc1(-23[nH])45C=NOso#FlS67+89%0"
# Apriori known max length of the SMILES in the dataset
maxlen = 128
# Name of the dataset
name = "ChEMBL25_TRAIN"

dataset_info = {"charset": charset, "maxlen": maxlen, "name": name}

In [None]:
# Initialize a model
model = ddc.DDC(x              = binmols,      # input
                y              = binmols,      # output
                dataset_info   = dataset_info, # dataset information
                noise_std      = 0.1,          # std of the noise layer
                lstm_dim       = 512,          # breadth of LSTM layers
                dec_layers     = 3,            # number of decoding layers
                codelayer_dim  = 128,          # dimensionality of latent space
                batch_size     = 128)          # batch size for training



In [None]:
model.fit(epochs              = 100,         # number of epochs
          lr                  = 1e-3,        # initial learning rate for Adam, recommended
          model_name          = "new_model", # base name to append the checkpoints with
          checkpoint_dir      = "",          # save checkpoints in the notebook's directory
          mini_epochs         = 10,          # number of sub-epochs within an epoch to trigger lr decay
          save_period         = 50,          # checkpoint frequency (in mini_epochs)
          lr_decay            = True,        # whether to use exponential lr decay or not
          sch_epoch_to_start  = 500,         # mini-epoch to start lr decay (bypassed if lr_decay=False)
          sch_lr_init         = 1e-3,        # initial lr, should be equal to lr (bypassed if lr_decay=False)
          sch_lr_final        = 1e-6,        # final lr before finishing training (bypassed if lr_decay=False)
          patience            = 25)          # patience for Keras' ReduceLROnPlateau (bypassed if lr_decay=True)

In [None]:
# Save the final model
model.save("new_model")