# JAK2 activity optimization with ReLeaSE algorithm

In [44]:
In this experiment we will optimized parameters of pretrained generative RNN to produce molecules with maximized and minimized pIC50 for JAK2. We use policy gradient algorithm with custom reward function to bias the properties of generated molecules aka Reinforcement Learninf for Structural Evolution (ReLeaSE) as was proposed in **Popova, M., Isayev, O., & Tropsha, A. (2018). *Deep reinforcement learning for de novo drug design*. Science advances, 4(7), eaap7885.** 

SyntaxError: invalid syntax (<ipython-input-44-85748b042ead>, line 1)

## Imports

In [None]:
%env CUDA_VISIBLE_DEVICES=2

In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
import sys

In [None]:
sys.path.append('./release/')

In [None]:
import torch
import torch.nn as nn
from torch.optim.lr_scheduler import ExponentialLR, StepLR
import torch.nn.functional as F

In [None]:
use_cuda = torch.cuda.is_available()

In [None]:
import numpy as np
from tqdm import tqdm, trange
import pickle
from rdkit import Chem, DataStructs
from stackRNN import StackAugmentedRNN
from data import GeneratorData
from utils import canonical_smiles

In [None]:
import matplotlib.pyplot as plt
%matplotlib inline

import seaborn as sns

## Setting up the generator

### Loading data for the generator

In [55]:
#gen_data_path = './data/chembl_22_clean_1576904_sorted_std_final.smi'
gen_data_path = 'data.csv'

In [56]:
 tokens = ['<', '>', '#', '%', ')', '(', '+', '-', '/', '.', '1', '0', '3', '2', '5', '4', '7',
          '6', '9', '8', '=', 'A', '@', 'C', 'B', 'F', 'I', 'H', 'O', 'N', 'P', 'S', '[', ']',
          '\\', 'c', 'e', 'i', 'l', 'o', 'n', 'p', 's', 'r', '\n']

In [64]:
#Generated data from created data generator, currently produces a memory error,
#Got around this by adding some of the data to a csv file and using that instead, better solution is a way to convert the smi to csv
gen_data = GeneratorData(training_data_path=gen_data_path, delimiter='\t', 
                         cols_to_read=[1], keep_header=True, tokens=tokens)

[['CCO' 'CHEMBL545']
 ['C' 'CHEMBL17564']
 ['CO' 'CHEMBL14688']
 ['NCCS' 'CHEMBL602']
 ['NCCN' 'CHEMBL816']
 ['CN' 'CHEMBL43280']
 ['C=O' 'CHEMBL1255']
 ['CCN' 'CHEMBL14449']
 ['CSC' 'CHEMBL15580']
 ['CBr' 'CHEMBL48339']
 ['CI' 'CHEMBL115849']
 ['CF' 'CHEMBL116838']
 ['CC' 'CHEMBL135626']
 ['CNC=O' 'CHEMBL9240']
 ['CCCN' 'CHEMBL14409']
 ['CCCO' 'CHEMBL14687']
 ['O=CC#C' 'CHEMBL722']
 ['C=CC=O' 'CHEMBL721']
 ['CC#N' 'CHEMBL45211']
 ['CCCl' 'CHEMBL46058']
 ['NC#N' 'CHEMBL56279']
 ['CC=O' 'CHEMBL76101']
 ['SC#N' 'CHEMBL84336']
 ['FCF' 'CHEMBL115186']
 ['C#C' 'CHEMBL116336']
 ['CCl' 'CHEMBL117545']
 ['C=C' 'CHEMBL117822']
 ['COC' 'CHEMBL119178']
 ['CNC' 'CHEMBL120433']
 ['CCNCC' 'CHEMBL1189']
 ['CCC' 'CHEMBL135416']
 ['N#N' 'CHEMBL142438']
 ['CNO' 'CHEMBL144761']
 ['CNN' 'CHEMBL160520']
 ['C#N' 'CHEMBL183419']
 ['CC(C)O' 'CHEMBL582']
 ['CNC=O' 'CHEMBL9081']
 ['CCCCON' 'CHEMBL6960']
 ['CCNC=O' 'CHEMBL9421']
 ['CC(O)=O' 'CHEMBL539']
 ['CCCCO' 'CHEMBL14245']
 ['CCCCN' 'CHEMBL13968']
 ['COCOC'

## Util functions

**plot_hist** function plots histogram of predicted properties and a vertical line for thershold.

In [65]:
def plot_hist(prediction, n_to_generate):
    print("Mean value of predictions:", prediction.mean())
    print("Proportion of valid SMILES:", len(prediction)/n_to_generate)
    ax = sns.kdeplot(prediction, shade=True)
    ax.set(xlabel='Predicted pIC50', 
           title='Distribution of predicted pIC50 for generated molecules')
    plt.show()

**estimate_and_update** function:

1) generates n_to_generate number of SMILES strings

2) filters invalid SMILES

3) predicts pIC50 for valid SMILES

4) plots histogram of predicted pIC50

5) Returns valid SMILES and their predicted pIC50s

In [66]:
def estimate_and_update(generator, predictor, n_to_generate, **kwargs):
    generated = []
    pbar = tqdm(range(n_to_generate))
    for i in pbar:
        pbar.set_description("Generating molecules...")
        generated.append(generator.evaluate(gen_data, predict_len=120)[1:-1])

    sanitized = canonical_smiles(generated, sanitize=False, throw_warning=False)[:-1]
    unique_smiles = list(np.unique(sanitized))[1:]
    smiles, prediction, nan_smiles = predictor.predict(unique_smiles, get_features=get_fp)  
                                                       
    plot_hist(prediction, n_to_generate)
        
    return smiles, prediction

## Initializing and training the generator

We will used stack augmented generative GRU as a generator. The model was trained to predict the next symbol from SMILES alphabet using the already generated prefix. Model was trained to minimize the cross-entropy loss between predicted symbol and ground truth symbol. Scheme of the generator when inferring new SMILES is shown below:

<img src="./figures/generator.png">

Initialize stack-augmented generative RNN:

In [67]:
hidden_size = 1500
stack_width = 1500
stack_depth = 200
layer_type = 'GRU'
lr = 0.001
optimizer_instance = torch.optim.Adadelta

my_generator = StackAugmentedRNN(input_size=gen_data.n_characters, hidden_size=hidden_size,
                                 output_size=gen_data.n_characters, layer_type=layer_type,
                                 n_layers=1, is_bidirectional=False, has_stack=True,
                                 stack_width=stack_width, stack_depth=stack_depth, 
                                 use_cuda=use_cuda, 
                                 optimizer_instance=optimizer_instance, lr=lr)

If you want train the model from scratch, uncomment the lines below:

In [68]:
model_path = './checkpoints/generator/checkpoint_biggest_rnn'

In [None]:
#losses = my_generator.fit(gen_data, 1500000)

In [None]:
#plt.plot(losses)

In [None]:
#my_generator.evaluate(gen_data)

In [None]:
#my_generator.save_model(model_path)

Alternatively, you can skip the process of training and load the pretrained parameters into the model:

In [74]:
#Change in stackRNN.py to make it run on a CPU linux VM, can change back by deleting the "map_location='cpu' in file"
my_generator.load_model(model_path)

## Setting up the predictor

For this demo we will use Random Forest predictor instead of Recurrent Neural Network, since the availability of the dataset with JAK2 activity data used in the "Deep Reinforcement Learning for de novo Drug Design" paper is restricted under the license agreement. Here instead we use the JAK2 activity data downladed from ChEMBL. The size of this dataset is ~2000 data points, which is not enough to build a reliable deep neural network. Is you want to see a demo with RNN, please checkout logP optimization demo. 

In [126]:
from data import PredictorData
from utils import get_desc, get_fp
from mordred import Calculator, descriptors

In [127]:
calc = Calculator(descriptors, ignore_3D=True)

In [128]:
from predictor import VanillaQSAR
from sklearn.ensemble import RandomForestRegressor as RFR

In [129]:
model_instance = RFR
model_params = {'n_estimators': 50, 'n_jobs': 10}

In [130]:
my_predictor = VanillaQSAR(model_instance=model_instance,
                           model_params=model_params,
                           model_type='regressor',
                          ensemble_size = 2)

In [132]:
pred_data = PredictorData(path='./data/jak2_data.csv', get_features=get_fp, has_label=True)
print(pred_data.binary_y)
my_predictor.fit_model(pred_data, cv_split='random')

[['SMILES' 'pIC50']
 ['O=S(=O)(Nc1cccc(-c2cnc3ccccc3n2)c1)c1cccs1' '4.26']
 ['O=c1cc(-c2nc(-c3ccc(-c4cn(CCP(=O)(O)O)nn4)cc3)[nH]c2-c2ccc(F)cc2)cc[nH]1'
  '4.34']
 ...
 ['CC1CN(S(=O)(=O)CC2CCC(N(C)c3[nH]cnc4nccc3-4)CC2)CC1CO' '10.78']
 ['CS(=O)(=O)N1CCC(Nc2ncccc2-c2cnc3[nH]ccc3n2)C1' '10.97']
 ['COC(=O)N1CCCCC(Nc2ncccc2-c2cnc3[nH]ccc3n2)C1' '10.97']]
None


ValueError: Cannot have number of splits n_splits=2 greater than the number of samples: n_samples=0.

Here we produce the unbiased distribution of the property:

In [None]:
smiles_unbiased, prediction_unbiased = estimate_and_update(my_generator,
                                                           my_predictor,
                                                           n_to_generate=10000)

## Biasing the distribution of the generator with reinforcement learning (policy gradient)

We combine the generator and the predictor into a single pipeline. The generator produces new SMILES string, which is then evaluated by the predictor. Based on the obtain prediction and our goal, we assign a numerical reward value and update the parameters of the generator using policy gradient algorithm.

<img src="./figures/rl_pipeline.png">

Policy gradient loss is defined as:
$$
L(S|\theta) = -\dfrac{1}{n}\sum_{i=1}^{|S|} \sum_{j=1}^{length(s_i)} R_i\cdot \gamma^i \cdot \log p(s_i|s_0 \dots s_{i-1}\theta),
$$

where $R_i$ is the reward obtained at time step $i$ $\gamma$ is the discount factor and $p(s_i|s_0 \dots s_{i-1}, \theta)$ is the probability of the next character given the prefix, which we obtain from the generator. 

In our case the reward is the same for every time step and is equal to the reward for the whole molecule. Discount factor $\gamma$ is a number close to $1.0$ (it could be $1.0$).

### Maximizing pIC50 for JAK2

In [None]:
from reinforcement import Reinforcement

Making a copy of the generator that will be optimized

In [None]:
my_generator_max = StackAugmentedRNN(input_size=gen_data.n_characters, 
                                     hidden_size=hidden_size,
                                     output_size=gen_data.n_characters, 
                                     layer_type=layer_type,
                                     n_layers=1, is_bidirectional=False, has_stack=True,
                                     stack_width=stack_width, stack_depth=stack_depth, 
                                     use_cuda=use_cuda, 
                                     optimizer_instance=optimizer_instance, lr=lr)

my_generator_max.load_model(model_path)

In [None]:
# Setting up some parameters for the experiment
n_to_generate = 200
n_policy_replay = 10
n_policy = 15
n_iterations = 100

In [None]:
def simple_moving_average(previous_values, new_value, ma_window_size=10):
    value_ma = np.sum(previous_values[-(ma_window_size-1):]) + new_value
    value_ma = value_ma/(len(previous_values[-(ma_window_size-1):]) + 1)
    return value_ma

In [None]:
def get_reward_max(smiles, predictor, invalid_reward=0.0, get_features=get_fp):
    mol, prop, nan_smiles = predictor.predict([smiles], get_features=get_features)
    if len(nan_smiles) == 1:
        return invalid_reward
    return np.exp(prop[0]/3)

The reward function we will use here is 
$$
R(s) = \exp(\dfrac{predictor(s)}{3}) 
$$

In [None]:
x = np.linspace(0, 12)
y = np.exp(x/3)
plt.plot(x, y)
plt.xlabel('pIC50 value')
plt.ylabel('Reward value')
plt.title('Reward function for JAK2 activity maximization')
plt.show()

In [None]:
RL_max = Reinforcement(my_generator_max, my_predictor, get_reward_max)

In [None]:
rewards_max = []
rl_losses_max = []

In [None]:
for i in range(n_iterations):
    for j in trange(n_policy, desc='Policy gradient...'):
        cur_reward, cur_loss = RL_max.policy_gradient(gen_data, get_features=get_fp)
        rewards_max.append(simple_moving_average(rewards_max, cur_reward)) 
        rl_losses_max.append(simple_moving_average(rl_losses_max, cur_loss))
    
    plt.plot(rewards_max)
    plt.xlabel('Training iteration')
    plt.ylabel('Average reward')
    plt.show()
    plt.plot(rl_losses_max)
    plt.xlabel('Training iteration')
    plt.ylabel('Loss')
    plt.show()
        
    smiles_cur, prediction_cur = estimate_and_update(RL_max.generator, 
                                                     my_predictor, 
                                                     n_to_generate,
                                                     get_features=get_fp)
    print('Sample trajectories:')
    for sm in smiles_cur[:5]:
        print(sm)

In [None]:
smiles_biased_max, prediction_biased_max = estimate_and_update(RL_max.generator, 
                                                           my_predictor,
                                                           n_to_generate=10000)

In [None]:
sns.kdeplot(prediction_biased_max,label='Maximized', shade=True, color='red')
sns.kdeplot(prediction_unbiased, label='Unbiased', shade=True, color='grey')
plt.xlabel('pIC50 values')
plt.show()

### Minimizing pIC50 for JAK2

Next we will minimize the pIC50 fpr JAK2.

The reward function we will use here is 
$$
R(s) = \exp(\dfrac{-predictor(s)}{3} + 3) 
$$

In [None]:
def get_reward_min(smiles, predictor, invalid_reward=0.0, get_features=get_fp):
    mol, prop, nan_smiles = predictor.predict([smiles], get_features=get_features)
    if len(nan_smiles) == 1:
        return invalid_reward
    return np.exp(-prop[0]/3 + 3)

In [None]:
x = np.linspace(0, 12)
y = np.exp(-x/3 + 3)
plt.plot(x, y)
plt.xlabel('pIC50 value')
plt.ylabel('Reward value')
plt.title('Reward function for JAK2 activity minimization')
plt.show()

Making a copy of the generator that will be optimized

In [None]:
my_generator_min = StackAugmentedRNN(input_size=gen_data.n_characters, hidden_size=hidden_size,
                                 output_size=gen_data.n_characters, layer_type=layer_type,
                                 n_layers=1, is_bidirectional=False, has_stack=True,
                                 stack_width=stack_width, stack_depth=stack_depth, 
                                 use_cuda=use_cuda, 
                                 optimizer_instance=optimizer_instance, lr=lr)
my_generator_min.load_model(model_path)

In [None]:
RL_min = Reinforcement(my_generator_min, my_predictor, get_reward_min)

In [None]:
rewards_min = []
rl_losses_min = []

In [None]:
for i in range(n_iterations):
    for j in trange(n_policy, desc='Policy gradient...'):
        cur_reward, cur_loss = RL_min.policy_gradient(gen_data, get_features=get_fp)
        rewards_min.append(simple_moving_average(rewards_min, cur_reward)) 
        rl_losses_min.append(simple_moving_average(rl_losses_min, cur_loss))
    
    plt.plot(rewards_min)
    plt.xlabel('Training iteration')
    plt.ylabel('Average reward')
    plt.show()
    plt.plot(rl_losses_min)
    plt.xlabel('Training iteration')
    plt.ylabel('Loss')
    plt.show()
        
    smiles_cur, prediction_cur = estimate_and_update(RL_min.generator, 
                                                     my_predictor, 
                                                     n_to_generate)
    print('Sample trajectories:')
    for sm in smiles_cur[:5]:
        print(sm)

In [None]:
smiles_biased_min, prediction_biased_min = estimate_and_update(RL_min.generator, 
                                                           my_predictor,
                                                           n_to_generate=10000)

In [None]:
sns.kdeplot(prediction_biased_max, label='Maximized', shade=True, color='red')
sns.kdeplot(prediction_biased_min, label='Minimized', shade=True, color='blue')
sns.kdeplot(prediction_unbiased, label='Unbiased', shade=True, color='grey')
plt.xlabel('pIC50 values')
plt.title('Distributions of predicted pIC50 for unbiased,' + 
          ' maximized and minimized generator')
plt.show()

## Drawing random molecules

Now we will draw some random compounds from the biased library:

In [None]:
from rdkit.Chem.Draw import DrawingOptions
from rdkit.Chem import Draw
DrawingOptions.atomLabelFontSize = 50
DrawingOptions.dotsPerAngstrom = 100
DrawingOptions.bondLineWidth = 3

### Molecules with maximized pIC50

In [None]:
generated_mols_max = [Chem.MolFromSmiles(sm, sanitize=True) for sm in smiles_biased_max]
sanitized_gen_mols_max = [generated_mols_max[i] 
                          for i in np.where(np.array(generated_mols_max) != None)[0]]

In [None]:
n_to_draw = 20
ind = np.random.randint(0, len(sanitized_gen_mols_max), n_to_draw)
mols_to_draw_max = [sanitized_gen_mols_max[i] for i in ind]
legends = ['pIC50 = ' + str(prediction_biased_max[i]) for i in ind]

In [None]:
Draw.MolsToGridImage(mols_to_draw_max, molsPerRow=5, 
                     subImgSize=(300,300), legends=legends)

### Molecules with minimized pIC50

generated_mols_min = [Chem.MolFromSmiles(sm, sanitize=True) for sm in smiles_biased_min]
sanitized_gen_mols_min = [generated_mols_min[i] 
                          for i in np.where(np.array(generated_mols_min) != None)[0]]

In [None]:
n_to_draw = 20
ind = np.random.randint(0, len(sanitized_gen_mols_min), n_to_draw)
mols_to_draw_min = [sanitized_gen_mols_min[i] for i in ind]
legends = ['pIC50 = ' + str(prediction_biased_min[i]) for i in ind]

In [None]:
Draw.MolsToGridImage(mols_to_draw_min, molsPerRow=5, 
                     subImgSize=(300,300), legends=legends)