Skip to content

Commit

Permalink
new weights
Browse files Browse the repository at this point in the history
  • Loading branch information
jacquesboitreaud committed May 7, 2020
1 parent 8b06736 commit 738e165
Show file tree
Hide file tree
Showing 13 changed files with 114 additions and 91 deletions.
2 changes: 1 addition & 1 deletion .gitignore
@@ -1,8 +1,8 @@
__pycache__
*.csv
runs
model_backups
data_curation
eval/plots
data/
results/
optim/bo_results
1 change: 1 addition & 0 deletions cbas/cbas.py
Expand Up @@ -22,6 +22,7 @@
from rdkit import Chem

from utils import *
from dgl_utils import *
from model import model_from_json
from oracles import qed, deterministic_cdf_oracle, normal_cdf_oracle
from gen_train import GenTrain
Expand Down
28 changes: 28 additions & 0 deletions dgl_utils.py
@@ -0,0 +1,28 @@
# -*- coding: utf-8 -*-
"""
Created on Thu May 7 18:42:15 2020
@author: jacqu
"""

import dgl

def send_graph_to_device(g, device):
"""
Send dgl graph to device
:param g: :param device:
:return:
"""
g.set_n_initializer(dgl.init.zero_initializer)
g.set_e_initializer(dgl.init.zero_initializer)

# nodes
labels = g.node_attr_schemes()
for l in labels.keys():
g.ndata[l] = g.ndata.pop(l).to(device, non_blocking=True)

# edges
labels = g.edge_attr_schemes()
for i, l in enumerate(labels.keys()):
g.edata[l] = g.edata.pop(l).to(device, non_blocking=True)
return g
5 changes: 3 additions & 2 deletions docking/dock1smiles.py
Expand Up @@ -25,7 +25,7 @@
pass

parser = argparse.ArgumentParser()
parser.add_argument("-i", "--input", default=' O=CC=C(C1C2=NC=C(C=NC3=CC=CC=C3N=C2)C=C1F)NC=C',
parser.add_argument("-i", "--input", default='O=CC=C(C1C2=NC=C(C=NC3=CC=CC=C3N=C2)C=C1F)NC=C',
help="Smiles to dock")
parser.add_argument("-s", "--server", default='mac', help="Server to run the docking on, for path and configs.")
parser.add_argument("-e", "--ex", default=16, help="exhaustiveness parameter for vina")
Expand All @@ -34,5 +34,6 @@

PYTHONSH, VINA = set_path(args.server)

dock(smile=args.input, unique_id=1, pythonsh=PYTHONSH, vina=VINA, parallel=True, exhaustiveness=args.ex)
sc = dock(smile=args.input, unique_id=1, pythonsh=PYTHONSH, vina=VINA, parallel=True, exhaustiveness=args.ex)
print('Score :', sc)

1 change: 1 addition & 0 deletions eval/diagnostic_plots.py
Expand Up @@ -52,6 +52,7 @@

from eval.eval_utils import *
from utils import *
from dgl_utils import *

# Should be same as for training
properties = ['QED', 'logP', 'molWt']
Expand Down
1 change: 1 addition & 0 deletions model.py
Expand Up @@ -34,6 +34,7 @@
from dgl.nn.pytorch.conv import GATConv, RelGraphConv

from utils import *
from dgl_utils import *


class MultiGRU(nn.Module):
Expand Down
45 changes: 36 additions & 9 deletions optim/BO.py
Expand Up @@ -49,14 +49,19 @@
from dataloaders.molDataset import Loader
from model import Model, model_from_json
from utils import *
from BO_utils import get_fitted_model
from dgl_utils import *
from bo_utils import get_fitted_model
from docking.docking import dock, set_path

parser = argparse.ArgumentParser()

parser.add_argument( '--bo_name', help="Name for BO results subdir ",
default='first_bo')

parser.add_argument( '--name', help="saved model weights fname. Located in saved_models subdir",
default='kekule')
default='inference_default')
parser.add_argument('-n', "--n_steps", help="Nbr of optim steps", type=int, default=50)
parser.add_argument('-q', "--n_queries", help="Nbr of queries per step", type=int, default=50)
parser.add_argument('-q', "--n_queries", help="Nbr of queries per step", type=int, default=100)

parser.add_argument('-o', '--objective', default='aff_pred') # 'qed', 'aff', 'aff_pred'

Expand All @@ -72,8 +77,10 @@
VAE is on GPU, decoding and aff prediction with MLP are on GPU, but Gaussian process operations on CPU
(if training set of gaussian process becomes big after some steps, may not fit on gpu
"""

soft_mkdir('bo_results')
soft_mkdir(os.path.join('bo_results',args.bo_name))

device = 'cuda' if torch.cuda.is_available() else 'cpu'
vocab = 'selfies'
# Loader for initial sample
loader = Loader(props=[],
Expand All @@ -84,11 +91,12 @@
test_only=True)

# Load model (on gpu if available)
device = 'cuda' if torch.cuda.is_available() else 'cpu'
device = 'cuda' if torch.cuda.is_available() else 'cpu' # the model device
model = model_from_json(args.name)
model.to(device)
model.eval()

# Search space
d = model.l_size
dtype = torch.float
bounds = torch.tensor([[-3.0] * d, [3.0] * d], device='cpu', dtype=dtype)
Expand All @@ -115,7 +123,9 @@
elif args.objective == 'aff' :
PYTHONSH, VINA = set_path(args.server)
scores_init = -1* torch.tensor(df.drd3).view(-1,1).cpu() # careful, maximize -aff <=> minimize binding energy (negative value)


# Tracing results
sc_dict = {}
best_value = torch.max(scores_init).item()
best_observed.append(best_value)
train_obj = scores_init
Expand Down Expand Up @@ -204,12 +214,14 @@ def optimize_acqf_and_get_observation(acq_func, device):
if(args.verbose):
print(' oracle outputs:')
print(new_score.numpy())
sc_dict[iteration]=new_score.numpy()

# update training points

train_smiles+= new_smiles
train_z = torch.cat((train_z, new_z.cpu()), dim=0)
train_obj = torch.cat((train_obj, new_score), dim=0)
state_dict = GP_model.state_dict()

# update progress
avg_score = torch.mean(new_score).item()
Expand All @@ -219,9 +231,24 @@ def optimize_acqf_and_get_observation(acq_func, device):
idx = idx.item()
best_smiles = train_smiles[idx]

state_dict = GP_model.state_dict()


print(f'current best mol: {best_smiles}, with oracle score {best_value.item()}')
print(f'average score of fresh samples at iter {iteration}: {avg_score}')
print("\n")

# Save
with open(os.path.join('bo_results',args.bo_name,'sample_scores.pickle'), 'wb') as f :
pickle.dump(sc_dict, f)

train_obj=train_obj.numpy()
idces = np.argsort(train_obj)
idces=idces[:100]

with open(os.path.join('bo_results',args.bo_name,'top_samples.txt'), 'w') as f :
for i in idces :
f.write(train_smiles[i], ', ', train_obj[i] )
print('wrote top samples and scores to txt. ')





20 changes: 0 additions & 20 deletions optim/log.txt

This file was deleted.

79 changes: 41 additions & 38 deletions results/saved_models/inference_default/params.json
Expand Up @@ -6,12 +6,12 @@
"load_iter": 0,
"decode": "selfies",
"build_alphabet": false,
"latent_size": 96,
"latent_size": 56,
"lr": 0.001,
"clip_norm": 50.0,
"beta": 0.0,
"step_beta": 0.002,
"max_beta": 0.1,
"max_beta": 0.5,
"warmup": 40000,
"processes": 20,
"batch_size": 64,
Expand All @@ -30,48 +30,47 @@
"bin_affs": false,
"features_dim": 16,
"num_rels": 4,
"l_size": 96,
"voc_size": 34,
"l_size": 56,
"voc_size": 33,
"max_len": 54,
"N_properties": 3,
"N_targets": 1,
"binned_scores": false,
"device": "cuda",
"index_to_char": {
"0": "[C]",
"1": "[epsilon]",
"2": "[#C]",
"3": "[=S]",
"4": "[s]",
"5": "[O]",
"0": "[Branch1_1]",
"1": "[Branch1_2]",
"2": "[Branch1_3]",
"3": "[Ring1]",
"4": "[Branch2_1]",
"5": "[Branch2_2]",
"6": "[Branch2_3]",
"7": "[Cl]",
"8": "[Expl-Ring1]",
"9": "[S]",
"10": "[N]",
"11": "[Ring1]",
"12": "[Branch1_2]",
"13": "[=c]",
"14": "[nHexpl]",
"15": "[o]",
"16": "[-c]",
"17": "[Branch2_1]",
"18": "[Branch1_3]",
"19": "[Expl-Ring2]",
"20": "[Br]",
"21": "[Hexpl]",
"22": "[#N]",
"23": "[Branch1_1]",
"24": "[Branch2_2]",
"25": "[=O]",
"26": "[-n]",
"27": "[=C]",
"28": "[=N]",
"29": "[n]",
"30": "[F]",
"31": "[Expl=Ring1]",
"32": "[Ring2]",
"33": "[c]"
"7": "[Ring2]",
"8": "[Branch3_1]",
"9": "[Branch3_2]",
"10": "[Branch3_3]",
"11": "[Ring3]",
"12": "[O]",
"13": "[=O]",
"14": "[N]",
"15": "[=N]",
"16": "[C]",
"17": "[=C]",
"18": "[#C]",
"19": "[S]",
"20": "[=S]",
"21": "[P]",
"22": "[F]",
"23": "[C@Hexpl]",
"24": "[C@@Hexpl]",
"25": "[C@expl]",
"26": "[C@@expl]",
"27": "[H]",
"28": "[NHexpl]",
"29": "[epsilon]",
"30": "[Cl]",
"31": "[#N]",
"32": "[Br]"
},
"props": [
"QED",
Expand All @@ -85,5 +84,9 @@
"properties": true,
"target": true,
"parallel": false,
"gcn_layers": 3
"gcn_layers": 3,
"load_name": "default",
"no_props": false,
"no_aff": false,
"gcn_hdim": 32
}
Binary file modified results/saved_models/inference_default/weights.pth
Binary file not shown.
1 change: 1 addition & 0 deletions train.py
Expand Up @@ -34,6 +34,7 @@
sys.path.append(script_dir)

from utils import *
from dgl_utils import *
from model import Model
from loss_func import VAELoss, weightedPropsLoss, affsRegLoss, affsClassifLoss
from dataloaders.molDataset import molDataset, Loader
Expand Down
1 change: 1 addition & 0 deletions train_triplets.py
Expand Up @@ -33,6 +33,7 @@
from loss_func import Loss, RecLoss, tripletLoss
from dataloaders.tripletsDataset import Loader
from utils import *
from dgl_utils import *

if __name__ == "__main__":

Expand Down
21 changes: 0 additions & 21 deletions utils.py
Expand Up @@ -8,7 +8,6 @@
"""

import numpy as np
import dgl
import torch
import pandas as pd

Expand Down Expand Up @@ -174,26 +173,6 @@ def debug_memory():
print('{}\t{}'.format(*line))


def send_graph_to_device(g, device):
"""
Send dgl graph to device
:param g: :param device:
:return:
"""
g.set_n_initializer(dgl.init.zero_initializer)
g.set_e_initializer(dgl.init.zero_initializer)

# nodes
labels = g.node_attr_schemes()
for l in labels.keys():
g.ndata[l] = g.ndata.pop(l).to(device, non_blocking=True)

# edges
labels = g.edge_attr_schemes()
for i, l in enumerate(labels.keys()):
g.edata[l] = g.edata.pop(l).to(device, non_blocking=True)
return g


# ============== Smiles handling utils ===============================

Expand Down

0 comments on commit 738e165

Please sign in to comment.