In [None]:
import tensorflow as tf
import pandas as pd
import numpy as np
from concurrent.futures import ProcessPoolExecutor, as_completed
from tqdm import tqdm
import logging
import sklearn.metrics
import json

In [None]:
ds = pd.read_csv('test_normalized.csv')

In [None]:
data_cols = [
 'b1LogP',
 'b1MolMR',
 'b1TPSA',
 'b1FractionCSP3',
 'b1NumHeteroatoms',
 'b1MolecularWeight',
 'b1ExactMass',
 'b1NumRotatableBonds',
 'b1NumValenceElectrons',
 'b1BondCount',
 'b1NumHAcceptors',

 'b2LogP',
 'b2MolMR',
 'b2TPSA',
 'b2FractionCSP3',
 'b2NumValenceElectrons',
 'b2MolecularWeight',
 'b2ExactMass',
 'b2NumRotatableBonds',
 'b2BondCount',
 'b2NumHeteroatoms',
 'b2NumHAcceptors',
 
 'b3LogP',
 'b3MolMR',
 'b3TPSA',
 'b3FractionCSP3',
 'b3NumHAcceptors',
 'b3MolecularWeight',
 'b3ExactMass',
 'b3BondCount',
 'b3NumValenceElectrons',
 'b3NumRotatableBonds',
 'b3NumHeteroatoms',
 'b3NumAromaticBonds',
]

In [None]:
smile_vector = tf.keras.layers.TextVectorization(max_tokens=None, split='character', standardize=None, output_sequence_length=70)
smile_vector.set_vocabulary(["", "[UNK]", "c", "C", "1", ")", "(", "O", "2", "N", "=", "n", "-", "l", "]", "[", "@", "H", "F", ".", "3", "s", "B", "r", "S", "#", "+", "o", "I", "4", "/", "5", "i"])

In [None]:
try_dir = './models/try24'

In [None]:
sEHModel = tf.keras.models.load_model(try_dir+'/sEH.keras')
sEHs = ds[ds['protein_name'] == 'sEH']
buildingblock1_smiles_vec = smile_vector(sEHs['buildingblock1_smiles'].values)
buildingblock2_smiles_vec = smile_vector(sEHs['buildingblock2_smiles'].values)
buildingblock3_smiles_vec = smile_vector(sEHs['buildingblock3_smiles'].values)
sEHpredictions = sEHModel.predict([buildingblock1_smiles_vec, buildingblock2_smiles_vec, buildingblock3_smiles_vec, sEHs[data_cols]])
sEHs['binds'] = sEHpredictions.flatten()

In [None]:
HSAModel = tf.keras.models.load_model(try_dir+'/HSA.keras')
HSAs = ds[ds['protein_name'] == 'HSA']
buildingblock1_smiles_vec = smile_vector(HSAs['buildingblock1_smiles'].values)
buildingblock2_smiles_vec = smile_vector(HSAs['buildingblock2_smiles'].values)
buildingblock3_smiles_vec = smile_vector(HSAs['buildingblock3_smiles'].values)
HSApredictions = HSAModel.predict([buildingblock1_smiles_vec, buildingblock2_smiles_vec, buildingblock3_smiles_vec, HSAs[data_cols]])
HSAs['binds'] = HSApredictions.flatten()

In [None]:
BRD4Model = tf.keras.models.load_model(try_dir+'/BRD4.keras')
BRD4s = ds[ds['protein_name'] == 'BRD4']
buildingblock1_smiles_vec = smile_vector(BRD4s['buildingblock1_smiles'].values)
buildingblock2_smiles_vec = smile_vector(BRD4s['buildingblock2_smiles'].values)
buildingblock3_smiles_vec = smile_vector(BRD4s['buildingblock3_smiles'].values)
BRD4predictions = BRD4Model.predict([buildingblock1_smiles_vec, buildingblock2_smiles_vec, buildingblock3_smiles_vec, BRD4s[data_cols]])
BRD4s['binds'] = BRD4predictions.flatten()

In [None]:
pd.concat([sEHs, BRD4s, HSAs])[['id', 'binds']].to_csv('submition.csv')