<h1>Table of Contents<span class="tocSkip"></span></h1>
<div class="toc"><ul class="toc-item"><li><span><a href="#Model" data-toc-modified-id="Model-1"><span class="toc-item-num">1&nbsp;&nbsp;</span>Model</a></span></li><li><span><a href="#Test" data-toc-modified-id="Test-2"><span class="toc-item-num">2&nbsp;&nbsp;</span>Test</a></span><ul class="toc-item"><li><span><a href="#NB!-All-smiles-should-be-canonicalized-using-RDkit." data-toc-modified-id="NB!-All-smiles-should-be-canonicalized-using-RDkit.-2.1"><span class="toc-item-num">2.1&nbsp;&nbsp;</span>NB! All smiles should be canonicalized using RDkit.</a></span><ul class="toc-item"><li><span><a href="#How-to-canonicalize-smiles-with-RDKit-for-MLT-LE" data-toc-modified-id="How-to-canonicalize-smiles-with-RDKit-for-MLT-LE-2.1.1"><span class="toc-item-num">2.1.1&nbsp;&nbsp;</span>How to canonicalize smiles with RDKit for MLT-LE</a></span></li></ul></li><li><span><a href="#Calcitriol---VDR-ligand" data-toc-modified-id="Calcitriol---VDR-ligand-2.2"><span class="toc-item-num">2.2&nbsp;&nbsp;</span>Calcitriol - VDR ligand</a></span></li><li><span><a href="#Diazepam---GABA-ligand" data-toc-modified-id="Diazepam---GABA-ligand-2.3"><span class="toc-item-num">2.3&nbsp;&nbsp;</span>Diazepam - GABA ligand</a></span></li><li><span><a href="#Torin1---mTor-ligand" data-toc-modified-id="Torin1---mTor-ligand-2.4"><span class="toc-item-num">2.4&nbsp;&nbsp;</span>Torin1 - mTor ligand</a></span></li></ul></li><li><span><a href="#p1Kd-to-pKd,-μM" data-toc-modified-id="p1Kd-to-pKd,-μM-3"><span class="toc-item-num">3&nbsp;&nbsp;</span>p1Kd to pKd, μM</a></span></li></ul></div>

# Model

In [1]:
from models.mlt_le_cnn_basic_with_memory import *
import pandas as pd
from data.example_target_sequences import VDR, MTOR, GABA

In [2]:
model = MLTLE()

# Test

## NB! All smiles should be canonicalized using RDkit.
### How to canonicalize smiles with RDKit for MLT-LE

```python

# install RDKit
!pip install rdkit-pypi
from rdkit import Chem
def to_non_isomeric_canonical(s):
    """
    Removes isomeric features and makes RDKit canonical.
    
    Example:
    pubchem_torin1 = "CCC(=O)N1CCN(CC1)C2=C(C=C(C=C2)N3C(=O)C=CC4=CN=C5C=CC(=CC5=C43)C6=CC7=CC=CC=C7N=C6)C(F)(F)F"
    to_non_isomeric_canonical(pubchem_torin1)
    >>CCC(=O)N1CCN(c2ccc(-n3c(=O)ccc4cnc5ccc(-c6cnc7ccccc7c6)cc5c43)cc2C(F)(F)F)CC1
    """
    try:
        mol = Chem.MolFromSmiles(s)
        s = Chem.MolToSmiles(mol, isomericSmiles=False, canonical=True)
        return s
    except Exception as e:
        # if smiles is invalid return NaN
        print(s, e)
        return np.nan
```

## Calcitriol - VDR ligand

In [3]:
calcitriol = "C=C1C(=CC=C2CCCC3(C)C2CCC3C(C)CCCC(C)(C)O)CC(O)CC1O"
X_predict = pd.DataFrame()
X_predict['smiles'] = [calcitriol, calcitriol, calcitriol]
X_predict['target'] = [VDR, MTOR, GABA]
X_predict.head()

Unnamed: 0,smiles,target
0,C=C1C(=CC=C2CCCC3(C)C2CCC3C(C)CCCC(C)(C)O)CC(O...,MEAMAASTSLPDPGDFDRNVPRICGVCGDRATGFHFNAMTCEGCKG...
1,C=C1C(=CC=C2CCCC3(C)C2CCC3C(C)CCCC(C)(C)O)CC(O...,MLGTGPAAATTAATTSSNVSVLQQFASGLKSRNEETRAKAAKELQH...
2,C=C1C(=CC=C2CCCC3(C)C2CCC3C(C)CCCC(C)(C)O)CC(O...,MRKSPGLSDCLWAWILLLSTLTGRSYGQPSLQDELKDNTTVFTRIL...


In [4]:
VDR_prediction = model.predict(X_predict)
VDR_prediction

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]



Unnamed: 0,smiles,target,p1Kd,p1Ki,p1IC50,p1EC50,is_active,pH
0,C=C1C(=CC=C2CCCC3(C)C2CCC3C(C)CCCC(C)(C)O)CC(O...,MEAMAASTSLPDPGDFDRNVPRICGVCGDRATGFHFNAMTCEGCKG...,0.552916,1.706918,2.598926,2.127594,0.986545,7.354312
1,C=C1C(=CC=C2CCCC3(C)C2CCC3C(C)CCCC(C)(C)O)CC(O...,MLGTGPAAATTAATTSSNVSVLQQFASGLKSRNEETRAKAAKELQH...,5.574427,5.283072,6.921693,7.705691,0.549589,7.354622
2,C=C1C(=CC=C2CCCC3(C)C2CCC3C(C)CCCC(C)(C)O)CC(O...,MRKSPGLSDCLWAWILLLSTLTGRSYGQPSLQDELKDNTTVFTRIL...,5.304507,4.889819,6.7019,8.349508,0.606734,7.426745


## Diazepam - GABA ligand

In [5]:
diazepam = "CN1C(=O)CN=C(c2ccccc2)c2cc(Cl)ccc21"
X_predict = pd.DataFrame()
X_predict['smiles'] = [diazepam, diazepam, diazepam]
X_predict['target'] = [VDR, MTOR, GABA]
X_predict.head()

Unnamed: 0,smiles,target
0,CN1C(=O)CN=C(c2ccccc2)c2cc(Cl)ccc21,MEAMAASTSLPDPGDFDRNVPRICGVCGDRATGFHFNAMTCEGCKG...
1,CN1C(=O)CN=C(c2ccccc2)c2cc(Cl)ccc21,MLGTGPAAATTAATTSSNVSVLQQFASGLKSRNEETRAKAAKELQH...
2,CN1C(=O)CN=C(c2ccccc2)c2cc(Cl)ccc21,MRKSPGLSDCLWAWILLLSTLTGRSYGQPSLQDELKDNTTVFTRIL...


In [6]:
GABA_prediction = model.predict(X_predict)
GABA_prediction

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]



Unnamed: 0,smiles,target,p1Kd,p1Ki,p1IC50,p1EC50,is_active,pH
0,CN1C(=O)CN=C(c2ccccc2)c2cc(Cl)ccc21,MEAMAASTSLPDPGDFDRNVPRICGVCGDRATGFHFNAMTCEGCKG...,8.271052,7.759677,8.27099,9.340125,0.197094,7.390858
1,CN1C(=O)CN=C(c2ccccc2)c2cc(Cl)ccc21,MLGTGPAAATTAATTSSNVSVLQQFASGLKSRNEETRAKAAKELQH...,8.45827,7.461936,9.431652,9.147441,0.121497,7.539183
2,CN1C(=O)CN=C(c2ccccc2)c2cc(Cl)ccc21,MRKSPGLSDCLWAWILLLSTLTGRSYGQPSLQDELKDNTTVFTRIL...,2.641443,3.534576,7.291436,6.100872,0.837896,7.562299


## Torin1 - mTor ligand

In [7]:
torin1 = "CCC(=O)N1CCN(c2ccc(-n3c(=O)ccc4cnc5ccc(-c6cnc7ccccc7c6)cc5c43)cc2C(F)(F)F)CC1"
X_predict = pd.DataFrame()
X_predict['smiles'] = [torin1, torin1, torin1]
X_predict['target'] = [VDR, MTOR, GABA]
X_predict.head()

Unnamed: 0,smiles,target
0,CCC(=O)N1CCN(c2ccc(-n3c(=O)ccc4cnc5ccc(-c6cnc7...,MEAMAASTSLPDPGDFDRNVPRICGVCGDRATGFHFNAMTCEGCKG...
1,CCC(=O)N1CCN(c2ccc(-n3c(=O)ccc4cnc5ccc(-c6cnc7...,MLGTGPAAATTAATTSSNVSVLQQFASGLKSRNEETRAKAAKELQH...
2,CCC(=O)N1CCN(c2ccc(-n3c(=O)ccc4cnc5ccc(-c6cnc7...,MRKSPGLSDCLWAWILLLSTLTGRSYGQPSLQDELKDNTTVFTRIL...


In [8]:
MTOR_prediction = model.predict(X_predict)
MTOR_prediction

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]



Unnamed: 0,smiles,target,p1Kd,p1Ki,p1IC50,p1EC50,is_active,pH
0,CCC(=O)N1CCN(c2ccc(-n3c(=O)ccc4cnc5ccc(-c6cnc7...,MEAMAASTSLPDPGDFDRNVPRICGVCGDRATGFHFNAMTCEGCKG...,7.021446,5.650318,7.06411,7.495184,0.504644,7.498044
1,CCC(=O)N1CCN(c2ccc(-n3c(=O)ccc4cnc5ccc(-c6cnc7...,MLGTGPAAATTAATTSSNVSVLQQFASGLKSRNEETRAKAAKELQH...,2.508047,2.22574,3.294503,3.690487,0.966631,7.484052
2,CCC(=O)N1CCN(c2ccc(-n3c(=O)ccc4cnc5ccc(-c6cnc7...,MRKSPGLSDCLWAWILLLSTLTGRSYGQPSLQDELKDNTTVFTRIL...,4.714337,2.885322,3.784179,5.531103,0.958375,7.485838


# p1Kd to pKd, μM

p1Kd = log(Kd+1)


In [9]:
VDR_prediction['Kd [μM]'] = VDR_prediction['p1Kd'].apply(model.to_uM)
VDR_prediction['pKd'] = VDR_prediction['p1Kd'].apply(model.to_pKd)

In [10]:
VDR_prediction.iloc[:, VDR_prediction.columns.str.contains('Kd')]

Unnamed: 0,p1Kd,Kd [μM],pKd
0,0.552916,0.000738,9.131759
1,5.574427,0.262599,6.580708
2,5.304507,0.200242,6.698445
