<h1>Table of Contents<span class="tocSkip"></span></h1>
<div class="toc"><ul class="toc-item"><li><span><a href="#Model" data-toc-modified-id="Model-1"><span class="toc-item-num">1&nbsp;&nbsp;</span>Model</a></span></li><li><span><a href="#Test" data-toc-modified-id="Test-2"><span class="toc-item-num">2&nbsp;&nbsp;</span>Test</a></span><ul class="toc-item"><li><span><a href="#NB!-All-smiles-should-be-canonicalized-using-RDkit." data-toc-modified-id="NB!-All-smiles-should-be-canonicalized-using-RDkit.-2.1"><span class="toc-item-num">2.1&nbsp;&nbsp;</span>NB! All smiles should be canonicalized using RDkit.</a></span><ul class="toc-item"><li><span><a href="#How-to-canonicalize-smiles-with-RDKit-for-MLT-LE" data-toc-modified-id="How-to-canonicalize-smiles-with-RDKit-for-MLT-LE-2.1.1"><span class="toc-item-num">2.1.1&nbsp;&nbsp;</span>How to canonicalize smiles with RDKit for MLT-LE</a></span></li></ul></li><li><span><a href="#Calcitriol---VDR-ligand" data-toc-modified-id="Calcitriol---VDR-ligand-2.2"><span class="toc-item-num">2.2&nbsp;&nbsp;</span>Calcitriol - VDR ligand</a></span></li><li><span><a href="#Diazepam---GABA-ligand" data-toc-modified-id="Diazepam---GABA-ligand-2.3"><span class="toc-item-num">2.3&nbsp;&nbsp;</span>Diazepam - GABA ligand</a></span></li><li><span><a href="#Torin1---mTor-ligand" data-toc-modified-id="Torin1---mTor-ligand-2.4"><span class="toc-item-num">2.4&nbsp;&nbsp;</span>Torin1 - mTor ligand</a></span></li></ul></li><li><span><a href="#Log1p-to-pKd,-μM" data-toc-modified-id="Log1p-to-pKd,-μM-3"><span class="toc-item-num">3&nbsp;&nbsp;</span>Log1p to pKd, μM</a></span></li></ul></div>

# Model

In [1]:
from models.mlt_le import *
import pandas as pd
from data.example_target_sequences import VDR, MTOR, GABA

In [2]:
model = MLTLE()

In [3]:
# model.model.summary()

# Test

## NB! All smiles should be canonicalized using RDkit.
### How to canonicalize smiles with RDKit for MLT-LE

```python

# install RDKit
!pip install rdkit-pypi
from rdkit import Chem
def to_non_isomeric_canonical(s):
    """
    Removes isomeric features and makes RDKit canonical.
    
    Example:
    pubchem_torin1 = "CCC(=O)N1CCN(CC1)C2=C(C=C(C=C2)N3C(=O)C=CC4=CN=C5C=CC(=CC5=C43)C6=CC7=CC=CC=C7N=C6)C(F)(F)F"
    to_non_isomeric_canonical(pubchem_torin1)
    >>CCC(=O)N1CCN(c2ccc(-n3c(=O)ccc4cnc5ccc(-c6cnc7ccccc7c6)cc5c43)cc2C(F)(F)F)CC1
    """
    try:
        mol = Chem.MolFromSmiles(s)
        s = Chem.MolToSmiles(mol, isomericSmiles=False, canonical=True)
        return s
    except Exception as e:
        # if smiles is invalid return NaN
        print(s, e)
        return np.nan
```

## Calcitriol - VDR ligand

In [4]:
calcitriol = "C=C1C(=CC=C2CCCC3(C)C2CCC3C(C)CCCC(C)(C)O)CC(O)CC1O"
X_predict = pd.DataFrame()
X_predict['smiles'] = [calcitriol, calcitriol, calcitriol]
X_predict['target'] = [VDR, MTOR, GABA]
X_predict.head()

Unnamed: 0,smiles,target
0,C=C1C(=CC=C2CCCC3(C)C2CCC3C(C)CCCC(C)(C)O)CC(O...,MEAMAASTSLPDPGDFDRNVPRICGVCGDRATGFHFNAMTCEGCKG...
1,C=C1C(=CC=C2CCCC3(C)C2CCC3C(C)CCCC(C)(C)O)CC(O...,MLGTGPAAATTAATTSSNVSVLQQFASGLKSRNEETRAKAAKELQH...
2,C=C1C(=CC=C2CCCC3(C)C2CCC3C(C)CCCC(C)(C)O)CC(O...,MRKSPGLSDCLWAWILLLSTLTGRSYGQPSLQDELKDNTTVFTRIL...


In [5]:
VDR_prediction = model.predict(X_predict)
VDR_prediction

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]



Unnamed: 0,smiles,target,Ki (nM) log1p,IC50 (nM) log1p,Kd (nM) log1p,EC50 (nM) log1p,is_active
0,C=C1C(=CC=C2CCCC3(C)C2CCC3C(C)CCCC(C)(C)O)CC(O...,MEAMAASTSLPDPGDFDRNVPRICGVCGDRATGFHFNAMTCEGCKG...,1.646734,1.644245,1.077023,1.596539,0.80849
1,C=C1C(=CC=C2CCCC3(C)C2CCC3C(C)CCCC(C)(C)O)CC(O...,MLGTGPAAATTAATTSSNVSVLQQFASGLKSRNEETRAKAAKELQH...,5.72745,7.401005,5.536683,7.136034,0.539038
2,C=C1C(=CC=C2CCCC3(C)C2CCC3C(C)CCCC(C)(C)O)CC(O...,MRKSPGLSDCLWAWILLLSTLTGRSYGQPSLQDELKDNTTVFTRIL...,4.972584,7.159822,2.865565,6.029324,0.607581


## Diazepam - GABA ligand

In [6]:
diazepam = "CN1C(=O)CN=C(c2ccccc2)c2cc(Cl)ccc21"
X_predict = pd.DataFrame()
X_predict['smiles'] = [diazepam, diazepam, diazepam]
X_predict['target'] = [VDR, MTOR, GABA]
X_predict.head()

Unnamed: 0,smiles,target
0,CN1C(=O)CN=C(c2ccccc2)c2cc(Cl)ccc21,MEAMAASTSLPDPGDFDRNVPRICGVCGDRATGFHFNAMTCEGCKG...
1,CN1C(=O)CN=C(c2ccccc2)c2cc(Cl)ccc21,MLGTGPAAATTAATTSSNVSVLQQFASGLKSRNEETRAKAAKELQH...
2,CN1C(=O)CN=C(c2ccccc2)c2cc(Cl)ccc21,MRKSPGLSDCLWAWILLLSTLTGRSYGQPSLQDELKDNTTVFTRIL...


In [7]:
GABA_prediction = model.predict(X_predict)
GABA_prediction

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]



Unnamed: 0,smiles,target,Ki (nM) log1p,IC50 (nM) log1p,Kd (nM) log1p,EC50 (nM) log1p,is_active
0,CN1C(=O)CN=C(c2ccccc2)c2cc(Cl)ccc21,MEAMAASTSLPDPGDFDRNVPRICGVCGDRATGFHFNAMTCEGCKG...,7.800803,10.13839,7.03886,9.023358,0.059901
1,CN1C(=O)CN=C(c2ccccc2)c2cc(Cl)ccc21,MLGTGPAAATTAATTSSNVSVLQQFASGLKSRNEETRAKAAKELQH...,5.405934,9.038071,6.264541,9.872234,0.242897
2,CN1C(=O)CN=C(c2ccccc2)c2cc(Cl)ccc21,MRKSPGLSDCLWAWILLLSTLTGRSYGQPSLQDELKDNTTVFTRIL...,3.412857,5.453036,4.336466,5.837235,0.778886


## Torin1 - mTor ligand

In [8]:
torin1 = "CCC(=O)N1CCN(c2ccc(-n3c(=O)ccc4cnc5ccc(-c6cnc7ccccc7c6)cc5c43)cc2C(F)(F)F)CC1"
X_predict = pd.DataFrame()
X_predict['smiles'] = [torin1, torin1, torin1]
X_predict['target'] = [VDR, MTOR, GABA]
X_predict.head()

Unnamed: 0,smiles,target
0,CCC(=O)N1CCN(c2ccc(-n3c(=O)ccc4cnc5ccc(-c6cnc7...,MEAMAASTSLPDPGDFDRNVPRICGVCGDRATGFHFNAMTCEGCKG...
1,CCC(=O)N1CCN(c2ccc(-n3c(=O)ccc4cnc5ccc(-c6cnc7...,MLGTGPAAATTAATTSSNVSVLQQFASGLKSRNEETRAKAAKELQH...
2,CCC(=O)N1CCN(c2ccc(-n3c(=O)ccc4cnc5ccc(-c6cnc7...,MRKSPGLSDCLWAWILLLSTLTGRSYGQPSLQDELKDNTTVFTRIL...


In [9]:
MTOR_prediction = model.predict(X_predict)
MTOR_prediction

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]



Unnamed: 0,smiles,target,Ki (nM) log1p,IC50 (nM) log1p,Kd (nM) log1p,EC50 (nM) log1p,is_active
0,CCC(=O)N1CCN(c2ccc(-n3c(=O)ccc4cnc5ccc(-c6cnc7...,MEAMAASTSLPDPGDFDRNVPRICGVCGDRATGFHFNAMTCEGCKG...,3.533484,4.624649,5.141253,5.166994,0.858802
1,CCC(=O)N1CCN(c2ccc(-n3c(=O)ccc4cnc5ccc(-c6cnc7...,MLGTGPAAATTAATTSSNVSVLQQFASGLKSRNEETRAKAAKELQH...,3.315738,3.691458,5.633108,5.145761,0.904719
2,CCC(=O)N1CCN(c2ccc(-n3c(=O)ccc4cnc5ccc(-c6cnc7...,MRKSPGLSDCLWAWILLLSTLTGRSYGQPSLQDELKDNTTVFTRIL...,4.328304,4.940562,6.982586,6.8873,0.835559


# Log1p to pKd, μM


In [10]:
VDR_prediction['Kd [μM]'] = VDR_prediction['Kd (nM) log1p'].apply(model.to_uM)
VDR_prediction['pKd'] = VDR_prediction['Kd (nM) log1p'].apply(model.to_pKd)

In [11]:
VDR_prediction.iloc[:, VDR_prediction.columns.str.contains('Kd')]

Unnamed: 0,Kd (nM) log1p,Kd [μM],pKd
0,1.077023,0.001936,8.713111
1,5.536683,0.252835,6.597163
2,2.865565,0.016559,7.780967
