# MolMap to CYP450 dataset (dual-path approach)

In [None]:
import os
os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID"
os.environ["CUDA_VISIBLE_DEVICES"]='0'

import tensorflow as tf

physical_devices = tf.config.experimental.list_physical_devices('GPU')
tf.config.experimental.set_memory_growth(physical_devices[0], True)

### Load data

In [1]:
import numpy as np
import pandas as pd

# read data CYP
data = pd.read_csv("../../../data/CYP450.csv.gz", compression='gzip')

Set up

In [2]:
# keep desired columns
data = data[['smiles', 'label_2c9']]

# drop molecules with NaN activity
data = data.dropna(subset = ["label_2c9"])

# set smiles
smi = data['smiles']

# set Y
Y = pd.get_dummies(data['label_2c9']).values

# number of active and inactive molecules
print("Inactive (0):", Y[:,1].tolist().count(0))
print("Active (1):", Y[:,1].tolist().count(1))

Inactive (0): 7429
Active (1): 2621


### Do MolMap: from MolDs & FFs to Fmaps

In [3]:
import pickle
from pathlib import Path
from molmap import MolMap
from molmap import feature

X1_file = Path("../../../files/CYP450/molmap/X1.data")
X2_file = Path("../../../files/CYP450/molmap/X2.data")

if X1_file.is_file() and X2_file.is_file():
    'If it exists, we load it.'
    with open(X1_file, "rb") as f:
        X1 = pickle.load(f)
    with open(X2_file, "rb") as f:
        X2 = pickle.load(f)

else: 
    'Otherwise, we compute it and save it to disk.'
    # compute MolDs
    mp1 = MolMap(ftype='descriptor', metric='cosine',)
    mp1.fit(verbose=0, method='umap', min_dist=0.1, n_neighbors=15,)
    
    # compute FFs
    bitsinfo = feature.fingerprint.Extraction().bitsinfo
    flist = bitsinfo[bitsinfo.Subtypes.isin(['PubChemFP', 'MACCSFP', 'PharmacoErGFP'])].IDs.tolist()
    mp2 = MolMap(ftype = 'fingerprint', fmap_type = 'scatter', flist = flist) 
    mp2.fit(method = 'umap',  min_dist = 0.1, n_neighbors = 15, verbose = 0)

    # get Fmaps
    X1 = mp1.batch_transform(smi)
    X2 = mp2.batch_transform(smi)

    # save in disk
    with open(X1_file, "wb+") as f:
        pickle.dump(X1, f, pickle.HIGHEST_PROTOCOL)
    with open(X2_file, "wb+") as f:
        pickle.dump(X2, f, pickle.HIGHEST_PROTOCOL)

### Split train, validation and test

In [4]:
import sys
sys.path.append("../../../src")
from utils import Rdsplit

train_idx, valid_idx, test_idx = Rdsplit(data, random_state = 888)

8040 1005 1005


In [5]:
trainX = (X1[train_idx], X2[train_idx])
validX = (X1[valid_idx], X2[valid_idx])
testX = (X1[test_idx], X2[test_idx])

trainY = Y[train_idx]
validY = Y[valid_idx]
testY = Y[test_idx]

### Build MolMapNet model (MultiClassEstimator)

In [6]:
from molmap.model import MultiClassEstimator

clf = MultiClassEstimator(n_outputs=trainY.shape[1], 
                          fmap_shape1 = X1.shape[1:],
                          fmap_shape2 = X2.shape[1:],
                          metric='ROC', 
                          dense_layers = [128, 64], gpuid = 0, epochs = 100) 

MultiClassEstimator(epochs=100, fmap_shape1=(37, 37, 13),
                    fmap_shape2=(72, 72, 3), gpuid='0', n_outputs=2)


### Train model

In [None]:
clf.fit(trainX, trainY, validX, validY)



ERROR:root:Internal Python error in the inspect module.
Below is the traceback from this internal error.

ERROR:root:Internal Python error in the inspect module.
Below is the traceback from this internal error.

ERROR:root:Internal Python error in the inspect module.
Below is the traceback from this internal error.



Traceback (most recent call last):
  File "/home/hodei/anaconda3/envs/molmap/lib/python3.6/site-packages/tensorflow_core/python/keras/engine/training_v2.py", line 668, in on_start
    yield
  File "/home/hodei/anaconda3/envs/molmap/lib/python3.6/site-packages/tensorflow_core/python/keras/engine/training_v2.py", line 372, in fit
    prefix='val_')
  File "/home/hodei/anaconda3/envs/molmap/lib/python3.6/contextlib.py", line 99, in __exit__
    self.gen.throw(type, value, traceback)
  File "/home/hodei/anaconda3/envs/molmap/lib/python3.6/site-packages/tensorflow_core/python/keras/engine/training_v2.py", line 685, in on_epoch
    self.callbacks.on_epoch_end(epoch, epoch_logs)
  File "/home/hodei/anaconda3/envs/molmap/lib/python3.6/site-packages/tensorflow_core/python/keras/callbacks.py", line 298, in on_epoch_end
    callback.on_epoch_end(epoch, logs)
  File "/home/nuriacami/Documents/bidd-molmap/molmap/model/cbks2.py", line 286, in on_epoch_end
    y_pred = self.model.predict(self.x)
  Fi

### Plot training history

In [None]:
pd.DataFrame(clf.history)[['auc', 'val_auc']].plot(title="Performance Learning Curve")

ERROR:root:Internal Python error in the inspect module.
Below is the traceback from this internal error.

ERROR:root:Internal Python error in the inspect module.
Below is the traceback from this internal error.

ERROR:root:Internal Python error in the inspect module.
Below is the traceback from this internal error.



Traceback (most recent call last):
  File "/home/hodei/anaconda3/envs/molmap/lib/python3.6/site-packages/IPython/core/interactiveshell.py", line 3343, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-7-3fc3d95be89f>", line 1, in <module>
    pd.DataFrame(clf.history)[['auc', 'val_auc']].plot(title="Performance Learning Curve")
AttributeError: 'MultiClassEstimator' object has no attribute 'history'

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "/home/hodei/anaconda3/envs/molmap/lib/python3.6/site-packages/IPython/core/interactiveshell.py", line 2044, in showtraceback
    stb = value._render_traceback_()
AttributeError: 'AttributeError' object has no attribute '_render_traceback_'

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "/home/hodei/anaconda3/envs/molmap/lib/python3.6/site-packages/IPython/core/ultratb.py", line 

In [None]:
pd.DataFrame(clf.history)[['loss', 'val_loss']].plot(title="Optimization Learning Curve")

ERROR:root:Internal Python error in the inspect module.
Below is the traceback from this internal error.

ERROR:root:Internal Python error in the inspect module.
Below is the traceback from this internal error.

ERROR:root:Internal Python error in the inspect module.
Below is the traceback from this internal error.



Traceback (most recent call last):
  File "/home/hodei/anaconda3/envs/molmap/lib/python3.6/site-packages/IPython/core/interactiveshell.py", line 3343, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-7-79630c28df07>", line 1, in <module>
    pd.DataFrame(clf.history)[['loss', 'val_loss']].plot(title="Optimization Learning Curve")
AttributeError: 'MultiClassEstimator' object has no attribute 'history'

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "/home/hodei/anaconda3/envs/molmap/lib/python3.6/site-packages/IPython/core/interactiveshell.py", line 2044, in showtraceback
    stb = value._render_traceback_()
AttributeError: 'AttributeError' object has no attribute '_render_traceback_'

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "/home/hodei/anaconda3/envs/molmap/lib/python3.6/site-packages/IPython/core/ultratb.py", li

In [None]:
print('Best epochs: %.2f, Best loss: %.2f' % (clf._performance.best_epoch, clf._performance.best))

ERROR:root:Internal Python error in the inspect module.
Below is the traceback from this internal error.

ERROR:root:Internal Python error in the inspect module.
Below is the traceback from this internal error.

ERROR:root:Internal Python error in the inspect module.
Below is the traceback from this internal error.



Traceback (most recent call last):
  File "/home/hodei/anaconda3/envs/molmap/lib/python3.6/site-packages/IPython/core/interactiveshell.py", line 3343, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-7-e2e31f9dd91e>", line 1, in <module>
    print('Best epochs: %.2f, Best loss: %.2f' % (clf._performance.best_epoch, clf._performance.best))
AttributeError: 'MultiClassEstimator' object has no attribute '_performance'

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "/home/hodei/anaconda3/envs/molmap/lib/python3.6/site-packages/IPython/core/interactiveshell.py", line 2044, in showtraceback
    stb = value._render_traceback_()
AttributeError: 'AttributeError' object has no attribute '_render_traceback_'

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "/home/hodei/anaconda3/envs/molmap/lib/python3.6/site-packages/IPython/core/u

### Calibration curve

In [None]:
import matplotlib.pyplot as plt

# predict probabilities
df_pred = pd.DataFrame([testY[:, 1], clf.predict_proba(testX)[:,1]]).T
df_pred.columns=['y_true', 'y_pred_prob']

# distributions for each class
dist0 = df_pred[df_pred['y_true'] == 0]['y_pred_prob'].tolist()
dist1 = df_pred[df_pred['y_true'] == 1]['y_pred_prob'].tolist()

# plot
plt.figure(figsize=(16, 4))
plt.title('Predicted probabilities distribution for each class')
plt.hist(dist0,histtype='step', label="Inactive (0)", color='r')  
plt.hist(dist1,histtype='step', label="Active (1)", color='g')
plt.xlabel('Predicted probability')
plt.ylabel('Counts')
plt.legend()
plt.show()

ERROR! Session/line number was not unique in database. History logging moved to new session 3562


ERROR:root:Internal Python error in the inspect module.
Below is the traceback from this internal error.

ERROR:root:Internal Python error in the inspect module.
Below is the traceback from this internal error.

ERROR:root:Internal Python error in the inspect module.
Below is the traceback from this internal error.



Traceback (most recent call last):
  File "/home/hodei/anaconda3/envs/molmap/lib/python3.6/site-packages/IPython/core/interactiveshell.py", line 3343, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-7-657e920005a1>", line 4, in <module>
    df_pred = pd.DataFrame([testY[:, 1], clf.predict_proba(testX)[:,1]]).T
  File "/home/nuriacami/Documents/bidd-molmap/molmap/model/model.py", line 513, in predict_proba
    y_prob = self._model.predict(X)
  File "/home/hodei/anaconda3/envs/molmap/lib/python3.6/site-packages/tensorflow_core/python/keras/engine/training.py", line 909, in predict
    use_multiprocessing=use_multiprocessing)
  File "/home/hodei/anaconda3/envs/molmap/lib/python3.6/site-packages/tensorflow_core/python/keras/engine/training_v2.py", line 462, in predict
    steps=steps, callbacks=callbacks, **kwargs)
  File "/home/hodei/anaconda3/envs/molmap/lib/python3.6/site-packages/tensorflow_core/python/keras/engine/training_v2.py", line 444, in 

In [None]:
from sklearn.calibration import calibration_curve

# reliability diagram
fop, mpv = calibration_curve(df_pred['y_true'], df_pred['y_pred_prob'], n_bins=10)

# plot perfectly calibrated
plt.plot([0, 1], [0, 1], linestyle='--', c='cadetblue')

# plot model reliability
plt.title('Calibration curve')
plt.plot(mpv, fop, marker='.', c='deeppink')
plt.xlabel('Mean predicted probability (Positive class:1)')
plt.ylabel('Fraction of positives (Positive class:1)')
plt.show()

ERROR:root:Internal Python error in the inspect module.
Below is the traceback from this internal error.

ERROR:root:Internal Python error in the inspect module.
Below is the traceback from this internal error.

ERROR:root:Internal Python error in the inspect module.
Below is the traceback from this internal error.



Traceback (most recent call last):
  File "/home/hodei/anaconda3/envs/molmap/lib/python3.6/site-packages/IPython/core/interactiveshell.py", line 3343, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-7-0d6d77a3ebf6>", line 4, in <module>
    fop, mpv = calibration_curve(df_pred['y_true'], df_pred['y_pred_prob'], n_bins=10)
NameError: name 'df_pred' is not defined

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "/home/hodei/anaconda3/envs/molmap/lib/python3.6/site-packages/IPython/core/interactiveshell.py", line 2044, in showtraceback
    stb = value._render_traceback_()
AttributeError: 'NameError' object has no attribute '_render_traceback_'

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "/home/hodei/anaconda3/envs/molmap/lib/python3.6/site-packages/IPython/core/ultratb.py", line 1169, in get_records
    return _fixed_g

### Evaluation on test set: AUC

In [None]:
auc = clf.score(testX, testY) 
print(round(auc,3))

ERROR:root:Internal Python error in the inspect module.
Below is the traceback from this internal error.



Traceback (most recent call last):
  File "/home/hodei/anaconda3/envs/molmap/lib/python3.6/site-packages/IPython/core/interactiveshell.py", line 3343, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-7-1883fbc42122>", line 1, in <module>
    auc = clf.score(testX, testY)
  File "/home/nuriacami/Documents/bidd-molmap/molmap/model/model.py", line 544, in score
    metrics = self._performance.evaluate(X, y)
AttributeError: 'MultiClassEstimator' object has no attribute '_performance'

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "/home/hodei/anaconda3/envs/molmap/lib/python3.6/site-packages/IPython/core/interactiveshell.py", line 2044, in showtraceback
    stb = value._render_traceback_()
AttributeError: 'AttributeError' object has no attribute '_render_traceback_'

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "/home/hod

ERROR:root:Internal Python error in the inspect module.
Below is the traceback from this internal error.

ERROR:root:Internal Python error in the inspect module.
Below is the traceback from this internal error.



  File "/home/hodei/anaconda3/envs/molmap/lib/python3.6/site-packages/IPython/core/ultratb.py", line 316, in wrapped
    return f(*args, **kwargs)
  File "/home/hodei/anaconda3/envs/molmap/lib/python3.6/site-packages/IPython/core/ultratb.py", line 350, in _fixed_getinnerframes
    records = fix_frame_records_filenames(inspect.getinnerframes(etb, context))
  File "/home/hodei/anaconda3/envs/molmap/lib/python3.6/inspect.py", line 1490, in getinnerframes
    frameinfo = (tb.tb_frame,) + getframeinfo(tb, context)
  File "/home/hodei/anaconda3/envs/molmap/lib/python3.6/inspect.py", line 1448, in getframeinfo
    filename = getsourcefile(frame) or getfile(frame)
  File "/home/hodei/anaconda3/envs/molmap/lib/python3.6/inspect.py", line 696, in getsourcefile
    if getattr(getmodule(object, filename), '__loader__', None) is not None:
  File "/home/hodei/anaconda3/envs/molmap/lib/python3.6/inspect.py", line 733, in getmodule
    if ismodule(module) and hasattr(module, '__file__'):
  File "/home

In [None]:
from sklearn import metrics

# ROC curve
fpr, tpr, threshold = metrics.roc_curve(df_pred['y_true'], df_pred['y_pred_prob'])
roc_auc = metrics.auc(fpr, tpr)

plt.title('Receiver Operating Characteristic')
plt.plot(fpr, tpr, 'b', label = 'AUC = %0.3f' % roc_auc)
plt.legend(loc = 'lower right')
plt.plot([0, 1], [0, 1],'r--')
plt.xlim([0, 1])
plt.ylim([0, 1])
plt.ylabel('True Positive Rate')
plt.xlabel('False Positive Rate')
plt.show()

ERROR:root:Internal Python error in the inspect module.
Below is the traceback from this internal error.

ERROR:root:Internal Python error in the inspect module.
Below is the traceback from this internal error.



Traceback (most recent call last):
  File "/home/hodei/anaconda3/envs/molmap/lib/python3.6/site-packages/IPython/core/interactiveshell.py", line 3343, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-7-03764d4df39a>", line 4, in <module>
    fpr, tpr, threshold = metrics.roc_curve(df_pred['y_true'], df_pred['y_pred_prob'])
NameError: name 'df_pred' is not defined

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "/home/hodei/anaconda3/envs/molmap/lib/python3.6/site-packages/IPython/core/interactiveshell.py", line 2044, in showtraceback
    stb = value._render_traceback_()
AttributeError: 'NameError' object has no attribute '_render_traceback_'

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "/home/hodei/anaconda3/envs/molmap/lib/python3.6/site-packages/IPython/core/ultratb.py", line 1169, in get_records
    return _fixed_g

ERROR:root:Internal Python error in the inspect module.
Below is the traceback from this internal error.



AttributeError: 'NameError' object has no attribute '_render_traceback_'

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "/home/hodei/anaconda3/envs/molmap/lib/python3.6/site-packages/IPython/core/interactiveshell.py", line 3263, in run_ast_nodes
    if (await self.run_code(code, result,  async_=asy)):
  File "/home/hodei/anaconda3/envs/molmap/lib/python3.6/site-packages/IPython/core/interactiveshell.py", line 3360, in run_code
    self.showtraceback(running_compiled_code=True)
  File "/home/hodei/anaconda3/envs/molmap/lib/python3.6/site-packages/IPython/core/interactiveshell.py", line 2047, in showtraceback
    value, tb, tb_offset=tb_offset)
  File "/home/hodei/anaconda3/envs/molmap/lib/python3.6/site-packages/IPython/core/ultratb.py", line 1436, in structured_traceback
    self, etype, value, tb, tb_offset, number_of_lines_of_context)
  File "/home/hodei/anaconda3/envs/molmap/lib/python3.6/site-packages/IPython/core/ult