In [1]:
from src.waterfall_plot import waterfall, MapSHAP
from matplotlib import pyplot as plt
from src.cupido import Cupido, load_csfp
from rdkit import Chem

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
from compchemkit import fingerprints
import numpy as np
import pandas as pd
import os

CSFP = load_csfp()

def smi2array(smi):
    array = CSFP.transform_smiles([smi])
    array=pd.DataFrame.sparse.from_spmatrix(array).values
    return array

## Load Objects

In [3]:
CUPIDOs = {
    'cav':Cupido('cav', cls_name='XGB'),
    'nav':Cupido('nav', cls_name='RF'),
    'erg':Cupido('erg', cls_name='RF'),
}

# Cycle for Smiles

In [4]:
case_studies = {
    'astemizolo':'COc1ccc(CCN2CCC(Nc3nc4ccccc4n3Cc3ccc(F)cc3)CC2)cc1',
    'clomipramina':'CN(C)CCCN1C2=CC=CC=C2CCC3=C1C=C(C=C3)Cl',
    'atorvastatina':'CC(C)C1=C(C(=C(N1CC[C@H](C[C@H](CC(=O)O)O)O)C2=CC=C(C=C2)F)C3=CC=CC=C3)C(=O)NC4=CC=CC=C4',
    
}

In [5]:
for smi_name, smi in case_studies.items():
    print(smi_name)
    for dataset,cupido in CUPIDOs.items():
        print(dataset)

        array = smi2array(smi)
        shap_values = cupido.compute_shap_values(array)

        score = cupido.predict_proba(array)[0,1]
        pred = cupido.predict(array)[0]
        fig_map = MapSHAP(smi, array, shap_values, FP=CSFP)
        fig_waterfall = waterfall(shap_values, show=False, features_imgPath="imgSMARTS", colorNeg='green', colorPos="red",
                                #    max_display=30
                                  )
        plt.close(fig_waterfall)

        reliability = cupido.reliability(score)

        fig_ad = cupido.reliability_fig(score, reliability)
        plt.close(fig_ad)

        fig_map.save(f'case_studies/{smi_name}_{dataset}_map.png')
        fig_waterfall.savefig(f'case_studies/{smi_name}_{dataset}_waterfall.png', bbox_inches='tight')
        fig_ad.savefig(f'case_studies/{smi_name}_{dataset}_ad.png', bbox_inches='tight')

        print('prediction:',pred)
        print('score:',score)

astemizolo
cav
prediction: 1
score: 0.872071870186417
nav
prediction: 0
score: 0.49
erg
prediction: 1
score: 0.97
clomipramina
cav
prediction: 1
score: 0.5939675532863243
nav
prediction: 1
score: 0.65
erg
prediction: 1
score: 0.94
atorvastatina
cav
prediction: 1
score: 0.5232139805679067
nav
prediction: 0
score: 0.27
erg
prediction: 0
score: 0.265


# Figures for Manuscript

In [7]:
from matplotlib import pyplot as plt

In [35]:

for smi_name in case_studies.keys():
    
    fig, axes = plt.subplots(ncols=3, nrows=3, figsize=(20,15),gridspec_kw={'wspace':-.4})

    for dataset, axes_row in zip(CUPIDOs.keys(),axes):

        for ax,element_name in zip(axes_row, ['waterfall','map','ad']):
            ax.imshow(plt.imread(f'case_studies/{smi_name}_{dataset}_{element_name}.png'))
            ax.axis('off')

    fig.savefig(f'case_studies/{smi_name}_full.png',bbox_inches='tight')