In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

import shap
from sklearn.model_selection import train_test_split

from rdkit.Chem import AllChem
from rdkit import Chem
from rdkit.Chem import rdMolDescriptors
from rdkit.Chem.Draw import IPythonConsole
from rdkit.Chem import Draw

import joblib

In [None]:
df = pd.read_csv(r"D:\OneDrive\Documentos\LabMol\IC-Citotoxicidade\datasets\AID_1345083 HEK\Balanced\SHAP\curated_reduced(SMILES).csv")
model = joblib.load(r"D:\OneDrive\Documentos\LabMol\IC-Citotoxicidade\datasets\AID_1345083 HEK\Balanced\SHAP\xgb_classifier_ecfp_fp_2_1024.joblib")

In [None]:
smiles_train, smiles_test, y_train, y_test = train_test_split(df['SMILES'], df['Outcome'], test_size=0.2, random_state=4)
#Certifique-se de que o random_state seja o mesmo do treinamento

smiles_test = pd.DataFrame(smiles_test)

### Descritores e SHAP

In [None]:
#Certifique-se de verificar o raio e o nBits
smiles_test['mol'] = [Chem.MolFromSmiles(x) for x in smiles_test['SMILES']]
X = np.array([AllChem.GetMorganFingerprintAsBitVect(x, radius = 2, nBits = 1024, useFeatures=False) for x in smiles_test['mol']])

In [None]:
#Salvando bitInfo
bi_all=[]
for x in smiles_test['mol']:
    bi={}
    AllChem.GetMorganFingerprintAsBitVect(x, radius = 2, nBits = 1024, useFeatures=False, bitInfo=bi); bi_all.append(bi)

In [None]:
explainer = shap.TreeExplainer(model)
shap_values = explainer(X)

### Plotting SHAP values

In [None]:
shap.plots.bar(shap_values)

In [None]:
shap.plots.violin(shap_values)

In [None]:
shap.plots.waterfall(shap_values[3], max_display= 5)


### Exploração dos bits

In [None]:
bit_desejado = 890
compostos_com_bit_desejado = [i for i, d in enumerate(bi_all) if bit_desejado in d]
print(compostos_com_bit_desejado[0:10])

In [None]:
from ipywidgets import interact,fixed,IntSlider
def renderFpBit(mol,bitIdx,bitInfo,fn):
    bid = bitIdx
    return(display(fn(mol,bid,bitInfo)))

In [None]:
index = 3 #select the number of molecule
bit_infos = bi_all[index] #select the number of molecule by index
mol_all_list = smiles_test['mol'].iloc[index] #select the number of molecule y index

interact(renderFpBit, bitIdx=bit_infos.keys(),mol=fixed(mol_all_list), 
         bitInfo=fixed(bit_infos),fn=fixed(Draw.DrawMorganBit));

In [None]:
index_predict = 3 #select the number of molecule

result = model.predict(X[1].reshape(1, -1))

print('Predicted value: ', result, 'Real value: ', y_test.iloc[index_predict])