# Notebook for load and processing molecules using rdkit

In [1]:
import pandas as pd
import numpy as np
from glob import glob
import os, pickle

from rdkit import Chem
from rdkit.Chem import AllChem
from rdkit import RDLogger 
RDLogger.DisableLog('rdApp.*')



In [2]:
from analyse_db_molecules import *

### Cocrystalized Molecules

In [3]:
sdf_input_path = '../../FILES/CRYSTALS/LIGS_FXA/POCKET_LIGS_PREP_SDF/'
sort_function = lambda x: x.split('/')[-1].split('_')[0]
list_sdf_files = get_files_list(sdf_input_path, actives_name='LIG', sufix='from_pdb', sort_func = sort_function)
df_pdi_lig = load_cocrys_molecules_from_dir(list_sdf_files)
# Just for cocrystalized molecules

# Tranform the previous dataframe to an Lig, activity, mol_rdk, sanitized dataframe
df_COCRYS = df_pdi_lig[['Lig', 'mol_rdk']]
df_COCRYS['Activity'] = 'active'
df_COCRYS['sanitized'] = [True if i != 'v3' else False for i in df_pdi_lig.validation]
df_COCRYS = df_COCRYS[['Lig', 'Activity', 'mol_rdk', 'sanitized']]
df_COCRYS = df_COCRYS.drop_duplicates('Lig').set_index('Lig')
df_COCRYS

Unnamed: 0_level_0,Activity,mol_rdk,sanitized
Lig,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
RPR,active,<rdkit.Chem.rdchem.Mol object at 0x7f48aed49120>,False
815,active,<rdkit.Chem.rdchem.Mol object at 0x7f48aed49030>,True
PR2,active,<rdkit.Chem.rdchem.Mol object at 0x7f48aed490d0>,True
DX9,active,<rdkit.Chem.rdchem.Mol object at 0x7f48aed49170>,False
Z34,active,<rdkit.Chem.rdchem.Mol object at 0x7f48aed49300>,False
...,...,...,...
987,active,<rdkit.Chem.rdchem.Mol object at 0x7f48aed55990>,True
44I,active,<rdkit.Chem.rdchem.Mol object at 0x7f48aed559e0>,True
4O4,active,<rdkit.Chem.rdchem.Mol object at 0x7f48aed55a30>,True
4O5,active,<rdkit.Chem.rdchem.Mol object at 0x7f48aed55a80>,True


### DEKOIS Molecules

In [4]:
sdf_input_path = '../../FILES/CRYSTALS/LIGS_FXA/DEKOIS2/sdf/'

list_sdf_files = get_files_list(sdf_input_path, actives_name='ligand')
df_DEKOIS = get_mol_dataframe(load_molecules_from_dir(list_sdf_files))
df_DEKOIS.head(3)

Unnamed: 0,Activity,mol_rdk,sanitized
ligand_1,active,<rdkit.Chem.rdchem.Mol object at 0x7f48ade310d0>,True
ligand_2,active,<rdkit.Chem.rdchem.Mol object at 0x7f48ac5f35d0>,True
ligand_3,active,<rdkit.Chem.rdchem.Mol object at 0x7f48ac5f3210>,True


### DUD Molecules

In [5]:
sdf_input_path = '../../FILES/CRYSTALS/LIGS_FXA/DUD2006/sdf/'

list_sdf_files = get_files_list(sdf_input_path, actives_name='ligand')
df_DUD = get_mol_dataframe(load_molecules_from_dir(list_sdf_files))
df_DUD.head(3)

Unnamed: 0,Activity,mol_rdk,sanitized
ligand_1,active,<rdkit.Chem.rdchem.Mol object at 0x7f487d705ad0>,True
ligand_2,active,<rdkit.Chem.rdchem.Mol object at 0x7f487d705e90>,True
ligand_3,active,<rdkit.Chem.rdchem.Mol object at 0x7f487d705350>,True


## Save Molecules Dictionary

In [6]:
lig_datasets = {'COCRYS': df_COCRYS, 'DUD': df_DUD, 'DEKOIS': df_DEKOIS}

In [7]:
import pickle

file_rd_mols = './fxa_rdkit_db_molecules.obj'

if not os.path.isfile(file_rd_mols):
    with open(file_rd_mols, 'rb') as f:
        lig_datasets = pickle.load(f)
else:
    with open(file_rd_mols, 'wb') as f:
        pickle.dump(lig_datasets, f)

In [8]:
!ls -alh

total 24M
drwxrwxr-x 4 joel joel 4,0K may 27 12:44 .
drwxrwxr-x 9 joel joel 4,0K may 26 17:18 ..
-rw-rw-r-- 1 joel joel 6,3K abr 18 13:33 0_Preparing_ligands.ipynb
-rw-rw-r-- 1 joel joel 6,7K may 25 11:42 1_Preprocessing_Docking_results.ipynb
-rw-rw-r-- 1 joel joel 409K may 27 12:44 2_Comparing_Molecules_Among_Molecular_Libraries.ipynb
-rw-rw-r-- 1 joel joel 5,5K may 25 11:43 2_Improving_Ranking_From_Docking_Scores.ipynb
-rw-rw-r-- 1 joel joel 4,2K may 27 12:44 2_Loading_molecules_from_db_with_rdkit.ipynb
-rw-rw-r-- 1 joel joel 2,7M may 25 15:52 3_Calculating_Metrics_to_Evaluate_VS_Performance.ipynb
-rw-rw-r-- 1 joel joel 1,8M may 26 17:16 4_ML_Model_Selection.ipynb
-rw-rw-r-- 1 joel joel 4,4K may 27 11:47 analyse_db_molecules.py
-rw-rw-r-- 1 joel joel 2,4K may 25 12:06 auxiliar_plot_swarm.py
-rw-rw-r-- 1 joel joel  271 abr 18 12:38 chim_addh.py
-rw-rw-r-- 1 joel joel  444 abr 18 12:38 chim_addh.pyc
-rw-rw-r-- 1 joel joel 895K may 25 11:26 DEKOIS2_VINARDO_137_prots_1240_m