<a href="https://colab.research.google.com/github/alessandronascimento/pyLiBELa/blob/main/Colabs/pyLiBELa_ligand_similarity.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Quick Instructions

## Required files:

To run pyLiBELa in this notebook, you will need:
* A reference ligand in MOL2 format.
* (Optional) a  set of ligands to be docked in SMILES.

Make sure that the MOL2 files include hydrogen atoms and atomic charges. You can add hydrogens and charges using the DockPrep tool available in [UCSF Chimera](https://www.cgl.ucsf.edu/chimera/) or UCSF [ChimeraX](https://www.cgl.ucsf.edu/chimerax/).

The notebook can download a set of MOL2 files from [ZINC](http://zinc.docking.org) from different catalogs, such as *FDA*, *CHEMBL30*,  *DrugBank*, among others and screen these catalogs to identify molecules that are similar to your query compound.

In [None]:
#@title Installing pyLiBELa (~3 min) {display-mode: "form"}
#@markdown The instalation process requires the runtime to restart. You can safely ignore the warning message.

#Installing Dependencies

# To avoid issues with python version, openbabel libraries are installed from mamba and not from apt-get. This takes longer, but ensures that python versions match.

%%capture
! apt-get install python-dev-is-python3 zlib1g-dev libeigen3-dev libgsl-dev libnlopt-cxx-dev libgsl-dev #libopenbabel-dev openbabel
!pip install ipython-autotime
!pip install py3Dmol
!pip install shutil
!pip install joblib
%load_ext autotime
!pip3 install condacolab
import condacolab
condacolab.install()
!mamba install openbabel

#Compiling pyLiBELa

! rm -f Makefile*
! rm -rf obj src test
! rmdir obj src
! git clone --branch main https://github.com/alessandronascimento/pyLiBELa.git
! wget https://raw.githubusercontent.com/alessandronascimento/pyLiBELa/main/Colabs/Makefile

! mv pyLiBELa/src src
! mv pyLiBELa/test test
! rm -rf pyLiBELa
! mkdir -p obj
! sed -i 's+-I/usr/include/openbabel3+-I/usr/local/include/openbabel3+g' Makefile
! make -j2


In [None]:
#@title Importing pyLiBELa libraries

import os
import timeit
import numpy as np
import glob
import ipywidgets as widgets
from IPython.display import display
from google.colab import files

try:
  from pyPARSER import *
  from pyMol2 import *
  from pyWRITER import *
  from pyGrid import *
  from pyCOORD_MC import *
  from pyFindHB import *
  from pyEnergy2 import *
  from pyGaussian import *
  from pyConformer import *
  from pyRAND import *
  from pyMcEntropy import *
  from pySA import *
  from pyOptimizer import *
  from pyMC import *
  from pyFullSearch import *
  from pyDocker import *
  from pyEngine import *
  print('pyLiBELa is imported!')
except ImportError:
  print('An ImportError occurred, try running this cell again!')

def File_extension_manager(name):
  role_flag = False
  name_ext = name.split('.')
#  name_ext = name_ext[-1]
  if name_ext[-1] == 'mol2':
    role_flag = True
    name_ext_string = '.mol2'
  elif name_ext[-2] == 'mol2':
    role_flag = True
    name_ext_string = '.mol2.gz'
  else:
    print('Check your file extension!\nIt needs to be .mol2 or .mol2.gz.')
    name_ext_string =''
  return role_flag,name_ext_string

def rec_changed(changes):
  rec_name = rec_toggle.value

def lig_changed(changes):
  lig_name = lig_toggle.value

def mol2_to_pdb(mol2file, pdbfile):
  !obabel -imol2 {mol2file} -opdb -O {pdbfile} > /dev/null

def join_pdb(name_file_rec,name_file_lig,name_file_merge):
  !cp $name_file_rec temp_rec.pdb
  !cp $name_file_lig temp_lig.pdb

  !sed -i '/END\|CONECT/d' temp_rec.pdb
  !sed -i 's/A  /B  /g' temp_lig.pdb
  !sed -i '/CONECT/d' temp_lig.pdb
  !cat temp_rec.pdb temp_lig.pdb > $name_file_merge
  !rm -rf temp_rec.pdb temp_lig.pdb


In [None]:
#@title Settings {display-mode: "form"}
#@markdown Please, set the parameters for the docking simulation.

Input = PARSER()
Input.output = "test" #@param {type:"string"}
search_box = 12.0 #@param {type:"number"}
Input.atom_limit = 100 #@param {type:"number"}

Input.generate_conformers = True;
Input.dock_parallel = True;
Input.parallel_jobs = 2;
Input.write_grids = False;
Input.load_grid_from_file = False;
Input.use_grids = False;
Input.write_mol2 = True

Input.dielectric_model = "r"
#Input.diel = 2.0;
Input.scoring_function = 0
Input.grid_prefix = 'McGrid'
Input.solvation_alpha = 0.1
Input.solvation_beta = -0.005

# Optimization parameter:
Input.overlay_optimizer = "mma" #@param ["mma", "ln_auglag", "subplex", "none"]
Input.energy_optimizer = "none"
Input.min_tol = 1E-5 #@param {type:'number'}
Input.min_delta = 1E-5 #@param {type:'number'}
Input.dock_min_tol = 1E-5
Input.timeout = 20 #@param {type:'integer'}
Input.min_timeout = 20

if (Input.scoring_function < 3):
    delta = 1.5
    Input.deltaij6 = (delta*delta*delta*delta*delta*delta)
    delta_es = 1.75
    Input.deltaij_es6 = pow(delta_es, 6);
    Input.deltaij_es3 = (delta_es*delta_es*delta_es)

Input.conf_search_trials = 10000;
Input.conformers_to_evaluate = 1;
Input.lig_conformers = 10;
Input.sort_by_energy = False;
Input.use_score_optimizer = False
Input.use_overlay_cutoff = True
Input.overlay_cutoff = 0.65

Input.search_box_x, Input.search_box_y, Input.search_box_z = search_box, search_box, search_box;

Input.ligand_energy_model = "MMFF94";
Input.atomic_model_ff = "AMBER";

Input.use_writeMol2_score_cutoff = True #@param {type:'boolean'}
Input.writeMol2_score_cutoff = 0.7 #@param {type:'number'}


%cd '/content/'
!mkdir -p uploads
!mkdir -p results

Input.output = 'results/' + Input.output
Input.grid_prefix = 'results/' + Input.grid_prefix


In [None]:
#@title Uploading files{display-mode: "form"}
#@markdown Upload your reference ligand file in MOL2 format.


%cd 'uploads'
!rm -rf *

uploaded = files.upload() #becomes a dictionary, for some reason
up_list = list(uploaded.keys())

rec_toggle = widgets.ToggleButtons(description='Receptor:', style = {'description_width': 'initial'}, options=up_list)
lig_toggle = widgets.ToggleButtons(description='Reference Ligand:', style = {'description_width': 'initial'}, options=up_list)

display(rec_toggle)
display(lig_toggle)

rec_toggle.observe(rec_changed,'value')
lig_toggle.observe(lig_changed,'value')

In [None]:
#@title Reading search ligands {display-mode: "form"}
#@markdown If you want to use your own dataset, just upload a file with .smi extensions into the **uploads** folder. The file must have one compound per line with a single-word description.
#@markdown Example: OC(=O)CC1=CNC2=C1C=CC=C2 MOL000000.

%cd /content

# Getting smiles from ZINC
use_chembl30 = False # @param {type:"boolean"}
use_chembi = False # @param {type:"boolean"}
use_drugbank = False # @param {type:"boolean"}
use_fda = True # @param {type:"boolean"}
use_hmdbdrug = False # @param {type:"boolean"}
chunck_size = 100 # @param {type: "integer"}


if (use_chembl30):
  !curl -O https://files.docking.org/catalogs/source/chembl30.src.txt
  ! mv chembl30.src.txt uploads/chembl30.smi
if (use_chembi):
  !curl -O https://files.docking.org/catalogs/source/chebi.src.txt
  !mv chebi.src.txt uploads/chebi.smi
if (use_drugbank):
  !curl -O https://files.docking.org/catalogs/source/dbap.src.txt
  !mv dbap.src.txt uploads/dbpap.smi
if (use_fda):
  !curl -O https://files.docking.org/catalogs/source/dbfda.src.txt
  !mv dbfda.src.txt uploads/dbfda.smi
if (use_hmdbdrug):
  !curl -O https://files.docking.org/catalogs/source/hmdbdrug.src.txt
  !mv hmdbdrug.src.txt uploads/hmdbdrug.smi

!rm -f uploads/multimol.smi
!cat uploads/*.smi > uploads/multimol.smi
! grep -v smiles uploads/multimol.smi > tmp
! mv tmp uploads/multimol.smi

!split -l {chunck_size} -d uploads/multimol.smi uploads/multimol.smi.
chuncks = glob.glob('uploads/multimol.smi.*')
print()
print()
print('Input file divided into %d chuncks with %d molecules.' % (len(chuncks), chunck_size))
print()


In [None]:
#@title Similarity Search (might take a while)
# Remove previous results, if any
! rm -f {Input.output}_dock.mol2.gz lig2.pdb

Rec = Mol2(Input, 'uploads/' + lig_toggle.value)
RefLig = Mol2(Input, 'uploads/' + lig_toggle.value)
Coord = COORD_MC()
center = Coord.compute_com(RefLig)
Writer = WRITER(Input)
Dock = Docker(Writer)
Grids = Grid(Input, Writer)
DockEngine = Engine(Writer)

Input.use_smiles = True

print()
print('Starting docking calculation...')
print()
for chunck in chuncks:
  print('Docking chunck %s' % chunck)
  Input.smiles_multifile = chunck
  DockEngine.Run_docking(Input, Rec, RefLig, Grids)
  print()
print()
print('Docking calculation finished!')
print()

In [None]:
#@title Downloading files
import shutil

jobname = Input.output.split('/')
results = 'pyLiBELa_{}'.format(jobname[1])
print('File {}.zip is being prepared'.format(results))
shutil.make_archive(results, 'zip', 'results')
files.download('{}.zip'.format(results))