<a href="https://colab.research.google.com/github/alessandronascimento/pyLiBELa/blob/main/Colabs/pyLiBELa.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
#@title Downloading and Installing pyLiBELa {display-mode: "form"}
%%capture
!pip3 install condacolab
import condacolab
condacolab.install()
! apt-get install openbabel libopenbabel-dev python-dev-is-python3 zlib1g-dev libeigen3-dev libgsl-dev libnlopt-cxx-dev libgsl-dev
! mamba install -c anaconda py-boost
! git clone https://github.com/alessandronascimento/pyLiBELa.git
! mv pyLiBELa/src src
! rm -rf pyLiBELa
! mkdir -p obj
! rm -f Makefile*
! wget https://raw.githubusercontent.com/alessandronascimento/pyLiBELa/main/Colabs/Makefile
! make -j4 

In [1]:
#@title Importing libraries {display-mode: "form"}

from pyPARSER import *
from pyMol2 import *
from pyWRITER import *
from pyGrid import *
from pyCOORD_MC import *
from pyFindHB import * 
from pyEnergy2 import *
from pyGaussian import *
from pyConformer import *
from pyRAND import *
from pyMcEntropy import *
from pySA import *
from pyOptimizer import *
from pyMC import *
from pyFullSearch import *
from pyDocker import *

In [2]:
#@title Getting SB2021 data {display-mode: "form"}


%%capture
from google.colab import drive
drive.mount('/content/drive/')
path = '/content/drive/MyDrive/Projects_Data/pyLiBELa/SB/' #@param {type:"string"}

with open(path+'list.txt') as f:
    pdb_codes_list = f.readlines()

pdb_codes_list = [pdb_code[0:4] for pdb_code in pdb_codes_list]

In [1]:
#@title Parameters for Grid Calculation {display-mode: "form"}

import os
import timeit
import numpy as np

num_pairs = 15

grid_spacing_list = [0.4]
length_input=len(grid_spacing_list)

num_metrics = 5
metrics_matrix=np.zeros((length_input*num_pairs,num_metrics+1))

dim_grid = 30

n_paralell_jobs = 2

i=0


metrics_name_list = ['PDB_code','Grid spacing','Original Energy','Time (s)','Time per atom(ms)','Grid Energy']


for j in range(num_pairs):

  pdb_code = pdb_codes_list[j]
  lig_src =  path + pdb_code +'/' + pdb_code + '.lig.am1bcc.mol2.gz'
  rec_src = path + pdb_code +'/' + pdb_code + '.rec.clean.mol2.gz'

  for grid_spacing in grid_spacing_list:

    print(pdb_code,grid_spacing)

    #Parâmetros do Input
    Input = PARSER()

    scoring_function = "0" #@param ["0", "1", "2", "3"]
    Input.dielectric_model = "r" #@param ["r", "constant"]
    Input.scoring_function = int(scoring_function)
    Input.solvation_alpha = 0.1 #@param {type:"number"}
    Input.solvation_beta = -0.005 #@param {type:"number"}
    if (Input.scoring_function < 3):
      delta = 2.5 #@param {type:"number"}
      Input.deltaij6 = (delta*delta*delta*delta*delta*delta)
      delta_es = 2.5 #@param {type:"number"}
      Input.deltaij_3 = (delta_es*delta_es*delta_es)
      Input.deltaij_6 = Input.deltaij_3*Input.deltaij_3

    Input.search_box_x, Input.search_box_y, Input.search_box_z = 30.0, 30.0, 30.0
    Input.x_dim, Input.y_dim, Input.z_dim = dim_grid, dim_grid, dim_grid
    Input.write_grids = True
    Input.use_grids = True
    
    Input.grid_spacing = grid_spacing
    Input.grid_prefix = "McGrid_"+pdb_code
    Input.parallel_jobs = n_paralell_jobs

    Input.output = "teste %.d"%(i) +pdb_code

    Writer = WRITER(Input)
    Coord = COORD_MC()
    HB = FindHB()

    lig = Mol2(Input, lig_src)
    rec = Mol2(Input, rec_src)
    ref_lig = Mol2(Input, lig_src)

    Energy = Energy2(Input)

    for k in range(len(list(rec.residue_pointer))-1):
      HB.parse_residue(rec.residue_pointer[k]-1, rec.residue_pointer[k+1]-2, rec.resnames[k], rec, lig, 9.0)

    HB.find_ligandHB(lig_src, lig)

    com = Coord.compute_com(lig)

    energy1 = Energy.compute_ene(rec, lig, lig.xyz)
    print("Original energy: %7.3f" % energy1)

    start_time = timeit.default_timer()
    Grids = Grid(Input, Writer, rec, com)
    time = timeit.default_timer() - start_time
    print('Time grids: %.2f' %time)

    time_per_atom = time/rec.N
    print('Time per atom: %.2f'%(time_per_atom))

    print('Points: %d %d %d'%(Grids.npointsx,Grids.npointsy,Grids.npointsz))

    Writer.write_box(com, Grids.xbegin, Grids.ybegin, Grids.zbegin, Grids.xend, Grids.yend, Grids.zend)

    energy2 = Energy.compute_ene(Grids, lig, lig.xyz)
    print("Grid Energy': %7.3f" % energy2)

    print("Energy error: %7.3f%%" % (100.*(abs(energy2-energy1))/abs(energy1)))

    folder = path + pdb_code
    #!mv *.grid McLiBELa.log box.pdb $folder

    print('')


    #Guardando valores
    metrics_list = np.zeros(num_metrics+1)
    metrics_list[0] = j
    metrics_list[1] = grid_spacing
    metrics_list[2] = energy1
    metrics_list[3] = time
    metrics_list[4] = time_per_atom
    metrics_list[5] = energy2




    metrics_matrix[i] = metrics_list
    i+=1


NameError: ignored

In [None]:
print("{:<10} {:<15} {:<17} {:<10} {:<19} {:<10}".format('PDB_code','Grid spacing','Original Energy','Time (s)','Time per atom(ms)','Grid Energy'))
for i in range(num_pairs*length_input):

  metrics_line = metrics_matrix[i]
  pdb_code_print = pdb_codes_list[int(metrics_line[0])]

  print("{:<10} {:<15} {:<17} {:<10} {:<19} {:<10}".format(pdb_code_print,metrics_line[1],'%7.3f'%metrics_line[2],'%.2f'%metrics_line[3],'%.2f'%(1000*metrics_line[4]),'%7.3f'%metrics_line[5])) 



PDB_code   Grid spacing    Original Energy   Time (s)   Time per atom(ms)   Grid Energy
121P       0.5             -237.910          100.90     38.51               -61.214   
121P       0.4             -237.910          197.12     75.24               -223.350  


In [None]:
#@title Escrevendo dados em arquivo .dat {display-mode: "form"}

data_path = '/content/drive/MyDrive/pyLiBELa/'
data_name = 'grid_spacing_test.dat'

data = open(data_name,'w')

variables='#'
#variables+='\t'.join(metrics_name_list)
variables+="{:<10} {:<15} {:<17} {:<10} {:<19} {:<10}".format('PDB_code','Grid spacing','Original Energy','Time (s)','Time per atom(ms)','Grid Energy')
variables+='\n'
data.write(variables)
data_line=''


for i in range(num_pairs*length_input):
  metrics_line = metrics_matrix[i]
  pdb_code_print = pdb_codes_list[int(metrics_line[0])]

  data_line+="{:<10} {:<15} {:<17} {:<10} {:<19} {:<10}".format(pdb_code_print,metrics_line[1],'%7.3f'%metrics_line[2],'%.2f'%metrics_line[3],'%.2f'%(1000*metrics_line[4]),'%7.3f'%metrics_line[5])

  data_line+='\n'


data.write(data_line)
data.close()

!mv $data_name $data_path

In [None]:
data_loc = data_path + data_name
!more $data_loc

#PDB_code   Grid spacing    Original Energy   Time (s)   Time per atom(ms)   Gri
d Energy
121P       0.5             -237.910          100.90     38.51               -61.
214   
121P       0.4             -237.910          197.12     75.24               -223
.350  


In [None]:
#@title Escrevendo dados em arquivo .csv {display-mode: "form"}
import csv

data_path = '/content/drive/MyDrive/pyLiBELa/'
data_name_csv = 'grid_spacing_test.csv'
metrics_name_list = ['PDB_code','Grid spacing','Original Energy','Time','Grid Energy']


  with open(data_name_csv,'w', encoding='UTF8', newline='') as data:

    writer = csv.writer(data)

    writer.writerow(metrics_name_list)

    for i in range(num_pairs*length_input):
      metrics_line = metrics_matrix[i]
      pdb_code_print = pdb_codes_list[int(metrics_line[0])]
      row = [pdb_code_print,metrics_line[1],'%7.3f'%metrics_line[2],'%.2f'%metrics_line[3], '%7.3f'%metrics_line[4]]
      writer.writerow(row)


  !mv $data_name_csv $data_path

In [None]:
#@title Variando trabalhos em paralelo {display-mode: "form"}

import os
import timeit
import numpy as np

num_pairs = 1

dim_grid = 30
grid_spacing = 0.5


paralell_jobs_list = [1,2,3,4]
length_input=len(paralell_jobs_list)


num_metrics = 5
metrics_matrix=np.zeros((length_input*num_pairs,num_metrics+1))


i=0
for j in range(num_pairs):
  pdb_code = pdb_codes_list[j]
  lig_src =  path + pdb_code +'/' + pdb_code + '.lig.am1bcc.mol2.gz'
  rec_src = path + pdb_code +'/' + pdb_code + '.rec.clean.mol2.gz'

  for n_paralell_jobs in paralell_jobs_list:


    print(pdb_code,n_paralell_jobs)

    #Parâmetros do Input
    Input = PARSER()

    Input.search_box_x, Input.search_box_y, Input.search_box_z = 30.0, 30.0, 30.0
    Input.x_dim, Input.y_dim, Input.z_dim = dim_grid, dim_grid, dim_grid
    Input.write_grids = True
    Input.use_grids = True
    
    Input.grid_spacing = grid_spacing
    Input.grid_prefix = "McGrid_"+pdb_code
    Input.parallel_jobs = n_paralell_jobs

    Input.output = "teste %.d"%(i) +pdb_code


    Writer = WRITER(Input)
    Coord = COORD_MC()
    HB = FindHB()

    lig = Mol2(Input, lig_src)
    rec = Mol2(Input, rec_src)
    ref_lig = Mol2(Input, lig_src)

    Energy = Energy2(Input)

    for k in range(len(list(rec.residue_pointer))-1):
      HB.parse_residue(rec.residue_pointer[k]-1, rec.residue_pointer[k+1]-2, rec.resnames[k], rec, lig, 9.0)

    HB.find_ligandHB(lig_src, lig)

    com = Coord.compute_com(lig)

    energy1 = Energy.compute_ene(rec, lig, lig.xyz)
    print("Original energy: %7.3f" % energy1)


    start_time = timeit.default_timer()
    Grids = Grid(Input, Writer, rec, com)
    time = timeit.default_timer() - start_time
    print('Time grids: %.2f' %time)


    time_per_atom = time/rec.N
    print('Time per atoms: %.2f'%(time_per_atom))

    print('Points: %d %d %d'%(Grids.npointsx,Grids.npointsy,Grids.npointsz))

    Writer.write_box(com, Grids.xbegin, Grids.ybegin, Grids.zbegin, Grids.xend, Grids.yend, Grids.zend)

    energy2 = Energy.compute_ene(Grids, lig, lig.xyz)
    print("Grid Energy': %7.3f" % energy2)

    folder = path + pdb_code
    !mv *.grid McLiBELa.log box.pdb teste* $folder

    print('')


    #Guardando valores
    metrics_list = np.zeros(num_metrics+1)
    metrics_list[0] = j
    metrics_list[1] = n_paralell_jobs
    metrics_list[2] = energy1
    metrics_list[3] = time
    metrics_list[4] = time_per_atom
    metrics_list[5] = energy2

    metrics_matrix[i] = metrics_list
    i+=1

121P 1
Original energy: -237.910
Time grids: 102.79
Time per atoms: 0.04
Points: 60 60 60
Grid Energy': -61.214
mv: cannot stat 'McLiBELa.log': No such file or directory

121P 2
Original energy: -237.910
Time grids: 86.68
Time per atoms: 0.03
Points: 60 60 60
Grid Energy': -61.214
mv: cannot stat 'McLiBELa.log': No such file or directory

121P 3
Original energy: -237.910
Time grids: 83.04
Time per atoms: 0.03
Points: 60 60 60
Grid Energy': -61.214
mv: cannot stat 'McLiBELa.log': No such file or directory

121P 4
Original energy: -237.910
Time grids: 85.46
Time per atoms: 0.03
Points: 60 60 60
Grid Energy': -61.214
mv: cannot stat 'McLiBELa.log': No such file or directory



In [None]:
#print("{:<10} {:<25} {:<20} {:<10} {:<19} {:<10}".format('PDB_code','Trabalhos em paralelo','Original Energy','Time (s)','Time per atom(ms)','Grid Energy'))
print("{}\t{}\t{}\t{}\t{}\t{}".format('PDB_code','Trabalhos em paralelo','Original Energy','Time (s)','Time per atom(ms)','Grid Energy'))
for i in range(num_pairs*length_input):

  metrics_line = metrics_matrix[i]
  pdb_code_print = pdb_codes_list[int(metrics_line[0])]

#  print("{:<10} {:<25} {:<20} {:<10} {:<19} {:<10}".format(pdb_code_print,metrics_line[1],'%7.3f'%metrics_line[2],'%.2f'%metrics_line[3],'%.2f'%(1000*metrics_line[4]),'%7.3f'%metrics_line[5])) 
  print("{}\t{}\t{}\t{}\t{}\t{}".format(pdb_code_print,metrics_line[1],'%7.3f'%metrics_line[2],'%.2f'%metrics_line[3],'%.2f'%(1000*metrics_line[4]),'%7.3f'%metrics_line[5])) 


PDB_code	Trabalhos em paralelo	Original Energy	Time (s)	Time per atom(ms)	Grid Energy
121P	1.0	-237.910	102.79	39.23	-61.214
121P	2.0	-237.910	86.68	33.08	-61.214
121P	3.0	-237.910	83.04	31.69	-61.214
121P	4.0	-237.910	85.46	32.62	-61.214


In [None]:
!python --version

Python 3.10.10
