In [2]:
import os
import sys
import io
import pandas as pd
#import pymol
import matplotlib.pyplot as plt
import numpy as np
from sklearn.cluster import KMeans
from sklearn.decomposition import PCA
import seaborn as sns
pd.set_option('display.max_rows', None)

def readFile(filename):
    for line in open(filename):
        yield line  

### Step 1: Docking using Autodock_Vina

In [None]:
# Docking script using directories as reference
config_file = "config.txt"
rec_dir = 'receptores/'
pos_dir = 'controles-positivos/'
neg_dir = 'controles-negativos/'
lig_dir = 'ligantes/'
pdbqt_dir = 'PDBQT/'
pdbqtR_dir = pdbqt_dir + 'Receptores/'
pdbqtL_dir = pdbqt_dir + 'Ligantes/'
results_dir = 'Resultados/'
logs_dir = results_dir + 'Logs/'
resultsprefix = 'out_'
logsprefix = 'result_'
prepare_dir = '/home/eduardo/MGLTools-1.5.7rc1/MGLToolsPckgs/AutoDockTools/Utilities24/prepare_'
pythonsh_dir = '/home/eduardo/MGLTools-1.5.7rc1/bin/pythonsh'

# Step 1 - Create folders if they don't exist
if not os.path.isdir(results_dir):
    os.mkdir(results_dir)
if not os.path.isdir(logs_dir):
    os.mkdir(logs_dir)
if not os.path.isdir(pdbqt_dir):
    os.mkdir(pdbqt_dir)
if not os.path.isdir(pdbqtR_dir):
    os.mkdir(pdbqtR_dir)    
if not os.path.isdir(pdbqtL_dir):
    os.mkdir(pdbqtL_dir)   
    
# Conversion of input files to pdbqt format
for r in os.listdir(rec_dir):
    rec_name = rec_dir + r
    rec_pdbqt = pdbqtR_dir + r[:-4] + '.pdbqt'
    os.system(pythonsh_dir + ' ' + prepare_dir + "receptor4.py -r " + rec_name + " -o " + rec_pdbqt) #converte o arquivo do receptor para formato pdbqt
    for l in os.listdir(lig_dir):
        lig_name = lig_dir + l
        lig_pdbqt = pdbqtL_dir + l[:-5] + '.pdbqt'
        os.system(pythonsh_dir + ' ' + prepare_dir + "ligand4.py -l "+ lig_name + " -o " + lig_pdbqt) #converte o arquivo do ligante para formato pdbqt
        # Actual docking
        print(' '.join(['\n\nStarting docking for',l[:-5], 'on', r[:-4],':\n']))
        command = ''.join(['vina --receptor ', rec_pdbqt, ' --ligand ', lig_pdbqt, ' --out ', results_dir, resultsprefix, l[:-5], "_", r[:-4], '.pdbqt --log ', logs_dir, logsprefix,l[:-5], "_", r[:-4], '.log --config ', config_file])
        os.system(command) 

In [None]:
## Sumarization of Vina Results
logs_dir = 'Resultados/Logs/'
# Optional: Allow user to define how many of the results to use
data = {"Ligand":[], "Receptor":[], "Affinity1":[], "Affinity2":[], "Affinity3":[]}
checagem = []
for filename in os.listdir(logs_dir):
    #filename = "result" + name + "ache1.log"
    full_filename = logs_dir + filename
    ligname = filename.split('_')[1]
    recname = filename.split('_')[2][:-4]
    data["Ligand"].append(ligname)
    data["Receptor"].append(recname)
    for line in readFile(full_filename):
        if len(line)> 3:
            if line[3] == "1":
                checagem.append(ligname)
                l = line.split("     ")
                data["Affinity1"].append(float(l[1]))
                
            elif line[3] == "2":
                l = line.split("     ")
                data["Affinity2"].append(float(l[1]))
                
            elif line[3] == "3":
                l = line.split("     ")
                data["Affinity3"].append(float(l[1]))
            
df_vina = pd.DataFrame(data)
df_vina = df_vina.set_index("Ligand", drop=True)
df_vina.to_csv("Sumario_VINA.csv")
df_vina

### Step 2: Rescoring using Convex_PL Score Function

In [3]:
# Rescoring using CONVEX-PL

# Step 0 - Definir lista de ligantes de entrada
receptor_dir = 'receptores/'
results_dir = 'output/'
rescoring_dir = 'Rescoring/'
rescoring_pdbdir = rescoring_dir +'PDB/'
convexlogs_dir = rescoring_dir + 'ConvexLogs/'
vina_results = [o for o in os.listdir(results_dir) if o != 'Logs']
summary_fileName = rescoring_dir + 'Sumario_CONVEX.csv'

# Step 1 - Create folders if they don't exist
if not os.path.isdir(rescoring_dir):
    os.mkdir(rescoring_dir)
if not os.path.isdir(rescoring_pdbdir):
    os.mkdir(rescoring_pdbdir)
if not os.path.isdir(convexlogs_dir):
    os.mkdir(convexlogs_dir)

# Step 2 - Convert all PDBQT results to PDB
for v in vina_results:
    pymol.cmd.load(results_dir + v)
    pymol.cmd.save(rescoring_pdbdir + v[:-6] + ".pdb", state = 0)
    pymol.cmd.reinitialize()

# Step 3 - Calculate
summary_file = open(summary_fileName, "w")
summary_file.write("Ligand,Receptor,Affinity1,Affinity2,Affinity3,Affinity4,Affinity5,Affinity6,Affinity7,Affinity8,Affinity9,Affinity10\n")
for v in vina_results:
    ligandName = v.split('_')[1]
    receptorName = v.split('_')[2].split('.')[0]
    summary_file.write(ligandName + ',' + receptorName)
    outputName =  ''.join([convexlogs_dir,ligandName, '_', receptorName, '.txt'])
    command = ''.join(['./Convex-PL --receptor ',receptor_dir, receptorName, '.pdb --ligand ', rescoring_pdbdir + v[:-6] + '.pdb >', outputName])
    os.system(command)
    for line in readFile(outputName):
        l = line.split()
        if l[0]=="model":
            summary_file.write("," + l[-1])
    summary_file.write("\n")
summary_file.close()
#Passo 6 - Exibir resultados para verificar
df_convex = pd.read_csv(summary_fileName)
df_convex

FileNotFoundError: [Errno 2] No such file or directory: 'output/'

### Step 3: Rescoring using SMINA AD4_Scoring Function

In [None]:
# Rescoring using SMINA's ad4scoring function

dic = {'Ligand':[], 'Receptor':[], 'Affinity1':[],'Affinity2':[],'Affinity3':[], 'Affinity4':[],'Affinity5':[], 'Affinity6':[], 'Affinity7':[], 'Affinity8':[], 'Affinity9':[], 'Affinity10':[]}
columns = ['Ligand', 'Receptor','Affinity1','Affinity2','Affinity3','Affinity4','Affinity5','Affinity6','Affinity7','Affinity8','Affinity9','Affinity10']
summary = pd.DataFrame(dic)

receptor_dir = 'receptores/'
results_dir = 'Resultados/'
rescoring_dir = 'Rescoring/'
rescoring_pdbdir = rescoring_dir +'PDB/'
rescoring_sdfdir = rescoring_dir + 'SDF/'
ad4scoring_dir = rescoring_dir + 'ad4scoringLogs/'
vina_results = [o for o in os.listdir(results_dir) if o != 'Logs']
summary_fileName = rescoring_dir + 'Sumario_ad4scoring.csv'

if not os.path.isdir(rescoring_sdfdir):
    os.mkdir(rescoring_sdfdir)
if not os.path.isdir(ad4scoring_dir):
    os.mkdir(ad4scoring_dir)

old_stdout = sys.stdout
new_stdout = io.StringIO()
sys.stdout = new_stdout
for v in vina_results:
    ligandName = v.split('_')[1]
    receptorName = v.split('_')[2].split('.')[0]
    source = results_dir + v
    ligand_sdf = rescoring_sdfdir + ligandName + '_' + receptorName + '.sdf'
    dest = ad4scoring_dir + ligandName + '_' + receptorName + '.sdf'
    pymol.cmd.load(source)
    pymol.cmd.save(ligand_sdf, state = 0, format = 'sdf')
    pymol.cmd.reinitialize()
    affinities = [ligandName, receptorName]
    command = ''.join(['./smina.static -r ', receptor_dir, receptorName, '.pdb -l ', ligand_sdf, ' -o ', dest, ' --seed 0 --score_only --scoring ad4_scoring'])
    os.system(command)
    p = False
    for line in readFile(dest):
        if '> <minimizedAffinity>' in line:
            p = True
        elif p == True:
            affinities.append(float(line))
            p = False
        else:
            p = False
    newLine = pd.DataFrame(affinities,index = columns).transpose()
    summary = summary.append(newLine)
sys.stdout = old_stdout
summary.to_csv(summary_fileName)
summary

### Step 4: Rescoring using SMINA Dkoes_Scoring Function

In [None]:
# Rescoring using SMINA's dkoes_scoring function

dic = {'Ligand':[], 'Receptor':[], 'Affinity1':[],'Affinity2':[],'Affinity3':[], 'Affinity4':[],'Affinity5':[], 'Affinity6':[], 'Affinity7':[], 'Affinity8':[], 'Affinity9':[], 'Affinity10':[]}
columns = ['Ligand', 'Receptor','Affinity1','Affinity2','Affinity3','Affinity4','Affinity5','Affinity6','Affinity7','Affinity8','Affinity9','Affinity10']
summary = pd.DataFrame(dic)

receptor_dir = 'receptores/'
results_dir = 'Resultados/'
rescoring_dir = 'Rescoring/'
rescoring_pdbdir = rescoring_dir +'PDB/'
rescoring_sdfdir = rescoring_dir + 'SDF/'
dkoes_dir = rescoring_dir + 'dkoesLogs/'
vina_results = [o for o in os.listdir(results_dir) if o != 'Logs']
summary_fileName = rescoring_dir + 'Sumario_dkoes.csv'

if not os.path.isdir(rescoring_sdfdir):
    os.mkdir(rescoring_sdfdir)
if not os.path.isdir(dkoes_dir):
    os.mkdir(dkoes_dir)

old_stdout = sys.stdout
new_stdout = io.StringIO()
sys.stdout = new_stdout
for v in vina_results:
    ligandName = v.split('_')[1]
    receptorName = v.split('_')[2].split('.')[0]
    source = results_dir + v
    ligand_sdf = rescoring_sdfdir + ligandName + '_' + receptorName + '.sdf'
    dest = dkoes_dir + ligandName + '_' + receptorName + '.sdf'
    pymol.cmd.load(source)
    pymol.cmd.save(ligand_sdf, state = 0, format = 'sdf')
    pymol.cmd.reinitialize()
    affinities = [ligandName, receptorName]
    command = ''.join(['./smina.static -r ', receptor_dir, receptorName, '.pdb -l ', ligand_sdf, ' -o ', dest, ' --seed 0 --score_only --scoring dkoes_scoring'])
    os.system(command)
    p = False
    for line in readFile(dest):
        if '> <minimizedAffinity>' in line:
            p = True
        elif p == True:
            affinities.append(float(line))
            p = False
        else:
            p = False
    newLine = pd.DataFrame(affinities,index = columns).transpose()
    summary = summary.append(newLine)
sys.stdout = old_stdout
summary.to_csv(summary_fileName)
summary

### Step 5: Rescoring using SMINA Vinardo_Scoring Function

In [None]:
# Rescoring using SMINA's Vinardo function

dic = {'Ligand':[], 'Receptor':[], 'Affinity1':[],'Affinity2':[],'Affinity3':[], 'Affinity4':[],'Affinity5':[], 'Affinity6':[], 'Affinity7':[], 'Affinity8':[], 'Affinity9':[], 'Affinity10':[]}
columns = ['Ligand', 'Receptor','Affinity1','Affinity2','Affinity3','Affinity4','Affinity5','Affinity6','Affinity7','Affinity8','Affinity9','Affinity10']
summary = pd.DataFrame(dic)

receptor_dir = 'receptores/'
results_dir = 'Resultados/'
rescoring_dir = 'Rescoring/'
rescoring_pdbdir = rescoring_dir +'PDB/'
rescoring_sdfdir = rescoring_dir + 'SDF/'
vinardo_dir = rescoring_dir + 'vinardoLogs/'
vina_results = [o for o in os.listdir(results_dir) if o != 'Logs']
summary_fileName = rescoring_dir + 'Sumario_vinardo.csv'

if not os.path.isdir(rescoring_sdfdir):
    os.mkdir(rescoring_sdfdir)
if not os.path.isdir(vinardo_dir):
    os.mkdir(vinardo_dir)

old_stdout = sys.stdout
new_stdout = io.StringIO()
sys.stdout = new_stdout
for v in vina_results:
    ligandName = v.split('_')[1]
    receptorName = v.split('_')[2].split('.')[0]
    source = results_dir + v
    ligand_sdf = rescoring_sdfdir + ligandName + '_' + receptorName + '.sdf'
    dest = vinardo_dir + ligandName + '_' + receptorName + '.sdf'
    pymol.cmd.load(source)
    pymol.cmd.save(ligand_sdf, state = 0, format = 'sdf')
    pymol.cmd.reinitialize()
    affinities = [ligandName, receptorName]
    command = ''.join(['./smina.static -r ', receptor_dir, receptorName, '.pdb -l ', ligand_sdf, ' -o ', dest, ' --seed 0 --score_only --scoring vinardo'])
    os.system(command)
    p = False
    for line in readFile(dest):
        if '> <minimizedAffinity>' in line:
            p = True
        elif p == True:
            affinities.append(float(line))
            p = False
        else:
            p = False
    newLine = pd.DataFrame(affinities,index = columns).transpose()
    summary = summary.append(newLine)
sys.stdout = old_stdout
summary.to_csv(summary_fileName)
summary