In [None]:
import numpy as np
import pandas as pd
import os
from openbabel import pybel
import tkinter
from tkinter import *
from tkinter.ttk import *
from tkinter.filedialog import askdirectory, askopenfilename
import re

In [None]:
# function that produces a popup directory and shows only .xlsx files
def popup_request_excel():    
    root = tkinter.Tk()
    request_load_file = askopenfilename(initialdir=os.getcwd(),title="Select Excel Spreadsheet with Compound Names & SMILES",filetypes=[("Excel","*.xlsx")])
    if request_load_file:
        filepath = request_load_file
    else: 
        filepath='No File Selected'
    root.destroy()
    return filepath

# function that produces a popup directory and shows only .smi files
def popup_request_smi():    
    root = tkinter.Tk()
    request_load_file = askopenfilename(initialdir=os.getcwd(),title="Select Text Document with Compound Names & SMILES",filetypes=[("SMILES","*.smi")])
    if request_load_file:
        filepath = request_load_file
    else: 
        filepath='No File Selected'
    root.destroy()
    return filepath

# function that produces a popup directory and shows only directory folders that may be selected (not used in the current setup, helpful if working from a separate directory)
def popup_request_dir():    
    root = tkinter.Tk()
    request_dir = askdirectory(initialdir=os.getcwd(),title="Select Directory")
    if request_dir=='':
        response='Directory Not Selected'
    else: 
        response= request_dir
    root.destroy()
    return response

# interprets the .smi file into a dataframe with columns "Compound" & "SMILES"
def interpret_smi():
    path = popup_request_smi()
    lines = open(path).readlines()
    compounds = []
    smiles = []
    for l in lines:
        compounds.append(l.split()[1])
        smiles.append(l.split()[0])
    df = pd.DataFrame({'Compound':compounds,'SMILES':smiles})
    return df

# filename_list is a list of strings that will be used to name the .pdbqt files (use df_compounds.Compound)
# smiles_list is a list of smiles that will be converted to .pdbqt files (use df_compounds.SMILES)
# this function formats the SMILES coded ligands into a dockable .pdbqt files output in the cwd, also adds new line to Ligand.txt
def MakeLigandPDBQT(filename_list,smiles_list):
    df_smiles = pd.DataFrame({'Compound':df_compounds.Compound,'SMILES':df_compounds.SMILES}).dropna()
    with open("Ligand.txt", "w") as outfile:
        file_list = []
        for i in df_smiles.index:
            mol = pybel.readstring('smi',df_smiles.SMILES[i])
            mol.addh()
            mol.make3D()
            mol.localopt()
            mol.write('pdbqt',f'{df_smiles.Compound[i]}.pdbqt',overwrite=True)
            file_list.append(f'{df_smiles.Compound[i]}.pdbqt')
            file_list.append('\n')
        file_list.pop()
        outfile.writelines(file_list)
        outfile.close()
    print(f'Check for new .pdbqt files in {os.getcwd()}')

# finds .log output files from the autodockvina analysis run from the command line (see associated video for guidance)
def find_log_files():
    log_files_dir = os.getcwd()
    docking_logs = []
    for file in os.listdir(log_files_dir):
        if file[-4:]=='.log':
            os.rename(file,'.'.join([file.split('.')[0],file.split('.')[-1]]))
    for file in os.listdir(log_files_dir):
        if file[-4:]=='.log':
            docking_logs.append(file)
    return (log_files_dir,docking_logs)

# scans the .log files for the docking score
def read_log(log):
    find_numbers = []
    for i in log[re.search('REMARK VINA RESULT:',log).span()[-1]:].split(' '):
        if i!='':
            find_numbers.append(i)
    return float(find_numbers[0])

# returns dataframe of ligand names with associated docking scores.  Don't treat the scores as absolute, just use them as a relative measure
def sort_dockingscores(directory,log_files):
    df_dockingscore = pd.DataFrame(columns=['Compound','Bonding Energy'])
    for file in log_files:
        log = open(f'{directory}/{file}').read()
        df_dockingscore.loc[len(df_dockingscore.index)]=[file.split('.')[0],read_log(open(directory+'\\'+file).read())]
    df_dockingscore.sort_values('Bonding Energy',inplace=True)
    return df_dockingscore

In [None]:
#  Use this to create a dataframe from .smi document
#df_compounds = interpret_smi()


#  Use this to create DataFrame from excel file
df_compounds = pd.read_excel(popup_request_excel())


df_compounds

In [None]:
MakeLigandPDBQT(df_compounds.Compound,df_compounds.SMILES)

# At this point, perform the docking analysis using autodock vina on the command line

In [None]:
directory,log_files = find_log_files()
sort_dockingscores(directory,log_files)