## (1) Install Condacolab (< 1min)

In [None]:
# @title (1) Install Condacolab (< 1min)
%%time

! pip install -q condacolab
import condacolab
condacolab.install()

## (2) Install Packages and Data (~ 2min)

In [None]:
# @title (2) Install Packages and Data (~ 2min)
%%time

# Get environment configuration from Git Repository
setup_repo="https://github.com/rwxayheee/colab_setup"
!git clone {setup_repo}

# Run setup script
!chmod +x colab_setup/basic_setup.sh
!bash colab_setup/basic_setup.sh

## (3) Import Modules & Locate Command Line Scripts (< 1s)

In [None]:
# @title (3) Import Modules & Locate Command Line Scripts (< 1s)
%%time

# Import modules
import sys, platform
from prody import *
from pathlib import Path
from rdkit import Chem
from rdkit.Chem import AllChem
import rdkit, py3Dmol
print("rdkit version:", rdkit.__version__)
print("py3Dmol version:", py3Dmol.__version__)
from ipywidgets import interact, IntSlider
import ipywidgets, copy
from IPython.display import display, Markdown


# Helper functions
def locate_file(from_path = None, query_path = None, query_name = "query file"):

    if not from_path or not query_path:
        raise ValueError("Must specify from_path and query_path")

    possible_path = list(from_path.glob(query_path))

    if not possible_path:
        raise FileNotFoundError(f"Cannot find {query_name} from {from_path} by {query_path}")

    return_which = (
        f"using {query_name} at:\n"
        f"{possible_path[0]}\n"
    )
    print(return_which)

    return possible_path[0]


# Commandline scripts
scrub = locate_file(from_path = Path("/usr/local/bin"), query_path = "scrub.py", query_name = "scrub.py")
mk_prepare_ligand = locate_file(from_path = Path("/usr/local/bin"), query_path = "mk_prepare_ligand.py", query_name = "mk_prepare_ligand.py")
mk_prepare_receptor = locate_file(from_path = Path("/usr/local/bin"), query_path = "mk_prepare_receptor.py", query_name = "mk_prepare_receptor.py")
mk_export = locate_file(from_path = Path("/usr/local/bin"), query_path = "mk_export.py", query_name = "mk_export.py")


# Locate reduce2 in conda install prefix
full_py_version = platform.python_version()
major_and_minor = ".".join(full_py_version.split(".")[:2])
env_path = Path("/usr/local") # default conda install prefix on Colab
reduce2_path = f"lib/python{major_and_minor}/site-packages/mmtbx/command_line/reduce2.py"
reduce2 = locate_file(from_path = env_path, query_path = reduce2_path, query_name = "reduce2.py")


# Locate geostd in current path
geostd_path = locate_file(from_path = Path.cwd(), query_path = "geostd", query_name = "geostd")

## 1.2 Receptor Preparation (< 30s)

In [None]:
# @title # 1.2 Receptor Preparation (< 30s)
%%time
#@markdown The preparation of a rigid receptor consists of two steps. The receptor structure is first sourced from a PDB file and sent to **reduce2.py** for hydrogen addition and optimization, and then, the conversion to a tangible receptor PDBQT file is done by **mk_prepare_receptor.py**.

#@markdown ## (1) Add Hydrogens to Receptor with Reduce2

# @markdown > Enter the PDB ID here to source the starting structure of receptor. This PDB file will be pulled from RCSB PDB server.
# Download PDB file
pdb_token = "3CL0" #@param {type:"string"}
! curl "http://files.rcsb.org/view/{pdb_token}.pdb" -o "{pdb_token}.pdb"


# Export receptor atoms
atoms_from_pdb = parsePDB(pdb_token)
# @markdown > Use ProDy selection language to specify atoms to retain in receptor.
receptor_selection = "chain A and not water and not hetero" #@param {type:"string"}
receptor_atoms = atoms_from_pdb.select(receptor_selection)
prody_receptorPDB = f"{pdb_token}_receptor_atoms.pdb"
writePDB(prody_receptorPDB, receptor_atoms)


# Add CRYST1 card (temporarily required for reduce2)
reduce_inputPDB = f"{pdb_token}_receptor.pdb"
! cat <(grep "CRYST1" "{pdb_token}.pdb") {prody_receptorPDB} > {reduce_inputPDB}


# Run reduce2
# @markdown > Use PHIL arguments to specify the tasks for reduce2. In this example, we use `approach=add add_flip_movers=True` which is equivalent to `-build` or `-flip` in the formerly standalone reduce. These arguments ask ***reduce*** to allow NQH flips and build histidines to neutral tautomers while adding hydrogens.
reduce_opts = "approach=add add_flip_movers=True" #@param {type:"string"}
! export MMTBX_CCP4_MONOMER_LIB="{geostd_path}"; python {reduce2} {reduce_inputPDB} {reduce_opts}
# Default name of reduce output...
prepare_inPDB = f"{pdb_token}_receptorFH.pdb"

#@markdown ---
#@markdown ## (2) Receptor Preparation with Meeko

# Specify Box
# Center at ligand
atoms_from_pdb = parsePDB(pdb_token)
#@markdown > Use ProDy selection language to specify (ligand, residue, or any) atoms for box setup. In this example, we choose the ligand and will use a constant padding in each dimension around the ligand to define the box.
ligand_selection = "chain A and resname G39" #@param {type:"string"}
ligand_atoms = atoms_from_pdb.select(ligand_selection)


#@markdown > Export ligand's original position to a PDB file. This is totally optional, but we will do it for visualization.
prody_ligandPDB = "LIG.pdb" #@param {type:"string"}
writePDB(prody_ligandPDB, ligand_atoms)


# Padding in each dimension
#@markdown > Set the box by the padding range (Å) around the centered object
padding = 8.0 #@param {type:"raw"}


# @markdown With the above specifications, atom selection will be made on the original PDB file in ***ProDy***. Then, a protonated structure is generated by **reduce2.py**.
# @markdown
# @markdown The PDB file is then fed to **mk_prepare_receptor.py** to generate the receptor PDBQT file.

# @markdown > Specify the flexible residue(s).
# Flexible (nonreactive) residue
flexible_residues = "A:276" #@param {type:"string"}

# @markdown > In this example, we will ask Meeko to ignore the few partially resolved residues. When this option is toggled on, residues that do not match with the internal chemical templates will be deleted from the receptor structure.
args = ""
allow_bad_res = True #@param {type:"boolean"}
if allow_bad_res:
  args += "--allow_bad_res "

# Prepare Receptor
# @markdown > A basename for the receptor PDBQT file is required.
prepare_output = "3CL0_receptorFH" #@param {type:"string"}
! python {mk_prepare_receptor} -i {prepare_inPDB} -f {flexible_residues} -o {prepare_output} -p -v -j --box_enveloping {prody_ligandPDB} --padding {padding} {args}


# Visualization with py3Dmol
def Receptor3DView(receptorPDB = None, boxPDB = None, ligPDB = None):

    view = py3Dmol.view()
    view.setBackgroundColor('white')

    view.addModel(open(boxPDB, 'r').read(),'pdb')
    view.addStyle({'stick': {}})
    view.zoomTo()

    view.addModel(open(receptorPDB, 'r').read(),'pdb')
    view.addStyle({'cartoon': {'color':'spectrum', 'opacity': 0.5}})

    if ligPDB is not None:
      view.addModel(open(ligPDB, 'r').read(), 'pdb')
      view.addStyle({'hetflag': True}, {'stick': {}})

    return view

Receptor3DView(receptorPDB = prepare_inPDB, \
               boxPDB = prepare_output + '.box.pdb', \
               ligPDB = prody_ligandPDB).show()

In [None]:
# 计时代码，统计整个运行时间
%%time

import pandas as pd
import os


# 读取 CSV 文件
csv_file = "./test.csv"  # 请确保路径正确
if not os.path.exists(csv_file):
    raise FileNotFoundError(f"CSV 文件 {csv_file} 未找到，请检查路径！")

df = pd.read_csv(csv_file, header=None)  # 读取第一列（SMILES）
num_molecules = len(df)  # 计算分子总数

# 设定受体文件和对接参数
rigid_receptorPDBQT = "3CL0_receptorFH_rigid.pdbqt"
flex_receptorPDBQT = "3CL0_receptorFH_flex.pdbqt"
configTXT = "3CL0_receptorFH.box.txt"
exhaustiveness = 8

# 检查受体文件是否存在
for file in [rigid_receptorPDBQT, flex_receptorPDBQT, configTXT]:
    if not os.path.exists(file):
        raise FileNotFoundError(f"文件 {file} 未找到，请检查路径！")

print(f"总共有 {num_molecules} 个分子，开始批量对接...")

# 存储对接结果
docking_results = []

# 处理每个分子
for index, row in df.iterrows():
    smiles = row[0]  # 获取 SMILES 字符串
    ligandName = f"ligand{index + 1}"  # 生成文件名
    ligandSDF = f"{ligandName}_scrubbed.sdf"
    ligandPDBQT = f"{ligandName}.pdbqt"
    outputPDBQT = f"3CL0_{ligandName}_vina_out.pdbqt"  # 对接输出文件

    print(f"正在处理第 {index + 1}/{num_molecules} 个分子：{ligandName}")

    # 运行 scrub.py 生成 SDF 文件
    ! python {scrub} "{smiles}" -o {ligandSDF} --ph 6.5 --skip_tautomer

    # 运行 mk_prepare_ligand.py 生成 PDBQT 文件
    ! python {mk_prepare_ligand} -i {ligandSDF} -o {ligandPDBQT}

    # 运行 AutoDock Vina 进行对接
    ! vina --receptor {rigid_receptorPDBQT} --flex {flex_receptorPDBQT} \
         --ligand {ligandPDBQT} --config {configTXT} \
         --exhaustiveness {exhaustiveness} --out {outputPDBQT}

    # 解析 output.pdbqt 提取最低结合能
    binding_energy = None
    if os.path.exists(outputPDBQT):
        with open(outputPDBQT, "r") as file:
            for line in file:
                if line.startswith("REMARK VINA RESULT"):
                    try:
                        binding_energy = float(line.split()[3])  # 提取第 4 个字段（结合能）
                    except ValueError:
                        binding_energy = None
                    break  # 只读取第一条结果
    else:
        print(f"警告：未找到 {outputPDBQT}，可能对接失败！")

    # 存储结果
    docking_results.append([smiles, binding_energy])



print("所有配体已处理完毕！")
# ✅ **保存对接结果到 CSV**
output_csv = "1_docking_results.csv"
df_results = pd.DataFrame(docking_results, columns=["Ligand (SMILES)", "Binding Energy (kcal/mol)"])
df_results.to_csv(output_csv, index=False)

print(f"✅ 结果已保存到 {output_csv}")