# # Protein-Ligand Docking with SMILES
# 
# In this notebook, we will use the `Docking` module of the `rdkit` package to perform protein-ligand docking using SMILES strings. We will use the `PDBBind` dataset to train and test our model.
# 
# First, let's import the necessary packages.

In [1]:
# 修改后的代码
import warnings  # 导入警告模块
from pathlib import Path  # 导入路径处理模块
import subprocess  # 导入子进程模块
import numpy as np  # 导入NumPy模块
from MDAnalysis import Universe  # 从MDAnalysis导入Universe类
from openbabel import pybel  # 从Open Babel导入pybel模块
import nglview as nv  # 导入nglview用于可视化

import openmm  # 导入OpenMM模块
import openmm.app  # 导入OpenMM.app模块

# Filter warnings
warnings.filterwarnings("ignore")  # 忽略警告信息



In [2]:
# 设置工作目录
from pathlib import Path
import os

# 获取当前工作目录
HERE = Path(os.getcwd())
DATA = HERE / 'data'
if not DATA.exists():
    DATA.mkdir(parents=True, exist_ok=True)
print(DATA)

/Users/wangyang/Desktop/Breast_cancer_brain_metastasis/wang_Gene/AIDD/15_Protein_ligand_docking/data


In [5]:
class Structure(Universe):
    """Core object to load structures with."""
    
    @classmethod
    def from_string(cls, pdb_path):
        """Load a structure from a local PDB file."""
        return cls(pdb_path)  # 从本地PDB文件加载结构


# Load local structure
pdb_path = DATA / "6Q4G.pdb"  # 确保这个路径指向本地的PDB文件
# Define ligand SMILES and convert to PDBQT
smiles = "C[C@@H](Nc1ncnc2c(C(N)=O)cccc12)c1cccc(NC(=O)c2ccc(F)c(O)c2)c1"  # 定义配体SMILES
structure = Structure.from_string(pdb_path)  # 加载本地结构

# Prepare protein file
protein_path = DATA / "protein.pdb"  # 定义蛋白质文件路径
protein = structure.select_atoms("protein")  # 选择蛋白质原子
protein.write(str(protein_path))  # 将蛋白质写入文件

def pdb_to_pdbqt(pdb_path, pdbqt_path, pH=7.4):
    """Convert a PDB file to a PDBQT file."""
    molecule = list(pybel.readfile("pdb", str(pdb_path)))[0]  # 读取PDB文件
    molecule.OBMol.CorrectForPH(pH)  # 根据pH值进行修正
    molecule.addh()  # 添加氢原子
    for atom in molecule.atoms:
        atom.OBAtom.GetPartialCharge()  # 获取部分电荷
    molecule.write("pdbqt", str(pdbqt_path), overwrite=True)  # 写入PDBQT文件

# Convert protein to PDBQT format
protein_pdbqt_path = DATA / "protein.pdbqt"  # 定义转换后的蛋白质PDBQT文件路径
pdb_to_pdbqt(protein_path, protein_pdbqt_path)  # 转换蛋白质为PDBQT格式

# 自动检测配体残基
# 修改后的代码

# 自动检测配体残基
def find_ligand_resname(structure):
    """自动检测配体残基并返回配体的残基名称"""
    # 选择所有非蛋白质和非水分子的原子
    ligand_atoms = structure.select_atoms("not protein and not resname HOH")
    
    # 获取这些原子的残基名称
    ligand_resnames = set(ligand_atoms.resnames)
    
    if len(ligand_resnames) == 0:
        raise ValueError("No ligand found in the structure.")
    
    # 假设只有一个配体残基，返回该残基名称
    return list(ligand_resnames)[0]

# 自动选择配体残基名称
ligand_resname = find_ligand_resname(structure)
print(f"Using ligand resname: {ligand_resname}")
ligand = structure.select_atoms(f"resname {ligand_resname}")  # 选择配体原子


# Calculate pocket center and size
pocket_center = (ligand.positions.max(axis=0) + ligand.positions.min(axis=0)) / 2  # 计算口袋中心
pocket_size = ligand.positions.max(axis=0) - ligand.positions.min(axis=0) + 5  # 计算口袋大小


ligand_path = DATA / "ligand.pdbqt"  # 定义配体PDBQT文件路径

def smiles_to_pdbqt(smiles, pdbqt_path, pH=7.4):
    """Convert a SMILES string to a PDBQT file."""
    molecule = pybel.readstring("smi", smiles)  # 从SMILES字符串读取分子
    molecule.OBMol.CorrectForPH(pH)  # 根据pH值进行修正
    molecule.addh()  # 添加氢原子
    molecule.make3D(forcefield="mmff94s", steps=10000)  # 生成3D构象
    for atom in molecule.atoms:
        atom.OBAtom.GetPartialCharge()  # 获取部分电荷
    molecule.write("pdbqt", str(pdbqt_path), overwrite=True)  # 写入PDBQT文件

# Convert ligand SMILES to PDBQT format
smiles_to_pdbqt(smiles, ligand_path)  # 将配体SMILES转换为PDBQT格式

# Run Smina docking
def run_smina(ligand_path, protein_path, out_path, pocket_center, pocket_size):
    """Perform docking with Smina."""
    output_text = subprocess.check_output([
        "smina",  # Smina命令
        "--receptor", str(protein_path),  # 蛋白质路径
        "--ligand", str(ligand_path),  # 配体路径
        "--out", str(out_path),  # 输出路径
        "--center_x", str(pocket_center[0]),  # 口袋中心x坐标
        "--center_y", str(pocket_center[1]),  # 口袋中心y坐标
        "--center_z", str(pocket_center[2]),  # 口袋中心z坐标
        "--size_x", str(pocket_size[0]),  # 口袋大小x
        "--size_y", str(pocket_size[1]),  # 口袋大小y
        "--size_z", str(pocket_size[2])   # 口袋大小z
    ])
    # print(output_text.decode())
    # 保存output_text结果
    # .txt文件，保存到DATA目录下
    with open(DATA / "output_text.txt", "w") as f:
        f.write(output_text.decode())
    return output_text.decode("utf-8")  # 返回输出文本

# 执行分子对接
docking_out_path = DATA / "docking_poses.sdf"  # 定义对接输出文件路径
run_smina(ligand_path, protein_pdbqt_path, docking_out_path, pocket_center, pocket_size)  # 运行对接

  Failed to kekulize aromatic bonds in OBMol::PerceiveBondOrders (title is /Users/wangyang/Desktop/Breast_cancer_brain_metastasis/wang_Gene/AIDD/15_Protein_ligand_docking/data/protein.pdb)



Using ligand resname: HJK
   _______  _______ _________ _        _______ 
  (  ____ \(       )\__   __/( (    /|(  ___  )
  | (    \/| () () |   ) (   |  \  ( || (   ) |
  | (_____ | || || |   | |   |   \ | || (___) |
  (_____  )| |(_)| |   | |   | (\ \) ||  ___  |
        ) || |   | |   | |   | | \   || (   ) |
  /\____) || )   ( |___) (___| )  \  || )   ( |
  \_______)|/     \|\_______/|/    )_)|/     \|


smina is based off AutoDock Vina. Please cite appropriately.

Weights      Terms
-0.035579    gauss(o=0,_w=0.5,_c=8)
-0.005156    gauss(o=3,_w=2,_c=8)
0.840245     repulsion(o=0,_c=8)
-0.035069    hydrophobic(g=0.5,_b=1.5,_c=8)
-0.587439    non_dir_h_bond(g=-0.7,_b=0,_c=8)
1.923        num_tors_div

Using random seed: 337003265

0%   10   20   30   40   50   60   70   80   90   100%
|----|----|----|----|----|----|----|----|----|----|
***************************************************

mode |   affinity | dist from best mode
     | (kcal/mol) | rmsd l.b.| rmsd u.b.
-----+----------

'   _______  _______ _________ _        _______ \n  (  ____ \\(       )\\__   __/( (    /|(  ___  )\n  | (    \\/| () () |   ) (   |  \\  ( || (   ) |\n  | (_____ | || || |   | |   |   \\ | || (___) |\n  (_____  )| |(_)| |   | |   | (\\ \\) ||  ___  |\n        ) || |   | |   | |   | | \\   || (   ) |\n  /\\____) || )   ( |___) (___| )  \\  || )   ( |\n  \\_______)|/     \\|\\_______/|/    )_)|/     \\|\n\n\nsmina is based off AutoDock Vina. Please cite appropriately.\n\nWeights      Terms\n-0.035579    gauss(o=0,_w=0.5,_c=8)\n-0.005156    gauss(o=3,_w=2,_c=8)\n0.840245     repulsion(o=0,_c=8)\n-0.035069    hydrophobic(g=0.5,_b=1.5,_c=8)\n-0.587439    non_dir_h_bond(g=-0.7,_b=0,_c=8)\n1.923        num_tors_div\n\nUsing random seed: 337003265\n\n0%   10   20   30   40   50   60   70   80   90   100%\n|----|----|----|----|----|----|----|----|----|----|\n***************************************************\n\nmode |   affinity | dist from best mode\n     | (kcal/mol) | rmsd l.b.| rmsd u.b.\

In [6]:
def split_sdf_file(sdf_path):
    """
    Split an SDF file into separate files for each molecule.
    Each file is named with consecutive numbers.

    Parameters
    ----------
    sdf_path: str or pathlib.Path
        Path to SDF file that should be split.
    """
    sdf_path = Path(sdf_path)  # 转换为Path对象
    stem = sdf_path.stem  # 获取文件名（不带扩展名）
    parent = sdf_path.parent  # 获取父目录
    molecules = pybel.readfile("sdf", str(sdf_path))  # 读取SDF文件中的分子
    for i, molecule in enumerate(molecules, 1):  # 遍历每个分子
        molecule.write("sdf", str(parent / f"{stem}_{i}.sdf"), overwrite=True)  # 写入单独的SDF文件
    return

# 分割对接输出的SDF文件
split_sdf_file(DATA / "docking_poses.sdf")  # 调用函数分割SDF文件

In [7]:
# 可视化对接结果
docking_pose_id = 1  # 定义要查看的对接位点ID
view = nv.show_structure_file(  # 创建可视化对象
    str(DATA / f"docking_poses_{docking_pose_id}.sdf"),  # 指定要显示的SDF文件
    representations=[{"params": {}, "type": "licorice"}],  # 设置可视化参数
)
view.add_pdbid(pdb_path.stem)  # 添加PDB ID到视图
view  # 显示视图



NGLWidget()