In [4]:
import warnings  # 忽略警告信息
from pathlib import Path  # 路径处理
import subprocess  # 运行外部程序
import numpy as np  # 数值计算
from MDAnalysis import Universe  # 处理分子结构
from openbabel import pybel  # 分子文件格式转换
import nglview as nv  # 分子可视化
import openmm 

# 忽略警告信息
warnings.filterwarnings("ignore")

# 定义工作目录
HERE = Path.cwd()
DATA = HERE / "data"
DATA.mkdir(exist_ok=True)

# 加载蛋白质结构
pdb_path = DATA / "5i50.pdb"  # 请确保PDB文件位于data目录下
structure = Universe(str(pdb_path))

# 提取蛋白质部分并保存为新的PDB文件
protein_path = DATA / "protein.pdb"
protein = structure.select_atoms("protein")
protein.write(str(protein_path))

def pdb_to_pdbqt(pdb_file, pdbqt_file, pH=7.4):
    """将PDB文件转换为PDBQT文件"""
    molecule = next(pybel.readfile("pdb", str(pdb_file)))
    molecule.OBMol.CorrectForPH(pH)
    molecule.addh()
    molecule.make3D()
    molecule.write("pdbqt", str(pdbqt_file), overwrite=True)

# 将蛋白质PDB文件转换为PDBQT格式
protein_pdbqt_path = DATA / "protein.pdbqt"
pdb_to_pdbqt(protein_path, protein_pdbqt_path)

# 自动检测配体并计算对接盒参数
# 定义标准残基名称
standard_resnames = [
    'ALA', 'ARG', 'ASN', 'ASP', 'CYS', 'GLN', 'GLU', 'GLY', 'HIS', 'ILE',
    'LEU', 'LYS', 'MET', 'PHE', 'PRO', 'SER', 'THR', 'TRP', 'TYR', 'VAL',
    'HOH', 'WAT', 'SOL', 'NA', 'CL', 'K', 'CA', 'MG', 'ZN', 'FE', 'SO4',
    'PO4', 'CO', 'NAG', 'MAN', 'BMA'
]

# 选择非标准残基（可能是配体）
non_standard = structure.select_atoms('not resname ' + ' '.join(standard_resnames))

# 获取配体残基名称
ligand_resnames = set(non_standard.residues.resnames)

if not ligand_resnames:
    raise ValueError("在PDB文件中未找到配体残基。")

# 选择原子数最多的残基作为配体
ligand_residue_sizes = {resname: len(structure.select_atoms(f'resname {resname}')) for resname in ligand_resnames}
ligand_resname = max(ligand_residue_sizes, key=ligand_residue_sizes.get)
print(f"检测到的配体残基名称：{ligand_resname}")

# 获取配体坐标，计算对接盒中心和大小
ligand = structure.select_atoms(f"resname {ligand_resname}")
ligand_positions = ligand.positions
pocket_center = ligand_positions.mean(axis=0)
pocket_size = ligand_positions.ptp(axis=0) + 5  # 增加5埃的边界

# 读取MOL000098.mol2并转换为PDBQT格式
mol2_path = DATA / "MOL000098.mol2"  # 请确保mol2文件位于data目录下
ligand_pdbqt_path = DATA / "ligand.pdbqt"

# 将mol2文件转换为pdbqt
mol = next(pybel.readfile("mol2", str(mol2_path)))
mol.addh()
mol.make3D()
mol.write("pdbqt", str(ligand_pdbqt_path), overwrite=True)

# 定义对接输出文件
docking_output_path = DATA / "docking_output.sdf"

# 打开out_text.txt文件以保存Smina的输出
out_text_path = DATA / "out_text.txt"
with open(out_text_path, "w", encoding="utf-8") as out_file:
    # 运行Smina进行分子对接
    print("开始分子对接...")
    process = subprocess.Popen(
        [
            "smina",
            "--ligand", str(ligand_pdbqt_path),
            "--receptor", str(protein_pdbqt_path),
            "--center_x", str(pocket_center[0]),
            "--center_y", str(pocket_center[1]),
            "--center_z", str(pocket_center[2]),
            "--size_x", str(pocket_size[0]),
            "--size_y", str(pocket_size[1]),
            "--size_z", str(pocket_size[2]),
            "--out", str(docking_output_path),
            "--exhaustiveness", "8"
        ],
        stdout=subprocess.PIPE,
        stderr=subprocess.STDOUT,
        text=True
    )

    # 实时输出对接进度并保存到文件
    smina_output = ''
    for line in process.stdout:
        print(line, end='')  # 显示进度
        out_file.write(line)  # 将输出写入文件
        smina_output += line
    process.wait()
    print("分子对接完成。")
    out_file.write("\n" + "="*80 + "\n\n")

    # 记录配体与对接结果的对应关系
    out_file.write("配体与对接结果对应关系:\n")
    out_file.write(f"{ligand_pdbqt_path.name} 对接到 {docking_output_path.name}\n")







  Failed to kekulize aromatic bonds in OBMol::PerceiveBondOrders (title is /Users/wangyang/Desktop/AIDD/15_Mol2_to_Smiles/data/protein.pdb)



检测到的配体残基名称：DG
开始分子对接...
   _______  _______ _________ _        _______ 
  (  ____ \(       )\__   __/( (    /|(  ___  )
  | (    \/| () () |   ) (   |  \  ( || (   ) |
  | (_____ | || || |   | |   |   \ | || (___) |
  (_____  )| |(_)| |   | |   | (\ \) ||  ___  |
        ) || |   | |   | |   | | \   || (   ) |
  /\____) || )   ( |___) (___| )  \  || )   ( |
  \_______)|/     \|\_______/|/    )_)|/     \|


smina is based off AutoDock Vina. Please cite appropriately.

Weights      Terms
-0.035579    gauss(o=0,_w=0.5,_c=8)
-0.005156    gauss(o=3,_w=2,_c=8)
0.840245     repulsion(o=0,_c=8)
-0.035069    hydrophobic(g=0.5,_b=1.5,_c=8)
-0.587439    non_dir_h_bond(g=-0.7,_b=0,_c=8)
1.923        num_tors_div

Using random seed: -1178334148

0%   10   20   30   40   50   60   70   80   90   100%
|----|----|----|----|----|----|----|----|----|----|
***************************************************

mode |   affinity | dist from best mode
     | (kcal/mol) | rmsd l.b.| rmsd u.b.
-----+----------

In [5]:
# 定义分割SDF文件的函数
def split_sdf_file(sdf_path):
    """
    将SDF文件拆分为每个分子单独的文件。
    每个文件以连续的数字命名。

    参数
    ----------
    sdf_path: str 或 pathlib.Path
        要拆分的SDF文件路径。
    """
    sdf_path = Path(sdf_path)
    stem = sdf_path.stem
    parent = sdf_path.parent
    molecules = pybel.readfile("sdf", str(sdf_path))
    for i, molecule in enumerate(molecules, 1):
        molecule.write("sdf", str(parent / f"{stem}_{i}.sdf"), overwrite=True)
    return

# 分割对接结果的SDF文件
split_sdf_file(docking_output_path)



In [6]:
# 可视化（可选）
# 选择要查看的对接结果编号
docking_pose_id = 1  # 修改为您想查看的构象编号

# 创建nglview视图
view = nv.NGLWidget()
# 加载蛋白质
view.add_component(str(protein_path))
# 加载对接结果
view.add_component(str(DATA / f"{docking_output_path.stem}_{docking_pose_id}.sdf"))
# 设置表示方式
view.add_representation('cartoon', selection='protein', color='spectrum')
view.add_representation('ball+stick', selection='not protein', color='blue')


In [7]:
# 显示视图
view

NGLWidget()