In [14]:
import warnings  # 导入警告模块
from pathlib import Path  # 导入路径处理模块
import subprocess  # 导入子进程模块
import numpy as np  # 导入NumPy模块
from MDAnalysis import Universe  # 从MDAnalysis导入Universe类
from openbabel import pybel  # 从Open Babel导入pybel模块
import nglview as nv  # 导入nglview用于可视化

import openmm  # 导入OpenMM模块
import openmm.app  # 导入OpenMM.app模块

# Filter warnings
warnings.filterwarnings("ignore")  # 忽略警告信息

AttributeError: 'super' object has no attribute '_ipython_display_'

In [5]:
from pathlib import Path
import os

# 获取当前工作目录
HERE = Path(os.getcwd())
DATA = HERE / 'data_1J1L'
if not DATA.exists():
    DATA.mkdir(parents=True, exist_ok=True)
print(DATA)


/Users/wangyang/Desktop/Breast_cancer_brain_metastasis/wang_Gene/AIDD/15_Protein_ligand_docking/data_1J1L


In [6]:
class Structure(Universe):
    """Core object to load structures with."""
    
    @classmethod
    def from_string(cls, pdb_path):
        """Load a structure from a local PDB file."""
        return cls(pdb_path)  # 从本地PDB文件加载结构

# Load local structure
pdb_path = DATA / "1J1L.pdb"  # 确保这个路径指向本地的PDB文件
structure = Structure.from_string(pdb_path)  # 加载本地结构

# Prepare protein file
protein_path = DATA / "protein.pdb"  # 定义蛋白质文件路径
protein = structure.select_atoms("protein")  # 选择蛋白质原子
protein.write(str(protein_path))  # 将蛋白质写入文件

def pdb_to_pdbqt(pdb_path, pdbqt_path, pH=7.4):
    """Convert a PDB file to a PDBQT file."""
    molecule = list(pybel.readfile("pdb", str(pdb_path)))[0]  # 读取PDB文件
    molecule.OBMol.CorrectForPH(pH)  # 根据pH值进行修正
    molecule.addh()  # 添加氢原子
    for atom in molecule.atoms:
        atom.OBAtom.GetPartialCharge()  # 获取部分电荷
    molecule.write("pdbqt", str(pdbqt_path), overwrite=True)  # 写入PDBQT文件

# Convert protein to PDBQT format
protein_pdbqt_path = DATA / "protein.pdbqt"  # 定义转换后的蛋白质PDBQT文件路径
pdb_to_pdbqt(protein_path, protein_pdbqt_path)  # 转换蛋白质为PDBQT格式

def extract_ligands_from_pdb(structure):
    """自动从PDB文件中识别并提取所有的配体"""
    # 获取所有非蛋白质部分，假设ligands不是“protein”这个分类
    ligands = structure.select_atoms("not protein")
    ligand_resnames = set(ligands.resnames)  # 获取所有配体的resname
    print(f"Detected ligand resnames: {ligand_resnames}")
    return ligand_resnames

# 自动从PDB中提取配体
ligand_resnames = extract_ligands_from_pdb(structure)  # 提取所有配体的resname

# 如果你要自动提取指定配体，可以通过循环来检测合适的配体
for ligand_resname in ligand_resnames:
    ligand = structure.select_atoms(f"resname {ligand_resname}")  # 根据resname选择配体原子
    print(f"Processing ligand: {ligand_resname}")
    
    # Calculate pocket center and size
    pocket_center = (ligand.positions.max(axis=0) + ligand.positions.min(axis=0)) / 2  # 计算口袋中心
    pocket_size = ligand.positions.max(axis=0) - ligand.positions.min(axis=0) + 5  # 计算口袋大小

    # 输出口袋信息
    print(f"Pocket center: {pocket_center}, Pocket size: {pocket_size}")

# Convert MOL2 to PDBQT using OpenBabel
def mol2_to_pdbqt(mol2_path, pdbqt_path, pH=7.4):
    """Convert a MOL2 file to a PDBQT file using OpenBabel."""
    molecule = list(pybel.readfile("mol2", str(mol2_path)))[0]  # 从MOL2文件读取
    molecule.OBMol.CorrectForPH(pH)  # 根据pH值修正
    molecule.addh()  # 添加氢原子
    molecule.write("pdbqt", str(pdbqt_path), overwrite=True)  # 写入PDBQT文件

# Example: Convert your MOL2 file to PDBQT
mol2_ligand_path = DATA / "MOL000098.mol2"  # MOL2文件路径
ligand_pdbqt_path = DATA / "ligand.pdbqt"  # 输出PDBQT文件路径
mol2_to_pdbqt(mol2_ligand_path, ligand_pdbqt_path)  # 转换MOL2文件为PDBQT格式

# Run Smina docking (using the calculated pocket parameters)
docking_out_path = DATA / "docking_poses.sdf"  # 定义对接输出文件路径


def run_smina_with_log(ligand_path, protein_path, out_path, pocket_center, pocket_size, ligand_name):
    """Perform docking with Smina and log the details."""
    try:
        output_text = subprocess.check_output(
            [
                "smina",
                "--ligand", str(ligand_path),
                "--receptor", str(protein_path),
                "--center_x", str(pocket_center[0]),
                "--center_y", str(pocket_center[1]),
                "--center_z", str(pocket_center[2]),
                "--size_x", str(pocket_size[0]),
                "--size_y", str(pocket_size[1]),
                "--size_z", str(pocket_size[2]),
                "--out", str(out_path)
            ]
        )
        
        # 打印并记录对接结果
        docking_log = f"Docking result for ligand {ligand_name} with receptor:\n"
        docking_log += output_text.decode()
        print(docking_log)
        
        # 将对接信息写入日志文件
        with open(DATA / f"{ligand_name}_docking_log.txt", "w") as log_file:
            log_file.write(docking_log)

        return docking_log

    except subprocess.CalledProcessError as e:
        print(f"Error in docking with ligand {ligand_name}: {e}")
        return None

# 自动化处理多个配体的对接和日志记录
for ligand_resname in ligand_resnames:
    ligand = structure.select_atoms(f"resname {ligand_resname}")  # 根据resname选择配体原子
    print(f"Processing ligand: {ligand_resname}")
    
    # Calculate pocket center and size
    pocket_center = (ligand.positions.max(axis=0) + ligand.positions.min(axis=0)) / 2  # 计算口袋中心
    pocket_size = ligand.positions.max(axis=0) - ligand.positions.min(axis=0) + 5  # 计算口袋大小

    # 转换MOL2文件为PDBQT文件（假设每个ligand都是MOL2文件）
    ligand_mol2_path =  DATA / "MOL000098.mol2"  # DATA / f"{ligand_resname}.mol2"  # 假设你有多个MOL2文件
    ligand_pdbqt_path = DATA / f"{ligand_resname}.pdbqt"
    mol2_to_pdbqt(ligand_mol2_path, ligand_pdbqt_path)  # 转换为PDBQT

    # Run Smina docking and save logs
    docking_out_path = DATA / f"{ligand_resname}_docking_poses.sdf"  # 定义对接输出文件路径
    run_smina_with_log(ligand_pdbqt_path, protein_pdbqt_path, docking_out_path, pocket_center, pocket_size, ligand_resname)



  Failed to kekulize aromatic bonds in OBMol::PerceiveBondOrders (title is /Users/wangyang/Desktop/Breast_cancer_brain_metastasis/wang_Gene/AIDD/15_Protein_ligand_docking/data_1J1L/protein.pdb)



Detected ligand resnames: {'HOH', 'FE2'}
Processing ligand: HOH
Pocket center: [29.197  59.663  14.9525], Pocket size: [51.232    61.18     45.009003]
Processing ligand: FE2
Pocket center: [26.93  49.403 11.893], Pocket size: [5. 5. 5.]
Processing ligand: HOH
Docking result for ligand HOH with receptor:
   _______  _______ _________ _        _______ 
  (  ____ \(       )\__   __/( (    /|(  ___  )
  | (    \/| () () |   ) (   |  \  ( || (   ) |
  | (_____ | || || |   | |   |   \ | || (___) |
  (_____  )| |(_)| |   | |   | (\ \) ||  ___  |
        ) || |   | |   | |   | | \   || (   ) |
  /\____) || )   ( |___) (___| )  \  || )   ( |
  \_______)|/     \|\_______/|/    )_)|/     \|


smina is based off AutoDock Vina. Please cite appropriately.

Weights      Terms
-0.035579    gauss(o=0,_w=0.5,_c=8)
-0.005156    gauss(o=3,_w=2,_c=8)
0.840245     repulsion(o=0,_c=8)
-0.035069    hydrophobic(g=0.5,_b=1.5,_c=8)
-0.587439    non_dir_h_bond(g=-0.7,_b=0,_c=8)
1.923        num_tors_div

Using ra

In [8]:
# def split_sdf_file(sdf_path):
#     """
#     Split an SDF file into separate files for each molecule.
#     Each file is named with consecutive numbers.

#     Parameters
#     ----------
#     sdf_path: str or pathlib.Path
#         Path to SDF file that should be split.
#     """
#     sdf_path = Path(sdf_path)  # 转换为Path对象
#     stem = sdf_path.stem  # 获取文件名（不带扩展名）
#     parent = sdf_path.parent  # 获取父目录
#     molecules = pybel.readfile("sdf", str(sdf_path))  # 读取SDF文件中的分子
#     for i, molecule in enumerate(molecules, 1):  # 遍历每个分子
#         molecule.write("sdf", str(parent / f"{stem}_{i}.sdf"), overwrite=True)  # 写入单独的SDF文件
#     return

# # 分割对接输出的SDF文件
# split_sdf_file(DATA / f"{ligand_resname}_docking_poses.sdf")  # 调用函数分割SDF文件

import subprocess
import re

def run_smina_with_best_affinity(ligand_path, protein_path, out_path, pocket_center, pocket_size, ligand_name):
    """
    Perform docking with Smina and extract the best affinity result.
    
    Parameters
    ----------
    ligand_path: str or pathlib.Path
        Path to the ligand PDBQT file.
    protein_path: str or pathlib.Path
        Path to the protein PDBQT file.
    out_path: str or pathlib.Path
        Path to save the output docking poses.
    pocket_center: list
        The center coordinates of the docking pocket.
    pocket_size: list
        The size of the docking pocket.
    ligand_name: str
        The name of the ligand for logging purposes.
    """
    try:
        output_text = subprocess.check_output(
            [
                "smina",
                "--ligand", str(ligand_path),
                "--receptor", str(protein_path),
                "--center_x", str(pocket_center[0]),
                "--center_y", str(pocket_center[1]),
                "--center_z", str(pocket_center[2]),
                "--size_x", str(pocket_size[0]),
                "--size_y", str(pocket_size[1]),
                "--size_z", str(pocket_size[2]),
                "--out", str(out_path)
            ],
            universal_newlines=True  # This ensures output is returned as a string
        )
        
        # 打印完整输出
        print(f"Docking result for ligand {ligand_name}:\n{output_text}")
        
        # 使用正则表达式提取affinity值
        affinity_pattern = r"^\s*1\s+(-\d+\.\d+)\s+.*$"
        match = re.search(affinity_pattern, output_text, re.MULTILINE)
        
        if match:
            best_affinity = float(match.group(1))
            print(f"Best affinity for {ligand_name}: {best_affinity} kcal/mol")
            return best_affinity
        else:
            print(f"No valid affinity found for {ligand_name}.")
            return None
    
    except subprocess.CalledProcessError as e:
        print(f"Error in docking with ligand {ligand_name}: {e}")
        return None

ligand_path = DATA / "ligand.pdbqt"  # 配体PDBQT路径
protein_path = DATA / "protein.pdbqt"  # 蛋白PDBQT路径
docking_out_path = DATA / "docking_poses.sdf"  # 对接结果保存路径

# 定义口袋中心和大小
pocket_center = [-36.1035,35.811,8.556 ]  # 这是你从代码中获取的示例
pocket_size = [63.345,56.038002,47.008   ]  # 这是你从代码中获取的示例

# 调用函数并打印最佳affinity
best_affinity = run_smina_with_best_affinity(ligand_path, protein_path, docking_out_path, pocket_center, pocket_size, "HOH")






Docking result for ligand HOH:
   _______  _______ _________ _        _______ 
  (  ____ \(       )\__   __/( (    /|(  ___  )
  | (    \/| () () |   ) (   |  \  ( || (   ) |
  | (_____ | || || |   | |   |   \ | || (___) |
  (_____  )| |(_)| |   | |   | (\ \) ||  ___  |
        ) || |   | |   | |   | | \   || (   ) |
  /\____) || )   ( |___) (___| )  \  || )   ( |
  \_______)|/     \|\_______/|/    )_)|/     \|


smina is based off AutoDock Vina. Please cite appropriately.

Weights      Terms
-0.035579    gauss(o=0,_w=0.5,_c=8)
-0.005156    gauss(o=3,_w=2,_c=8)
0.840245     repulsion(o=0,_c=8)
-0.035069    hydrophobic(g=0.5,_b=1.5,_c=8)
-0.587439    non_dir_h_bond(g=-0.7,_b=0,_c=8)
1.923        num_tors_div

Using random seed: -1291628050

0%   10   20   30   40   50   60   70   80   90   100%
|----|----|----|----|----|----|----|----|----|----|
***************************************************

mode |   affinity | dist from best mode
     | (kcal/mol) | rmsd l.b.| rmsd u.b.
-----+---

In [9]:
# import nglview as nv

# def visualize_best_docking_pose(docking_pose_path, protein_path, best_affinity_pose_id=1):
#     """
#     可视化最佳对接模式的蛋白质丝带形状和配体的结合位点。
    
#     Parameters
#     ----------
#     docking_pose_path: str or pathlib.Path
#         The path to the docking poses SDF file.
#     protein_path: str or pathlib.Path
#         The path to the protein PDB file.
#     best_affinity_pose_id: int
#         The ID of the docking pose with the best affinity (default is 1).
#     """
#     # 加载SDF文件中的对接结果 (配体)
#     view = nv.show_structure_file(str(docking_pose_path))

#     # 添加受体（蛋白质）结构
#     view.add_component(str(protein_path))  # 添加蛋白质（受体）

#     # 设置为最佳对接模式 (注意 NGLView 的索引从 0 开始)
#     view.frame = best_affinity_pose_id - 1
    
#     # 清除之前的可视化设置
#     view.clear_representations()

#     # 添加蛋白质以丝带或cartoon形式展示
#     view.add_cartoon(selection="protein", color="blue")  # 蓝色丝带/卡通形式显示蛋白质

#     # 添加配体的可视化
#     view.add_ball_and_stick(selection="ligand", color="red")  # 配体以红色的简单棒球样式显示

#     # 调整视图，居中到配体和蛋白质
#     view.center()

#     return view

# # 使用最佳affinity对应的pose ID可视化
# docking_pose_path = DATA / f"{ligand_resname}_docking_poses.sdf"
# protein_path = DATA / "protein.pdb"




In [13]:
import nglview as nv

# 定义要查看的对接位点ID
docking_pose_id = 1

ligand_resname = "HOH"
docking_pose_path = DATA / f"{ligand_resname}_docking_poses.sdf"
# 创建可视化对象，加载SDF文件
view = nv.show_structure_file(
    str(docking_pose_path)  # 指定要显示的SDF文件
)

# 添加蛋白质PDB文件到视图中
view.add_pdbid(pdb_path.stem)  # 添加PDB ID

# 显示视图
view


AttributeError: 'super' object has no attribute '_ipython_display_'