**AF3 Pocket Finding**

In [None]:
import numpy as np
from Bio.PDB.MMCIFParser import MMCIFParser
from Bio.PDB import Selection

def find_binding_site_residues(cif_file, distance_cutoff=5.0):
    """
    从CIF文件中找到与配体距离在阈值内的蛋白质残基(结合位点)
    
    参数:
        cif_file: CIF文件路径
        distance_cutoff: 距离阈值(Å), 默认5.0Å
    
    返回:
        结合位点残基信息列表,包含链ID、残基编号和氨基酸类型
    """
    # 解析CIF文件
    parser = MMCIFParser(QUIET=True)  # QUIET=True关闭警告
    structure = parser.get_structure("complex", cif_file)
    
    # 分离蛋白质残基和配体
    protein_residues = []
    ligand_atoms = []
    
    for model in structure:
        for chain in model:
            for residue in chain:
                # 判断是否为蛋白质残基（标准氨基酸）
                if residue.id[0] == " " and residue.get_resname() in standard_amino_acids:
                    protein_residues.append(residue)
                # 判断是否为配体（非蛋白质、非水的分子）
                elif residue.id[0] != " " and residue.get_resname() not in ["HOH", "WAT"]:
                    ligand_atoms.extend(residue.get_atoms())
    
    # 如果没有找到配体，返回空列表
    if not ligand_atoms:
        print("未找到配体分子")
        return []
    
    # 计算所有蛋白质残基与配体的最小距离
    binding_site_residues = []
    ligand_coords = np.array([atom.get_coord() for atom in ligand_atoms])
    
    for residue in protein_residues:
        # 获取残基所有原子的坐标
        res_coords = np.array([atom.get_coord() for atom in residue.get_atoms()])
        
        # 计算残基与配体之间的最小距离
        min_distance = np.min(np.linalg.norm(res_coords[:, np.newaxis] - ligand_coords, axis=2))
        
        # 如果小于阈值，则认为是结合位点残基
        if min_distance <= distance_cutoff:
            chain_id = residue.parent.id
            res_id = residue.id[1]  # 残基编号（序列位置）
            res_name = residue.get_resname()
            binding_site_residues.append({
                "chain": chain_id,
                "residue_number": res_id,
                "residue_name": res_name
            })
    
    return binding_site_residues

# 标准氨基酸三字母代码
standard_amino_acids = {
    "ALA", "ARG", "ASN", "ASP", "CYS", "GLN", "GLU", "GLY", 
    "HIS", "ILE", "LEU", "LYS", "MET", "PHE", "PRO", "SER", 
    "THR", "TRP", "TYR", "VAL"
}

if __name__ == "__main__":
    # 替换为你的CIF文件路径
    cif_file_path = "/data/qingyuyang/af3/outputs/example1/example1_model.cif"
    
    # 查找结合位点残基
    binding_residues = find_binding_site_residues(cif_file_path, distance_cutoff=5.0)
    
    if binding_residues:
        print("结合位点残基在氨基酸序列中的位置：")
        print("链ID | 残基编号(序列位置) | 氨基酸类型")
        print("-" * 40)
        for res in binding_residues:
            print(f"  {res['chain']:2} | {res['residue_number']:18} | {res['residue_name']}")
    else:
        print("未找到结合位点残基")