In [1]:
%cd ~/REVIVAL2

/disk2/fli/REVIVAL2


In [6]:
from REVIVAL.util import calculate_chain_centroid, get_protein_structure

import numpy as np

In [24]:
def get_ligand_centroid(pdb_file, ligand_info):
    """
    Calculates the centroid of a given list of atoms specified by chain, residue, and atom names.

    Args:
        pdb_file (str): Path to the PDB file.
        ligand_info (list of tuples): List of atoms specified as (chain_id, residue_name, atom_name).

    Returns:
        tuple: Centroid coordinates as (x, y, z).
    """

    structure = get_protein_structure(pdb_file)

    atom_coords = []

    for chain_id, residue_name, atom_name in ligand_info:
        for model in structure:
            try:
                chain = model[chain_id]
                for residue in chain:
                    # Match residue name (flexible: partial match, case insensitive)
                    if residue_name.lower() in residue.resname.lower():
                        # Match atom name (flexible: ignore underscores and case differences)
                        for atom in residue:
                            if atom_name.replace("_", "").lower() == atom.name.replace("_", "").lower():
                                atom_coords.append(atom.coord)
            except KeyError:
                print(f"Chain {chain_id} not found in the structure.")
                continue

    if not atom_coords:
        raise ValueError("No matching atoms found in the structure.")

    # Calculate centroid
    return np.mean(atom_coords, axis=0)

In [34]:
def extract_active_site_by_radius(pdb_file, target_coord, target_chain="A", distance_threshold=10.0):
    """
    Extracts a list of amino acids in the specified chain whose centroids (side chain or CA for glycine)
    are within a given distance from a specified (x, y, z) coordinate.

    Args:
        pdb_file (str): Path to the PDB file.
        target_coord (tuple): Target (x, y, z) coordinate.
        target_chain (str): Chain ID to search within (default is "A").
        distance_threshold (float): Distance threshold in Ångströms.

    Returns:
        list: A list of tuples containing residue information
              (e.g., [("GLY", 12), ("ALA", 25)]).
    """
    structure = get_protein_structure(pdb_file)

    nearby_residues = []

    # Iterate through all residues in the specified chain
    for model in structure:
        chain = model[target_chain]  # Access the specified chain
        for residue in chain:
            # Exclude backbone atoms (N, CA, C, O) and calculate centroid of side chain atoms
            side_chain_atoms = [atom for atom in residue if atom.name not in {"N", "CA", "C", "O"}]

            if not side_chain_atoms:
                # Use the alpha carbon (CA) as the centroid for glycine
                if residue.resname == "GLY" and "CA" in residue:
                    ca_atom = residue["CA"]
                    centroid = np.array(ca_atom.coord)
                else:
                    # Skip residues with no side chains or CA
                    continue
            else:
                # Calculate the centroid of the side chain
                side_chain_coords = np.array([atom.coord for atom in side_chain_atoms])
                centroid = np.mean(side_chain_coords, axis=0)

            # Calculate distance between target coordinate and residue centroid
            distance = np.linalg.norm(centroid - target_coord)
            if distance <= distance_threshold:
                residue_info = (residue.resname, residue.id[1])
                nearby_residues.append(residue_info)

    return nearby_residues


In [31]:
from REVIVAL.global_param import LIB_INFO_DICT

In [22]:
LIB_INFO_DICT["PfTrpB-4bromo"]["4bromo-info"]

[('B', 'LIG', 'C1'),
 ('B', 'LIG', 'C2'),
 ('B', 'LIG', 'C3'),
 ('B', 'LIG', 'C4'),
 ('B', 'LIG', 'C5'),
 ('B', 'LIG', 'C6'),
 ('B', 'LIG', 'C7'),
 ('B', 'LIG', 'C8'),
 ('B', 'LIG', 'N1'),
 ('B', 'LIG', 'BR1')]

In [25]:
pdb_file = "/disk2/fli/REVIVAL2/zs/af3/struct_joint/PfTrpB-4bromo/i165a_i183a_y301v/seed-1_sample-0/model.cif"

In [26]:
get_ligand_centroid(pdb_file, LIB_INFO_DICT["PfTrpB-4bromo"]["4bromo-info"])

array([-1.9962   , -0.4137   , -6.8669004], dtype=float32)

In [35]:

extract_active_site_by_radius(pdb_file, get_ligand_centroid(pdb_file, LIB_INFO_DICT["PfTrpB-4bromo"]["4bromo-info"]), target_chain = "A", distance_threshold=12.0)

[('LYS', 82),
 ('ASN', 85),
 ('ALA', 103),
 ('GLU', 104),
 ('THR', 105),
 ('GLY', 106),
 ('ALA', 107),
 ('HIS', 110),
 ('ASP', 133),
 ('THR', 160),
 ('LEU', 161),
 ('LYS', 162),
 ('ASP', 163),
 ('ALA', 164),
 ('ALA', 165),
 ('ASP', 166),
 ('ALA', 168),
 ('LEU', 169),
 ('TYR', 181),
 ('ALA', 183),
 ('GLY', 184),
 ('SER', 185),
 ('VAL', 186),
 ('VAL', 187),
 ('GLY', 188),
 ('PRO', 189),
 ('TYR', 192),
 ('PRO', 193),
 ('VAL', 196),
 ('VAL', 226),
 ('GLY', 227),
 ('GLY', 228),
 ('GLY', 229),
 ('SER', 230),
 ('SER', 263),
 ('SER', 265),
 ('SER', 274),
 ('HIS', 275),
 ('GLY', 276),
 ('MET', 277),
 ('SER', 279),
 ('PHE', 281),
 ('ILE', 289),
 ('SER', 292),
 ('GLY', 298),
 ('LEU', 299),
 ('ASP', 300),
 ('VAL', 301),
 ('PRO', 302),
 ('GLY', 303),
 ('VAL', 304),
 ('GLY', 305),
 ('PRO', 306),
 ('HIS', 308)]

In [11]:
calculate_chain_centroid(pdb_file, chain_ids = ["B"])

array([-3.4316127 , -0.73019344, -1.117032  ], dtype=float32)