In [7]:
%cd ~/REVIVAL2

/disk2/fli/REVIVAL2


In [8]:
import numpy as np
from Bio.PDB import PDBParser
from scipy.spatial import ConvexHull, Delaunay

def parse_active_site_coordinates(pdb_file, chain_id, residue_ids):
    """
    Extracts atomic coordinates for specified active site residues in a given chain.

    Args:
        pdb_file (str): Path to the PDB file.
        chain_id (str): Chain identifier (e.g., "A").
        residue_ids (list): List of residue IDs (e.g., [123, 124, 125]).

    Returns:
        np.ndarray: Array of atomic coordinates for the active site.
    """
    parser = PDBParser(QUIET=True)
    structure = parser.get_structure("enzyme", pdb_file)
    coords = []

    # Iterate over chains to find the matching chain
    for chain in structure.get_chains():
        if chain.id == chain_id:
            # Iterate over residues in the chain
            for res in chain.get_residues():
                if res.id[1] in residue_ids:
                    # Collect atomic coordinates for the residue
                    for atom in res.get_atoms():
                        coords.append(atom.coord)
    
    return np.array(coords)


def compute_convex_hull_volume(coords):
    """Compute the volume of a convex hull for the active site."""
    hull = ConvexHull(coords)
    return hull.volume

def compute_alpha_shape_volume(coords, alpha=1.5):
    """Compute the volume of an alpha shape (approximation of the pocket)."""
    tri = Delaunay(coords)
    tetrahedra = coords[tri.simplices]
    volumes = []
    for tetra in tetrahedra:
        vol = np.abs(np.linalg.det(np.c_[tetra - tetra[0], [1, 1, 1, 1]]) / 6.0)
        if vol <= alpha:
            volumes.append(vol)
    return sum(volumes)

In [8]:
from REVIVAL.zs.plip import get_plip_active_site_dict



In [10]:



# Input PDB and active site residues
enzyme_pdb = "/disk2/fli/REVIVAL2/zs/plip/af3/struct_joint/ParLQ/F89A_0/F89A_0.pdb"  # Replace with your enzyme PDB file

chain_id = "A"  # Specify the chain of interest
residue_ids = list(get_plip_active_site_dict("/disk2/fli/REVIVAL2/zs/plip/af3/struct_joint/ParLQ/F89A_0/report.xml").keys())  # Specify the active site residue IDs

# Parse coordinates for active site residues
active_site_coords = parse_active_site_coordinates(enzyme_pdb, chain_id, residue_ids)

# Compute volumes (reuse previous volume functions)
convex_hull_volume = compute_convex_hull_volume(active_site_coords)
alpha_shape_volume = compute_alpha_shape_volume(active_site_coords)

# Output results
print(f"Active Site Volume (Convex Hull): {convex_hull_volume:.2f} Å³")
print(f"Active Site Volume (Alpha Shape): {alpha_shape_volume:.2f} Å³")



Active Site Volume (Convex Hull): 12512.56 Å³
Active Site Volume (Alpha Shape): 1378.41 Å³


In [21]:
import freesasa

def calculate_pocket_volume_freesasa(pdb_file, chain_id, resid_list):
    """
    Calculate the active site volume using FreeSASA by specifying chain and residue IDs.

    Args:
        pdb_file (str): Path to the PDB file.
        chain_id (str): Chain ID to specify the protein chain (e.g., "A").
        resid_list (list): List of residue IDs (e.g., [123, 124, 125]).

    Returns:
        float: Total solvent-accessible volume of the active site in Å³.
    """
    # Load the structure from the PDB file
    structure = freesasa.Structure(pdb_file)
    
    # Perform FreeSASA calculation for the entire structure
    result = freesasa.calc(structure)

    # Initialize total volume for the active site
    total_volume = 0.0

    # Iterate over atoms and check if they belong to the specified active site residues
    for atom in range(structure.nAtoms()):
        atom_residue = structure.residueNumber(atom)
        atom_chain = structure.chainLabel(atom)
        if atom_chain == chain_id and atom_residue in resid_list:
            total_volume += result.atomArea(atom)  # Add volume contribution of the atom
    
    return total_volume


In [22]:
# Input PDB and active site residues
enzyme_pdb = "/disk2/fli/REVIVAL2/zs/plip/af3/struct_joint/ParLQ/F89A_0/F89A_0.pdb"  # Replace with your enzyme PDB file

chain_id = "A"  # Specify the chain of interest
resid_list = list(get_plip_active_site_dict("/disk2/fli/REVIVAL2/zs/plip/af3/struct_joint/ParLQ/F89A_0/report.xml").keys())  # Specify the active site residue IDs
calculate_pocket_volume_freesasa(enzyme_pdb, chain_id, resid_list)



0.0

14' unknown, guessing element is ' C', and radius 1.700 A


In [10]:
def calculate_pocket_volume(pdb_file, chain_id, resid_list):
    """
    Calculate the active site solvent-accessible surface area (SASA) using PyMOL.

    Args:
        pdb_file (str): Path to the PDB file.
        chain_id (str): Chain ID to specify the protein chain (e.g., "A").
        resid_list (list): List of residue IDs (e.g., [123, 124, 125]).

    Returns:
        float: Solvent-accessible surface area (SASA) of the active site in Å².
    """
    import pymol2

    # Construct the selection string dynamically
    selection = " or ".join([f"(chain {chain_id} and resi {resid})" for resid in resid_list])

    with pymol2.PyMOL() as pymol:
        pymol.cmd.load(pdb_file, "enzyme")
        pymol.cmd.select("pocket", selection)
        pymol.cmd.set("dot_solvent", 1)  # Enable solvent-accessible surface calculation
        pymol.cmd.set("dot_density", 4)  # Increase dot density for better accuracy
        area = pymol.cmd.get_area("pocket")  # Get the SASA for the selection
        return area

In [11]:
# Input PDB and active site residues
enzyme_pdb = "/disk2/fli/REVIVAL2/zs/plip/af3/struct_joint/ParLQ/F89A_0/F89A_0.pdb"  # Replace with your enzyme PDB file

chain_id = "A"  # Specify the chain of interest
resid_list = list(get_plip_active_site_dict("/disk2/fli/REVIVAL2/zs/plip/af3/struct_joint/ParLQ/F89A_0/report.xml").keys())  # Specify the active site residue IDs
calculate_pocket_volume(enzyme_pdb, chain_id, resid_list)

2553.607421875