In [3]:
import plip

In [6]:
import lxml

In [5]:
from rdkit import Chem
from openbabel import pybel

In [7]:
from lxml import etree

def xml_file_to_dict(file_path):
    """
    Parses an XML file and converts it to a dictionary.
    :param file_path: Path to the XML file.
    :return: Dictionary representation of the XML.
    """
    def xml_to_dict(element):
        """
        Recursively converts an lxml element to a dictionary.
        """
        # Base case: If the element has no children, return its text or attributes
        if not len(element) and not element.attrib:
            return element.text

        # Start the dictionary with attributes if present
        element_dict = {}
        if element.attrib:
            element_dict["@attributes"] = element.attrib

        # Process child elements
        for child in element:
            child_dict = xml_to_dict(child)
            if child.tag not in element_dict:
                # First occurrence of the tag
                element_dict[child.tag] = child_dict
            else:
                # Multiple occurrences of the same tag
                if not isinstance(element_dict[child.tag], list):
                    element_dict[child.tag] = [element_dict[child.tag]]
                element_dict[child.tag].append(child_dict)

        # Add element text if present and not just whitespace
        if element.text and element.text.strip():
            element_dict["#text"] = element.text.strip()

        return element_dict

    # Parse the XML file
    tree = etree.parse(file_path)
    root = tree.getroot()

    # Convert the root element to a dictionary
    return xml_to_dict(root)

In [8]:

# Example usage
file_path = "/disk2/fli/REVIVAL2/zs/plip/af3/struct_joint/PfTrpB-4bromo/I165A:I183A:Y301V_0/report.xml"  # Replace with the path to your XML file
result = xml_file_to_dict(file_path)
print(result)


{'plipversion': '2.4.0', 'bindingsite': {'@attributes': {'id': '1', 'has_interactions': 'True'}, 'identifiers': {'longname': 'LIG', 'ligtype': 'SMALLMOLECULE', 'hetid': 'LIG', 'chain': 'B', 'position': '0', 'composite': 'False', 'members': {'member': {'@attributes': {'id': '1'}, '#text': 'LIG:B:0'}}, 'smiles': 'OC(=O)[C@H](/N=C/c1c(cnc(c1O)C)CP(=O)(O)O)C.Brc1cccc2c1cc[nH]2.[Na]', 'inchikey': 'JDIJGIUQUOEZTL-CXALCSDVSA-N\n'}, 'lig_properties': {'num_heavy_atoms': '31', 'num_hbd': '5', 'num_unpaired_hbd': '1', 'num_hba': '8', 'num_unpaired_hba': '2', 'num_hal': '1', 'num_unpaired_hal': '1', 'num_aromatic_rings': '3', 'num_rotatable_bonds': '5', 'molweight': '521.2540709999998', 'logp': '3.595700000000003'}, 'interacting_chains': {'interacting_chain': {'@attributes': {'id': '1'}, '#text': 'A'}}, 'bs_residues': {'bs_residue': [{'@attributes': {'id': '1', 'contact': 'False', 'min_dist': '4.7', 'aa': 'LYS'}, '#text': '376A'}, {'@attributes': {'id': '2', 'contact': 'False', 'min_dist': '4.7',

In [9]:
len(result["bindingsite"])

7

In [10]:
result["bindingsite"].keys()

dict_keys(['@attributes', 'identifiers', 'lig_properties', 'interacting_chains', 'bs_residues', 'interactions', 'mappings'])

In [30]:
active_site_list = []

for i in result["bindingsite"]["bs_residues"]["bs_residue"]:
    active_site_list.append((i["@attributes"]["aa"], int(i["#text"][:-2])))

In [31]:
active_site_list

[('LYS', 37),
 ('ALA', 23),
 ('MET', 12),
 ('LEU', 26),
 ('GLY', 37),
 ('GLY', 22),
 ('HIS', 11),
 ('GLN', 13),
 ('VAL', 11),
 ('ALA', 11),
 ('PRO', 30),
 ('GLU', 25),
 ('GLN', 20),
 ('GLN', 10),
 ('SER', 26),
 ('HIS', 27),
 ('GLY', 11),
 ('GLY', 7),
 ('HIS', 26),
 ('GLY', 18),
 ('ASN', 23),
 ('TYR', 12),
 ('GLY', 22),
 ('GLY', 30),
 ('LYS', 8),
 ('SER', 34),
 ('SER', 26),
 ('ASP', 16),
 ('ASN', 14),
 ('SER', 27),
 ('MET', 23),
 ('LEU', 28),
 ('GLY', 22),
 ('GLU', 34),
 ('LEU', 34),
 ('ALA', 8),
 ('ASN', 36),
 ('VAL', 19),
 ('ALA', 34),
 ('GLY', 18),
 ('VAL', 30),
 ('CYS', 22),
 ('MET', 27),
 ('SER', 18),
 ('GLU', 10),
 ('ASP', 37),
 ('TYR', 18),
 ('GLU', 7),
 ('GLY', 10),
 ('SER', 23),
 ('LEU', 37),
 ('ALA', 25),
 ('GLY', 29),
 ('VAL', 18),
 ('GLY', 30),
 ('THR', 10),
 ('HIS', 30),
 ('ASP', 13),
 ('GLY', 10),
 ('ASP', 30),
 ('VAL', 18),
 ('VAL', 22),
 ('ALA', 16),
 ('ALA', 16),
 ('ALA', 18),
 ('ARG', 37),
 ('LEU', 29),
 ('ALA', 26),
 ('ALA', 10),
 ('PHE', 28),
 ('LEU', 16),
 ('HIS', 8

In [14]:
result["bindingsite"]["interactions"].keys()

dict_keys(['hydrophobic_interactions', 'hydrogen_bonds', 'water_bridges', 'salt_bridges', 'pi_stacks', 'pi_cation_interactions', 'halogen_bonds', 'metal_complexes'])

In [19]:
result["bindingsite"]["interactions"]["metal_complexes"]

{'metal_complex': [{'@attributes': {'id': '1'},
   'resnr': '263',
   'restype': 'SER',
   'reschain': 'A',
   'resnr_lig': '0',
   'restype_lig': 'LIG',
   'reschain_lig': 'B',
   'metal_idx': '2996',
   'metal_type': 'Na',
   'target_idx': '2021',
   'target_type': 'O',
   'coordination': '4',
   'dist': '2.70',
   'location': 'protein.mainchain',
   'rms': '42.99',
   'geometry': 'square.planar',
   'complexnum': '1',
   'metalcoo': {'x': '-1.675', 'y': '-8.521', 'z': '-4.796'},
   'targetcoo': {'x': '-2.691', 'y': '-10.890', 'z': '-4.001'}},
  {'@attributes': {'id': '2'},
   'resnr': '265',
   'restype': 'SER',
   'reschain': 'A',
   'resnr_lig': '0',
   'restype_lig': 'LIG',
   'reschain_lig': 'B',
   'metal_idx': '2996',
   'metal_type': 'Na',
   'target_idx': '2034',
   'target_type': 'O',
   'coordination': '4',
   'dist': '2.62',
   'location': 'protein.sidechain',
   'rms': '42.99',
   'geometry': 'square.planar',
   'complexnum': '1',
   'metalcoo': {'x': '-1.675', 'y': '-8.

In [2]:
import os
from plip.structure.preparation import PDBComplex
from rdkit import Chem
from rdkit.Chem import AllChem
from simtk.openmm.app import *
from simtk.openmm import *
from simtk.unit import *

def analyze_interactions(pdb_file):
    # Step 1: Use PLIP to identify protein-ligand interactions
    pl_complex = PDBComplex()
    pl_complex.load_pdb(pdb_file)
    interactions_summary = {}

    # Parse each binding site detected by PLIP
    for bs_id, site in pl_complex.interaction_sets.items():
        interactions_summary[bs_id] = {
            "hydrophobic": len(site.hydrophobic_contacts),
            "h_bonds": len(site.hydrogen_bonds),
            "salt_bridges": len(site.saltbridges),
            "pi_stacking": len(site.pi_stacking),
            "metal_complexes": len(site.metal_complexes),
        }
    
    return interactions_summary

def calculate_rdkit_energy(smiles):
    # Step 2: Use RDKit to compute molecular energy
    mol = Chem.MolFromSmiles(smiles)
    mol = Chem.AddHs(mol)
    AllChem.EmbedMolecule(mol)
    energy = AllChem.UFFGetMoleculeForceField(mol).CalcEnergy()
    return energy

def calculate_openmm_energy(pdb_file):
    # Step 3: Use OpenMM to calculate interaction energy
    pdb = PDBFile(pdb_file)
    forcefield = ForceField('amber99sb.xml', 'tip3p.xml')
    system = forcefield.createSystem(pdb.topology, nonbondedMethod=NoCutoff, constraints=HBonds)
    integrator = VerletIntegrator(0.002*picoseconds)
    simulation = Simulation(pdb.topology, system, integrator)
    simulation.context.setPositions(pdb.positions)
    state = simulation.context.getState(getEnergy=True)
    potential_energy = state.getPotentialEnergy().value_in_unit(kilocalories_per_mole)
    return potential_energy

# def main():
#     # Input files
#     pdb_file = "protein_ligand.pdb"  # Replace with your PDB file path
#     ligand_smiles = "CC(=O)NC1=CC=C(O)C=C1"  # Replace with your ligand's SMILES
    
#     # Step 1: Analyze interactions with PLIP
#     interactions = analyze_interactions(pdb_file)
#     print("PLIP Interaction Analysis:")
#     for site_id, data in interactions.items():
#         print(f"Binding Site {site_id}: {data}")
    
#     # Step 2: Calculate RDKit energy
#     rdkit_energy = calculate_rdkit_energy(ligand_smiles)
#     print(f"RDKit Ligand Energy: {rdkit_energy:.2f} kcal/mol")
    
#     # Step 3: Calculate OpenMM interaction energy
#     openmm_energy = calculate_openmm_energy(pdb_file)
#     print(f"OpenMM Interaction Energy: {openmm_energy:.2f} kcal/mol")



In [9]:
pdb_file = "/disk2/fli/REVIVAL2/zs/plip/af3/struct_joint/PfTrpB-4bromo/I165A:I183A:Y301V_0/I165A:I183A:Y301V_0_protonated.pdb"  # Replace with the path to your XML file
interactions = analyze_interactions(pdb_file)
print("PLIP Interaction Analysis:")
for site_id, data in interactions.items():
    print(f"Binding Site {site_id}: {data}")

PLIP Interaction Analysis:


In [1]:
from pdbfixer import PDBFixer
from simtk.openmm.app import PDBFile

def fix_pdb(pdb_file, fixed_pdb_file):
    fixer = PDBFixer(filename=pdb_file)
    fixer.findMissingResidues()
    fixer.findMissingAtoms()
    fixer.addMissingAtoms()
    fixer.addMissingHydrogens(pH=7.4)  # Add hydrogens at neutral pH
    with open(fixed_pdb_file, 'w') as output:
        PDBFile.writeFile(fixer.topology, fixer.positions, output)
    print(f"Fixed PDB file saved to {fixed_pdb_file}")

# Input and output PDB file paths
original_pdb = "protein.pdb"
fixed_pdb = "protein_fixed.pdb"

fix_pdb(original_pdb, fixed_pdb)




Fixed PDB file saved to protein_fixed.pdb


In [19]:
from rdkit import Chem
from rdkit.Chem import AllChem
import subprocess

ligand_smiles = "C1=CC2=C(C=CN2)C(=C1)Br"  # Replace with your ligand SMILES
ligand = Chem.MolFromSmiles(ligand_smiles)
ligand = Chem.AddHs(ligand)
AllChem.EmbedMolecule(ligand)

# Save as SDF
Chem.MolToMolFile(ligand, "ligand.sdf")

# Convert SDF to MOL2 using Open Babel
subprocess.run(["obabel", "ligand.sdf", "-O", "ligand.mol2"])


1 molecule converted


CompletedProcess(args=['obabel', 'ligand.sdf', '-O', 'ligand.mol2'], returncode=0)

In [15]:
import parmed

def generate_openmm_params(mol2_file, frcmod_file, xml_output):
    """
    Generate OpenMM XML parameters from Amber files (MOL2 and FRCMOD).

    Args:
        mol2_file (str): Path to the MOL2 file (ligand structure with charges).
        frcmod_file (str): Path to the FRCMOD file (ligand force field parameters).
        xml_output (str): Path to save the OpenMM-compatible XML file.
    """
    # Load the ligand as a ResidueTemplate
    print(f"Loading MOL2 file: {mol2_file}")
    residue_template = parmed.load_file(mol2_file)

    # Create a new Structure object
    print("Converting ResidueTemplate to Structure...")
    structure = parmed.Structure()

    # Add atoms to the Structure
    print("Adding atoms...")
    atom_mapping = {}
    for atom in residue_template.atoms:
        new_atom = parmed.Atom(
            name=atom.name,
            type=atom.type,
            atomic_number=atom.atomic_number,
            mass=atom.mass,
            charge=atom.charge
        )
        structure.add_atom(new_atom, residue_template.name, 1)
        atom_mapping[atom] = new_atom

    # Add bonds to the Structure
    print("Adding bonds...")
    for bond in residue_template.bonds:
        structure.bonds.append(parmed.Bond(atom_mapping[bond.atom1], atom_mapping[bond.atom2]))

    # Load the Amber parameters (FRCMOD)
    print(f"Loading FRCMOD file: {frcmod_file}")
    frcmod_params = parmed.amber.AmberParameterSet(frcmod_file)

    # Assign parameters to the Structure
    print("Applying FRCMOD parameters to the structure...")
    structure.parameters = frcmod_params

    # Save as OpenMM XML
    print(f"Saving OpenMM-compatible XML file: {xml_output}")
    parmed.openmm.OpenMMParameterSet.from_structure(structure).write(xml_output)
    print(f"OpenMM parameter file saved as {xml_output}")


In [16]:
%cd ~/REVIVAL2/sandbox/testopenmm/

/disk2/fli/REVIVAL2/sandbox/testopenmm


In [17]:
# Input files
mol2_file = "ligand_charged.mol2"  # Replace with your MOL2 file
frcmod_file = "ligand.frcmod"      # Replace with your FRCMOD file
xml_output = "ligand.xml"          # Desired output XML file

# Generate OpenMM parameters
try:
    generate_openmm_params(mol2_file, frcmod_file, xml_output)
except Exception as e:
    print(f"Error: {e}")


Loading MOL2 file: ligand_charged.mol2
Converting ResidueTemplate to Structure...
Adding atoms...
Adding bonds...
Loading FRCMOD file: ligand.frcmod
Applying FRCMOD parameters to the structure...
Saving OpenMM-compatible XML file: ligand.xml
OpenMM parameter file saved as ligand.xml


In [18]:
from simtk.openmm.app import *
from simtk.openmm import *
from simtk.unit import *

def calculate_openmm_energy_with_ligand(pdb_file, ligand_params_file):
    pdb = PDBFile(pdb_file)
    forcefield = ForceField('amber99sb.xml', 'tip3p.xml', ligand_params_file)
    system = forcefield.createSystem(pdb.topology, nonbondedMethod=NoCutoff, constraints=HBonds)
    integrator = VerletIntegrator(0.002 * picoseconds)
    simulation = Simulation(pdb.topology, system, integrator)
    simulation.context.setPositions(pdb.positions)
    state = simulation.context.getState(getEnergy=True)
    potential_energy = state.getPotentialEnergy().value_in_unit(kilocalories_per_mole)
    return potential_energy




In [None]:
calculate_openmm_energy_with_ligand(
    pdb_file="/disk2/fli/REVIVAL2/sandbox/testopenmm/protein_fixed.pdb",
    ligand_params_file="/disk2/fli/REVIVAL2/sandbox/testopenmm/ligand.xml"
    )

In [17]:
import xml.etree.ElementTree as ET

def extract_interactions_from_plip(xml_file):
    """
    Extract interaction data from a PLIP report XML file.

    Args:
        xml_file (str): Path to the PLIP report XML file.

    Returns:
        list: A list of dictionaries, where each dictionary represents an interaction with the following keys:
              - type (str): Interaction type (e.g., "hydrophobic", "hydrogen_bond").
              - distance (float): Distance between interacting atoms (Å).
              - atoms (tuple): Tuple of interacting atom types (e.g., ("C", "C")).
    """
    interactions = []
    tree = ET.parse(xml_file)
    root = tree.getroot()

    # Define interaction types and their XML paths
    interaction_paths = {
        "hydrophobic": "hydrophobic_interactions/hydrophobic_interaction",
        "hydrogen_bond": "hydrogen_bonds/hydrogen_bond",
        "salt_bridge": "salt_bridges/salt_bridge",
        "pi_stacking": "pi_stacks/pi_stack",
        "metal_complex": "metal_complexes/metal_complex",
    }

    # Iterate through interaction types
    for interaction_type, path in interaction_paths.items():
        for interaction in root.findall(f".//{path}"):
            interaction_data = {"type": interaction_type}
            
            # Extract common data
            distance = interaction.find("dist") or interaction.find("dist_d-a")  # Use dist_d-a for hydrogen bonds
            interaction_data["distance"] = float(distance.text) if distance is not None else None

            # Extract atom types
            if interaction_type == "hydrophobic":
                lig_carbon = interaction.find("ligcarbonidx")
                prot_carbon = interaction.find("protcarbonidx")
                interaction_data["atoms"] = ("C", "C")  # Always C for hydrophobic interactions
            elif interaction_type == "hydrogen_bond":
                donor_type = interaction.find("donortype").text if interaction.find("donortype") is not None else "Unknown"
                acceptor_type = interaction.find("acceptortype").text if interaction.find("acceptortype") is not None else "Unknown"
                interaction_data["atoms"] = (donor_type, acceptor_type)
            elif interaction_type == "salt_bridge":
                interaction_data["atoms"] = ("Charged", "Charged")
            elif interaction_type == "pi_stacking":
                interaction_data["atoms"] = ("Aromatic", "Aromatic")
            elif interaction_type == "metal_complex":
                metal_type = interaction.find("metal_type").text if interaction.find("metal_type") is not None else "Unknown"
                target_type = interaction.find("target_type").text if interaction.find("target_type") is not None else "Unknown"
                interaction_data["atoms"] = (metal_type, target_type)

            # Add to interactions list
            interactions.append(interaction_data)

    return interactions

import math

# Empirical energy values for interaction types
ENERGY_VALUES = {
    "hydrogen_bond": (-1.0, -3.0),        # Min, Max (kcal/mol)
    "hydrophobic": (-0.5, -1.5),
    "salt_bridge": (-1.0, -5.0),
    "pi_stacking_parallel": (-1.0, -3.0),
    "pi_stacking_perpendicular": (-0.5, -2.0),
    "pi_cation": (-3.0, -5.0),
    "metal_coordination": (-2.0, -6.0),
    "halogen_bond": (-1.0, -3.0)
}

# Distance-dependent scaling function
def distance_scaling(distance, min_distance=2.0, max_distance=6.0):
    """
    Scales energy contribution based on distance.
    Args:
        distance (float): Distance between interacting atoms (Å).
        min_distance (float): Minimum effective distance (Å).
        max_distance (float): Maximum effective distance (Å).

    Returns:
        float: Scaling factor between 0 and 1.
    """
    if distance < min_distance:
        return 1.0
    elif distance > max_distance:
        return 0.0
    else:
        return 1.0 - (distance - min_distance) / (max_distance - min_distance)

# Estimate interaction energy
def calculate_interaction_energy(interactions):
    """
    Estimate the total interaction energy for a protein-ligand complex.

    Args:
        interactions (list of dict): List of interactions with the following keys:
            - type (str): Interaction type (e.g., "hydrogen_bond").
            - distance (float): Distance between interacting atoms (Å).
            - atoms (tuple): Tuple of interacting atom types (e.g., ("O", "N")).

    Returns:
        float: Estimated total interaction energy (kcal/mol).
    """
    total_energy = 0.0
    for interaction in interactions:
        interaction_type = interaction["type"]
        distance = interaction["distance"]

        # Get the energy range for the interaction type
        if interaction_type in ENERGY_VALUES:
            min_energy, max_energy = ENERGY_VALUES[interaction_type]
            # Scale energy by distance
            scaling_factor = distance_scaling(distance)
            energy = scaling_factor * (min_energy + max_energy) / 2
            total_energy += energy
    return total_energy


In [4]:

# Example Usage
file_path = "/disk2/fli/REVIVAL2/zs/plip/af3/struct_joint/PfTrpB-4bromo/I165A:I183A:Y301V_0/report.xml"
interactions = extract_interactions_from_plip(file_path)
print(interactions)

[{'type': 'hydrophobic', 'distance': None, 'atoms': ('C', 'C')}, {'type': 'hydrophobic', 'distance': None, 'atoms': ('C', 'C')}, {'type': 'hydrophobic', 'distance': None, 'atoms': ('C', 'C')}, {'type': 'hydrophobic', 'distance': None, 'atoms': ('C', 'C')}, {'type': 'hydrogen_bond', 'distance': 3.03, 'atoms': ('O3', 'Nar')}, {'type': 'hydrogen_bond', 'distance': 2.76, 'atoms': ('N3', 'O2')}, {'type': 'hydrogen_bond', 'distance': 2.98, 'atoms': ('Nar', 'O3')}, {'type': 'hydrogen_bond', 'distance': 2.68, 'atoms': ('O3', 'O.co2')}, {'type': 'hydrogen_bond', 'distance': 2.7, 'atoms': ('Nam', 'O.co2')}, {'type': 'hydrogen_bond', 'distance': 3.64, 'atoms': ('Nam', 'O.co2')}, {'type': 'hydrogen_bond', 'distance': 3.93, 'atoms': ('Nam', 'O.co2')}, {'type': 'hydrogen_bond', 'distance': 3.01, 'atoms': ('Nam', 'O.co2')}, {'type': 'hydrogen_bond', 'distance': 2.85, 'atoms': ('Nam', 'O.co2')}, {'type': 'hydrogen_bond', 'distance': 4.07, 'atoms': ('O3', 'O3')}, {'type': 'hydrogen_bond', 'distance': 2

In [5]:
calculate_interaction_energy(interactions)

TypeError: '<' not supported between instances of 'NoneType' and 'float'