In [1]:
import plip

In [2]:
import lxml

In [3]:
from rdkit import Chem
from openbabel import pybel

In [4]:
from lxml import etree

def xml_file_to_dict(file_path):
    """
    Parses an XML file and converts it to a dictionary.
    :param file_path: Path to the XML file.
    :return: Dictionary representation of the XML.
    """
    def xml_to_dict(element):
        """
        Recursively converts an lxml element to a dictionary.
        """
        # Base case: If the element has no children, return its text or attributes
        if not len(element) and not element.attrib:
            return element.text

        # Start the dictionary with attributes if present
        element_dict = {}
        if element.attrib:
            element_dict["@attributes"] = element.attrib

        # Process child elements
        for child in element:
            child_dict = xml_to_dict(child)
            if child.tag not in element_dict:
                # First occurrence of the tag
                element_dict[child.tag] = child_dict
            else:
                # Multiple occurrences of the same tag
                if not isinstance(element_dict[child.tag], list):
                    element_dict[child.tag] = [element_dict[child.tag]]
                element_dict[child.tag].append(child_dict)

        # Add element text if present and not just whitespace
        if element.text and element.text.strip():
            element_dict["#text"] = element.text.strip()

        return element_dict

    # Parse the XML file
    tree = etree.parse(file_path)
    root = tree.getroot()

    # Convert the root element to a dictionary
    return xml_to_dict(root)

In [5]:

# Example usage
file_path = "/disk2/fli/REVIVAL2/zs/plip/af3/struct_joint/PfTrpB-4bromo/I165A:I183A:Y301V_0/report.xml"  # Replace with the path to your XML file
result = xml_file_to_dict(file_path)
print(result)


{'plipversion': '2.4.0', 'bindingsite': {'@attributes': {'id': '1', 'has_interactions': 'True'}, 'identifiers': {'longname': 'LIG', 'ligtype': 'SMALLMOLECULE', 'hetid': 'LIG', 'chain': 'B', 'position': '0', 'composite': 'False', 'members': {'member': {'@attributes': {'id': '1'}, '#text': 'LIG:B:0'}}, 'smiles': 'OC(=O)[C@H](/N=C/c1c(cnc(c1O)C)CP(=O)(O)O)C.Brc1cccc2c1cc[nH]2.[Na]', 'inchikey': 'JDIJGIUQUOEZTL-CXALCSDVSA-N\n'}, 'lig_properties': {'num_heavy_atoms': '31', 'num_hbd': '5', 'num_unpaired_hbd': '1', 'num_hba': '8', 'num_unpaired_hba': '2', 'num_hal': '1', 'num_unpaired_hal': '1', 'num_aromatic_rings': '3', 'num_rotatable_bonds': '5', 'molweight': '521.2540709999998', 'logp': '3.595700000000003'}, 'interacting_chains': {'interacting_chain': {'@attributes': {'id': '1'}, '#text': 'A'}}, 'bs_residues': {'bs_residue': [{'@attributes': {'id': '1', 'contact': 'False', 'min_dist': '4.7', 'aa': 'LYS'}, '#text': '376A'}, {'@attributes': {'id': '2', 'contact': 'False', 'min_dist': '4.7',

In [6]:
len(result["bindingsite"])

7

In [9]:
result["bindingsite"].keys()

dict_keys(['@attributes', 'identifiers', 'lig_properties', 'interacting_chains', 'bs_residues', 'interactions', 'mappings'])

In [16]:
result["bindingsite"]["bs_residues"]["bs_residue"]

[{'@attributes': {'id': '1', 'contact': 'False', 'min_dist': '4.7', 'aa': 'LYS'},
  '#text': '376A'},
 {'@attributes': {'id': '2', 'contact': 'False', 'min_dist': '4.7', 'aa': 'ALA'},
  '#text': '232A'},
 {'@attributes': {'id': '3', 'contact': 'False', 'min_dist': '6.9', 'aa': 'MET'},
  '#text': '129A'},
 {'@attributes': {'id': '4', 'contact': 'False', 'min_dist': '5.8', 'aa': 'LEU'},
  '#text': '266A'},
 {'@attributes': {'id': '5', 'contact': 'False', 'min_dist': '3.9', 'aa': 'GLY'},
  '#text': '372A'},
 {'@attributes': {'id': '6', 'contact': 'True', 'min_dist': '2.6', 'aa': 'GLY'},
  '#text': '229A'},
 {'@attributes': {'id': '7', 'contact': 'True', 'min_dist': '2.9', 'aa': 'HIS'},
  '#text': '110A'},
 {'@attributes': {'id': '8', 'contact': 'False', 'min_dist': '7.1', 'aa': 'GLN'},
  '#text': '137A'},
 {'@attributes': {'id': '9', 'contact': 'False', 'min_dist': '6.6', 'aa': 'VAL'},
  '#text': '112A'},
 {'@attributes': {'id': '10', 'contact': 'False', 'min_dist': '7.5', 'aa': 'ALA'},
 