In [1]:
import plip

In [7]:
import lxml

In [5]:
from rdkit import Chem
from openbabel import pybel

In [8]:
from lxml import etree

def xml_file_to_dict(file_path):
    """
    Parses an XML file and converts it to a dictionary.
    :param file_path: Path to the XML file.
    :return: Dictionary representation of the XML.
    """
    def xml_to_dict(element):
        """
        Recursively converts an lxml element to a dictionary.
        """
        # Base case: If the element has no children, return its text or attributes
        if not len(element) and not element.attrib:
            return element.text

        # Start the dictionary with attributes if present
        element_dict = {}
        if element.attrib:
            element_dict["@attributes"] = element.attrib

        # Process child elements
        for child in element:
            child_dict = xml_to_dict(child)
            if child.tag not in element_dict:
                # First occurrence of the tag
                element_dict[child.tag] = child_dict
            else:
                # Multiple occurrences of the same tag
                if not isinstance(element_dict[child.tag], list):
                    element_dict[child.tag] = [element_dict[child.tag]]
                element_dict[child.tag].append(child_dict)

        # Add element text if present and not just whitespace
        if element.text and element.text.strip():
            element_dict["#text"] = element.text.strip()

        return element_dict

    # Parse the XML file
    tree = etree.parse(file_path)
    root = tree.getroot()

    # Convert the root element to a dictionary
    return xml_to_dict(root)

In [9]:

# Example usage
file_path = "/disk2/fli/REVIVAL2/zs/plip/test/report.xml"  # Replace with the path to your XML file
result = xml_file_to_dict(file_path)
print(result)


{'plipversion': '2.4.0', 'bindingsite': [{'@attributes': {'id': '1', 'has_interactions': 'True'}, 'identifiers': {'longname': 'NA', 'ligtype': 'ION', 'hetid': 'NA', 'chain': 'A', 'position': '402', 'composite': 'False', 'members': {'member': {'@attributes': {'id': '1'}, '#text': 'NA:A:402'}}, 'smiles': 'N', 'inchikey': 'QGZKDVFQNNGYKY-UHFFFAOYSA-N\n'}, 'lig_properties': {'num_heavy_atoms': '1', 'num_hbd': '3', 'num_unpaired_hbd': '0', 'num_hba': '1', 'num_unpaired_hba': '0', 'num_hal': '0', 'num_unpaired_hal': '0', 'num_aromatic_rings': '0', 'num_rotatable_bonds': '0', 'molweight': '17.03052', 'logp': '0.3239000000000001'}, 'interacting_chains': {'interacting_chain': {'@attributes': {'id': '1'}, '#text': 'A'}}, 'bs_residues': {'bs_residue': [{'@attributes': {'id': '1', 'contact': 'False', 'min_dist': '2.9', 'aa': 'SER'}, '#text': '265A'}, {'@attributes': {'id': '2', 'contact': 'False', 'min_dist': '5.5', 'aa': 'GLY'}, '#text': '228A'}, {'@attributes': {'id': '3', 'contact': 'False', 'm