In [33]:
# Import Dependencies
import re
import xml.etree.ElementTree as ET
import xmlschema
import pandas as pd
import yaml

#### Convert an input file into a XML File

#### .yaml file

In [34]:
def parse_yaml_to_dict(file_path):
    """
    Parses a YAML file into a dictionary.

    Args:
    - file_path (str): Path to the YAML file.

    Returns:
    - dict: Dictionary representing the data from the YAML file.
    """
    with open(file_path, 'r', encoding='utf-8') as file:
        data_dict = yaml.safe_load(file)
    return data_dict

def add_data_to_element(element, data):
    """
    Recursively adds data to an XML element from a dictionary or list.

    Args:
    - element (xml.etree.ElementTree.Element): The XML element to which data will be added.
    - data (dict or list or str): The data to be added to the element. Can be a dictionary, list, or string.
    """
    if isinstance(data, dict):
        for key, value in data.items():
            if isinstance(value, list):
                for item in value:
                    child = ET.SubElement(element, key)
                    add_data_to_element(child, item)
            else:
                child = ET.SubElement(element, key)
                add_data_to_element(child, value)
    elif isinstance(data, list):
        for item in data:
            if isinstance(item, dict):
                add_data_to_element(element, item)
            else:
                sub_element = ET.SubElement(element, element.tag)
                sub_element.text = str(item)
    else:
        element.text = str(data)

def remove_empty_elements(element):
    """
    Recursively removes empty elements from an XML element.

    Args:
    - element (xml.etree.ElementTree.Element): The XML element from which empty elements will be removed.
    """
    for child in list(element):
        remove_empty_elements(child)
        if not (child.text and child.text.strip()) and len(child) == 0:
            element.remove(child)

# File paths
yaml_file_path = r'C:/Users/jhyan/Documents/all/duke_research/MaterialsMine_XMLconvert/data/input_files/L183_S4_Potschke_2003.yaml'  # Replace with your YAML file path
xml_output_path = r'C:/Users/jhyan/Documents/all/duke_research/MaterialsMine_XMLconvert/data/output_files/L183_S4_Potschke_2003_output.xml'  # Replace with your output XML file path

# Read and parse YAML file data
data_dict = parse_yaml_to_dict(yaml_file_path)

# Create XML root element
root = ET.Element('PolymerNanocomposite')

# Populate XML structure with data
add_data_to_element(root, data_dict)

# Remove empty elements
remove_empty_elements(root)

# Convert XML element tree to string and write to file
tree = ET.ElementTree(root)
tree.write(xml_output_path, encoding='utf-8', xml_declaration=True)

print(f'XML file has been generated and saved as {xml_output_path}')


XML file has been generated and saved as C:/Users/jhyan/Documents/all/duke_research/MaterialsMine_XMLconvert/data/output_files/L183_S4_Potschke_2003_output.xml
