In [4]:
# Import Dependencies
import re
import xml.etree.ElementTree as ET
import xmlschema
import pandas as pd

### Create a XML file with an Input File

In [13]:
# 读取TXT文件并解析数据
def parse_txt_to_dict(file_path):
    data_dict = {}
    with open(file_path, 'r', encoding='utf-8') as file:
        lines = file.readlines()
        for line in lines:
            if line.strip():
                key, value = line.split(':', 1)
                key = key.strip()
                value = value.strip()
                if key in data_dict:
                    if isinstance(data_dict[key], list):
                        data_dict[key].append(value)
                    else:
                        data_dict[key] = [data_dict[key], value]
                else:
                    data_dict[key] = value
    return data_dict

# 根据前缀确定父节点
def get_parent_element(root, prefix):
    for elem in root.iter():
        if elem.tag.lower().endswith(prefix.lower()):
            return elem
    return None

# 填充XML结构
def fill_xml_structure(root, data_dict):
    for key, value in data_dict.items():
        
        if key.startswith('Matrix_'):
            prefix = 'MatrixComponent'
            field_name = key.replace('Matrix_', '')
        elif key.startswith('Filler_'):
            prefix = 'FillerComponent'
            field_name = key.replace('Filler_', '')
        else:
            prefix = None
            field_name = key

        if prefix:
            parent = get_parent_element(root, prefix)
        else:
            parent = root

        if parent is not None:
            elem = parent.find(field_name)
            if elem is None:
                # 如果找不到元素，则创建新的
                elem = ET.SubElement(parent, field_name)
            if isinstance(value, list):
                elem.text = ', '.join(value)
            else:
                elem.text = value

# 删除空节点
def remove_empty_nodes(element):
    for child in list(element):
        remove_empty_nodes(child)
        if len(child) == 0 and (child.text is None or (isinstance(child.text, str) and child.text.strip() == '')):
            element.remove(child)

# 主程序
def main():
    txt_file_path = r'C:/Users/jhyan/Documents/all/duke_research/MaterialsMine_XMLconvert/input_files/L183_S2_Potschke_2003.txt'
    xml_file_path = r'C:/Users/jhyan/Documents/all/duke_research/MaterialsMine_XMLconvert/XML-Schema.xml'
    xml_output_path = r'C:/Users/jhyan/Documents/all/duke_research/MaterialsMine_XMLconvert/output-L183_S2_Potschke_2003.xml'

    # 读取并解析TXT文件数据
    data_dict = parse_txt_to_dict(txt_file_path)

    # 读取XML文件
    tree = ET.parse(xml_file_path)
    root = tree.getroot()

    # 填充XML结构
    fill_xml_structure(root, data_dict)

    # 删除空节点
    remove_empty_nodes(root)

    # 将XML元素树转换为字符串
    xml_string = ET.tostring(root, encoding='utf-8', method='xml').decode('utf-8')

    # 写入XML文件
    with open(xml_output_path, 'w', encoding='utf-8') as f:
        f.write(xml_string)

    print(f'XML file has been generated and saved as {xml_output_path}')

if __name__ == "__main__":
    main()

XML file has been generated and saved as C:/Users/jhyan/Documents/all/duke_research/MaterialsMine_XMLconvert/output-L183_S2_Potschke_2003.xml
