Script to automate the process for extracting the needed files

In [None]:
import os
import xml.etree.ElementTree as ET
import csv

# Define paths and DDF UUID
shared_drive_path = r"path_to_shared_drive"
ddf_uuid = "9aba6d6a38c6423da035ea1be76a3cd9"
output_actions_dir = "path_to_output/actions"
output_solutions_dir = "path_to_output/solutions"

# Ensure output directories exist
os.makedirs(output_actions_dir, exist_ok=True)
os.makedirs(output_solutions_dir, exist_ok=True)

# Function to flatten XML elements
def flatten_xml(element, parent_key="", sep="."):
    """Recursively flattens an XML element and its children."""
    items = {}
    for key, value in element.attrib.items():
        items[f"{parent_key}{sep}{key}".strip(sep)] = value
    if element.text and element.text.strip():
        items[f"{parent_key}.text".strip(sep)] = element.text.strip()
    for child in element:
        items.update(flatten_xml(child, f"{parent_key}{sep}{child.tag}".strip(sep), sep=sep))
    return items

# Function to process a single XML file
def process_xml_file(xml_file):
    try:
        # Parse the XML
        tree = ET.parse(xml_file)
        root = tree.getroot()
        
        # Check if DDF UUID exists in the file
        if not any(ddf_uuid in elem.text for elem in root.findall(".//DDF_GUID")):
            return False  # Skip files without the target DDF UUID
        
        actions = []
        solutions = []
        
        # Process each DdfActionPlan
        for plan in root.findall(".//DdfActionPlan"):
            base_row = flatten_xml(plan)

            # Extract nested actions
            for action in plan.findall(".//DdfActionList/DdfAction"):
                action_row = base_row.copy()
                action_row.update(flatten_xml(action, "DdfAction"))
                actions.append(action_row)

            # Extract nested solutions
            for solution in plan.findall(".//DdfSolutionList/DdfSolution"):
                solution_row = base_row.copy()
                solution_row.update(flatten_xml(solution, "DdfSolution"))
                solutions.append(solution_row)
        
        # Write actions to CSV
        if actions:
            action_headers = sorted({key for row in actions for key in row.keys()})
            action_csv_file = os.path.join(output_actions_dir, f"actions-{ddf_uuid}-{os.path.basename(xml_file)}.csv")
            with open(action_csv_file, mode="w", newline="", encoding="utf-8") as file:
                writer = csv.DictWriter(file, fieldnames=action_headers)
                writer.writeheader()
                writer.writerows(actions)
            print(f"Actions data written to {action_csv_file}")

        # Write solutions to CSV
        if solutions:
            solution_headers = sorted({key for row in solutions for key in row.keys()})
            solution_csv_file = os.path.join(output_solutions_dir, f"solutions-{ddf_uuid}-{os.path.basename(xml_file)}.csv")
            with open(solution_csv_file, mode="w", newline="", encoding="utf-8") as file:
                writer = csv.DictWriter(file, fieldnames=solution_headers)
                writer.writeheader()
                writer.writerows(solutions)
            print(f"Solutions data written to {solution_csv_file}")
        
        return True
    except Exception as e:
        print(f"Error processing {xml_file}: {e}")
        return False

# Main script to process all XML files in the shared drive
for root_dir, dirs, files in os.walk(shared_drive_path):
    for file in files:
        if file.endswith(".xml"):
            file_path = os.path.join(root_dir, file)
            print(f"Processing file: {file_path}")
            if process_xml_file(file_path):
                print(f"File {file_path} processed successfully.")
            else:
                print(f"File {file_path} skipped (DDF UUID not found).")
