# XSD to CSV Conversion Testing

In [1]:
import os
import zipfile
import xmlschema
from lxml import etree
import pandas as pd

In [2]:
# Define the path to the zip file
input_file_path = r'..\tests\data\JVF_DTM_143_XSD.zip'
output_file_path = r'..\tests\output\JVF_DTM_143_XSD'

In [3]:
# Create the extraction directory if it does not exist
if not os.path.exists(output_file_path):
    os.makedirs(output_file_path)

# Extract the zip file
with zipfile.ZipFile(input_file_path, 'r') as zip_ref:
    zip_ref.extractall(output_file_path)

# List the extracted files
extracted_files = os.listdir(output_file_path)
print(output_file_path)

..\tests\output\JVF_DTM_143_XSD


In [4]:
# Set the path to the folder with extracted XSD files
xsd_dir = os.path.join(output_file_path, 'xsd')

def load_xsd_files(directory):
    """
    Load all XSD files from the given directory and parse them into XML trees.

    Args:
        directory (str): Path to the folder with XSD files.

    Returns:
        list: List of tuples (file_name, parsed_xsd_tree).
    """
    xsd_files = []
    
    # Walk through the directory and find all files
    for root, dirs, files in os.walk(directory):
        for file in files:
            # Only process .xsd files
            if file.endswith('.xsd'):
                file_path = os.path.join(root, file)
                
                # Read the file content
                with open(file_path, 'rb') as f:
                    xsd_content = f.read()
                
                # Parse content as XML
                xsd_tree = etree.XML(xsd_content)
                
                # Save filename and parsed tree
                xsd_files.append((file, xsd_tree))
    
    return xsd_files

# Load and parse the XSD files
xsd_files = load_xsd_files(xsd_dir)

In [7]:
# Initialize an empty list for storing data
data = []

# Loop through the loaded XSD files
for file_name, xsd_tree in xsd_files:
    # Get the root tag
    root_tag = xsd_tree.tag
    
    # Count the number of elements in the root
    num_elements = len(xsd_tree)
    
    # Add the data to the list
    data.append({
        "File Name": file_name,
        "Root Tag": root_tag,
        "Number of Elements": num_elements
    })

# Create a pandas DataFrame from the list
df = pd.DataFrame(data)

# Show the DataFrame
df

Unnamed: 0,File Name,Root Tag,Number of Elements
0,atributy.xsd,{http://www.w3.org/2001/XMLSchema}schema,185
1,common.xsd,{http://www.w3.org/2001/XMLSchema}schema,4
2,doprovodne_informace.xsd,{http://www.w3.org/2001/XMLSchema}schema,29
3,extenze.xsd,{http://www.w3.org/2001/XMLSchema}schema,1
4,servis.xsd,{http://www.w3.org/2001/XMLSchema}schema,1
...,...,...,...
442,zed-linie.xsd,{http://www.w3.org/2001/XMLSchema}schema,5
443,zed-plocha.xsd,{http://www.w3.org/2001/XMLSchema}schema,5
444,zeleznicni_prejezd-plocha.xsd,{http://www.w3.org/2001/XMLSchema}schema,5
445,zemedelska_plocha-defbod.xsd,{http://www.w3.org/2001/XMLSchema}schema,5
