In [1]:
import os
import xml.etree.ElementTree as ET
import pandas as pd

# Function to parse TEI files and extract title and print date
def parse_print_events(folder_path):
    # Initialize a list to store the data
    print_data = []

    # Namespace dictionary
    ns = {'tei': 'http://www.tei-c.org/ns/1.0'}

    # Iterate over files in the folder
    for filename in os.listdir(folder_path):
        if filename.endswith('.xml'):
            file_path = os.path.join(folder_path, filename)
            tree = ET.parse(file_path)
            root = tree.getroot()

            # Extract title
            title = root.find('.//tei:title', namespaces=ns).text.strip()

            # Extract print date
            events = root.findall('.//tei:event[@type="print"]', namespaces=ns)
            for event in events:
                print_date = event.get('when') or event.get('notBefore')
                if print_date:
                    print_data.append({'Title': title, 'Print Date': print_date})

    # Sort the data by print date
    print_data.sort(key=lambda x: x['Print Date'])

    return print_data

# Path to the folder containing TEI files
folder_path = '../results'

# Parse TEI files and extract title and print date
print_data = parse_print_events(folder_path)

# Convert data to DataFrame
print_df = pd.DataFrame(print_data)

# Save the DataFrame to CSV
output_folder = 'output'
output_file = os.path.join(output_folder, 'play_print_dates.csv')
os.makedirs(output_folder, exist_ok=True)
print_df.to_csv(output_file, index=False)

# Display the DataFrame
print(print_df)


                            Title Print Date
0                La selva confusa       1623
1    Un castigo en tres venganzas       1634
2           La puente de Mantible       1636
3                La vida es sueño       1636
4           El príncipe constante       1636
..                            ...        ...
198    Loa a El año santo de Roma       1759
199        La devoción de la misa       1760
200            El socorro general       1760
201             La piel de Gedeón       1760
202           El acaso y el error       1849

[203 rows x 2 columns]
