In [1]:
import os
import xml.etree.ElementTree as ET
import pandas as pd

# Function to parse TEI files and extract title and written date
def parse_tei_files(folder_path):
    # Initialize a list to store the data
    play_data = []

    # Namespace dictionary
    ns = {'tei': 'http://www.tei-c.org/ns/1.0'}

    # Iterate over files in the folder
    for filename in os.listdir(folder_path):
        if filename.endswith('.xml'):
            file_path = os.path.join(folder_path, filename)
            tree = ET.parse(file_path)
            root = tree.getroot()

            # Extract title
            title = root.find('.//tei:title', namespaces=ns).text.strip()

            # Extract written date
            events = root.findall('.//tei:event[@type="written"]', namespaces=ns)
            for event in events:
                date_when = event.get('when')
                date_not_before = event.get('notBefore')
                written_date = date_when if date_when else date_not_before
                if written_date:
                    play_data.append({'Title': title, 'Written Date': written_date})

    # Sort the data by written date
    play_data.sort(key=lambda x: x['Written Date'])

    return play_data

# Path to the folder containing TEI files
folder_path = '../results'

# Parse TEI files and extract title and written date
play_data = parse_tei_files(folder_path)

# Convert data to DataFrame
play_df = pd.DataFrame(play_data)

# Save the DataFrame to CSV
output_folder = 'output'
output_file = os.path.join(output_folder, 'play_written_dates.csv')
os.makedirs(output_folder, exist_ok=True)
play_df.to_csv(output_file, index=False)

# Display the DataFrame
print(play_df)


                                                 Title Written Date
0    Origen, pérdida y restauración de la Virgen de...         1616
1                             La exaltación de la cruz         1620
2                                      El divino Jasón         1620
3                                   El convite general         1620
4                               La universal redención         1620
..                                                 ...          ...
131                          Las armas de la hermosura         1678
132                                El tesoro escondido         1679
133                 Hado y divisa de Leonido y Marfisa         1680
134                                  La divina Filotea         1681
135                                 El segundo Scipión         1683

[136 rows x 2 columns]
