In [1]:
import os
import csv
from xml.etree import ElementTree as ET

def count_persons(tei_file):
    tree = ET.parse(tei_file)
    root = tree.getroot()

    female_count = 0
    male_count = 0

    for person in root.findall('.//{http://www.tei-c.org/ns/1.0}person'):
        sex = person.get('sex')
        if sex == 'FEMALE':
            female_count += 1
        elif sex == 'MALE':
            male_count += 1

    total_count = female_count + male_count

    return female_count, male_count, total_count

def process_tei_files(input_folder, output_file):
    summary_data = []

    for file_name in os.listdir(input_folder):
        if file_name.endswith('.xml'):
            tei_file = os.path.join(input_folder, file_name)
            female_count, male_count, total_count = count_persons(tei_file)

            # Extracting play title from TEI header
            tree = ET.parse(tei_file)
            title = tree.find('.//{http://www.tei-c.org/ns/1.0}titleStmt/{http://www.tei-c.org/ns/1.0}title[@type="main"]').text

            summary_data.append({
                'Title': title,
                'FemaleCount': female_count,
                'MaleCount': male_count,
                'TotalCount': total_count
            })

    # Sort by FemaleCount in descending order
    summary_data = sorted(summary_data, key=lambda x: x['FemaleCount'], reverse=True)

    with open(output_file, 'w', newline='', encoding='utf-8') as csvfile:
        fieldnames = ['Title', 'FemaleCount', 'MaleCount', 'TotalCount']
        writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
        writer.writeheader()

        for summary in summary_data:
            writer.writerow(summary)

if __name__ == "__main__":
    input_folder = "results"
    output_file = "output/csv/person_summary_output_sorted.csv"

    process_tei_files(input_folder, output_file)
