In [None]:
import os
import csv
from xml.etree import ElementTree as ET

def count_sp_by_character(tei_file):
    tree = ET.parse(tei_file)
    root = tree.getroot()

    character_sp_count = {}

    for sp in root.findall('.//{http://www.tei-c.org/ns/1.0}sp'):
        speaker_id = sp.get('who')
        if speaker_id is not None:
            speaker_id = speaker_id[1:]  # Removing the '#' character
            character_sp_count[speaker_id] = character_sp_count.get(speaker_id, 0) + 1

    return character_sp_count

def process_tei_files(input_folder, output_folder):
    if not os.path.exists(output_folder):
        os.makedirs(output_folder)

    for file_name in os.listdir(input_folder):
        if file_name.endswith('.xml'):
            tei_file = os.path.join(input_folder, file_name)
            character_sp_count = count_sp_by_character(tei_file)

            # Sort characters by the number of <sp> elements in descending order
            sorted_characters = sorted(character_sp_count.items(), key=lambda x: x[1], reverse=True)

            output_file = os.path.join(output_folder, file_name.replace('.xml', '_sp_count_output.csv'))
            with open(output_file, 'w', newline='', encoding='utf-8') as csvfile:
                fieldnames = ['Character', 'SPCount']
                writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
                writer.writeheader()

                for character, sp_count in sorted_characters:
                    writer.writerow({'Character': character, 'SPCount': sp_count})

if __name__ == "__main__":
    input_folder = "results"
    output_folder = "output"

    process_tei_files(input_folder, output_folder)
