In [None]:
import requests
import pandas as pd

# Define the corpus name and base URL
corpusname = "cal"
BASE_URL = "https://dracor.org/api/v1/"

# Retrieve all plays in the corpus using the /plays endpoint with query parameter 'corpus'
plays_url = f"{BASE_URL}plays?corpus={corpusname}"
response = requests.get(plays_url)
if response.status_code != 200:
    raise Exception(f"Error fetching plays. Status code: {response.status_code}")

plays_data = response.json().get("plays", [])
print(f"Found {len(plays_data)} plays in corpus '{corpusname}'.")

# Prepare a list to store results for plays where the top speaker is female.
results = []

for play in plays_data:
    # Use "dracorId" if available; otherwise, fall back to "name" as the play identifier
    play_id = play.get("dracorId") or play.get("name")
    
    # Construct the URL for the characters of this play
    characters_url = f"{BASE_URL}corpora/{corpusname}/plays/{play_id}/characters"
    char_response = requests.get(characters_url)
    if char_response.status_code != 200:
        print(f"Error fetching characters for play {play_id} (Status: {char_response.status_code}). Skipping...")
        continue

    characters = char_response.json()
    if not characters:
        print(f"No characters found in play {play_id}.")
        continue

    # Identify the character with the highest word count (i.e., the top speaker)
    top_speaker = max(characters, key=lambda ch: ch.get("wordCount", 0))

    # Only add to results if the top speaker is FEMALE.
    if top_speaker.get("gender") == "FEMALE":
        results.append({
            "play": play.get("title", play_id),
            "character": top_speaker.get("name", "Unknown"),
            "wordCount": top_speaker.get("wordCount", 0)
        })
    else:
        print(f"In play '{play.get('title', play_id)}', the top speaker is not female. Skipping.")

# Convert the results to a DataFrame and display it as a table
df_results = pd.DataFrame(results)
display(df_results)

# Save the results as a CSV file (filename includes the corpus name)
csv_filename = f"{corpusname}_top_female_speakers.csv"
df_results.to_csv(csv_filename, index=False)
print(f"Results saved to {csv_filename}")
