In [13]:
import csv
import re

# Pronoun mapping dictionaries
male_to_female = {
    "he": "she",
    "him": "her",
    "his": "her",
    "himself": "herself"
}

female_to_male = {
    "she": "he",
    "her": "him",
    "hers": "his",
    "herself": "himself"
}

# Words suggesting "her" is possessive
possessive_contexts = ["book", "car", "bag", "pen", "idea", "dog", "phone", "shoes", "attitude", "trip", "laptop"]

def is_possessive(word, next_word):
    return next_word and next_word.lower() in possessive_contexts

# Main transformation function
def transform_sentence(sentence, target_gender):
    words = sentence.split()
    transformed = []

    i = 0
    while i < len(words):
        word = words[i]
        stripped = re.sub(r'[^\w]', '', word)
        punct = word[len(stripped):] if len(stripped) < len(word) else ''

        lower = stripped.lower()
        next_word = words[i + 1] if i + 1 < len(words) else ""

        # Use correct mapping
        if target_gender == "female" and lower in male_to_female:
            if lower == "his":
                replacement = "her"
            else:
                replacement = male_to_female[lower]
        elif target_gender == "male" and lower in female_to_male:
            if lower == "her":
                replacement = "his" if is_possessive(lower, next_word) else "him"
            else:
                replacement = female_to_male[lower]
        else:
            replacement = stripped

        # Preserve case
        if stripped.istitle():
            replacement = replacement.capitalize()
        elif stripped.isupper():
            replacement = replacement.upper()

        transformed.append(replacement + punct)
        i += 1

    return ' '.join(transformed)

# Process CSV and check accuracy
def process_pronoun_csv(filepath):
    correct = 0
    total = 0

    with open(filepath, "r", encoding="utf-8") as f:
        reader = csv.DictReader(f)
        for row in reader:
            input_text = row['input_text']
            target_gender = row['target_gender'].strip().lower()
            expected = row['expected_output'].strip()

            output = transform_sentence(input_text, target_gender).strip()

            match = output.strip().lower() == expected.strip().lower()
            print(f"Input:    {input_text}")
            print(f"Gender:   {target_gender}")
            print(f"Output:   {output}")
            print(f"Expected: {expected}")
            print(f"Match:    {match}")
            print("-" * 50)

            if match:
                correct += 1
            total += 1

    print(f"\n✅ Accuracy: {correct}/{total} correct ({(correct / total) * 100:.2f}%)")

# Run
process_pronoun_csv("pronoun_testcases.csv")

Input:    He is going to the market.
Gender:   female
Output:   She is going to the market.
Expected: She is going to the market.
Match:    True
--------------------------------------------------
Input:    His book is on the table.
Gender:   female
Output:   Her book is on the table.
Expected: Her book is on the table.
Match:    True
--------------------------------------------------
Input:    I saw him yesterday.
Gender:   female
Output:   I saw her yesterday.
Expected: I saw her yesterday.
Match:    True
--------------------------------------------------
Input:    He hurt himself.
Gender:   female
Output:   She hurt herself.
Expected: She hurt herself.
Match:    True
--------------------------------------------------
Input:    I called him last night.
Gender:   female
Output:   I called her last night.
Expected: I called her last night.
Match:    True
--------------------------------------------------
Input:    That is his car.
Gender:   female
Output:   That is her car.
Expected: Th