In [4]:
# %% [markdown]
# # Test Cases for `extract_choice` Function

# %% [markdown]
# ## Setup - Define the function

# %%
import re

def extract_choice(response: str) -> str:
    """Extract A, B, C, or D from response - handles multiple formats."""
    
    response_clean = response.strip()
    response_upper = response_clean.upper()
    
    # 1. Try boxed format first
    boxed_match = re.search(r'\\boxed\{([ABCD])\}', response_clean, re.IGNORECASE)
    if boxed_match:
        return boxed_match.group(1).upper()
    
    # 2. Direct single letter response (ideal case)
    if response_upper in ['A', 'B', 'C', 'D']:
        return response_upper
    
    # 3. Parenthetical format: (A), (B), etc.
    paren_match = re.search(r'\(([ABCD])\)', response_upper)
    if paren_match:
        matches = re.findall(r'\(([ABCD])\)', response_upper)
        if matches:
            return matches[-1]
    
    # 4. "choose/pick/select/answer X" patterns - take LAST occurrence
    choice_patterns = [
        r'(?:choose|chose|pick|select|answer|option)[:\s]+([ABCD])\b',
        r'(?:choose|chose|pick|select|go with)[:\s]+(?:option\s+)?([ABCD])\b',
        r'(?:my|the|final)\s+(?:answer|choice)[:\s]+(?:is\s+)?([ABCD])\b',
        r'\b([ABCD])\s+is\s+(?:my|the)\s+(?:answer|choice)',
        r"I(?:'ll| will| shall)?\s+(?:choose|pick|go with)\s+([ABCD])\b",
    ]
    
    for pattern in choice_patterns:
        matches = re.findall(pattern, response_upper)
        if matches:
            return matches[-1]
    
    # 5. Standalone letter at end of response or line
    lines = response_clean.split('\n')
    for line in reversed(lines):
        line = line.strip()
        end_match = re.search(r'\b([ABCD])\s*[.!]?\s*$', line.upper())
        if end_match:
            return end_match.group(1)
    
    # 6. Last isolated A/B/C/D in the response (word boundary)
    isolated_matches = re.findall(r'\b([ABCD])\b', response_upper)
    if isolated_matches:
        for match in reversed(isolated_matches):
            idx = response_upper.rfind(match)
            before = response_upper[max(0, idx-10):idx].lower()
            if match == 'A' and before.endswith((' ', '\n', '')):
                after = response_upper[idx:idx+3].lower()
                if after in ['a ', 'a\n', 'a.']:
                    return 'A'
                if re.search(r'[^a-z]a\s+[a-z]', response_clean.lower()[max(0,idx-1):idx+10]):
                    continue
            return match
    
    return "X"

# %% [markdown]
# ## Test Helper

# %%
def test(name, input_text, expected):
    """Run a single test case."""
    result = extract_choice(input_text)
    status = "✓" if result == expected else "✗"
    
    # Truncate long inputs for display
    display_input = input_text[:60] + "..." if len(input_text) > 60 else input_text
    display_input = display_input.replace("\n", "\\n")
    
    if result == expected:
        print(f"{status} {name}: '{display_input}' -> {result}")
    else:
        print(f"{status} {name}: '{display_input}' -> {result} (expected {expected})")
    
    return result == expected

def run_test_suite(test_cases, suite_name="Tests"):
    """Run a suite of tests and report results."""
    print(f"\n{'='*60}")
    print(f" {suite_name}")
    print('='*60)
    
    passed = sum(test(name, inp, exp) for name, inp, exp in test_cases)
    total = len(test_cases)
    
    print(f"\nResults: {passed}/{total} passed")
    return passed, total

# %% [markdown]
# ## Test Suite 1: Boxed Format (Highest Priority)

# %%
boxed_tests = [
    ("boxed uppercase A", r"\boxed{A}", "A"),
    ("boxed uppercase B", r"\boxed{B}", "B"),
    ("boxed uppercase C", r"\boxed{C}", "C"),
    ("boxed uppercase D", r"\boxed{D}", "D"),
    ("boxed lowercase a", r"\boxed{a}", "A"),
    ("boxed lowercase b", r"\boxed{b}", "B"),
    ("boxed with prefix", r"My answer is \boxed{B}", "B"),
    ("boxed with suffix", r"\boxed{C} is my choice.", "C"),
    ("boxed in rambling", r"After thinking about it carefully, I believe \boxed{D} is correct.", "D"),
]

run_test_suite(boxed_tests, "Boxed Format Tests")

# %% [markdown]
# ## Test Suite 2: Direct Single Letter

# %%
single_letter_tests = [
    ("single A", "A", "A"),
    ("single B", "B", "B"),
    ("single C", "C", "C"),
    ("single D", "D", "D"),
    ("lowercase a", "a", "A"),
    ("lowercase b", "b", "B"),
    ("with whitespace", "  A  ", "A"),
    ("with newlines", "\nB\n", "B"),
]

run_test_suite(single_letter_tests, "Single Letter Tests")

# %% [markdown]
# ## Test Suite 3: Parenthetical Format

# %%
paren_tests = [
    ("paren A", "(A)", "A"),
    ("paren B", "(B)", "B"),
    ("paren with text", "I choose (C)", "C"),
    ("paren with period", "The answer is (D).", "D"),
    ("multiple takes last", "I think (A) but actually (B)", "B"),
    ("three options takes last", "(A) or maybe (C)... I'll go with (D)", "D"),
]

run_test_suite(paren_tests, "Parenthetical Format Tests")

# %% [markdown]
# ## Test Suite 4: Choice Patterns (choose/pick/select)

# %%
choice_pattern_tests = [
    ("I choose A", "I choose A", "A"),
    ("I'll choose B", "I'll choose B", "B"),
    ("I will choose C", "I will choose C", "C"),
    ("I pick D", "I pick D", "D"),
    ("I'll pick A", "I'll pick A", "A"),
    ("I select B", "I select B", "B"),
    ("I'll go with C", "I'll go with C", "C"),
    ("I will go with D", "I will go with D", "D"),
    ("My answer is A", "My answer is A", "A"),
    ("The answer: B", "The answer: B", "B"),
    ("My final answer is C", "My final answer is C", "C"),
    ("choose option A", "I choose option A", "A"),
    ("go with option B", "I'll go with option B", "B"),
    ("mind change takes last", "I think A... but I'll choose B", "B"),
]

run_test_suite(choice_pattern_tests, "Choice Pattern Tests")

# %% [markdown]
# ## Test Suite 5: End of Line/Response

# %%
end_of_line_tests = [
    ("letter with period", "The answer is A.", "A"),
    ("letter with exclamation", "I'll go with B!", "B"),
    ("letter after colon", "My choice: C", "C"),
    ("multiline - last line", "Let me think.\nAfter analysis:\nD", "D"),
    ("multiline takes last", "I thought A.\nBut B is better.\nFinal: C", "C"),
]

run_test_suite(end_of_line_tests, "End of Line Tests")

# %% [markdown]
# ## Test Suite 6: Realistic Rambling Responses

# %%
rambling_tests = [
    (
        "educated guess",
        """I can't actually determine the correct answer based on the 
        given sequence of characters. However, I can suggest that the sequence 
        appears to be a mix of letters and numbers. If I had to make an educated 
        guess, I would choose letter C.""",
        "C"
    ),
    (
        "analysis then answer",
        """To solve this, I'll look for the most common letters in 
        the sequence. The letters "A", "e", "s", and "t" seem frequent.
        However, given the analysis, I'll choose option B.""",
        "B"
    ),
    (
        "multiple mind changes",
        """I think A might be right. Actually B looks better.
        Wait, maybe C? No, I'll go with D as my final answer.""",
        "D"
    ),
    (
        "analysis then simple letter",
        """After examining the sequence, I notice various patterns.
        The distribution seems random.
        
        A""",
        "A"
    ),
    (
        "refuses then answers",
        """I cannot make a decision on the sequence you've provided.
        After analyzing the sequence, I'm going to choose option A.""",
        "A"
    ),
    (
        "very long rambling",
        """To solve this, I'll look for the most common letters in the sequence, 
        which are often found in natural language. The letters "A", "e", "s", and "t" seem frequent.
        However, given the provided sequence, I see "e" only once, and it's not prominent. 
        Among the letters "s", "t", "A", "e", and "B" are present, but "B" is the most common. 
        However, considering the presence of "e" only once and the overall letter distribution, 
        I'd lean towards B being the least less frequent of the more common letters.
        Since the letters "B" and "t" are both present, I choose option (B).""",
        "B"
    ),
]

run_test_suite(rambling_tests, "Rambling Response Tests")

# %% [markdown]
# ## Test Suite 7: Edge Cases

# %%
edge_case_tests = [
    ("article 'a' not confused", "This is a random sequence. I choose B.", "B"),
    ("empty response", "", "X"),
    ("no valid choice", "I don't know", "X"),
    ("only invalid letters", "I think E or F might work.", "X"),
    ("whitespace only", "   \n\t  ", "X"),
    ("mixed case", "i CHOOSE a", "A"),
    ("answer with colon", "Answer: A", "A"),
    ("choice with colon", "Choice: B", "B"),
]

run_test_suite(edge_case_tests, "Edge Case Tests")

# %% [markdown]
# ## Run All Tests - Summary

# %%
all_test_suites = [
    ("Boxed Format", boxed_tests),
    ("Single Letter", single_letter_tests),
    ("Parenthetical", paren_tests),
    ("Choice Patterns", choice_pattern_tests),
    ("End of Line", end_of_line_tests),
    ("Rambling Responses", rambling_tests),
    ("Edge Cases", edge_case_tests),
]

print("=" * 60)
print(" FULL TEST SUMMARY")
print("=" * 60)

total_passed = 0
total_tests = 0

for suite_name, tests in all_test_suites:
    passed = sum(1 for _, inp, exp in tests if extract_choice(inp) == exp)
    total = len(tests)
    total_passed += passed
    total_tests += total
    status = "✓" if passed == total else "✗"
    print(f"{status} {suite_name}: {passed}/{total}")

print("-" * 60)
print(f"TOTAL: {total_passed}/{total_tests} tests passed")
print(f"Success rate: {100 * total_passed / total_tests:.1f}%")

# %% [markdown]
# ## Interactive Testing
# 
# Test your own responses here:

# %%
# Try your own test cases!
test_responses = [
    "I'll choose A.",
    "After much deliberation, B",
    "(C) is my answer",
    r"\boxed{D}",
    "Hmm, A seems good but actually I pick C",
    "Idk \boxed{A} but kinda C but maybe D?",
    "I actually think A but probably will say C!!!!sfifjjf",
    "D",
    "X",
]

print("hello")
print("Interactive Tests:")
print("-" * 40)
for resp in test_responses:
    result = extract_choice(resp)
    print(f"Input: {resp[:50]}...")
    print(f"Output: {result}")
    print()


 Boxed Format Tests
✓ boxed uppercase A: '\boxed{A}' -> A
✓ boxed uppercase B: '\boxed{B}' -> B
✓ boxed uppercase C: '\boxed{C}' -> C
✓ boxed uppercase D: '\boxed{D}' -> D
✓ boxed lowercase a: '\boxed{a}' -> A
✓ boxed lowercase b: '\boxed{b}' -> B
✓ boxed with prefix: 'My answer is \boxed{B}' -> B
✓ boxed with suffix: '\boxed{C} is my choice.' -> C
✓ boxed in rambling: 'After thinking about it carefully, I believe \boxed{D} is co...' -> D

Results: 9/9 passed

 Single Letter Tests
✓ single A: 'A' -> A
✓ single B: 'B' -> B
✓ single C: 'C' -> C
✓ single D: 'D' -> D
✓ lowercase a: 'a' -> A
✓ lowercase b: 'b' -> B
✓ with whitespace: '  A  ' -> A
✓ with newlines: '\nB\n' -> B

Results: 8/8 passed

 Parenthetical Format Tests
✓ paren A: '(A)' -> A
✓ paren B: '(B)' -> B
✓ paren with text: 'I choose (C)' -> C
✓ paren with period: 'The answer is (D).' -> D
✓ multiple takes last: 'I think (A) but actually (B)' -> B
✓ three options takes last: '(A) or maybe (C)... I'll go with (D)' -> D

Results