# Test Improvements - UX, FLA Normalization & Table Parser

This notebook tests the following improvements:
1. FLA cap removal
2. FLA relationship normalization (fixes encoding errors, excludes general/punitive damages)
3. Case summary paragraph removal
4. UX improvements (better sizing, color contrast)
5. **Table-based parser functionality**

Run these tests before committing the PR to ensure all functionality works correctly.

In [None]:
import json
import re
from typing import Optional, List, Dict, Any
import pandas as pd
import os

# Import the normalization function from app/ui/fla_analytics
from app.ui.fla_analytics import normalize_fla_relationship, extract_fla_awards

## Test 1: FLA Relationship Normalization

Test that relationships are properly extracted and encoded errors are fixed.

In [None]:
# Test cases for relationship normalization
test_cases = [
    # (input, expected_output)
    ("€50,000 to spouse", "Spouse"),
    ("Spouse - loss of guidance", "Spouse"),
    ("Wife's claim", "Spouse"),
    ("Husband", "Spouse"),
    ("Child - loss of care", "Child"),
    ("Son's claim", "Child"),
    ("Daughter", "Child"),
    ("Mother's claim", "Parent"),
    ("Father - loss of companionship", "Parent"),
    ("Brother's claim", "Sibling"),
    ("Sister", "Sibling"),
    ("Grandmother's claim", "Grandparent"),
    ("Grandfather", "Grandparent"),
    ("General damages", None),  # Should be excluded
    ("Punitive damages", None),  # Should be excluded
    ("Aggravated damages", None),  # Should be excluded
    ("Special damages", None),  # Should be excluded
    ("Costs", None),  # Should be excluded
    ("â€ spouse claim", "Spouse"),  # Encoding error fix
    ("€ child", "Child"),  # Euro sign removal
]

print("Testing FLA Relationship Normalization")
print("=" * 80)

passed = 0
failed = 0

for input_desc, expected in test_cases:
    result = normalize_fla_relationship(input_desc)
    
    if result == expected:
        status = "✓"
        passed += 1
    else:
        status = "✗"
        failed += 1
    
    print(f"{status} '{input_desc:40}' -> '{result}' (expected: '{expected}')")

print("=" * 80)
print(f"Results: {passed} passed, {failed} failed")

if failed == 0:
    print("\n✅ All FLA normalization tests passed!")
else:
    print(f"\n⚠️ {failed} test(s) failed!")

## Test 2: Table-Based Parser (Live Demo)

**IMPORTANT:** This test demonstrates actual table-based parsing on a sample PDF page.

This shows the new row-by-row extraction approach that's 90-95% cheaper than full-page parsing.

In [None]:
# Check if PDF exists
PDF_PATH = "2024damagescompendium.pdf"

if os.path.exists(PDF_PATH):
    print(f"✅ PDF found: {PDF_PATH}")
    print("Will test table extraction on a sample page...")
    has_pdf = True
else:
    print(f"⚠️ PDF not found: {PDF_PATH}")
    print("Skipping table extraction test (requires PDF)")
    has_pdf = False

In [None]:
# Only run if PDF exists
if has_pdf:
    import pdfplumber
    
    # Test on page 50 (adjust as needed)
    TEST_PAGE = 50
    
    print(f"\nExtracting tables from page {TEST_PAGE}...")
    print("=" * 80)
    
    with pdfplumber.open(PDF_PATH) as pdf:
        page = pdf.pages[TEST_PAGE - 1]  # 0-indexed
        
        # Extract text for section detection
        page_text = page.extract_text() or ""
        print(f"\nPage {TEST_PAGE} text sample (first 300 chars):")
        print("-" * 80)
        print(page_text[:300])
        print("-" * 80)
        
        # Extract tables
        tables = page.extract_tables()
        
        if not tables:
            print(f"\n⚠️ No tables found on page {TEST_PAGE}")
            print("Try a different page number (e.g., 50, 100, 200)")
        else:
            print(f"\n✅ Found {len(tables)} table(s) on page {TEST_PAGE}")
            
            for table_idx, table in enumerate(tables):
                print(f"\n{'='*80}")
                print(f"TABLE {table_idx + 1}")
                print('='*80)
                print(f"Dimensions: {len(table)} rows × {len(table[0]) if table else 0} columns")
                
                if not table or len(table) < 2:
                    print("⚠️ Table too small to process")
                    continue
                
                # Show header
                header = table[0]
                print(f"\nHeader Row:")
                for i, col in enumerate(header):
                    col_name = str(col).strip() if col else "(empty)"
                    print(f"  Column {i+1}: {col_name}")
                
                # Show first 3 data rows
                print(f"\nFirst 3 Data Rows:")
                print("-" * 80)
                
                for row_idx, row in enumerate(table[1:4], 1):
                    print(f"\nRow {row_idx}:")
                    for col_idx, (col_name, cell) in enumerate(zip(header, row)):
                        cell_val = str(cell).strip() if cell else "(empty)"
                        # Truncate long values
                        if len(cell_val) > 60:
                            cell_val = cell_val[:57] + "..."
                        col_label = str(col_name).strip() if col_name else f"Col{col_idx+1}"
                        print(f"  {col_label:20}: {cell_val}")
                
                print("\n" + "=" * 80)
                
    print("\n✅ Table extraction test complete!")
    print("\nKEY BENEFITS OF TABLE-BASED PARSING:")
    print("  - Processes individual rows (100-300 tokens each)")
    print("  - vs. Full pages (3000-5000 tokens each)")
    print("  - 90-95% cost reduction")
    print("  - Pre-labeled columns (plaintiff, defendant, year, etc.)")
    print("  - Deterministic merging (rows without citations → previous case)")
    print("  - Works with cheaper models (gpt-5-nano vs gpt-5-chat)")

## Test 3: Judge Name Normalization

Test the judge normalization function from the table parser.

In [None]:
# Import judge normalization from table parser
from damages_parser_table import TableBasedParser

judge_test_cases = [
    ("Smith J.", "Smith"),
    ("A. Smith J.A.", "Smith"),
    ("Hon. John Smith J.", "Smith"),
    ("Brown J.J.A.", "Brown"),
    ("Wilson C.J.", "Wilson"),
    ("O'Brien J.A.", "O'Brien"),
    ("Smith, J.", "Smith"),
]

print("Testing Judge Name Normalization")
print("=" * 80)

judge_passed = 0
judge_failed = 0

for input_name, expected in judge_test_cases:
    result = TableBasedParser.normalize_judge_name(input_name)
    
    if result == expected:
        status = "✓"
        judge_passed += 1
    else:
        status = "✗"
        judge_failed += 1
    
    print(f"{status} '{input_name:30}' -> '{result:15}' (expected: '{expected}')")

print("=" * 80)
print(f"Results: {judge_passed} passed, {judge_failed} failed")

if judge_failed == 0:
    print("\n✅ All judge normalization tests passed!")
else:
    print(f"\n⚠️ {judge_failed} test(s) failed!")

## Test 4: FLA Awards Extraction

Test that FLA awards are correctly extracted with normalized relationships.

In [None]:
# Sample case data
sample_case = {
    'case_name': 'Test v. Case',
    'year': 2023,
    'court': 'ONSC',
    'extended_data': {
        'family_law_act_claims': [
            {
                'description': '€50,000 - Spouse loss of guidance',
                'amount': 50000,
                'category': 'FLA'
            },
            {
                'description': 'Child - loss of care and companionship',
                'amount': 30000,
                'category': 'FLA'
            },
            {
                'description': 'General damages',
                'amount': 100000,
                'category': 'General'
            },
            {
                'description': 'Punitive damages',
                'amount': 25000,
                'category': 'Punitive'
            }
        ]
    }
}

awards = extract_fla_awards(sample_case)

print("Testing FLA Awards Extraction")
print("=" * 80)
print(f"Total claims in case: {len(sample_case['extended_data']['family_law_act_claims'])}")
print(f"Valid FLA relationship claims: {len(awards)}")
print()

for i, award in enumerate(awards, 1):
    print(f"Award {i}:")
    print(f"  Normalized: {award['description']}")
    print(f"  Original: {award['original_description']}")
    print(f"  Amount: ${award['amount']:,}")
    print()

print("Expected behavior:")
print("  - Should extract 2 awards (Spouse and Child)")
print("  - Should exclude 'General damages' and 'Punitive damages'")
print("  - Should normalize 'Spouse' and 'Child' relationships")
print("  - Should fix encoding errors (€ symbols removed)")
print()

if len(awards) == 2:
    print("✅ Extraction test passed!")
else:
    print(f"⚠️ Expected 2 awards, got {len(awards)}")

## Test 5: Case Summary Removal Check

Verify that the ENTIRE summary paragraph has been removed (not just the label).

In [None]:
# Check streamlit_app.py for case summary
streamlit_file_path = 'streamlit_app.py'

if os.path.exists(streamlit_file_path):
    with open(streamlit_file_path, 'r') as f:
        content = f.read()
    
    print("Case Summary Removal Check")
    print("=" * 80)
    
    # Look for the summary display code
    summary_patterns = [
        (r'summary_text.*?CASE_SUMMARY_MAX_LENGTH', 'Summary text extraction'),
        (r'st\.text\(.*?summary', 'st.text display of summary'),
        (r'\*\*Case Summary:\*\*', 'Case Summary label'),
    ]
    
    found_issues = []
    for pattern, desc in summary_patterns:
        matches = re.findall(pattern, content, re.IGNORECASE)
        if matches:
            found_issues.append((desc, len(matches)))
    
    # Check for the comment that indicates removal
    has_removal_comment = 'Summary paragraph removed' in content or 'pertinent info shown in enhanced data' in content
    
    if has_removal_comment:
        print("✅ Found comment indicating summary paragraph removal")
    else:
        print("⚠️ No removal comment found")
    
    # Check if summary display code is gone
    has_summary_display = 'st.text(summary_text' in content or 'st.markdown(summary_text' in content
    
    if not has_summary_display:
        print("✅ Summary display code has been removed")
    else:
        print("⚠️ Summary display code still exists")
    
    # Check for expander header with award amount
    has_award_in_header = 'Award:' in content and 'damage_display' in content
    
    if has_award_in_header:
        print("✅ Award amount added to expander header")
    else:
        print("⚠️ Award amount not found in expander header")
    
    print()
    if has_removal_comment and not has_summary_display and has_award_in_header:
        print("✅ All case summary improvements verified!")
    else:
        print("⚠️ Some improvements may be missing")
else:
    print(f"File not found: {streamlit_file_path}")

## Test 6: FLA Cap Removal

Verify that FLA cap references are removed from the analytics module.

In [None]:
# Check the fla_analytics.py file for cap references
fla_file_path = 'app/ui/fla_analytics.py'

if os.path.exists(fla_file_path):
    with open(fla_file_path, 'r') as f:content = f.read()
    
    # Check for cap-related keywords
    cap_keywords = ['FLA_DAMAGES_CAP', 'fla_cap', 'create_fla_cap_chart']
    found_caps = []
    
    for keyword in cap_keywords:
        if keyword in content:
            found_caps.append(keyword)
    
    print("FLA Cap Reference Check")
    print("=" * 80)
    
    if found_caps:
        print(f"⚠️ Found {len(found_caps)} cap-related references:")
        for keyword in found_caps:
            print(f"  - {keyword}")
    else:
        print("✅ No FLA cap references found - cap has been successfully removed!")
    
    # Check for new distribution chart
    if 'create_fla_distribution_chart' in content:
        print("✅ New distribution chart function found")
    else:
        print("⚠️ Distribution chart function not found")
else:
    print(f"File not found: {fla_file_path}")

## Test 7: CSS Improvements

Verify that CSS has been updated for better contrast and sizing.

In [None]:
# Check CSS improvements in streamlit_app.py
if os.path.exists(streamlit_file_path):
    with open(streamlit_file_path, 'r') as f:
        content = f.read()
    
    # Extract CSS block
    css_match = re.search(r'st\.markdown\("""\s*<style>(.*?)</style>', content, re.DOTALL)
    
    if css_match:
        css_content = css_match.group(1)
        
        print("CSS Improvements Check")
        print("=" * 80)
        
        # Check for improved properties
        improvements = [
            ('Better font sizes', 'font-size: 1.05rem' in css_content or 'font-size: 1.1rem' in css_content),
            ('Line height for readability', 'line-height: 1.6' in css_content or 'line-height: 1.7' in css_content),
            ('Improved color contrast', '#111827' in css_content or '#1f2937' in css_content),
            ('Better padding/spacing', 'padding: 1.25rem' in css_content or 'padding: 1.35rem' in css_content),
            ('Expander improvements', 'expanderHeader' in css_content),
            ('Metric improvements', 'stMetricValue' in css_content),
        ]
        
        passed_improvements = sum(1 for _, check in improvements if check)
        
        for improvement, check in improvements:
            status = "✅" if check else "❌"
            print(f"{status} {improvement}")
        
        print()
        print(f"UX Improvements: {passed_improvements}/{len(improvements)} implemented")
        
        if passed_improvements >= len(improvements) - 1:  # Allow 1 variation
            print("\n✅ CSS improvements successfully applied!")
        else:
            print(f"\n⚠️ Only {passed_improvements} improvements found")
    else:
        print("Could not find CSS block in streamlit_app.py")
else:
    print(f"File not found: {streamlit_file_path}")

## Summary

Run all cells above to verify:

1. ✅ FLA relationship normalization works correctly
2. ✅ **Table-based parser extracts tables and shows actual output**
3. ✅ Judge names normalized (last name only)
4. ✅ Encoding errors (€ symbols) are fixed
5. ✅ General/punitive damages are excluded
6. ✅ **Entire case summary paragraph removed** (not just label)
7. ✅ **Award amount shown in expander header**
8. ✅ FLA cap references are removed
9. ✅ Distribution chart replaces cap chart
10. ✅ CSS improvements for better UX

All tests should pass before merging the PR!