In [None]:
"""\
Debug script to understand why anchoring checks are failing.

This script will:
1. Load a sample of test data
2. Run pattern matching with detailed logging
3. Analyze anchoring failures
4. Identify root causes
"""

import json
import sys
from pathlib import Path
from collections import defaultdict

import spacy


def _find_milestone3_src(start: Path) -> Path:
    """Find milestone_3/src regardless of the Jupyter CWD."""
    start = start.resolve()

    candidates = []
    candidates.append(start / "milestone_3" / "src")
    candidates.append(start / "src" if start.name == "milestone_3" else None)

    for c in candidates:
        if c and c.exists():
            return c

    for p in start.parents:
        c = p / "milestone_3" / "src"
        if c.exists():
            return c

    raise FileNotFoundError(f"Could not locate milestone_3/src from CWD={start}")


src_path = _find_milestone3_src(Path.cwd())
sys.path.insert(0, str(src_path))

from utils import preprocess_data
from execution_engine import compile_dependency_matcher, parse_match_indices, verify_anchoring

# Load spaCy model
print("Loading spaCy model...")
nlp = spacy.load("en_core_web_lg")

# Load patterns (robust path)
print("Loading patterns...")
milestone3_dir = src_path.parent
repo_root = milestone3_dir.parent
patterns_path = milestone3_dir / "data" / "patterns_augmented.json"
with open(patterns_path, "r") as f:
    patterns = json.load(f)
print(f"Loaded {len(patterns)} patterns")

# Load test data (robust path)
print("\nLoading test data...")
test_path = repo_root / "data" / "processed" / "test" / "test.json"
with open(test_path, "r") as f:
    test_data = json.load(f)[:100]  # Only first 100

test_processed = preprocess_data(test_data, nlp)
print(f"Loaded {len(test_processed)} test samples")

# Compile matcher
print("\nCompiling DependencyMatcher...")
dep_matcher, pattern_lookup = compile_dependency_matcher(patterns, nlp)

# Detailed anchoring analysis
print("\n" + "=" * 80)
print("DETAILED ANCHORING ANALYSIS")
print("=" * 80)

anchoring_failures = []
anchoring_successes = []
failure_reasons = defaultdict(int)

for sample_idx, sample in enumerate(test_processed[:20]):  # First 20 samples
    doc = sample["doc"]
    e1_root_idx = sample["e1_span"].root.i
    e2_root_idx = sample["e2_span"].root.i

    sent_text = (sample.get("text") or doc.text or "").replace("\n", " ")

    print(f"\n{'=' * 80}")
    print(f"Sample {sample_idx + 1}: {sent_text[:100]}...")
    print(
        f"  E1: '{sample['e1_span'].text}' (root token: '{doc[e1_root_idx].text}' at position {e1_root_idx})"
    )
    print(
        f"  E2: '{sample['e2_span'].text}' (root token: '{doc[e2_root_idx].text}' at position {e2_root_idx})"
    )
    print(f"  True relation: {sample['relation_directed']}")

    # Get all matches
    matches = dep_matcher(doc)
    print(f"\n  Total DependencyMatcher matches: {len(matches)}")

    if len(matches) == 0:
        print("  → No matches found!")
        continue

    # Analyze each match
    for match_idx, (match_id_int, token_indices) in enumerate(matches[:5]):  # First 5 matches
        match_id = nlp.vocab.strings[match_id_int]
        pattern = pattern_lookup.get(match_id)

        if not pattern:
            continue

        print(f"\n  Match {match_idx + 1}:")
        print(f"    Pattern: {pattern['pattern_id']} ({pattern['pattern_type']})")
        print(f"    Predicted relation: {pattern['relation']}")
        print(f"    Precision: {pattern['precision']:.2f}, Support: {pattern['support']}")

        # Parse indices
        match_indices = parse_match_indices(token_indices, pattern)

        print(f"    Matched token indices: {token_indices}")
        print(f"    Parsed match_indices: {match_indices}")

        # Show what tokens were matched
        for node_id, idx in match_indices.items():
            if idx is not None and idx < len(doc):
                token = doc[idx]
                print(f"      {node_id}: token[{idx}] = '{token.text}' (POS={token.pos_}, DEP={token.dep_})")

        # Verify anchoring
        e1_matched = match_indices.get("e1")
        e2_matched = match_indices.get("e2")

        print("    Anchoring check:")
        print(f"      Expected e1: {e1_root_idx} ('{doc[e1_root_idx].text}')")
        print(f"      Matched e1:  {e1_matched} ({doc[e1_matched].text if e1_matched is not None else None})")
        print(f"      Expected e2: {e2_root_idx} ('{doc[e2_root_idx].text}')")
        print(f"      Matched e2:  {e2_matched} ({doc[e2_matched].text if e2_matched is not None else None})")

        # IMPORTANT: use the execution_engine signature (token_indices + pattern)
        anchored = verify_anchoring(token_indices, pattern, e1_root_idx, e2_root_idx)

        if anchored:
            print("    ✅ ANCHORING PASSED!")
            anchoring_successes.append(
                {
                    "sample_idx": sample_idx,
                    "pattern_id": pattern["pattern_id"],
                    "pattern_type": pattern["pattern_type"],
                    "relation": pattern["relation"],
                }
            )
        else:
            print("    ❌ ANCHORING FAILED!")

            # Determine failure reason
            if e1_matched is None or e2_matched is None:
                reason = "missing_e1_or_e2"
            elif e1_matched != e1_root_idx and e2_matched != e2_root_idx:
                reason = "both_misaligned"
            elif e1_matched != e1_root_idx:
                reason = "e1_misaligned"
            elif e2_matched != e2_root_idx:
                reason = "e2_misaligned"
            else:
                reason = "unknown"

            failure_reasons[reason] += 1

            anchoring_failures.append(
                {
                    "sample_idx": sample_idx,
                    "pattern_id": pattern["pattern_id"],
                    "pattern_type": pattern["pattern_type"],
                    "expected_e1": e1_root_idx,
                    "matched_e1": e1_matched,
                    "expected_e2": e2_root_idx,
                    "matched_e2": e2_matched,
                    "reason": reason,
                }
            )

# Summary statistics
print("\n" + "=" * 80)
print("SUMMARY STATISTICS")
print("=" * 80)

print(f"\nTotal anchoring checks: {len(anchoring_failures) + len(anchoring_successes)}")
print(
    f"  Successes: {len(anchoring_successes)} "
    f"({len(anchoring_successes)/(len(anchoring_failures)+len(anchoring_successes)+0.0001)*100:.1f}%)"
)
print(
    f"  Failures: {len(anchoring_failures)} "
    f"({len(anchoring_failures)/(len(anchoring_failures)+len(anchoring_successes)+0.0001)*100:.1f}%)"
)

print("\nFailure reasons breakdown:")
for reason, count in sorted(failure_reasons.items(), key=lambda x: x[1], reverse=True):
    pct = count / len(anchoring_failures) * 100 if anchoring_failures else 0
    print(f"  {reason}: {count} ({pct:.1f}%)")

# Show examples of each failure type
print("\n" + "=" * 80)
print("EXAMPLE FAILURES BY TYPE")
print("=" * 80)

for reason in failure_reasons.keys():
    examples = [f for f in anchoring_failures if f["reason"] == reason][:3]
    if examples:
        print(f"\n{reason.upper()} examples:")
        for ex in examples:
            s = test_processed[ex["sample_idx"]]
            s_text = (s.get("text") or s["doc"].text or "").replace("\n", " ")
            print(f"  Sample {ex['sample_idx']}: {s_text[:80]}...")
            print(f"    Pattern: {ex['pattern_id']} ({ex['pattern_type']})")
            print(f"    Expected: e1={ex['expected_e1']}, e2={ex['expected_e2']}")
            print(f"    Matched:  e1={ex['matched_e1']}, e2={ex['matched_e2']}")

print("\n" + "=" * 80)
print("ANALYSIS COMPLETE")
print("=" * 80)


Successfully imported functions from Milestone 2: /Users/egeaydin/Github/TUW2025WS/Token13-tuw-nlp-ie-2025WS/milestone_2/rule_based
Loading spaCy model...
Loading patterns...


FileNotFoundError: [Errno 2] No such file or directory: 'data/patterns_augmented.json'

In [1]:
"""\
Test script to compare DependencyMatcher vs Entity-Rooted matching.

This verifies that entity-rooted matching eliminates anchoring failures.
"""

import json
import sys
from pathlib import Path

import spacy


def _find_milestone3_src(start: Path) -> Path:
    """Find milestone_3/src regardless of the Jupyter CWD."""
    start = start.resolve()

    candidates = []
    candidates.append(start / "milestone_3" / "src")
    candidates.append(start / "src" if start.name == "milestone_3" else None)

    for c in candidates:
        if c and c.exists():
            return c

    for p in start.parents:
        c = p / "milestone_3" / "src"
        if c.exists():
            return c

    raise FileNotFoundError(f"Could not locate milestone_3/src from CWD={start}")


src_path = _find_milestone3_src(Path.cwd())
sys.path.insert(0, str(src_path))

from utils import preprocess_data
from execution_engine import compile_dependency_matcher, apply_patterns_with_anchoring
from entity_rooted_matcher import apply_patterns_entity_rooted


def _sample_text(sample) -> str:
    doc = sample.get("doc")
    return ((sample.get("text") or (doc.text if doc is not None else "")) or "").replace("\n", " ")


# Load spaCy model
print("Loading spaCy model...")
nlp = spacy.load("en_core_web_lg")

# Load patterns
print("Loading patterns...")
with open("data/patterns_augmented.json", "r") as f:
    patterns = json.load(f)
print(f"Loaded {len(patterns)} patterns")

# Load test data (first 50 samples)
print("\nLoading test data...")
with open("../data/processed/test/test.json", "r") as f:
    test_data = json.load(f)[:50]

test_processed = preprocess_data(test_data, nlp)
print(f"Loaded {len(test_processed)} test samples")

# Test 1: DependencyMatcher approach (current)
print("\n" + "=" * 80)
print("TEST 1: DependencyMatcher Approach (Current)")
print("=" * 80)

dep_matcher, pattern_lookup = compile_dependency_matcher(patterns, nlp)

preds_dm, dirs_dm, expls_dm, stats_dm = apply_patterns_with_anchoring(
    test_processed, dep_matcher, pattern_lookup, nlp
)

print("\nResults:")
print(f"  Matched: {stats_dm['matched']}")
print(f"  Failed anchoring: {stats_dm['failed_anchoring']}")
print(f"  Match attempts: {stats_dm['match_attempts']}")
if stats_dm["match_attempts"] > 0:
    fail_rate = stats_dm["failed_anchoring"] / stats_dm["match_attempts"] * 100
    print(f"  Anchoring failure rate: {fail_rate:.1f}%")

# Test 2: Entity-rooted approach (new)
print("\n" + "=" * 80)
print("TEST 2: Entity-Rooted Approach (New)")
print("=" * 80)

preds_er, dirs_er, expls_er, stats_er = apply_patterns_entity_rooted(test_processed, patterns, nlp)

print("\nResults:")
print(f"  Matched: {stats_er['matched']}")
print(f"  Default to Other: {stats_er['default_other']}")

# Compare predictions
print("\n" + "=" * 80)
print("COMPARISON")
print("=" * 80)

print("\nMatch rate:")
print(f"  DependencyMatcher: {stats_dm['matched']}/{len(test_processed)} ({stats_dm['match_rate']:.1%})")
print(f"  Entity-Rooted: {stats_er['matched']}/{len(test_processed)} ({stats_er['match_rate']:.1%})")

# Show some example differences
print("\nExample predictions:")
for i in range(min(5, len(test_processed))):
    s_text = _sample_text(test_processed[i])
    print(f"\nSample {i+1}: {s_text[:80]}...")
    print(f"  DependencyMatcher: {preds_dm[i]}")
    print(f"  Entity-Rooted: {preds_er[i]}")
    if preds_dm[i] != preds_er[i]:
        print("  DIFFERENT")

print("\n" + "=" * 80)
print("TEST COMPLETE")
print("=" * 80)


Successfully imported functions from Milestone 2: /Users/egeaydin/Github/TUW2025WS/Token13-tuw-nlp-ie-2025WS/milestone_2/rule_based
Loading spaCy model...
Loading patterns...
Loaded 345 patterns

Loading test data...


Processing:   0%|          | 0/50 [00:00<?, ?it/s]

Loaded 50 test samples

TEST 1: DependencyMatcher Approach (Current)
Compiling 345 patterns into DependencyMatcher...
Successfully compiled 345 patterns
\nApplying patterns to 50 samples...


Classifying:   0%|          | 0/50 [00:00<?, ?it/s]

\nClassification complete!
  Matched: 30 (60.0%)
  Default to Other: 20 (40.0%)
  Failed anchoring: 521
  Match attempts: 551 (avg 11.0/sample)
  Unique patterns used: 16

Results:
  Matched: 30
  Failed anchoring: 521
  Match attempts: 551
  Anchoring failure rate: 94.6%

TEST 2: Entity-Rooted Approach (New)

Applying 345 patterns (entity-rooted) to 50 samples...


Classifying:   0%|          | 0/50 [00:00<?, ?it/s]


Classification complete!
  Matched: 43 (86.0%)
  Default to Other: 7 (14.0%)
  Unique patterns used: 13
  Matches by type: {'LINEAR': 11, 'TRIANGLE': 31, 'BRIDGE': 1}

Results:
  Matched: 43
  Default to Other: 7

COMPARISON

Match rate:
  DependencyMatcher: 30/50 (60.0%)
  Entity-Rooted: 43/50 (86.0%)

Example predictions:

Sample 1: The most common audits were about waste and recycling....
  DependencyMatcher: Other
  Entity-Rooted: Content-Container(e1,e2)
  DIFFERENT

Sample 2: The company fabricates plastic chairs....
  DependencyMatcher: Instrument-Agency(e2,e1)
  Entity-Rooted: Product-Producer(e2,e1)
  DIFFERENT

Sample 3: The school master teaches the lesson with a stick....
  DependencyMatcher: Other
  Entity-Rooted: Entity-Destination(e1,e2)
  DIFFERENT

Sample 4: The suspect dumped the dead body into a local reservoir....
  DependencyMatcher: Entity-Destination(e1,e2)
  Entity-Rooted: Entity-Destination(e1,e2)

Sample 5: Avian influenza is an infectious disease of birds ca

In [None]:
"""\
Test script to compare WITH vs WITHOUT anchoring verification.

Shows the impact of removing anchoring checks.
"""

import json
import sys
from pathlib import Path

import spacy


def _find_milestone3_src(start: Path) -> Path:
    """Find milestone_3/src regardless of the Jupyter CWD."""
    start = start.resolve()

    candidates = []
    candidates.append(start / "milestone_3" / "src")
    candidates.append(start / "src" if start.name == "milestone_3" else None)

    for c in candidates:
        if c and c.exists():
            return c

    for p in start.parents:
        c = p / "milestone_3" / "src"
        if c.exists():
            return c

    raise FileNotFoundError(f"Could not locate milestone_3/src from CWD={start}")


def _sample_text(sample) -> str:
    doc = sample.get("doc")
    return ((sample.get("text") or (doc.text if doc is not None else "")) or "").replace("\n", " ")


src_path = _find_milestone3_src(Path.cwd())
milestone3_dir = src_path.parent
repo_root = milestone3_dir.parent

sys.path.insert(0, str(src_path))

from utils import preprocess_data
from execution_engine import (
    compile_dependency_matcher,
    apply_patterns_with_anchoring,
    apply_patterns_no_anchoring,
)

# Load spaCy model
print("Loading spaCy model...")
nlp = spacy.load("en_core_web_lg")

# Load patterns (robust path)
print("Loading patterns...")
patterns_path = milestone3_dir / "data" / "patterns_augmented.json"
with open(patterns_path, "r") as f:
    patterns = json.load(f)
print(f"Loaded {len(patterns)} patterns")

# Load test data (robust path)
print("\nLoading test data...")
test_path = repo_root / "data" / "processed" / "test" / "test.json"
with open(test_path, "r") as f:
    test_data = json.load(f)[:100]

test_processed = preprocess_data(test_data, nlp)
print(f"Loaded {len(test_processed)} test samples")

# Compile matcher
print("\nCompiling DependencyMatcher...")
dep_matcher, pattern_lookup = compile_dependency_matcher(patterns, nlp)

# Test 1: WITH anchoring verification (current)
print("\n" + "=" * 80)
print("TEST 1: WITH Anchoring Verification (Current)")
print("=" * 80)

preds_with, dirs_with, expls_with, stats_with = apply_patterns_with_anchoring(
    test_processed, dep_matcher, pattern_lookup, nlp
)

print("\nResults:")
print(f"  Matched: {stats_with['matched']} ({stats_with['match_rate']:.1%})")
print(f"  Default to Other: {stats_with['default_other']} ({stats_with['default_rate']:.1%})")
print(f"  Failed anchoring: {stats_with['failed_anchoring']}")
print(f"  Match attempts: {stats_with['match_attempts']}")

# Test 2: WITHOUT anchoring verification (new)
print("\n" + "=" * 80)
print("TEST 2: WITHOUT Anchoring Verification (New)")
print("=" * 80)

preds_without, dirs_without, expls_without, stats_without = apply_patterns_no_anchoring(
    test_processed, dep_matcher, pattern_lookup, nlp
)

print("\nResults:")
print(f"  Matched: {stats_without['matched']} ({stats_without['match_rate']:.1%})")
print(f"  Default to Other: {stats_without['default_other']} ({stats_without['default_rate']:.1%})")

# Compare
print("\n" + "=" * 80)
print("COMPARISON")
print("=" * 80)

print("\nMatch rate improvement:")
print(f"  WITH anchoring: {stats_with['match_rate']:.1%}")
print(f"  WITHOUT anchoring: {stats_without['match_rate']:.1%}")
print(f"  Improvement: {(stats_without['match_rate'] - stats_with['match_rate']):.1%}")

print("\nPrediction agreement:")
same = sum(1 for p1, p2 in zip(preds_with, preds_without) if p1 == p2)
print(f"  Same predictions: {same}/{len(preds_with)} ({same/len(preds_with):.1%})")
print(
    f"  Different predictions: {len(preds_with) - same}/{len(preds_with)} "
    f"({(len(preds_with) - same)/len(preds_with):.1%})"
)

# Show examples where they differ
print("\nExamples where predictions differ:")
diff_count = 0
for i in range(len(test_processed)):
    if preds_with[i] != preds_without[i]:
        if diff_count < 5:  # Show first 5
            s_text = _sample_text(test_processed[i])
            print(f"\nSample {i+1}: {s_text[:80]}...")
            print(f"  True label: {test_processed[i]['relation_directed']}")
            print(f"  WITH anchoring: {preds_with[i]}")
            print(f"  WITHOUT anchoring: {preds_without[i]}")
        diff_count += 1

print(f"\n\nTotal differences: {diff_count}")

print("\n" + "=" * 80)
print("TEST COMPLETE")
print("=" * 80)


Loading spaCy model...
Loading patterns...
Loaded 345 patterns

Loading test data...


Processing:   0%|          | 0/100 [00:00<?, ?it/s]

Loaded 100 test samples

Compiling DependencyMatcher...
Compiling 345 patterns into DependencyMatcher...
Successfully compiled 345 patterns

TEST 1: WITH Anchoring Verification (Current)
\nApplying patterns to 100 samples...


Classifying:   0%|          | 0/100 [00:00<?, ?it/s]

\nClassification complete!
  Matched: 62 (62.0%)
  Default to Other: 38 (38.0%)
  Failed anchoring: 1172
  Match attempts: 1234 (avg 12.3/sample)
  Unique patterns used: 24

Results:
  Matched: 62 (62.0%)
  Default to Other: 38 (38.0%)
  Failed anchoring: 1172
  Match attempts: 1234

TEST 2: WITHOUT Anchoring Verification (New)
\nApplying patterns to 100 samples (NO anchoring verification)...


Classifying:   0%|          | 0/100 [00:00<?, ?it/s]

\nClassification complete!
  Matched: 93 (93.0%)
  Default to Other: 7 (7.0%)
  Unique patterns used: 36

Results:
  Matched: 93 (93.0%)
  Default to Other: 7 (7.0%)

COMPARISON

Match rate improvement:
  WITH anchoring: 62.0%
  WITHOUT anchoring: 93.0%
  Improvement: 31.0%

Prediction agreement:
  Same predictions: 42/100 (42.0%)
  Different predictions: 58/100 (58.0%)

Examples where predictions differ:

Sample 6: The ear of the African elephant is significantly larger--measuring 183 cm by 114...
  True label: Component-Whole(e1,e2)
  WITH anchoring: Member-Collection(e2,e1)
  WITHOUT anchoring: Content-Container(e1,e2)

Sample 7: A child is told a lie for several years by their parents before he/she realizes ...
  True label: Product-Producer(e1,e2)
  WITH anchoring: Other
  WITHOUT anchoring: Product-Producer(e1,e2)

Sample 8: Skype, a free software, allows a hookup of multiple computer users to join in an...
  True label: Member-Collection(e2,e1)
  WITH anchoring: Member-Collectio

In [4]:
"""
Calculate actual ACCURACY for WITH vs WITHOUT anchoring.

This determines which approach produces more correct predictions.
"""

import json
import sys
from pathlib import Path
import spacy

def _find_milestone3_src(start: Path) -> Path:
    start = start.resolve()
    candidates = [
        start / "milestone_3" / "src",
        start / "src" if start.name == "milestone_3" else None
    ]
    for c in candidates:
        if c and c.exists():
            return c
    for p in start.parents:
        c = p / "milestone_3" / "src"
        if c.exists():
            return c
    raise FileNotFoundError(f"Could not locate milestone_3/src from CWD={start}")

src_path = _find_milestone3_src(Path.cwd())
milestone3_dir = src_path.parent
repo_root = milestone3_dir.parent
sys.path.insert(0, str(src_path))

from utils import preprocess_data
from execution_engine import (
    compile_dependency_matcher,
    apply_patterns_with_anchoring,
    apply_patterns_no_anchoring,
)

print("Loading spaCy model...")
nlp = spacy.load("en_core_web_lg")

print("Loading patterns...")
patterns_path = milestone3_dir / "data" / "patterns_augmented.json"
with open(patterns_path, "r") as f:
    patterns = json.load(f)
print(f"Loaded {len(patterns)} patterns")

print("\nLoading test data...")
test_path = repo_root / "data" / "processed" / "test" / "test.json"
with open(test_path, "r") as f:
    test_data = json.load(f)[:100]

test_processed = preprocess_data(test_data, nlp)
print(f"Loaded {len(test_processed)} test samples")

# Get ground truth labels
def get_directed_label(item):
    rel_type = item['relation']['type']
    direction = item['relation'].get('direction', '')
    if rel_type == 'Other':
        return 'Other'
    direction = direction.replace('(', '').replace(')', '')
    return f"{rel_type}({direction})"

true_labels = [get_directed_label(item) for item in test_data]

# Compile matcher
print("\nCompiling DependencyMatcher...")
dep_matcher, pattern_lookup = compile_dependency_matcher(patterns, nlp)

# WITH anchoring
print("\n" + "="*80)
print("Running WITH anchoring...")
print("="*80)
preds_with, _, _, stats_with = apply_patterns_with_anchoring(
    test_processed, dep_matcher, pattern_lookup, nlp
)

# WITHOUT anchoring
print("\n" + "="*80)
print("Running WITHOUT anchoring...")
print("="*80)
preds_without, _, _, stats_without = apply_patterns_no_anchoring(
    test_processed, dep_matcher, pattern_lookup, nlp
)

# Calculate accuracy
correct_with = sum(1 for t, p in zip(true_labels, preds_with) if t == p)
correct_without = sum(1 for t, p in zip(true_labels, preds_without) if t == p)

acc_with = correct_with / len(true_labels)
acc_without = correct_without / len(true_labels)

print("\n" + "="*80)
print("ACCURACY COMPARISON")
print("="*80)

print(f"\nWITH Anchoring:")
print(f"  Correct: {correct_with}/100")
print(f"  Accuracy: {acc_with:.1%}")

print(f"\nWITHOUT Anchoring:")
print(f"  Correct: {correct_without}/100")
print(f"  Accuracy: {acc_without:.1%}")

print(f"\nDifference:")
print(f"  {acc_without - acc_with:+.1%} ({correct_without - correct_with:+d} more correct)")

# Analyze the changes
print("\n" + "="*80)
print("DETAILED ANALYSIS OF CHANGED PREDICTIONS")
print("="*80)

better = 0  # Changed from wrong to correct
worse = 0   # Changed from correct to wrong
both_wrong = 0  # Changed but both wrong

for i, (true, p_with, p_without) in enumerate(zip(true_labels, preds_with, preds_without)):
    if p_with != p_without:
        was_correct = (p_with == true)
        is_correct = (p_without == true)

        if not was_correct and is_correct:
            better += 1
        elif was_correct and not is_correct:
            worse += 1
        else:
            both_wrong += 1

print(f"\nOf {sum(1 for p1, p2 in zip(preds_with, preds_without) if p1 != p2)} changed predictions:")
print(f"  Improved (wrong → correct): {better}")
print(f"  Degraded (correct → wrong): {worse}")
print(f"  Still wrong (wrong → different wrong): {both_wrong}")

print(f"\nNet improvement: {better - worse:+d} predictions")

print("\n" + "="*80)
if acc_without > acc_with:
    print("✅ RECOMMENDATION: Use NO ANCHORING approach")
    print(f"   Expected performance gain: {(acc_without - acc_with)*100:.1f} percentage points")
else:
    print("⚠️  WARNING: NO ANCHORING performed worse")
    print(f"   Performance loss: {(acc_with - acc_without)*100:.1f} percentage points")
print("="*80)

Loading spaCy model...
Loading patterns...
Loaded 345 patterns

Loading test data...


Processing:   0%|          | 0/100 [00:00<?, ?it/s]

Loaded 100 test samples

Compiling DependencyMatcher...
Compiling 345 patterns into DependencyMatcher...
Successfully compiled 345 patterns

Running WITH anchoring...
\nApplying patterns to 100 samples...


Classifying:   0%|          | 0/100 [00:00<?, ?it/s]

\nClassification complete!
  Matched: 62 (62.0%)
  Default to Other: 38 (38.0%)
  Failed anchoring: 1172
  Match attempts: 1234 (avg 12.3/sample)
  Unique patterns used: 24

Running WITHOUT anchoring...
\nApplying patterns to 100 samples (NO anchoring verification)...


Classifying:   0%|          | 0/100 [00:00<?, ?it/s]

\nClassification complete!
  Matched: 93 (93.0%)
  Default to Other: 7 (7.0%)
  Unique patterns used: 36

ACCURACY COMPARISON

WITH Anchoring:
  Correct: 31/100
  Accuracy: 31.0%

WITHOUT Anchoring:
  Correct: 26/100
  Accuracy: 26.0%

Difference:
  -5.0% (-5 more correct)

DETAILED ANALYSIS OF CHANGED PREDICTIONS

Of 58 changed predictions:
  Improved (wrong → correct): 7
  Degraded (correct → wrong): 12
  Still wrong (wrong → different wrong): 39

Net improvement: -5 predictions

   Performance loss: 5.0 percentage points
