In [None]:
import sys
sys.path.append('..')

from core import MultiHopRetriever, PathwayDocumentStore, print_section
import pickle

print_section("MULTI-HOP EVIDENCE RETRIEVAL")

## Load Previous Results

In [None]:
# Load document store
document_store = PathwayDocumentStore(embedding_model=None, chunk_size=1000)
document_store.load_index('../results/document_index.json')

# Load claims
with open('../results/claims.pkl', 'rb') as f:
    claims = pickle.load(f)

print(f"✓ Loaded {len(claims)} claims")
print(f"✓ Loaded document store with {len(document_store.documents)} chunks")

## Initialize Retriever

In [None]:
# Initialize multi-hop retriever
retriever = MultiHopRetriever(
    document_store=document_store,
    max_hops=3
)

print("✓ Retriever initialized")

## Retrieve Evidence for Claims

In [None]:
# Retrieve evidence for all claims
novel_id = "evermoor_sample"

evidence_map = retriever.retrieve_for_claims(
    claims=claims,
    novel_id=novel_id,
    top_k_per_claim=5
)

print(f"\n✓ Retrieved evidence for {len(evidence_map)} claims")
print("=" * 60)

# Show summary
for claim_id, evidence_list in list(evidence_map.items())[:3]:
    print(f"\n{claim_id}:")
    print(f"  Evidence count: {len(evidence_list)}")
    if evidence_list:
        print(f"  Top score: {evidence_list[0].score:.3f}")
        print(f"  Preview: {evidence_list[0].text[:100]}...")

## Test Hybrid Search

In [None]:
# Test hybrid search for a specific claim
test_claim = claims[0] if claims else None

if test_claim:
    print(f"\nTesting hybrid search for claim:")
    print(f"  {test_claim.text}")
    
    hybrid_results = retriever.hybrid_search(
        query=test_claim.text,
        keywords=test_claim.entities[:3],
        novel_id=novel_id,
        top_k=5
    )
    
    print(f"\nHybrid search results: {len(hybrid_results)}")
    print("=" * 60)
    
    for i, result in enumerate(hybrid_results[:3], 1):
        print(f"\n{i}. Score: {result.score:.3f}")
        print(f"   Text: {result.text[:100]}...")

## Analyze Evidence Quality

In [None]:
# Analyze evidence quality
total_evidence = sum(len(ev) for ev in evidence_map.values())
avg_score = sum(
    ev.score for evidence_list in evidence_map.values() for ev in evidence_list
) / max(total_evidence, 1)

print(f"\nEvidence Quality Analysis:")
print("=" * 60)
print(f"Total evidence pieces: {total_evidence}")
print(f"Average similarity score: {avg_score:.3f}")
print(f"Claims with no evidence: {sum(1 for ev in evidence_map.values() if len(ev) == 0)}")

## Export Results

In [None]:
# Save evidence map
with open('../results/evidence_map.pkl', 'wb') as f:
    pickle.dump(evidence_map, f)

print("\n✓ Module 4 Complete: Evidence retrieval successful!")