# Hybrid Retrieval Pipeline - Complete End-to-End Demo

This notebook demonstrates the **full pipeline** with:
- Knowledge base seeding
- Topic drift detection
- Hybrid retrieval (sparse + dense)
- Context enhancement
- Drift-aware prompting
- Statistics & cleanup

---
**All features in action!**

In [None]:
# Cell 1: Imports and Setup
from uuid import uuid4
from pipeline import HybridRetrievalPipeline

print("Hybrid Retrieval System - Complete Demo")
print("=" * 70)

In [None]:
# Cell 2: Initialize Pipeline
pipeline = HybridRetrievalPipeline(
    db_name="hyper_kb_demo",
    fusion_method='rrf',
    aggregation_strategy='weighted',
    prompt_template='conversational'
)

print("Pipeline initialized with:")
print("  • Fusion: RRF")
print("  • Aggregation: Weighted")
print("  • Prompt: Conversational")

In [None]:
# Cell 3: Seed Sample Data
session_id = str(uuid4())
print(f"Session ID: {session_id}\n")

# Sample conversation with intentional topic shift
conversations = [
    {
        "query": "What is machine learning?",
        "response": "Machine learning is a subset of AI that enables systems to learn from data without explicit programming."
    },
    {
        "query": "What are neural networks?",
        "response": "Neural networks are computing systems inspired by biological neural networks, consisting of interconnected nodes that process information."
    },
    {
        "query": "How do I cook pasta?",  # Topic shift
        "response": "Boil water with salt, add pasta, cook for 8-10 minutes until al dente, then drain."
    }
]

print("Storing sample conversations...")
for i, conv in enumerate(conversations, 1):
    result = pipeline.process_interaction(
        query=conv['query'],
        response=conv['response'],
        session_id=session_id
    )
   
    status = "[SHIFT]" if result['shift_detected'] else "[STABLE]"
    print(f"{i}. {status} {conv['query'][:50]:<50}...")
    if result['shift_detected']:
        print(f"    Drift score: {result['topic_shift_score']:.3f}")

In [None]:
# Cell 4: Query with Context Enhancement
print("\n" + "=" * 70)
print("QUERYING WITH CONTEXT ENHANCEMENT")
print("=" * 70)

test_query = "How long should I cook spaghetti?"
result = pipeline.query(test_query, session_id, top_k=3)

print(f"\nQuery: {test_query}")
print(f"Retrieved: {result['retrieved_contexts']} contexts")
print(f"Session turn: {result['session_turn']}")
print("\nEnhanced Prompt:")
print("-" * 70)
print(result['enhanced_prompt'])

In [None]:
# Cell 5: Show Retrieved Contexts
print("\n" + "=" * 70)
print("RETRIEVED CONTEXTS")
print("=" * 70)

for r in result['retrieval_results']:
    print(f"\nRank {r.rank} | Score: {r.score:.3f} | Method: {r.retrieval_method}")
    print(f"Q: {r.query_text}")
    print(f"A: {r.response_text[:100].strip()}...")

In [None]:
# Cell 6: Drift State Analysis
print("\n" + "=" * 70)
print("DRIFT STATE ANALYSIS")
print("=" * 70)

drift_state = result['drift_state']
print(f"\nSession: {drift_state['session_id']}")
print(f"Total interactions: {drift_state['total_interactions']}")
print(f"Topic transitions: {drift_state['topic_transitions']}")
print(f"Active context size: {drift_state['active_context_size']}")

print("\nRetrieval Bias:")
for key, value in drift_state['retrieval_bias'].items():
    print(f"  {key.replace('_', ' ').title():<20}: {value:.3f}")

In [None]:
# Cell 7: Statistics
print("\n" + "=" * 70)
print("SYSTEM STATISTICS")
print("=" * 70)

stats = pipeline.get_statistics()

print(f"\nKB Stats:")
for key, value in stats['kb_stats'].items():
    if key != 'most_accessed':
        formatted_key = key.replace('_', ' ').title()
        print(f"  {formatted_key:<25}: {value}")

print(f"\nPipeline:")
print(f"  Indexed               : {stats['indexed']}")
print(f"  Active sessions       : {stats['active_sessions']}")

In [None]:
# Cell 8: Cleanup
pipeline.close()
print("\n" + "=" * 70)
print("Demo complete! Pipeline shut down.")

---
## You're Done!

You've seen:
- Topic drift detection in action
- Hybrid retrieval with RRF fusion
- Dynamic context window management
- Bias-aware retrieval scoring
- Full system statistics

**Try changing queries or adding more data!**

---