# Building the Legal Retrieval Pipeline

Build and test the complete retrieval system with FAISS indexing.

In [None]:
import sys
sys.path.append('..')

import json
from src.retrieval.retriever import load_retriever_from_config

## 1. Load Configuration and Create Retriever

In [None]:
# Load retriever with configuration
retriever = load_retriever_from_config('../configs/retrieval_config.yaml')

print("Retriever loaded successfully!")

## 2. Index Sample Documents

In [None]:
# Load documents
with open('../data/samples/sample_documents.json', 'r') as f:
    documents = json.load(f)

print(f"Loaded {len(documents)} documents")

# Index documents
print("\nIndexing documents...")
retriever.index_documents(documents, chunk_documents=True)

print(f"\n✅ Indexed {retriever.get_num_documents()} document chunks")

## 3. Test Retrieval with Queries

In [None]:
# Test queries
test_queries = [
    "What are the elements of negligence?",
    "How is causation established?",
    "What is res ipsa loquitur?"
]

for query in test_queries:
    print(f"\n{'='*80}")
    print(f"Query: {query}")
    print(f"{'='*80}\n")
    
    results = retriever.retrieve(query, top_k=3)
    
    for i, doc in enumerate(results, 1):
        print(f"{i}. Score: {doc['score']:.4f}")
        print(f"   Title: {doc.get('title', 'N/A')}")
        print(f"   {doc['text'][:150]}...\n")

## 4. Save Index for Later Use

In [None]:
# Save index
retriever.save_index('../data/embeddings')
print("\n✅ Index saved to ../data/embeddings")

## 5. Load Index and Test

In [None]:
# Create new retriever and load saved index
new_retriever = load_retriever_from_config('../configs/retrieval_config.yaml')
new_retriever.load_index('../data/embeddings')

print(f"Loaded index with {new_retriever.get_num_documents()} documents")

# Test
query = "What is negligence per se?"
results = new_retriever.retrieve(query, top_k=2)

print(f"\nTest query: {query}")
for i, doc in enumerate(results, 1):
    print(f"\n{i}. Score: {doc['score']:.4f}")
    print(f"   {doc['text'][:100]}...")

## Summary

Retrieval pipeline complete!
- ✅ Built retrieval system
- ✅ Indexed legal documents
- ✅ Tested queries
- ✅ Saved index for reuse

**Next:** Proceed to `03_self_rag_training.ipynb` to train Self-RAG models