In [1]:
import weaviate

In [3]:

print("Connecting to Weaviate...")
client = weaviate.connect_to_local()

try:
    print(f"âœ“ Connected!")
    print(f"âœ“ Ready: {client.is_ready()}")
    
    meta = client.get_meta()
    print(f"âœ“ Version: {meta.get('version')}")
    
    collections = client.collections.list_all()
    print(f"âœ“ Collections: {list(collections.keys())}")
    
    print("\nðŸŽ‰ Weaviate is working correctly!")
    
finally:
    client.close()
    print("Connection closed.")

Connecting to Weaviate...
âœ“ Connected!
âœ“ Ready: True
âœ“ Version: 1.34.5
âœ“ Collections: ['RAG_section800_embed3large_v1']

ðŸŽ‰ Weaviate is working correctly!
Connection closed.


In [None]:
# Test the new query functions
# Prerequisites: Weaviate must be running (docker compose up -d)

from src.vector_db import get_client, query_similar, query_hybrid, list_available_books

In [None]:
# Connect and list available books
client = get_client()
books = list_available_books(client)
print(f"Found {len(books)} books:")
for book in books:
    print(f"  - {book}")

In [None]:
# Test 1: Simple vector similarity search (all books)
results = query_similar(client, "What is System 1 thinking?", top_k=3)

print("Query: 'What is System 1 thinking?'")
print(f"Found {len(results)} results:\n")

for i, r in enumerate(results, 1):
    print(f"[{i}] Score: {r.score:.4f}")
    print(f"    Book: {r.book_id}")
    print(f"    Section: {r.section}")
    print(f"    Text: {r.text[:200]}...")
    print()

In [None]:
# Test 2: Filtered search (single book)
kahneman = "Thinking Fast and Slow (Daniel Kahneman)"
results = query_similar(
    client, 
    "How do heuristics lead to biases?", 
    top_k=3,
    book_ids=kahneman
)

print(f"Query: 'How do heuristics lead to biases?' (filtered to Kahneman)")
print(f"Found {len(results)} results:\n")

for i, r in enumerate(results, 1):
    print(f"[{i}] Score: {r.score:.4f} | {r.section}")
    print(f"    {r.text[:150]}...")
    print()

In [None]:
# Test 3: Multi-book filter (Stoic philosophers)
stoic_books = [
    "The Meditations (Marcus Aurelius)",
    "Letters from a Stoic (Seneca)",
    "The Enchiridion (Epictetus)",
]
results = query_similar(
    client,
    "How should we deal with things outside our control?",
    top_k=5,
    book_ids=stoic_books
)

print("Query: 'How should we deal with things outside our control?' (Stoic books)")
print(f"Found {len(results)} results:\n")

for i, r in enumerate(results, 1):
    # Extract just the author name from book_id
    author = r.book_id.split("(")[-1].replace(")", "")
    print(f"[{i}] {author} | Score: {r.score:.4f}")
    print(f"    {r.text[:150]}...")
    print()

In [None]:
# Test 4: Hybrid search (vector + keyword)
# Good for technical terms that should match exactly
results = query_hybrid(
    client,
    "dopamine reward nucleus accumbens",
    top_k=3,
    alpha=0.5  # Equal weight to vector and keyword
)

print("Query: 'dopamine reward nucleus accumbens' (hybrid search)")
print(f"Found {len(results)} results:\n")

for i, r in enumerate(results, 1):
    print(f"[{i}] Score: {r.score:.4f} | {r.book_id[:40]}...")
    print(f"    {r.text[:150]}...")
    print()

In [None]:
# Cleanup: close the client connection
client.close()
print("Client connection closed.")