In [6]:
import asyncio
import os
from dotenv import load_dotenv

# Import our custom modules
from src.ingestion import IngestionEngine, TextChunker
from src.embedder import Embedder
from src.vector_store import VectorStore
from src.answer_engine import AnswerEngine

# Load Env Vars (API Keys)
load_dotenv()

print("‚úÖ Modules imported successfully.")

‚úÖ Modules imported successfully.


In [7]:
# Initialize the pipeline components
ingestor = IngestionEngine()
chunker = TextChunker()
embedder = Embedder()
vector_db = VectorStore()
engine = AnswerEngine()

print("‚úÖ Pipeline components initialized.")

Loading local embedding model: all-MiniLM-L6-v2...
Model loaded successfully.
‚úÖ Connected to Local Vector Store (ChromaDB) at ./chroma_db
Loading local embedding model: all-MiniLM-L6-v2...
Model loaded successfully.
‚úÖ Connected to Local Vector Store (ChromaDB) at ./chroma_db
‚úÖ Pipeline components initialized.


In [8]:
# Define a test file (Create a dummy file if needed)
test_file = "test_document.txt"
with open(test_file, "w") as f:
    f.write("The project architecture uses a decoupled design. It relies on ChromaDB for vector storage and Groq for LLM inference.")

# 1. Load
docs = ingestor.load_file(test_file)
print(f"üìÑ Loaded {len(docs)} document(s).")

# 2. Chunk
chunks = chunker.chunk_documents(docs)
print(f"üß© Split into {len(chunks)} chunks.")

# 3. Embed
texts = [d.content for d in chunks]
vectors = embedder.embed(texts)
print(f"üß† Generated {len(vectors)} embeddings.")

# 4. Store
# Since our VectorStore is async, we use asyncio to run it in a notebook
await vector_db.add_documents(chunks, vectors)
print("üíæ Saved to Vector Store.")

üìÑ Loaded 1 document(s).
üß© Split into 1 chunks.
üß† Generated 1 embeddings.
Successfully stored 1 chunks locally.
üíæ Saved to Vector Store.


In [13]:
query = "What database does this project use?"

print(f"‚ùì Question: {query}\n")

# Run the Answer Engine
response = await engine.answer(query)

print("ü§ñ Answer below :")
print(response)


‚ùì Question: What database does this project use?

Analyzing query: What database does this project use?...
ü§ñ Answer below :
The project uses ChromaDB for vector storage.
