In [11]:
import os
from dotenv import load_dotenv
from langchain.vectorstores import FAISS
from langchain.embeddings import OllamaEmbeddings
from langchain.document_loaders import TextLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
import shutil

In [12]:
# 📌 2. Load environment variables
load_dotenv()

# 📌 3. Define paths
DATA_FILE = "../data/sample_docs.txt"
VECTOR_STORE_DIR = "../data/faiss_store"

In [13]:
# 📌 4. Load and split documents
print("🔄 Loading and splitting documents...")
loader = TextLoader(DATA_FILE)
documents = loader.load()

🔄 Loading and splitting documents...


In [14]:
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=500,
    chunk_overlap=50
)
texts = text_splitter.split_documents(documents)

In [15]:
print(f"✅ {len(texts)} chunks created.")

✅ 2 chunks created.


In [16]:
# 📌 5. Initialize Ollama embeddings
print("🔍 Initializing Ollama embeddings...")
embedding_model = OllamaEmbeddings(model="nomic-embed-text")

🔍 Initializing Ollama embeddings...


In [17]:
# 📌 6. Create or update FAISS vector store
if os.path.exists(VECTOR_STORE_DIR):
    shutil.rmtree(VECTOR_STORE_DIR)

In [18]:
print("💾 Building vector store...")
db = FAISS.from_documents(texts, embedding_model)
db.save_local(VECTOR_STORE_DIR)

💾 Building vector store...


In [19]:
print("✅ Vector store created and saved at:", VECTOR_STORE_DIR)

# 📌 7. Test a simple similarity search
query = "What is Retrieval-Augmented Generation?"
retrieved_docs = db.similarity_search(query, k=2)

print("\n📚 Retrieved documents for test query:")
for i, doc in enumerate(retrieved_docs):
    print(f"\n--- Document {i+1} ---\n{doc.page_content}")

✅ Vector store created and saved at: ../data/faiss_store

📚 Retrieved documents for test query:

--- Document 1 ---
Natural language processing (NLP) allows AI systems to extract valuable insights from unstructured medical records and research papers.
Wearable devices integrated with AI monitor patient vitals in real-time, alerting healthcare providers to potential issues before they escalate.

--- Document 2 ---
AI is revolutionizing healthcare by enabling faster and more accurate diagnostics through advanced image recognition algorithms.
Machine learning models can analyze patient data to predict disease progression, helping doctors create personalized treatment plans.
Hospitals are using AI-powered tools to optimize resource allocation, reducing wait times and improving patient care efficiency.
