In [1]:
import os
from dotenv import load_dotenv
from typing import List, Optional, Any
import google.generativeai as genai

from langchain_google_genai.embeddings import GoogleGenerativeAIEmbeddings
from langchain.vectorstores import Chroma
from langchain.text_splitter import CharacterTextSplitter
from langchain.document_loaders import PyPDFLoader, TextLoader
from langchain.chains import RetrievalQA
from langchain.prompts import PromptTemplate
from langchain.llms.base import LLM

In [2]:
# ✅ Set your API key here or via .env
genai.configure(api_key="GOOGLE_API_KEY")

In [3]:
# ✅ Gemini LLM wrapper
class GeminiLLM(LLM):
    model: Any = None
    model_name: str = "models/gemini-1.5-flash"
    temperature: float = 0.7

    def __init__(self, **kwargs):
        super().__init__()
        self.model = genai.GenerativeModel(self.model_name)

    def _call(self, prompt: str, stop: Optional[List[str]] = None) -> str:
        response = self.model.generate_content(prompt)
        return response.text

    @property
    def _llm_type(self) -> str:
        return "gemini"

In [4]:
# ✅ Load all .pdf and .txt files from folder
def load_documents(folder_path):
    docs = []
    for file in os.listdir(folder_path):
        path = os.path.join(folder_path, file)
        if file.endswith(".pdf"):
            loader = PyPDFLoader(path)
        elif file.endswith(".txt"):
            loader = TextLoader(path)
        else:
            continue
        docs.extend(loader.load())
    return docs


In [None]:
# ✅ Create persistent ChromaDB vector store
def create_vectorstore(documents, save_path="chroma_store"):
    text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=100)
    chunks = text_splitter.split_documents(documents)
    # print("🧩 Number of chunks:", len(chunks))

    embeddings = GoogleGenerativeAIEmbeddings(model="models/embedding-001")

    vectorstore = Chroma.from_documents(
        documents=chunks,
        embedding=embeddings,
        persist_directory=save_path
    )

    vectorstore.persist()
    return vectorstore

In [6]:
# ✅ Answering questions using Gemini + Chroma
def ask_question(vectorstore, question):
    retriever = vectorstore.as_retriever()
    gemini = GeminiLLM()

    prompt_template = PromptTemplate(
        input_variables=["context", "question"],
        template="""
You are a helpful assistant. Use the following context to answer the question.
If you don't know the answer, just say you don't know.

Context:
{context}

Question:
{question}
        """
    )

    qa = RetrievalQA.from_chain_type(
        llm=gemini,
        retriever=retriever,
        chain_type_kwargs={"prompt": prompt_template},
        return_source_documents=True
    )

    result = qa({"query": question})
    print("\n📌 Answer:\n", result['result'])

    print("\n📄 Sources used:")
    for doc in result['source_documents']:
        print("-", doc.metadata['source'])

In [None]:
# ✅ Main workflow
if __name__ == "__main__":
    chroma_path = "chroma_store"

    if not os.path.exists(chroma_path):
        os.makedirs(chroma_path)

    if not os.path.exists(os.path.join(chroma_path, "chroma.sqlite")):
        print("📥 Loading and embedding documents...")
        docs = load_documents("docs/")
        vs = create_vectorstore(docs, save_path=chroma_path)
    else:
        print("📦 Loading Chroma vector store...")
        embeddings = GoogleGenerativeAIEmbeddings(model="models/embedding-001")
        vs = Chroma(persist_directory=chroma_path, embedding_function=embeddings)
        vs.persist()

    # 🔄 Ask questions in a loop
    while True:
        query = input("\n💬 Ask a question (or 'exit'): ")
        if query.lower() == "exit":
            break
        ask_question(vs, query)

In [None]:
# 