## Imports

In [9]:
import os
from dotenv import load_dotenv
from PyPDF2 import PdfReader
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import Chroma
from langchain.text_splitter import CharacterTextSplitter
from langchain.chains import RetrievalQA
from langchain_groq import ChatGroq

## Groq API

In [10]:
# Load environment variables
load_dotenv()
GROQ_API_KEY = os.getenv("GROQ_API_KEY")

## Code

In [11]:

def load_pdfs(folder_path):
    """Load and extract text from all PDFs in a folder."""
    texts = []
    for file in os.listdir(folder_path):
        if file.endswith(".pdf"):
            pdf_path = os.path.join(folder_path, file)
            reader = PdfReader(pdf_path)
            text = ""
            for page in reader.pages:
                text += page.extract_text()
            texts.append(text)
    return texts

def setup_chroma_db(texts):
    """Split texts, generate embeddings, and store them in ChromaDB."""
    # Text splitter
    splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
    chunks = []
    for text in texts:
        chunks.extend(splitter.split_text(text))
    
    # Create embeddings
    embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
    vectorstore = Chroma(embedding_function=embeddings)
    vectorstore.add_texts(chunks)
    return vectorstore

def build_rag_pipeline(vectorstore):
    """Build a RAG pipeline using Groq and ChromaDB."""
    llm = ChatGroq(api_key=GROQ_API_KEY)
    retriever = vectorstore.as_retriever()
    qa_chain = RetrievalQA(llm=llm, retriever=retriever)
    return qa_chain

def main():
    folder_path = "./data"
    texts = load_pdfs(folder_path)
    if not texts:
        print("No PDFs found in the folder!")
        return
    
    # Set up ChromaDB
    vectorstore = setup_chroma_db(texts)
    
    # Build RAG pipeline
    qa_chain = build_rag_pipeline(vectorstore)
    
    # Example query
    query = "What are the main topics covered in the documents?"
    response = qa_chain.run(query)
    print("Response:", response)

if __name__ == "__main__":
    main()


  embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")


ImportError: Could not import sentence_transformers python package. Please install it with `pip install sentence-transformers`.