front to back RAG implementation using lagchain

In [1]:
from langchain_community.document_loaders import PyPDFLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_chroma import Chroma
from langchain_openai import OpenAIEmbeddings
from langchain_core.prompts import PromptTemplate

from dotenv import load_dotenv
import os


In [2]:
load_dotenv()
PATH = './docs'
LLM_KEY = os.getenv("LLM")

In [7]:
loader = PyPDFLoader(os.path.join(PATH, os.listdir(PATH)[1]))
documents = loader.load()

# split the docs
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
split_docs = text_splitter.split_documents(documents)

print(len(split_docs))
for doc in split_docs:
    print('-'*100)
    print(f"Page {doc.metadata['page']}: \n")
    print(f"{doc.page_content} \n\n")

120
----------------------------------------------------------------------------------------------------
Page 0: 

arXiv:2302.11382v1  [cs.SE]  21 Feb 2023
A Prompt Pattern Catalog to Enhance Prompt Engineering with ChatGPT
Jules White, Quchen Fu, Sam Hays, Michael Sandborn, Carlos O lea, Henry Gilbert,
Ashraf Elnashar, Jesse Spencer-Smith, and Douglas C. Schmi dt
Department of Computer Science
V anderbilt University, T ennessee
Nashville, TN, USA
{jules.white, quchen.fu, george.s.hays, michael.sandbor n, carlos.olea, henry.gilbert,
ashraf.elnashar, jesse.spencer-smith, douglas.c.schmi dt}@vanderbilt.edu
Abstract—Prompt engineering is an increasingly important
skill set needed to converse effectively with large languag e models
(LLMs), such as ChatGPT. Prompts are instructions given to a n
LLM to enforce rules, automate processes, and ensure speciﬁ c
qualities (and quantities) of generated output. Prompts ar e also
a form of programming that can customize the outputs and
interactions w

In [8]:
embedding_model = OpenAIEmbeddings(
    model="text-embedding-3-small",
    openai_api_key=os.getenv("LLM"),
    openai_api_base="https://openrouter.ai/api/v1"
)

db = Chroma.from_documents(
    documents=split_docs[0:1],
    embedding=embedding_model,
    persist_directory=os.path.join(PATH, "chroma_db")
)
db.persist()


AttributeError: 'Chroma' object has no attribute 'persist'

In [10]:
"""
Complete RAG Implementation with Issue Fixes
Addresses: API configuration, embeddings setup, database persistence, and full retrieval+generation pipeline
"""

from langchain_community.document_loaders import PyPDFLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_chroma import Chroma
from langchain_openai import OpenAIEmbeddings, ChatOpenAI
from langchain_core.prompts import PromptTemplate
from langchain_core.runnables import RunnablePassthrough
from langchain_core.output_parsers import StrOutputParser

from dotenv import load_dotenv
import os

load_dotenv()

PATH = './docs'
LLM_KEY = os.getenv("LLM")  # Ensure this is set in .env

if not LLM_KEY:
    raise ValueError("LLM environment variable not set. Check your .env file.")

In [17]:
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=1000,
    chunk_overlap=200
)

documents = []
for file_path in os.listdir(PATH):
    if file_path.endswith('.pdf'):
        joined_path = os.path.join(PATH, file_path)
        loader = PyPDFLoader(joined_path)
        documents.extend(loader.load()) 

print(f"Loaded {len(documents)} pages")
split_docs = text_splitter.split_documents(documents)

print(f"Split into {len(split_docs)} chunks")


Ignoring wrong pointing object 2 65536 (offset 0)
Ignoring wrong pointing object 34 65536 (offset 0)
Ignoring wrong pointing object 92 65536 (offset 0)
Ignoring wrong pointing object 145 65536 (offset 0)
Ignoring wrong pointing object 206 65536 (offset 0)
Ignoring wrong pointing object 274 65536 (offset 0)
Ignoring wrong pointing object 330 65536 (offset 0)
Ignoring wrong pointing object 372 65536 (offset 0)


Loaded 803 pages
Split into 3329 chunks


In [18]:
# STEP 2: CREATE EMBEDDINGS AND VECTOR STORE

embedding_model = OpenAIEmbeddings(
    model="text-embedding-3-small",
    api_key=LLM_KEY,
    base_url="https://openrouter.ai/api/v1"  # ISSUE #5 FIX: Use base_url instead of openai_api_base
)

chroma_db_path = os.path.join(PATH, "chroma_db")
db = Chroma.from_documents(
    documents=split_docs,  # ISSUE #6 FIX: Index all chunks
    embedding=embedding_model,
    persist_directory=chroma_db_path
)
print(f"Vector store created with {len(split_docs)} documents\n")

Vector store created with 3329 documents



In [30]:
# STEP 3: CREATE RETRIEVER
retriever = db.as_retriever(search_kwargs={"k": 5})  # Retrieve top 3 most relevant chunks

In [None]:
# STEP 4: SETUP LLM FOR GENERATION
llm = ChatOpenAI(
    model="gpt-3.5-turbo",
    api_key=LLM_KEY,
    base_url="https://openrouter.ai/api/v1",  # Remove if using OpenAI directly
    temperature=0.7
)

In [None]:
# STEP 5: CRAG SETUP
import json

rag_prompt = PromptTemplate(
    template="""
You are a helpful assistant that answers questions based on the provided context.

## CONTEXT:
{context}

## QUESTION:
{question}

## ANSWER:
Provide a clear, concise answer based on the context above. If the context doesn't contain the answer, say so.
Include the sources of the used context in the answer.
""",
    input_variables=["context", "question"],
)


def format_docs(docs):
    """Format retrieved documents for the prompt."""
    context = []
    for retreived_doc in docs:
        content = retreived_doc.page_content
        source = retreived_doc.metadata["source"]
        context.append({"source": source, "content": content})
    return json.dumps(context)


rag_chain = (
    {
        "context": retriever | format_docs,  # Retrieve and format documents
        "question": RunnablePassthrough(),  # Pass through the user question
    }
    | rag_prompt  # Format the prompt
    | llm  # Send to LLM
    | StrOutputParser()  # Parse the response as string
)

In [33]:
# ============================================================================
# STEP 8: TEST THE RAG PIPELINE
# ============================================================================
query = "was ist qualitätsmanagement?"
answer = rag_chain.invoke(query)

print(f"A: {answer}\n")

A: Qualitätsmanagement ist das Management von systematischen Planungs- und Steuerungsprozessen in einem Unternehmen, um Produkte von hoher Qualität zu liefern, die die Anforderungen der Kunden erfüllen. Es umfasst aufeinander abgestimmte Tätigkeiten zum Leiten und Lenken einer Organisation bezüglich der Qualität. Das Qualitätsmanagementsystem (QM-System) ist das Managementsystem zur Leitung und Lenkung einer Organisation hinsichtlich der Qualität, was die Festlegung der Qualitätspolitik, Qualitätsziele, Qualitätsplanung und Qualitätslenkung beinhaltet.

Sources:
- Grundlagen Qualitätsmanagement
- DIN EN ISO 9000

