In [1]:
# Install necessary libraries if you haven't already
# !pip install pypdf
# !pip install langchain
# !pip install langchain-google-genai
# !pip install chromadb
# !pip install -U langchain-community
# !pip install --upgrade google-api-python-client google-auth-httplib2 google-auth-oauthlib

import os
import getpass
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.document_loaders import PyMuPDFLoader
from langchain_google_genai import ChatGoogleGenerativeAI, GoogleGenerativeAIEmbeddings
from langchain_community.vectorstores import Chroma
from langchain.chains.retrieval import create_retrieval_chain
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.documents import Document

print("Libraries imported successfully!")







Libraries imported successfully!


In [2]:
#!pip install langchain_google_genai

In [3]:
# --- 1. Set up the API Key ---
# For security, use getpass to hide your API key input.
# Or, you can set it as an environment variable.
if "GOOGLE_API_KEY" not in os.environ:
    os.environ["GOOGLE_API_KEY"] = getpass.getpass("you api key")
    print("\nAPI key set successfully!")


you api key ········



API key set successfully!


In [4]:
#!pip install pymupdf

In [5]:
# --- 2. Load the Document ---
# Define the path to your PDF file.
# The file 'BELT STOCK.pdf' should be in the same directory as this notebook.
file_path = "BELT_STOCK_STRUCTURED.pdf"
try:
    loader = PyMuPDFLoader(file_path)
    documents = loader.load()
    print(f"\nLoaded {len(documents)} pages from {file_path}")
except FileNotFoundError:
    print(f"\nError: The file '{file_path}' was not found. Please ensure it is in the same directory.")
    # Exit gracefully if the file is not found
    exit()


Loaded 1 pages from BELT_STOCK_STRUCTURED.pdf


In [6]:
# --- 3. Split Documents into Chunks ---
# We split the documents into smaller, manageable chunks.
# This helps the model focus on relevant information without being overwhelmed.
# The chunk size and overlap can be tuned for better performance.
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
docs = text_splitter.split_documents(documents)
print(f"Split {len(documents)} pages into {len(docs)} chunks.")


Split 1 pages into 1 chunks.


In [7]:
# --- 4. Initialize Embedding Model and Vector Store ---
# An embedding model converts text into numerical vectors.
# We'll use Google's embedding model for this.
embeddings = GoogleGenerativeAIEmbeddings(model="models/embedding-001")

# We'll use Chroma as our vector store to store the document chunks and their embeddings.
# This allows us to quickly search for relevant chunks later.
vectorstore = Chroma.from_documents(docs, embeddings)
print("Vector store created successfully.")

Failed to send telemetry event ClientStartEvent: capture() takes 1 positional argument but 3 were given
Failed to send telemetry event ClientCreateCollectionEvent: capture() takes 1 positional argument but 3 were given


Vector store created successfully.


In [8]:
# --- 5. Create the Retriever ---
# The retriever is a key component of RAG. It's responsible for fetching
# the most relevant document chunks based on a user's query.
retriever = vectorstore.as_retriever()
print("Retriever created.")

Retriever created.


In [9]:
# --- 6. Set up the RAG Chain ---
# First, define the prompt template that will guide the LLM's response.
# The prompt tells the model to use the provided context to answer the question.
prompt = ChatPromptTemplate.from_template("""
Answer the user's question based only on the provided context.
Answer strictly from the BELT STOCK table.provided context of each row and each column have precise stock details 
If you do not have enough information in the context, please state that you cannot answer.

Context:
{context}

Question:
{input}
""")

In [10]:
# Initialize the LLM with the gemini-2.5-flash model.
llm = ChatGoogleGenerativeAI(model="gemini-2.5-flash")

# Create a 'stuff' documents chain. This chain takes the retrieved documents
# and 'stuffs' them into the prompt's context.
document_chain = create_stuff_documents_chain(llm, prompt)

# Create the final retrieval chain. This chain combines the retriever and the document chain.
# When a user asks a question, this chain will:
# 1. Retrieve relevant documents using the retriever.
# 2. Pass those documents to the document chain for generation.
retrieval_chain = create_retrieval_chain(retriever, document_chain)
print("RAG chain setup complete.")


RAG chain setup complete.


In [11]:
#--- 7. Test the Agent with multiple questions ---
print("\n--- Testing the RAG Agent with a list of questions ---")

# Define a list of questions you want to ask.
questions = [
    "What is the belt drum details of BELT STRENGTH EP800/4 AND BELT WIDTH 2400MM?",
    "What is the total stock belt in meters for the ST 2000, 2000 mm belt?",
    "What are the belt details of no of drum,drum length of  ST1400, 2000mm width",
    "What are the belt details of no of drum,drum length of  ST2000, 2000mm width?"
]

# Loop through the questions and get the answers.

for question in questions:
    print("-" * 50)
    print(f"Question: {question}")
    response = retrieval_chain.invoke({"input": question})
    print("Answer:")
    print(response["answer"])




--- Testing the RAG Agent with a list of questions ---
--------------------------------------------------
Question: What is the belt drum details of BELT STRENGTH EP800/4 AND BELT WIDTH 2400MM?


Failed to send telemetry event CollectionQueryEvent: capture() takes 1 positional argument but 3 were given
Number of requested results 4 is greater than number of elements in index 1, updating n_results = 1


Answer:
For BELT STRENGTH EP800/4 and BELT WIDTH 2400MM, the belt drum details are:
*   Drum Length: 250m, No. of Drums: 1
*   Drum Length: 187m, No. of Drums: 1
--------------------------------------------------
Question: What is the total stock belt in meters for the ST 2000, 2000 mm belt?


Number of requested results 4 is greater than number of elements in index 1, updating n_results = 1


Answer:
The total stock belt in meters for the ST2000, 2000 mm belt is 3440 meters.
--------------------------------------------------
Question: What are the belt details of no of drum,drum length of  ST1400, 2000mm width


Number of requested results 4 is greater than number of elements in index 1, updating n_results = 1


Answer:
For ST1400 with 2000mm width, the Drum Length is 500m. The number of drums is not provided in the context.
--------------------------------------------------
Question: What are the belt details of no of drum,drum length of  ST2000, 2000mm width?


Number of requested results 4 is greater than number of elements in index 1, updating n_results = 1


Answer:
For ST2000, 2000mm width, the belt details are:

*   Drum Length: 250m, No. of Drums: 6
*   Drum Length: 300m, No. of Drums: 3
*   Drum Length: 190m, No. of Drums: 1
*   Drum Length: 200m, No. of Drums: 3
*   Drum Length: 250m, No. of Drums: 1
