In [None]:
## Installing required packages
# !pip install langchain-community
# !pip install langchain-qdrant
# !pip install langchain-huggingface
# !pip install langchain-groq
# !pip install pdfplumber



In [None]:
from langchain_community.document_loaders import PDFPlumberLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_qdrant import QdrantVectorStore
from langchain.retrievers import ContextualCompressionRetriever
from langchain.retrievers.document_compressors import FlashrankRerank
from langchain_huggingface import HuggingFaceEmbeddings
from qdrant_client import QdrantClient
from qdrant_client.http.models import Distance, VectorParams
from langchain.prompts import PromptTemplate
from langchain.chains import RetrievalQA
from langchain_groq import ChatGroq

In [None]:
pdf_path = "APP-TIAGO-FINAL-OMSB.pdf"
loader = PDFPlumberLoader(pdf_path)
pages = loader.load_and_split()
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
documents = text_splitter.split_documents(pages)

In [None]:
len(documents)

243

In [None]:
documents[0]

Document(metadata={'source': '/content/drive/MyDrive/RingCentral_Assignment/APP-TIAGO-FINAL-OMSB.pdf', 'file_path': '/content/drive/MyDrive/RingCentral_Assignment/APP-TIAGO-FINAL-OMSB.pdf', 'page': 1, 'total_pages': 153, 'CreationDate': "D:20160225230124+05'30'", 'Creator': 'Adobe Acrobat Pro 10.0.0', 'ModDate': "D:20160317094306+05'30'", 'Producer': 'Adobe Acrobat Pro 10.0.0', 'Title': ''}, page_content='OWNER’S MANUAL')

In [None]:
client = QdrantClient(path="langchain_qdrant")

client.create_collection(
    collection_name="demo_collection",
    vectors_config=VectorParams(size=1024, distance=Distance.COSINE),
)
embed_model = HuggingFaceEmbeddings(model_name="BAAI/bge-m3")
vector_store = QdrantVectorStore(
    client=client,
    collection_name="demo_collection",
    embedding=embed_model,
)

  from tqdm.autonotebook import tqdm, trange
The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/123 [00:00<?, ?B/s]

README.md:   0%|          | 0.00/15.8k [00:00<?, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/54.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/687 [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/2.27G [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/444 [00:00<?, ?B/s]

sentencepiece.bpe.model:   0%|          | 0.00/5.07M [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/17.1M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/964 [00:00<?, ?B/s]

1_Pooling/config.json:   0%|          | 0.00/191 [00:00<?, ?B/s]

In [None]:
vector_store.add_documents(documents=documents)

['7d8634d6227e4cb2b0e37cf4684d3f0c',
 'efe1a23df1be4ce58a18f140ab76b169',
 '8b04a88cca9747ab945a54eb389fa33f',
 '3b22a2cc59514b478f06e808146c1816',
 '574fda46bcaa4f6e9ccf43f36b83796a',
 'e1b883fb98344bffa7d7719f509825ae',
 '00775c893de24cd0b49cf619c40b15c8',
 '7ba3385aea8845ae9a006ab746f7d094',
 '79e8d5c72dc44c859ce668bc31a98f0c',
 '9ded25e1039e40a0b30434d3d7362c43',
 'daa19cc36dee450a8be1d8d1387718fc',
 'e8e736fdef0a457fbc0762a36f5e46d7',
 'decb49aa3b714b888b1949683511a341',
 '0778a04cb8a4405cb741c8e90e732db4',
 'e968e3bc45b44fa4930f9e644740216f',
 'd66844b733e64298a2cb9c09cd4af10a',
 '9b3dc00606744675b5bc13a91e7cc2bb',
 '256e938115204c3c89c03691a7db0bb3',
 '88034a187f6c496b9ecfaecb349a5333',
 '42d3fca52f8b46939d11305d88ddad4e',
 '0f45b6b26290438dbcb16f0cba7d90d3',
 'd90eacd381d44b1c99cbf4152dcca639',
 '33fc1c213e784284bb3e405b3c698772',
 '35cc0f9ce03143678494981df226f6be',
 'db7287c8c32542229b87d01d52045bb2',
 'd353aef1ce4b4d5ea8b256d11b3394b4',
 '7567889111b84567a26b67efd9cd9067',
 

In [None]:
retriever = vector_store.as_retriever(search_type="similarity", search_kwargs={"k": 5})
# compressor = FlashrankRerank(model="ms-marco-MiniLM-L-12-v2")
# compression_retriever = ContextualCompressionRetriever(base_compressor=compressor,
#                                                         base_retriever=retriever
#                                                         )

In [None]:
RAG_PROMPT_TEMPLATE = """
You are given user review and the context from the product manual.
User review may not always be correct, eg: he may mention in the review that product is having 2 years of warranty but context may contain warranty period as 5 years.
Assume information present in the context is always correct.
So you have to find relevant information as per the user review from the given context and return back the correct information, so that later I can compare the information mentioned in user review with the information given by you.
Remember your source of information is just the context below.
Give your answer in brief.
<context>
{context}
</context>

Review: {question}
"""

In [None]:
def load_llm():
    groq_api_key = "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"
    llm=ChatGroq(groq_api_key=groq_api_key, model_name="llama-3.1-8b-instant")
    return llm

In [None]:
prompt = PromptTemplate(template=RAG_PROMPT_TEMPLATE, input_variables=["context", "question"])
llm = load_llm()
qa_chain = RetrievalQA.from_chain_type(
llm=llm,
chain_type="stuff",
retriever=retriever,
return_source_documents=True,
chain_type_kwargs={"prompt": prompt, "verbose": False},
)

In [None]:
queries = ["What is customer assisstance number?", "What are different dashboard features present?", "When I should for 2nd free service?", "What is the warranty period?"]
relevant_docs_list = []
for query in queries:
    retrieved_docs = retriever.invoke(query)
    retrieved_docs = [doc.page_content for doc in retrieved_docs]
    relevant_docs_list.append(retrieved_docs)

In [None]:
relevant_docs_list

[['CUSTOMER ASSISTANCE\nIn our constant endeavour to provide assistance and complete You can also approach nearest TATA MOTORS dealer. A sepa-\nservice backup, TATA MOTORS has established an all India cus- rate Dealer network address booklet is provided with the\ntomer assistance centre. Owner’s manual.\nIn case you have a query regarding any aspect of your vehicle, TATA MOTORS 24X7 Roadside Assistance Program offers tech-\nour Customer Assistance Centre will be glad to assist you on nical help in the event of a breakdown. Call to the toll-free\nour Toll Free no. 1800 209 7979 Roadside Assistance.\nFor additional information, refer to "24X7 Roadside Assis-\ntance" section in the Owner’s manual.\n2',
  'happening within hailing distance of a and other oncoming traffic.\nTATA MOTORS Authorized Workshop is\nplaces\nvery low.\n**(The response time will depend on the\nIt is precisely for this reason, we have\nlocation, terrain, traffic density and the\ntied up with TVS AA, who will provide\

In [None]:
relevant_answer_list = []
for query in queries:
    response = qa_chain.invoke(query)
    relevant_answer_list.append(response['result'])

In [None]:
relevant_answer_list

['Customer assistance number is 1800 209 7979.',
 'Based on the given context, the different dashboard features present are:\n\n1. Instrument Cluster (Version 1) \n   - Tachometer\n   - Speedometer\n   - Temperature\n   - Set knob\n   - Mode knob\n   - Fuel Gauge\n   - Driver Information Display\n\n2. Instrument Cluster (Version 2)\n   - Tachometer\n   - Driver Information Display\n   - Speedometer\n   - Set knob\n   - Mode knob\n   - Fuel Gauge\n\n3. Steering Wheel Switches (RHS) \n   - Mic\n   - Volume\n   - Mute / phone reject\n   - Phone receive\n   - Seek forward / backward',
 'You should get the 2nd free service at 7000-8000 kms OR 6 months, whichever is earlier.',
 'The warranty period is 24 months from the date of sale of the car or a mileage of 75,000 Kms, whichever occurs earlier.']