# RAG Using LangChain Mistral and Weaviate(Cloud) DB

Using Opensource libraries

In [1]:
# Import general libraries
import os
from dotenv import load_dotenv

In [2]:
# Load environment variables
load_dotenv()

True

In [3]:
from langchain_huggingface import HuggingFaceEmbeddings

In [4]:
# Define Embedding
embeddings = HuggingFaceEmbeddings(model="sentence-transformers/all-mpnet-base-v2")

In [5]:
# Load the Data
from langchain.document_loaders import PyPDFLoader

In [6]:
loader = PyPDFLoader("NeurIPS-2020-retrieval-augmented-generation-for-knowledge-intensive-nlp-tasks-Paper.pdf", extract_images=True)
pages = loader.load()

Ignoring wrong pointing object 31 0 (offset 0)
Ignoring wrong pointing object 32 0 (offset 0)
Ignoring wrong pointing object 89 0 (offset 0)


In [7]:
pages

[Document(metadata={'producer': 'macOS Version 10.15.7 (Build 19H15) Quartz PDFContext', 'creator': 'LaTeX with hyperref', 'creationdate': "D:20210107190157Z00'00'", 'moddate': "D:20210107190157Z00'00'", 'source': 'NeurIPS-2020-retrieval-augmented-generation-for-knowledge-intensive-nlp-tasks-Paper.pdf', 'total_pages': 16, 'page': 0, 'page_label': '1'}, page_content='Retrieval-Augmented Generation for\nKnowledge-Intensive NLP Tasks\nPatrick Lewis†‡ , Ethan Perez?,\nAleksandra Piktus† , Fabio Petroni† , Vladimir Karpukhin† , Naman Goyal† , Heinrich Küttler† ,\nMike Lewis† , Wen-tau Yih† , Tim Rocktäschel†‡ , Sebastian Riedel†‡ , Douwe Kiela†\n† Facebook AI Research;‡ University College London;?New York University;\nplewis@fb.com\nAbstract\nLarge pre-trained language models have been shown to store factual knowledge\nin their parameters, and achieve state-of-the-art results when ﬁne-tuned on down-\nstream NLP tasks. However, their ability to access and precisely manipulate\nknowledge is s

In [8]:
# Configure Weaviate client
from langchain_community.vectorstores import Weaviate
import weaviate
from weaviate.classes.init import Auth

In [9]:
# Connect to Weaviate Cloud
# Connect to Weaviate Cloud
client = weaviate.connect_to_weaviate_cloud(
    cluster_url=os.getenv("WEAVIATE_URL"),
    auth_credentials=Auth.api_key(os.getenv("WEAVIATE_API_KEY")),
)

print(client.is_ready())  # Should print: `True`

True


In [10]:
# Split the data in chunks
from langchain.text_splitter import RecursiveCharacterTextSplitter

In [11]:
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap= 20)
docs = text_splitter.split_documents(pages)
docs

[Document(metadata={'producer': 'macOS Version 10.15.7 (Build 19H15) Quartz PDFContext', 'creator': 'LaTeX with hyperref', 'creationdate': "D:20210107190157Z00'00'", 'moddate': "D:20210107190157Z00'00'", 'source': 'NeurIPS-2020-retrieval-augmented-generation-for-knowledge-intensive-nlp-tasks-Paper.pdf', 'total_pages': 16, 'page': 0, 'page_label': '1'}, page_content='Retrieval-Augmented Generation for\nKnowledge-Intensive NLP Tasks\nPatrick Lewis†‡ , Ethan Perez?,\nAleksandra Piktus† , Fabio Petroni† , Vladimir Karpukhin† , Naman Goyal† , Heinrich Küttler† ,\nMike Lewis† , Wen-tau Yih† , Tim Rocktäschel†‡ , Sebastian Riedel†‡ , Douwe Kiela†\n† Facebook AI Research;‡ University College London;?New York University;\nplewis@fb.com\nAbstract\nLarge pre-trained language models have been shown to store factual knowledge\nin their parameters, and achieve state-of-the-art results when ﬁne-tuned on down-\nstream NLP tasks. However, their ability to access and precisely manipulate\nknowledge is s

In [12]:
len(docs)

71

In [13]:
from langchain_weaviate import WeaviateVectorStore

In [14]:
# Store the chunks and data in Weaviate after embedding
vector_db = WeaviateVectorStore.from_documents(
    docs,
    embeddings,
    client=client,
    by_text= False
)

In [15]:
# Test Similarity search
vector_db.similarity_search("What is RAG?", k=3)

[Document(metadata={'creator': 'LaTeX with hyperref', 'creationdate': "D:20210107190157Z00'00'", 'source': 'NeurIPS-2020-retrieval-augmented-generation-for-knowledge-intensive-nlp-tasks-Paper.pdf', 'page_label': '6', 'total_pages': 16.0, 'moddate': "D:20210107190157Z00'00'", 'producer': 'macOS Version 10.15.7 (Build 19H15) Quartz PDFContext', 'page': 5.0}, page_content='cases for NQ, where an extractive model would score 0%.\n4.2 Abstractive Question Answering\nAs shown in Table 2, RAG-Sequence outperforms BART on Open MS-MARCO NLG by 2.6 Bleu\npoints and 2.6 Rouge-L points. RAG approaches state-of-the-art model performance, which is\nimpressive given that (i) those models access gold passages with speciﬁc information required to\ngenerate the reference answer, (ii) many questions are unanswerable without the gold passages, and\n(iii) not all questions are answerable from Wikipedia alone. Table 3 shows some generated answers\nfrom our models. Qualitatively, we ﬁnd that RAG models hallu

In [16]:
vector_db.similarity_search("What is Attention?", k=3)

[Document(metadata={'page_label': '15', 'creationdate': "D:20210107190157Z00'00'", 'moddate': "D:20210107190157Z00'00'", 'creator': 'LaTeX with hyperref', 'total_pages': 16.0, 'source': 'NeurIPS-2020-retrieval-augmented-generation-for-knowledge-intensive-nlp-tasks-Paper.pdf', 'producer': 'macOS Version 10.15.7 (Build 19H15) Quartz PDFContext', 'page': 14.0}, page_content='biases in sentence-pair classiﬁcation with elastic weight consolidation.ArXiv, abs/2004.14366,\n2020. URLhttps://arxiv.org/abs/2004.14366.\n[58] Ashish Vaswani, Noam Shazeer, Niki Parmar, Jakob Uszkoreit, Llion Jones, Aidan N Gomez,\nŁ ukasz Kaiser, and Illia Polosukhin. Attention is all you need. In I. Guyon, U. V . Luxburg,\nS. Bengio, H. Wallach, R. Fergus, S. Vishwanathan, and R. Garnett, editors,Advances in Neural\nInformation Processing Systems 30, pages 5998–6008. Curran Associates, Inc., 2017. URL\nhttp://papers.nips.cc/paper/7181-attention-is-all-you-need.pdf .\n[59] Ashwin Vijayakumar, Michael Cogswell, Ramp

In [17]:
print(vector_db.similarity_search("What is RAG?", k=3)[0].page_content)

cases for NQ, where an extractive model would score 0%.
4.2 Abstractive Question Answering
As shown in Table 2, RAG-Sequence outperforms BART on Open MS-MARCO NLG by 2.6 Bleu
points and 2.6 Rouge-L points. RAG approaches state-of-the-art model performance, which is
impressive given that (i) those models access gold passages with speciﬁc information required to
generate the reference answer, (ii) many questions are unanswerable without the gold passages, and
(iii) not all questions are answerable from Wikipedia alone. Table 3 shows some generated answers
from our models. Qualitatively, we ﬁnd that RAG models hallucinate less and generate factually
correct text more often than BART. Later, we also show that RAG generations are more diverse than
BART generations (see §4.5).
4.3 Jeopardy Question Generation
Table 2 shows that RAG-Token performs better than RAG-Sequence on Jeopardy question generation,


In [18]:
print([doc.page_content + '\n' for doc in vector_db.similarity_search("What is RAG?", k=3)])

['cases for NQ, where an extractive model would score 0%.\n4.2 Abstractive Question Answering\nAs shown in Table 2, RAG-Sequence outperforms BART on Open MS-MARCO NLG by 2.6 Bleu\npoints and 2.6 Rouge-L points. RAG approaches state-of-the-art model performance, which is\nimpressive given that (i) those models access gold passages with speciﬁc information required to\ngenerate the reference answer, (ii) many questions are unanswerable without the gold passages, and\n(iii) not all questions are answerable from Wikipedia alone. Table 3 shows some generated answers\nfrom our models. Qualitatively, we ﬁnd that RAG models hallucinate less and generate factually\ncorrect text more often than BART. Later, we also show that RAG generations are more diverse than\nBART generations (see §4.5).\n4.3 Jeopardy Question Generation\nTable 2 shows that RAG-Token performs better than RAG-Sequence on Jeopardy question generation,\n', 'top K documents are retrieved using the retriever, and the generator pr

In [19]:
# Create Prompt Template
from langchain.prompts import ChatPromptTemplate

In [20]:
template ="""
You are an assistant for question-answering tasks.
Use the following pieces of retrieved context to answer the question.
If you don't know the answer, just say that you don't know, don't try to make up an answer.
Use teb sendtences maximum to keep the ansder cocise.
Question: {question}
Context: {context}
Answer:
"""

In [21]:
prompt = ChatPromptTemplate.from_template(template)

In [106]:
from langchain_huggingface import HuggingFaceEndpoint
from langchain_community.llms import HuggingFaceHub

In [155]:
llm = HuggingFaceEndpoint(
    model="mistralai/Devstral-Small-2507",
    huggingfacehub_api_token=os.getenv("HUGGINGFACE_API_KEY")
)
# llm = HuggingFaceHub(
#     repo_id="mistralai/Mistral-7B-Instruct-v0.1",
#     model_kwargs={"temperature": 0.7, "max_new_tokens": 512},
#     huggingfacehub_api_token=os.getenv("HUGGINGFACE_API_KEY")
# )


In [156]:
from langchain.schema.runnable import RunnablePassthrough
from langchain.schema.output_parser import StrOutputParser

In [157]:
retriever = vector_db.as_retriever(search_type="similarity")

In [158]:
output_perser = StrOutputParser()

In [159]:
# Create RAG Chain
rag_chain = (
    {"context": retriever, "question": RunnablePassthrough()}
    | prompt
    | llm 
    | output_perser
)

In [160]:
# Run the chain
response = rag_chain.invoke("What is RAG?")
print(response)

RAG (Retrieval-Augmented Generation) is a method that combines retrieval and generation for knowledge-intensive NLP tasks. It retrieves relevant documents and uses them to generate answers or text. RAG models have shown to hallucinate less and generate factually correct text more often than other models like BART.
