In [5]:
#install libraries
!pip install langchain
!pip install langchain-groq
!pip install pypdf
!pip install chromadb

from IPython.display import clear_output
clear_output()

In [6]:
#import libs
from langchain.vectorstores import Chroma
from langchain.embeddings.huggingface import HuggingFaceInferenceAPIEmbeddings
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.document_loaders import PyPDFLoader
from langchain.chains import RetrievalQA
from langchain_groq import ChatGroq

In [7]:
# setting up the environment variables
from google.colab import userdata
groq_api_key = userdata.get('groq_api')
hf_api_key = userdata.get('hf_api')

In [8]:
#loading pdf
pdf =  "https://arxiv.org/pdf/2005.11401.pdf"

loader = PyPDFLoader(pdf)

pages = loader.load()

In [9]:
#creating the docs
docs = RecursiveCharacterTextSplitter(
    chunk_size=1000,
    chunk_overlap=0).split_documents(pages)

In [10]:
print(f"You have {len(docs)} documents")
print(f"The first document has {len(docs[0].page_content)} characters")

You have 83 documents
The first document has 928 characters


In [11]:
# embedding and vectorestores
embedding = HuggingFaceInferenceAPIEmbeddings(
    model_name="sentence-transformers/all-MiniLM-L6-v2",
    api_key=hf_api_key
)

db = Chroma.from_documents(
    docs,
    embedding,
    persist_directory="./"
)

print(db._collection.count())


83


In [None]:
llm = ChatGroq(
    model='llama3-70b-8192',
    temperature=0.3,
    api_key=groq_api_key,
    verbose=True
)

In [None]:
qa_chain = RetrievalQA.from_chain_type(
    llm=llm,
    chain_type="stuff",
    retriever=db.as_retriever()
)

In [None]:
qa_chain({"query": "What is the paper about?"})

  warn_deprecated(


{'query': 'What is the paper about?',
 'result': 'The paper appears to be about a language model called RAG (Retrieval-Augmented Generator) that combines the strengths of two approaches: retrieval-based models that retrieve evidence from a database and generate text based on that evidence, and parametric models that generate text based on patterns learned from a large corpus of text. The paper presents experiments and results showing that RAG achieves high accuracy in various natural language processing tasks, such as fact verification, question answering, and text generation, and outperforms other models in some cases.'}

In [None]:
# you will see it doesn't carry the answer from the previous question
qa_chain({"query": "Write five main topics about the paper."})

{'query': 'Write five main topics about the paper.',
 'result': 'Based on the provided context, the five main topics about the paper are:\n\n1. **RAG Model Performance**: The paper discusses the performance of the RAG (Retrieval-Augmented Generator) model in various tasks, including 2-way classification, generation diversity, and factual accuracy.\n\n2. **Comparison with Other Models**: The paper compares the performance of the RAG model with other models, such as RoBERTa and BART, in terms of accuracy, generation diversity, and factual accuracy.\n\n3. **Document Retrieval and Evidence**: The paper explores the ability of the RAG model to retrieve relevant documents and evidence to support its generated responses, and analyzes the overlap between the retrieved documents and gold evidence annotations.\n\n4. **Generation Tasks and Diversity**: The paper investigates the performance of the RAG model in generation tasks, such as Jeopardy question generation, and analyzes the diversity of t

In [None]:
from langchain.memory import ConversationBufferMemory
from langchain.chains import ConversationChain

memory = ConversationBufferMemory(
    memory_key="history",
    return_messages=True
  )

memory.save_context({"input": "Hi"}, {"output": "Hey"})

memory.load_memory_variables({})

{'history': [HumanMessage(content='Hi'), AIMessage(content='Hey')]}

In [None]:
qa = ConversationChain(
    llm=llm,
    memory=memory,
    verbose=True
)

In [None]:
qa.predict(input="Hi there!")



[1m> Entering new ConversationChain chain...[0m
Prompt after formatting:
[32;1m[1;3mThe following is a friendly conversation between a human and an AI. The AI is talkative and provides lots of specific details from its context. If the AI does not know the answer to a question, it truthfully says it does not know.

Current conversation:
[HumanMessage(content='Hi'), AIMessage(content='Hey')]
Human: Hi there!
AI:[0m

[1m> Finished chain.[0m


"Hi there! I'm thrilled to be chatting with you. I've been trained on a vast amount of text data, including books, articles, and conversations, so I'm ready to dive into a fun conversation. By the way, did you know that I've been trained on over 1.5 billion parameters? It's a lot, I know! Anyway, what's on your mind today?"

In [None]:
qa.predict(input="What is the paper about?")



[1m> Entering new ConversationChain chain...[0m
Prompt after formatting:
[32;1m[1;3mThe following is a friendly conversation between a human and an AI. The AI is talkative and provides lots of specific details from its context. If the AI does not know the answer to a question, it truthfully says it does not know.

Current conversation:
[HumanMessage(content='Hi'), AIMessage(content='Hey'), HumanMessage(content='Hi there!'), AIMessage(content="Hi there! I'm thrilled to be chatting with you. I've been trained on a vast amount of text data, including books, articles, and conversations, so I'm ready to dive into a fun conversation. By the way, did you know that I've been trained on over 1.5 billion parameters? It's a lot, I know! Anyway, what's on your mind today?")]
Human: What is the paper about?
AI:[0m

[1m> Finished chain.[0m


'I\'m happy to share that with you! The paper that my training is based on is titled "Language Models are Few-shot Learners" by Tom B. Brown et al. It was published in 2020 and introduced the concept of using a large language model to perform well on a wide range of tasks with minimal fine-tuning. The paper explores the idea that large language models can be adapted to perform well on a variety of tasks, such as conversational dialogue, text classification, and language translation, with only a few examples of training data. It\'s a fascinating read, and I\'m proud to be a part of the next generation of language models inspired by this research!'

In [None]:
# Use ConversationChain with its own PromptTemplate
from langchain.prompts.prompt import PromptTemplate


template = """Use the following pieces of context to answer the question at the end.
If you don't know the answer, just say that you don't know, don't try to make up an answer.
{history}
Question: {input}
Helpful Answer:"""

PROMPT = PromptTemplate(
    input_variables=["history", "input"],
    template=template
)

qa = ConversationChain(
    llm=llm,
    prompt=PROMPT,
    memory=memory,
    verbose=True
)

qa.predict(input="What is the paper about?")



[1m> Entering new ConversationChain chain...[0m
Prompt after formatting:
[32;1m[1;3mUse the following pieces of context to answer the question at the end. 
If you don't know the answer, just say that you don't know, don't try to make up an answer.
[HumanMessage(content='Hi'), AIMessage(content='Hey'), HumanMessage(content='Hi there!'), AIMessage(content="Hi there! I'm thrilled to be chatting with you. I've been trained on a vast amount of text data, including books, articles, and conversations, so I'm ready to dive into a fun conversation. By the way, did you know that I've been trained on over 1.5 billion parameters? It's a lot, I know! Anyway, what's on your mind today?"), HumanMessage(content='What is the paper about?'), AIMessage(content='I\'m happy to share that with you! The paper that my training is based on is titled "Language Models are Few-shot Learners" by Tom B. Brown et al. It was published in 2020 and introduced the concept of using a large language model to perform

'According to the conversation, the paper "Language Models are Few-shot Learners" by Tom B. Brown et al. is about using large language models to perform well on a wide range of tasks with minimal fine-tuning, such as conversational dialogue, text classification, and language translation, with only a few examples of training data.'

In [None]:
#converstional retrieval chain and conversational buffer memory
from langchain.chains import ConversationalRetrievalChain
from langchain.memory import ConversationBufferMemory

retriever = db.as_retriever()

memory = ConversationBufferMemory(
    memory_key="chat_history",
    return_messages=True
)

llm = ChatGroq(
    model='llama3-70b-8192',
    temperature=0.3,
    api_key=groq_api_key,
    verbose=True
)

qa = ConversationalRetrievalChain.from_llm(
    llm=llm,
    retriever=retriever,
    memory=memory,
    verbose=True
)


In [None]:
qa({"question": "What is the paper about?"})



[1m> Entering new StuffDocumentsChain chain...[0m


[1m> Entering new LLMChain chain...[0m
Prompt after formatting:
[32;1m[1;3mSystem: Use the following pieces of context to answer the user's question. 
If you don't know the answer, just say that you don't know, don't try to make up an answer.
----------------
For 2-way classiﬁcation, we compare against Thorne and Vlachos [57], who train RoBERTa [ 35]
to classify the claim as true or false given the gold evidence sentence. RAG achieves an accuracy
within 2.7% of this model, despite being supplied with only the claim and retrieving its own evidence.
We also analyze whether documents retrieved by RAG correspond to documents annotated as gold
evidence in FEVER. We calculate the overlap in article titles between the top kdocuments retrieved
by RAG and gold evidence annotations. We ﬁnd that the top retrieved document is from a gold article
in 71% of cases, and a gold article is present in the top 10 retrieved articles in 90% of case

{'question': 'What is the paper about?',
 'chat_history': [HumanMessage(content='What is the paper about?'),
  AIMessage(content='The paper appears to be about a language model called RAG (Retrieval-Augmented Generator) that combines the strengths of retrieval-based and generation-based approaches to produce more accurate and specific responses. The model is trained to retrieve relevant documents and then generate text based on those documents. The paper presents results showing that RAG outperforms other models in various tasks, including fact verification and Jeopardy question generation.')],
 'answer': 'The paper appears to be about a language model called RAG (Retrieval-Augmented Generator) that combines the strengths of retrieval-based and generation-based approaches to produce more accurate and specific responses. The model is trained to retrieve relevant documents and then generate text based on those documents. The paper presents results showing that RAG outperforms other model

In [None]:
qa.Config.llm = ChatGroq(
    model = 'llama3-8b-8192',
    temperature = 0.3,
    api_key = groq_api_key,
    verbose = True
)

In [None]:
# here i am explicity mentioning about the RAG Paper.  # just changes the model to llama3-8b and you can see the difference.
qa({'question': 'Explain 5 main points of it in one sentence'})



[1m> Entering new LLMChain chain...[0m
Prompt after formatting:
[32;1m[1;3mGiven the following conversation and a follow up question, rephrase the follow up question to be a standalone question, in its original language.

Chat History:

Human: What is the paper about?
Assistant: The paper appears to be about a language model called RAG (Retrieval-Augmented Generator) that combines the strengths of retrieval-based and generation-based approaches to produce more accurate and specific responses. The model is trained to retrieve relevant documents and then generate text based on those documents. The paper presents results showing that RAG outperforms other models in various tasks, including fact verification and Jeopardy question generation.
Follow Up Input: Explain 5 main points of it in one sentence
Standalone question:[0m

[1m> Finished chain.[0m


[1m> Entering new StuffDocumentsChain chain...[0m


[1m> Entering new LLMChain chain...[0m
Prompt after formatting:
[32;1m[1;

{'question': 'Explain 5 main points of it in one sentence',
 'chat_history': [HumanMessage(content='What is the paper about?'),
  AIMessage(content='The paper appears to be about a language model called RAG (Retrieval-Augmented Generator) that combines the strengths of retrieval-based and generation-based approaches to produce more accurate and specific responses. The model is trained to retrieve relevant documents and then generate text based on those documents. The paper presents results showing that RAG outperforms other models in various tasks, including fact verification and Jeopardy question generation.'),
  HumanMessage(content='Explain 5 main points of it in one sentence'),
  AIMessage(content="I don't know. The provided context does not provide a clear summary of the five main points of the RAG language model paper. It appears to be a snippet of a research paper discussing the performance of RAG models on various tasks, but it does not provide a concise summary of the main poi

In [None]:
# from nltk.corpus import stopwords
# from nltk.tokenize import word_tokenize

# # X = input("Enter first string: ").lower()
# # Y = input("Enter second string: ").lower()
# X ="I love horror movies"
# Y ="Lights out is a horror movie"

# # tokenization
# X_list = word_tokenize(X)
# Y_list = word_tokenize(Y)


# # sw contains the list of stopwords
# sw = stopwords.words('english')
# l1 =[];l2 =[]

# # remove stop words from the string
# X_set = {w for w in X_list if not w in sw}
# Y_set = {w for w in Y_list if not w in sw}

# # form a set containing keywords of both strings
# rvector = X_set.union(Y_set)  #{'I', 'Lights', 'horror', 'love', 'movie', 'movies'}

# for w in rvector:
#     if w in X_set: l1.append(1) # create a vector
#     else: l1.append(0)
#     if w in Y_set: l2.append(1)
#     else: l2.append(0)
# c = 0

# # cosine formula
# for i in range(len(rvector)):
#         c+= l1[i]*l2[i]
# cosine = c / float((sum(l1)*sum(l2))**0.5)
# print("similarity: ", cosine)

similarity:  0.2886751345948129


In [14]:
# mmr as search type and chain_type=stuff

from langchain.chains import ConversationalRetrievalChain
from langchain.memory import ConversationBufferMemory
from langchain_groq import ChatGroq

llm = ChatGroq(
    model = "llama3-8b-8192",
    temperature = 0.3,
    api_key = groq_api_key,
    verbose = True
)

vectordb = Chroma.from_documents(
    docs,
    embedding,
    persist_directory="./",
)

memory = ConversationBufferMemory(
    memory_key="chat_history",
    return_messages=True
)

qa = ConversationalRetrievalChain.from_llm(
    llm=llm,
    retriever=vectordb.as_retriever(search_type="mmr", search_kwargs = {"k": 3}),
    memory=memory,
    chain_type="stuff",
    verbose=True
)

In [15]:
qa({"question": "What is the paper about?"})

  warn_deprecated(




[1m> Entering new StuffDocumentsChain chain...[0m


[1m> Entering new LLMChain chain...[0m
Prompt after formatting:
[32;1m[1;3mSystem: Use the following pieces of context to answer the user's question. 
If you don't know the answer, just say that you don't know, don't try to make up an answer.
----------------
For 2-way classiﬁcation, we compare against Thorne and Vlachos [57], who train RoBERTa [ 35]
to classify the claim as true or false given the gold evidence sentence. RAG achieves an accuracy
within 2.7% of this model, despite being supplied with only the claim and retrieving its own evidence.
We also analyze whether documents retrieved by RAG correspond to documents annotated as gold
evidence in FEVER. We calculate the overlap in article titles between the top kdocuments retrieved
by RAG and gold evidence annotations. We ﬁnd that the top retrieved document is from a gold article
in 71% of cases, and a gold article is present in the top 10 retrieved articles in 90% of case

{'question': 'What is the paper about?',
 'chat_history': [HumanMessage(content='What is the paper about?'),
  AIMessage(content='The paper appears to be about a natural language processing (NLP) model called RAG (Retrieval-Augmented Generation) that is designed for fact-checking and question-answering tasks. The paper presents several experiments and results that demonstrate the effectiveness of the RAG model in various NLP tasks, including fact verification, question answering, and generation of text.')],
 'answer': 'The paper appears to be about a natural language processing (NLP) model called RAG (Retrieval-Augmented Generation) that is designed for fact-checking and question-answering tasks. The paper presents several experiments and results that demonstrate the effectiveness of the RAG model in various NLP tasks, including fact verification, question answering, and generation of text.'}

In [16]:
qa({"question":"Tell me 5 insights from the paper."})



[1m> Entering new LLMChain chain...[0m
Prompt after formatting:
[32;1m[1;3mGiven the following conversation and a follow up question, rephrase the follow up question to be a standalone question, in its original language.

Chat History:

Human: What is the paper about?
Assistant: The paper appears to be about a natural language processing (NLP) model called RAG (Retrieval-Augmented Generation) that is designed for fact-checking and question-answering tasks. The paper presents several experiments and results that demonstrate the effectiveness of the RAG model in various NLP tasks, including fact verification, question answering, and generation of text.
Follow Up Input: Tell me 5 insights from the paper.
Standalone question:[0m

[1m> Finished chain.[0m


[1m> Entering new StuffDocumentsChain chain...[0m


[1m> Entering new LLMChain chain...[0m
Prompt after formatting:
[32;1m[1;3mSystem: Use the following pieces of context to answer the user's question. 
If you don't know th

{'question': 'Tell me 5 insights from the paper.',
 'chat_history': [HumanMessage(content='What is the paper about?'),
  AIMessage(content='The paper appears to be about a natural language processing (NLP) model called RAG (Retrieval-Augmented Generation) that is designed for fact-checking and question-answering tasks. The paper presents several experiments and results that demonstrate the effectiveness of the RAG model in various NLP tasks, including fact verification, question answering, and generation of text.'),
  HumanMessage(content='Tell me 5 insights from the paper.'),
  AIMessage(content='Based on the provided context, the paper presents the following key insights about the RAG model:\n\n1. **Improved factual accuracy**: RAG is more strongly grounded in real factual knowledge, which makes it "hallucinate" less with generations that are more factual.\n2. **Control and interpretability**: RAG offers more control and interpretability compared to previous models.\n3. **Wide applic

In [17]:
# with mmr search_type and chain_type refine

memory = ConversationBufferMemory(
  memory_key="chat_history",
  return_messages=True
)

retriever = vectordb.as_retriever(search_type="mmr", search_kwargs = {"k": 3})

qa = ConversationalRetrievalChain.from_llm(
    llm=llm,
    retriever=retriever,
    memory=memory,
    chain_type="refine",
    verbose=True
)

In [18]:
qa({'question':'What is the paper about?'})



[1m> Entering new RefineDocumentsChain chain...[0m


[1m> Entering new LLMChain chain...[0m
Prompt after formatting:
[32;1m[1;3mSystem: Context information is below.
------------
For 2-way classiﬁcation, we compare against Thorne and Vlachos [57], who train RoBERTa [ 35]
to classify the claim as true or false given the gold evidence sentence. RAG achieves an accuracy
within 2.7% of this model, despite being supplied with only the claim and retrieving its own evidence.
We also analyze whether documents retrieved by RAG correspond to documents annotated as gold
evidence in FEVER. We calculate the overlap in article titles between the top kdocuments retrieved
by RAG and gold evidence annotations. We ﬁnd that the top retrieved document is from a gold article
in 71% of cases, and a gold article is present in the top 10 retrieved articles in 90% of cases.
4.5 Additional Results
Generation Diversity Section 4.3 shows that RAG models are more factual and speciﬁc than
BART for Jeopardy 

{'question': 'What is the paper about?',
 'chat_history': [HumanMessage(content='What is the paper about?'),
  AIMessage(content='With the additional context, I can refine the original answer.\n\nThe papers listed appear to be related to natural language processing and question answering. Specifically:\n\n* [10] introduces SearchQA, a new Q&A dataset augmented with context from a search engine.\n* [11] presents a hierarchical neural story generation approach.\n* [12] discusses ELI5, a long-form question answering approach.\n\nThese papers are likely exploring ways to improve question answering and natural language processing capabilities.')],
 'answer': 'With the additional context, I can refine the original answer.\n\nThe papers listed appear to be related to natural language processing and question answering. Specifically:\n\n* [10] introduces SearchQA, a new Q&A dataset augmented with context from a search engine.\n* [11] presents a hierarchical neural story generation approach.\n* 

In [19]:
# With MMR as search_type and Chain_type=map_rerank

memory = ConversationBufferMemory(
  memory_key="chat_history",
  return_messages=True,
)

retriever = vectordb.as_retriever(search_type="mmr", search_kwargs = {"k": 3})

qa = ConversationalRetrievalChain.from_llm(
    llm=llm,
    retriever=retriever,
    memory=memory,
    chain_type="map_rerank",
    verbose=True
)

In [20]:
qa({"question": "What is the paper about?"})



[1m> Entering new MapRerankDocumentsChain chain...[0m


[1m> Entering new LLMChain chain...[0m
Prompt after formatting:
[32;1m[1;3mUse the following pieces of context to answer the question at the end. If you don't know the answer, just say that you don't know, don't try to make up an answer.

In addition to giving an answer, also return a score of how fully it answered the user's question. This should be in the following format:

Question: [question here]
Helpful Answer: [answer here]
Score: [score between 0 and 100]

How to determine the score:
- Higher is a better answer
- Better responds fully to the asked question, with sufficient level of detail
- If you do not know the answer based on the context, that should be a score of 0
- Don't be overconfident!

Example #1

Context:
---------
Apples are red
---------
Question: what color are apples?
Helpful Answer: red
Score: 100

Example #2

Context:
---------
it was night and the witness forgot his glasses. he was not sure if it 




[1m> Finished chain.[0m

[1m> Finished chain.[0m


{'question': 'What is the paper about?',
 'chat_history': [HumanMessage(content='What is the paper about?'),
  AIMessage(content='Helpful Answer: The paper is about evaluating the performance of a language model called RAG in various tasks, including 2-way classification and generation diversity.')],
 'answer': 'Helpful Answer: The paper is about evaluating the performance of a language model called RAG in various tasks, including 2-way classification and generation diversity.'}

In [21]:
qa({"question": "What are the main topics of the paper?"})



[1m> Entering new LLMChain chain...[0m
Prompt after formatting:
[32;1m[1;3mGiven the following conversation and a follow up question, rephrase the follow up question to be a standalone question, in its original language.

Chat History:

Human: What is the paper about?
Assistant: Helpful Answer: The paper is about evaluating the performance of a language model called RAG in various tasks, including 2-way classification and generation diversity.
Follow Up Input: What are the main topics of the paper?
Standalone question:[0m

[1m> Finished chain.[0m


[1m> Entering new MapRerankDocumentsChain chain...[0m


[1m> Entering new LLMChain chain...[0m
Prompt after formatting:
[32;1m[1;3mUse the following pieces of context to answer the question at the end. If you don't know the answer, just say that you don't know, don't try to make up an answer.

In addition to giving an answer, also return a score of how fully it answered the user's question. This should be in the following form




[1m> Finished chain.[0m

[1m> Finished chain.[0m


{'question': 'What are the main topics of the paper?',
 'chat_history': [HumanMessage(content='What is the paper about?'),
  AIMessage(content='Helpful Answer: The paper is about evaluating the performance of a language model called RAG in various tasks, including 2-way classification and generation diversity.'),
  HumanMessage(content='What are the main topics of the paper?'),
  AIMessage(content='Helpful Answer: The main topics of the paper are about the performance of the BART model in generating titles and the interaction between parametric and non-parametric memories.')],
 'answer': 'Helpful Answer: The main topics of the paper are about the performance of the BART model in generating titles and the interaction between parametric and non-parametric memories.'}