# Stuff Documents Chain
Takes a set of documents and passes it as a context to the LLM. The documents may be retrived from a retrival system.

https://python.langchain.com/docs/get_started/quickstart/

https://api.python.langchain.com/en/latest/chains/langchain.chains.combine_documents.stuff.create_stuff_documents_chain.html

## Document chunking and Vector DB setup
generative-ai-for-architects/LangChain/retriever-vectorstore-basics.ipynb

In [None]:
# !pip install pymupdf

## Setup LLM

In [None]:
from dotenv import load_dotenv
import sys
import json

from langchain.prompts import PromptTemplate

# Load the file that contains the API keys - OPENAI_API_KEY
load_dotenv('C:\\Users\\raj\\.jupyter\\.env')

# setting path
sys.path.append('../')

from utils.create_llm import create_gpt_llm, create_anthropic_llm, create_ai21_llm, create_cohere_llm, create_hugging_face_llm

# Try with GPT
# model="gpt-4"
# llm = create_gpt_llm({"model": model})

llm = create_hugging_face_llm(repo_id='mistralai/Mistral-7B-Instruct-v0.3', args={"max_new_tokens":1024})

## Setup Retriever function

In [None]:
from langchain_community.retrievers import ArxivRetriever

def get_arxiv_docs(query):
    # Retrieve full documents, with as much info as possible
    retriever = ArxivRetriever(load_max_docs=1, get_full_documents=True, doc_content_chars_max=30000)
    return retriever.get_relevant_documents(query = query)

# # Chain of Thought paper
# COT_Document_identifier = 'chain of thought' # 2201.11903'

# results = retriever.get_relevant_documents(query = COT_Document_identifier)

# print(len(results))
# results[0].metadata



## RAG
Idea is to setup LLM for Q&A:

1. Retrieve Arxiv documents on topics of interest
2. Create a RAG LLM chain with the relevant docs
3. Ask questions on topic

In [None]:
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain_core.prompts import ChatPromptTemplate, PromptTemplate

prompt = PromptTemplate(
    template="You are a smart agent who uses only the provided provided context to carry out the given task. \n\n Task: {task} \n\n Context: \n {context}",
    input_variables=["task", "context"]
)

In [None]:
chain = create_stuff_documents_chain(llm, prompt)

In [None]:
topic = "chain of thought"
task="explain COT to a 15 year old"

docs = get_arxiv_docs(topic)
# result = chain.invoke({"task": task, "context": docs})

# print(result)

In [None]:
task="what is chain of thought?"
# task="create a bullet point list of important points"
result = llm.invoke(prompt.format(task=task, context=docs[0].page_content))

In [None]:
print(result)

In [None]:
print(docs[0].page_content)

## 2

In [None]:
from langchain_community.retrievers import WikipediaRetriever
retriever = WikipediaRetriever()

topic = "LLM Chain of Thought"
docs = retriever.invoke(topic)

In [None]:
docs

In [None]:

llm = create_hugging_face_llm(repo_id='mistralai/Mistral-7B-Instruct-v0.3') #, args={"max_new_tokens":1024})
chain = create_stuff_documents_chain(llm, prompt)

In [None]:
task="create a bullet point list that exaplains the chain of thought technique"
result = chain.invoke({"task": task, "context": docs})

In [None]:
result

In [None]:
task="describe COT technique in simple terms"
result = chain.invoke({"task": task, "context": docs})
print(result)

In [None]:
task="give me an example of how to apply chain of thought technique"
result = chain.invoke({"task": task, "context": docs})
print(result)

In [None]:
task="create a summary for the chain of thought technique"
result = chain.invoke({"task": task, "context": docs})
print(result)

# 3

In [None]:
context = ''
for doc in docs:
    
    context = "\n" + context + doc.page_content

print(context)

In [None]:
task="what is chain of thought technique"
result = llm.invoke(prompt.format(task=task, context=context))
print(result)

In [None]:
task="give me an example of chain of thought technique"
result = llm.invoke(prompt.format(task=task, context=context))
print(result)

In [None]:
task="create a list of 5 points that explains the chain of thought technique"
result = llm.invoke(prompt.format(task=task, context=context))
print(result)