### Importando as Bibliotecas Necessárias

In [35]:
import os
from dotenv import load_dotenv, find_dotenv
from langchain_community.document_loaders import PyPDFLoader
from langchain_text_splitters import CharacterTextSplitter
from langchain_groq import ChatGroq
from langchain import hub
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain.chains.retrieval import create_retrieval_chain
from langchain.memory import ConversationBufferMemory
from langchain_cohere import CohereEmbeddings
from langchain_pinecone import PineconeVectorStore
from langchain.chains import create_retrieval_chain

### Conectando as Variaveis de Ambiente

In [36]:
load_dotenv(find_dotenv())

COHERE_API_KEY = os.getenv("COHERE_API_KEY")
PINECONE_API_KEY = os.getenv("PINECONE_API_KEY")
GROQ_API_KEY = os.getenv("GROQ_API_KEY")


### Carregando o arquivo PDF

In [37]:
file_path = "data/2210.03629v3.pdf"
loader = PyPDFLoader(file_path=file_path)
documents = loader.load()

### Separando o Texto 

In [38]:
text_splitter = CharacterTextSplitter(separator= "\n")
docs = text_splitter.split_documents(documents) 

In [39]:
vectorstore_from_docs = PineconeVectorStore.from_documents(
    docs,
    index_name=index_name,
    embedding=embeddings
)

In [40]:
vectorstore = PineconeVectorStore(
    index_name=index_name,
    embedding=embeddings
)

### Gerando os Embeddings com Cohere e Criando Vetores com Pinecones

In [41]:
embeddings = CohereEmbeddings(model="embed-english-v3.0", cohere_api_key=COHERE_API_KEY)
index_name = "rag-pdf"

vectorstore_from_docs = PineconeVectorStore.from_documents(
    docs, index_name=index_name, embedding=embeddings
)

In [45]:
vectorstore = PineconeVectorStore(index_name=index_name, embedding=embeddings)
print(vectorstore.similarity_search("What is a llm?"))

[Document(id='821b35dd-264a-447b-bfd5-4a1e7ac9ce87', metadata={'author': '', 'creationdate': '2023-03-13T00:09:11+00:00', 'creator': 'LaTeX with hyperref', 'keywords': '', 'moddate': '2023-03-13T00:09:11+00:00', 'page': 8.0, 'page_label': '9', 'producer': 'pdfTeX-1.40.21', 'ptex.fullbanner': 'This is pdfTeX, Version 3.14159265-2.6-1.40.21 (TeX Live 2020) kpathsea version 6.3.2', 'source': 'data/2210.03629v3.pdf', 'subject': '', 'title': '', 'total_pages': 33.0, 'trapped': '/False'}, page_content='Published as a conference paper at ICLR 2023\n5 R ELATED WORK\nLanguage model for reasoning Perhaps the most well-known work of using LLMs for reasoning\nis Chain-of-Thought (CoT) (Wei et al., 2022), which reveals the ability of LLMs to formulate their\nown “thinking procedure” for problem solving. Several follow-up works have since been performed,\nincluding least-to-most prompting for solving complicated tasks (Zhou et al., 2022), zero-shot-\nCoT (Kojima et al., 2022), and reasoning with sel

### Criando a memória com o Chat

In [46]:
memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True)

  memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True)


### Criando CHAT com RAG


In [47]:
llm = ChatGroq(
    model = "Gemma2-9b-It", 
    groq_api_key = GROQ_API_KEY, 
    temperature = 0.1, 
)

In [53]:
retrieval_qa_chat_prompt = hub.pull("langchain-ai/retrieval-qa-chat")
combine_docs_chain = create_stuff_documents_chain(
    llm, retrieval_qa_chat_prompt
)

In [54]:
qa= create_retrieval_chain(
    vectorstore.as_retriever(),
    combine_docs_chain  
)

In [58]:
# Supondo que você tenha um QA Chain configurado
response = qa.invoke({"input": "What is React according to the pdf?"})
print(response['answer'])



According to the provided text, ReAct is a paradigm that can address the shortcomings of a lack of commonsense reasoning.  

It specifically mentions "ReAct-IM" which seems to be a specific implementation of the ReAct paradigm. 

