In [1]:
import os

from dotenv import load_dotenv

load_dotenv()

os.environ['OPENAI_API_KEY'] = os.getenv('OPENAI_API_KEY')

In [2]:
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_openai import OpenAIEmbeddings, ChatOpenAI
from langchain_chroma import Chroma
from langchain_community.document_loaders import PyPDFLoader
from langchain.chains.question_answering import load_qa_chain

In [3]:
# Load dos modelos (Embbedings e LLM)

embeddings_model = OpenAIEmbeddings()
llm = ChatOpenAI(model_name='gpt-3.5-turbo', max_tokens=200)

In [5]:
# Carregar o PDF

pdf_link = '../../pdfs/starlink.pdf'

loader = PyPDFLoader(pdf_link, extract_images=False)
pages = loader.load_and_split()

In [6]:
# Separar em chunks
text_splitter = RecursiveCharacterTextSplitter(
  chunk_size=4000,
  chunk_overlap=20,
  length_function=len,
  add_start_index=True,
)

chunks = text_splitter.split_documents(pages)

In [7]:
# Salvar no Vector DB - Chroma

db = Chroma.from_documents(
  chunks, 
  embedding=embeddings_model, 
  persist_directory='text_index'
)

In [8]:
# Carregar DB
vectordb = Chroma(persist_directory='text_index', embedding_function=embeddings_model)

# Load Retriever
retriever = vectordb.as_retriever(search_kwargs={'k': 3})

# Construção da cadeia de prompt para chamada do LLM
chain = load_qa_chain(llm, chain_type='stuff')

stuff: https://python.langchain.com/v0.2/docs/versions/migrating_chains/stuff_docs_chain
map_reduce: https://python.langchain.com/v0.2/docs/versions/migrating_chains/map_reduce_chain
refine: https://python.langchain.com/v0.2/docs/versions/migrating_chains/refine_chain
map_rerank: https://python.langchain.com/v0.2/docs/versions/migrating_chains/map_rerank_docs_chain

See also guides on retrieval and question-answering here: https://python.langchain.com/v0.2/docs/how_to/#qa-with-rag
  chain = load_qa_chain(llm, chain_type='stuff')


In [9]:
def ask(question):
  context = retriever.get_relevant_documents(question)
  answer = chain.invoke({
    'input_documents': context, 
    'question': question
  }, return_only_outputs=True)['output_text']
  return answer

In [10]:
user_question = input('User: ')
aswer = ask(user_question)
print('Answer: ', aswer)

  context = retriever.get_relevant_documents(question)


Answer:  Olá! Como posso ajudar você hoje?
