In [None]:
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_openai import ChatOpenAI
from langchain_chroma import Chroma
from langchain_community.document_loaders import PyPDFLoader
from langchain_openai import OpenAIEmbeddings
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.runnables import RunnableParallel, RunnablePassthrough
from langchain_core.output_parsers import StrOutputParser
from langchain.retrievers.contextual_compression import ContextualCompressionRetriever
from langchain_cohere import CohereRerank

In [None]:
import os
os.environ['OPENAI_API_KEY'] = "Sua chave API"

In [None]:
# Carregar modelos OpenAI - Embeddings e Chat 
embeddings_model = OpenAIEmbeddings(model = "text-embeddings-3-small")
llm = ChatOpenAI(model_name = "gpt-3.5-turbo")

In [None]:
# Carregar PDF
pdf = PyPDFLoader(file_path = "Seu PDF", extract_images = False)

pages = pdf.load_and_split()

In [None]:
# Separar em Chunks
text_spliter = RecursiveCharacterTextSplitter(
    chunk_size=4000, 
    chunk_overlap=20, 
    length_function=len, 
    add_start_index=True
)

chunk = text_spliter.split_documents(pages)

In [None]:
# Salvar os chunks no VectorDB
vectorDB = Chroma(embedding_function = embeddings_model, persist_directory = "naiveDB")

In [None]:
# Carregar o DB
naive_retriever = vectorDB.as_retriever(kwargs = {"k": 10})

In [None]:
os.environ["COHERE_API_KEY"] = "Sua chave cohere"

In [None]:
rerank = CohereRerank(model = "rerank-v3.5",  top_n=3)

compression_retriever = ContextualCompressionRetriever(
    base_compressor = rerank,
    base_retriever = naive_retriever
)

In [None]:
TEMPLATE = """
    Sua especificação do que o agente de ia é...
    
    Querry:
    {questio}

    Context
    {constext}
"""

reg_prompt = ChatPromptTemplate.from_template(TEMPLATE)

In [None]:
setup_retrieval = RunnableParallel({"question": RunnablePassthrough(), "context": compression_retriever})

output_parser = StrOutputParser()

compression_retrieval_chain = setup_retrieval | reg_prompt | llm | output_parser

In [None]:
compression_retrieval_chain.invoke("Faça sua pergunta...")