In [None]:
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_openai import ChatOpenAI
from langchain_community.vectorstores import Chroma
from langchain_community.document_loaders import PyPDFLoader
from langchain.retrievers import ParentDocumentRetriever
from langchain.storage import InMemoryStore
from langchain.embeddings import OpenAIEmbeddings
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.runnables import RunnableParallel, RunnablePassthrough
from langchain_core.output_parsers import StrOutputParser

In [None]:
import os
os.environ['OPENAI_API_KEY'] = "Sua chave API"

In [None]:
embeddings = OpenAIEmbeddings(model = "text-embeddings-3-small")
llm = ChatOpenAI(model_name = "gpt-3.5-turbo", max_tokens = 500)

In [None]:
# Carregando o PDF

pdf_link = "Seu PDF"
loader = PyPDFLoader(pdf_link, extract_images=False)
pages = loader.load_and_split()

In [None]:
len(pages)

In [None]:
# Spltter

child_splitter = RecursiveCharacterTextSplitter(chunk_size = 200)
parent_splitter = RecursiveCharacterTextSplitter(
    chunk_size = 4000,
    chunk_overlap = 200,
    length_function = len,
    add_start_index = True
)

In [None]:
# Storage

store = InMemoryStore()
vectorstore = Chroma(embedding_function = embeddings, persist_directory = "childVectorDB")

In [None]:
parent_document_retriever = ParentDocumentRetriever(
    vectorstore = vectorstore,
    docstore = store,
    child_splitter = child_splitter,
    parent_splitter = parent_splitter
)

parent_document_retriever.add_documents(pages, ids = None)

In [None]:
# Mostra o "child_spltter" porque ele que está dentro do "vectorstore"
parent_document_retriever.vectorstore.get()

In [None]:
# Criando um template de prompt

TEMPLATE = """
    Escreva o seu template
    Querry:
    {question}

    Context:
    {context}
"""

reg_prompt = ChatPromptTemplate.from_template(TEMPLATE)

In [None]:
setup_retrival = RunnableParallel(
    {"question": RunnablePassthrough, "context": parent_document_retriever}
)

output_parser = StrOutputParser()

In [None]:
parent_chain_retrival = setup_retrival | reg_prompt | llm | output_parser

In [None]:
parent_chain_retrival.invoke("Escreva sua pergunta aqui")