# Parent Document Retrieval (Small to Big Retrieval)

In [None]:
%pip install -qU langchain
%pip install -qU langchain-community
%pip install -qU langchain-text-splitters
%pip install -qU langchain_openai


### Import needed libraries

In [2]:
from langchain_community.document_loaders import TextLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_openai import OpenAIEmbeddings
from langchain_community.vectorstores import Chroma
from langchain.retrievers import ParentDocumentRetriever
from langchain.prompts import ChatPromptTemplate
from langchain_openai import ChatOpenAI
from langchain.schema.runnable import RunnablePassthrough
from langchain.schema.output_parser import StrOutputParser
from langchain.storage._lc_store import create_kv_docstore
from langchain.storage import LocalFileStore

### Config

In [3]:
data = '../data/tesla.txt'
collection_name = 'tesla'

embedding_model = OpenAIEmbeddings() # or something else
language_model_name = 'gpt-3.5-turbo-0125'

### Load and Split Document

In [4]:
raw_documents = TextLoader(data, encoding='utf-8').load()
child_splitter = RecursiveCharacterTextSplitter(chunk_size=200)
parent_splitter = RecursiveCharacterTextSplitter(chunk_size=2000)

### Store Documents in Vector Database

In [5]:
db = Chroma(embedding_function=embedding_model, persist_directory="./chroma_db", collection_name=collection_name)

fs = LocalFileStore("./store_location")
store = create_kv_docstore(fs)
parent_splitter = RecursiveCharacterTextSplitter(chunk_size=2000)
child_splitter = RecursiveCharacterTextSplitter(chunk_size=400)

## Setup retriever

In [6]:
full_retriever = ParentDocumentRetriever(
    vectorstore=db,
    docstore=store,
    child_splitter=child_splitter,
    parent_splitter=parent_splitter
)

full_retriever.add_documents(raw_documents)

In [7]:
query = "What was the motivation for Tesla to not mary?"

### Query Vector Database

In [None]:
sub_docs = db.similarity_search(query)

print(sub_docs[0].page_content)

### Query Retriever

In [None]:
retrieved_docs = full_retriever.get_relevant_documents(query)

print(retrieved_docs[0].page_content)

## RAG setup

In [None]:
template = """Answer the following question based only on the following context:
{context}

Question: {question}
"""
prompt = ChatPromptTemplate.from_template(template)

model = ChatOpenAI( model_name = language_model_name)


### Simple RAG from Child Document

In [None]:
only_child_retriever = db.as_retriever()

child_chain = (
    {"context": only_child_retriever, "question": RunnablePassthrough()}
    | prompt
    | model
    | StrOutputParser()
)

child_chain.invoke(query)

## Parent Retrieval RAG

In [None]:
full_chain = (
    {"context": full_retriever, "question": RunnablePassthrough()}
    | prompt
    | model
    | StrOutputParser()
)

full_chain.invoke(query)