# Advanced RAG

In [2]:
import os
import chromadb

from langchain.prompts import ChatPromptTemplate
from langchain.schema.output_parser import StrOutputParser
from langchain.schema.runnable import RunnableLambda, RunnablePassthrough

from langchain_openai import ChatOpenAI
from langchain_community.vectorstores import Chroma
from langchain_openai import OpenAIEmbeddings
from langchain_community.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.text_splitter import CharacterTextSplitter
from langchain_core.documents import Document as lancghain_Document
from langchain.retrievers import ParentDocumentRetriever
from langchain.storage import InMemoryStore
from langchain_community.vectorstores import Pinecone

from langchain.retrievers.multi_vector import MultiVectorRetriever
from langchain.storage import InMemoryByteStore
from langchain.storage import LocalFileStore
from pathlib import Path
import uuid

from operator import itemgetter
from langchain.memory import ConversationBufferMemory
from langchain.schema import format_document
from langchain_core.messages import AIMessage, HumanMessage, get_buffer_string
from langchain_core.runnables import RunnableParallel
from langchain.prompts.prompt import PromptTemplate

from langchain.text_splitter import RecursiveCharacterTextSplitter

from pinecone import Pinecone as pinecone_client

from dotenv import load_dotenv,find_dotenv
load_dotenv(find_dotenv(),override=True)

local_db_path='../db/'
embedding_model='text-embedding-ada-002'

llm = ChatOpenAI(openai_api_key=os.getenv('OPENAI_API_KEY'), model_name="gpt-3.5-turbo", temperature=0.1)
query_model=OpenAIEmbeddings(model=embedding_model,openai_api_key=os.getenv('OPENAI_API_KEY'))


In [3]:
import tiktoken
encoding = tiktoken.encoding_for_model("gpt-3.5-turbo")
def num_tokens_from_string(string: str, encoding_name: str = "gpt-3.5-turbo") -> int:
    """Returns the number of tokens in a text string."""
    encoding = tiktoken.encoding_for_model(encoding_name)
    num_tokens = len(encoding.encode(string))
    return num_tokens

In [4]:
docs=['../data/AMS/AMS_2020.pdf','../data/AMS/AMS_2018.pdf']

In [5]:
docs_out=[]
for doc in docs:
    loader = PyPDFLoader(doc)
    data = loader.load()
    docs_out.extend(data)

In [6]:
chunk_size=400
k_parent=5
# parent_splitter=CharacterTextSplitter.from_tiktoken_encoder(chunk_size=chunk_size*k_parent, chunk_overlap=0)
# child_splitter=CharacterTextSplitter.from_tiktoken_encoder(chunk_size=chunk_size, chunk_overlap=0)

# I don't think the splitters above work for what I want!
child_splitter=RecursiveCharacterTextSplitter(chunk_size=chunk_size, chunk_overlap=0)
parent_splitter=RecursiveCharacterTextSplitter(chunk_size=chunk_size*k_parent, chunk_overlap=0)

In [7]:
split_docs_child=child_splitter.split_documents(docs_out)
# split_docs_child[:5]

In [8]:
split_docs_parent=parent_splitter.split_documents(docs_out)
# split_docs_parent[:5]

In [9]:
questions=["What are some challenges associated with angular preload on ball bearings?"]

In [10]:
from langchain import hub
from langchain.prompts.prompt import PromptTemplate

# Prompts on the hub: https://smith.langchain.com/hub/my-prompts?organizationId=45eb8917-7353-4296-978d-bb461fc45c65
CONDENSE_QUESTION_PROMPT = hub.pull("dmueller/ams-chatbot-qa-condense-history")
QA_PROMPT=hub.pull("dmueller/ams-chatbot-qa-retrieval")
QA_WSOURCES_PROMPT=hub.pull("dmueller/ams-chatbot-qa-retrieval-wsources")
QA_GENERATE_PROMPT=hub.pull("dmueller/generate_qa_prompt")
DEFAULT_DOCUMENT_PROMPT = PromptTemplate.from_template(template="{page_content}")

In [11]:
# Combine documents, from queries.py

def _combine_documents(
    docs, document_prompt=DEFAULT_DOCUMENT_PROMPT, document_separator="\n\n"
):
    doc_strings = [format_document(doc, document_prompt) for doc in docs]
    return document_separator.join(doc_strings)

In [12]:
# From queries.py

memory = ConversationBufferMemory(
    return_messages=True, output_key="answer", input_key="question"
)

## Parent-Child with Full Parent Retrieval

In [15]:
type='standard'
# type='parent-child'

persistent_client = chromadb.PersistentClient(path=local_db_path+'/chromadb')   
try:
    persistent_client.delete_collection(name="standard-test")
except:
    pass   
vectorstore = Chroma(collection_name='standard-test',
                     embedding_function=query_model)


In [16]:
# text_splitter = RecursiveCharacterTextSplitter.from_tiktoken_encoder(chunk_size=chunk_size, chunk_overlap=chunk_overlap)
# page_chunks = text_splitter.split_documents(docs_out)
# vectorstore.add_documents(page_chunks)

store=InMemoryStore()
retriever = ParentDocumentRetriever(
    vectorstore=vectorstore,
    docstore=store,
    child_splitter=child_splitter,
)
child_splitter._chunk_size

400

In [17]:
retriever.add_documents(docs_out[:500],ids=None)

In [18]:
# Original pdf pages stored in memory
len(list(store.yield_keys()))

500

In [19]:
# Retrieved chunks with size chunk_size
sub_docs = vectorstore.similarity_search("bearing preload")
print(sub_docs[0].page_content)
print(len(sub_docs[0].page_content))


Shimming and grinding procedures were confirmed to 
produce the desired preload, which was measured 
indirectly (by measuring the gap between the clamp and 
shaft/hub)  during assembly of the bear ings. Review of the 
clamp design analysis with a high- fidelity  Finite Element 
Analysis (FEA) model confirmed good correlation with the 
load vs. deflection curves seen in in the as -built assembly.
398


In [20]:
# Original pdf page stored in memory
retrieved_docs = retriever.get_relevant_documents("bearing preload")
print(retrieved_docs[0].page_content)
print(len(retrieved_docs[0].page_content))
print('tokens: '+str(num_tokens_from_string(retrieved_docs[0].page_content)))


325 the bearings was exonerated by measuring the hardness of a set of bearings from the same lot as the 
bearings in the EM unit . Tests resulted in an  average hardness of 57.6 on the Rockwell Hardness Scale C 
(HRC), which was considered in- family with the specified requirement of 58- 60 for the 440C stainless steel 
bearings .  
Bearing analysis via Orbis 3.0 indicated that an angular misalignment of 0.00028 in (7 µm) could introduce 
localized Maximum Mean Hertzian Contact Stress (MMHCS) greater than 335 ksi  (2310 MPa) . Both a 
coordinate measuring machine (CMM) and computed tomography  (CT) (Figure 11) scans were used to 
evaluate this condition. CMM measurements showed a 0.0004- in (10-µm) difference across the bearing 
diameter while computerized tomography ( CT) scans did not show any signs of misalignment. However, 
the CT scans only had a voxel resolution of 0.002 in (51 µm) , which is not fine enough to detect the potential 
misalignment. The CMM measurements could only b

## Parent-child with partial parent retrieval
https://colab.research.google.com/github/datastax/ragstack-ai/blob/main/examples/notebooks/advancedRAG.ipynb

In [13]:
type='standard'
# type='parent-child'

persistent_client = chromadb.PersistentClient(path=local_db_path+'/chromadb')   
try:
    persistent_client.delete_collection(name="pc-test")
except:
    pass   
vectorstore = Chroma(client=persistent_client,
                    collection_name='pc-test',
                    embedding_function=query_model)


In [22]:
# In case you want to store on a remote sql db

# from langchain_community.storage import AstraDBStore

# ASTRA_DB_API_ENDPOINT = 'https://f17fd04c-2051-4034-85e5-2f7fc320ab0a-us-east-1.apps.astra.datastax.com'
# ASTRA_DB_APPLICATION_TOKEN = 'AstraCS:TKHGUJWdOGIXqDthOIAgZqOe:a8549547743f6999fffb6810461eb29ea1cc1252098f0f9baf67b3920904385c'
# collecton_name='test'

# store_pc = AstraDBStore(
#     api_endpoint=ASTRA_DB_API_ENDPOINT,
#     token=ASTRA_DB_APPLICATION_TOKEN,
#     collection_name="my_store",
# )

# import redis

# r = redis.Redis(
#   host='usw2-devoted-ladybird-30146.upstash.io',
#   port=30146,
#   password='5c0dbf8cdc65467096032041b68f7d66'
# )

# r.set('foo', 'bar')
# print(r.get('foo'))

# from langchain.storage import UpstashRedisByteStore
# from upstash_redis import Redis

# URL = 'https://usw2-devoted-ladybird-30146.upstash.io'
# TOKEN = 'AXXCACQgOTUwNTA5MzAtZTViYy00ZTMwLTgwOGItNzM1MmY1ZjJlOWIwNWMwZGJmOGNkYzY1NDY3MDk2MDMyMDQxYjY4ZjdkNjY='

# redis_client = Redis(url=URL, token=TOKEN)
# store_pc = UpstashRedisByteStore(client=redis_client, ttl=None, namespace="test-ns")

In [14]:
# store_pc=InMemoryByteStore()

# root_path = Path.cwd() / "data"  # can also be a path set by a string

docs_temp=docs_out[:500]

In [15]:
id_key = "doc_id"
root_path = Path.cwd().parent / 'db/chromadb/parent-docs'
store_pc = LocalFileStore(root_path)
doc_ids = [str(uuid.uuid4()) for _ in docs_temp]

parent_retriever = MultiVectorRetriever(
    vectorstore=vectorstore,
    byte_store=store_pc,
    id_key=id_key,
)

In [16]:
sub_docs = []
for i, doc in enumerate(docs_temp):
    _id = doc_ids[i]
    _sub_docs = child_splitter.split_documents([doc])
    for _doc in _sub_docs:
        _doc.metadata[id_key] = _id
    sub_docs.extend(_sub_docs)

In [17]:
parent_retriever.vectorstore.add_documents(sub_docs)
parent_retriever.docstore.mset(list(zip(doc_ids, docs_temp)))

RateLimitError: Error code: 429 - {'error': {'message': 'You exceeded your current quota, please check your plan and billing details. For more information on this error, read the docs: https://platform.openai.com/docs/guides/error-codes/api-errors.', 'type': 'insufficient_quota', 'param': None, 'code': 'insufficient_quota'}}

In [35]:
# Child retrieved chunks with size chunk_size
sub_docs = vectorstore.similarity_search("bearing preload")
print(sub_docs[0].page_content)
print(len(sub_docs[0].page_content))

Shimming and grinding procedures were confirmed to 
produce the desired preload, which was measured 
indirectly (by measuring the gap between the clamp and 
shaft/hub)  during assembly of the bear ings. Review of the 
clamp design analysis with a high- fidelity  Finite Element 
Analysis (FEA) model confirmed good correlation with the 
load vs. deflection curves seen in in the as -built assembly.
398


In [36]:
# Parent chunked documents with chunk_size*k_parent
retrieved_docs = retriever.get_relevant_documents("bearing preload")
print(retrieved_docs[0].page_content)
print(len(retrieved_docs[0].page_content))
print('tokens: '+str(num_tokens_from_string(retrieved_docs[0].page_content)))


325 the bearings was exonerated by measuring the hardness of a set of bearings from the same lot as the 
bearings in the EM unit . Tests resulted in an  average hardness of 57.6 on the Rockwell Hardness Scale C 
(HRC), which was considered in- family with the specified requirement of 58- 60 for the 440C stainless steel 
bearings .  
Bearing analysis via Orbis 3.0 indicated that an angular misalignment of 0.00028 in (7 µm) could introduce 
localized Maximum Mean Hertzian Contact Stress (MMHCS) greater than 335 ksi  (2310 MPa) . Both a 
coordinate measuring machine (CMM) and computed tomography  (CT) (Figure 11) scans were used to 
evaluate this condition. CMM measurements showed a 0.0004- in (10-µm) difference across the bearing 
diameter while computerized tomography ( CT) scans did not show any signs of misalignment. However, 
the CT scans only had a voxel resolution of 0.002 in (51 µm) , which is not fine enough to detect the potential 
misalignment. The CMM measurements could only b

## Try opening existing retriever and local store

In [41]:
store_pc_exists = LocalFileStore(root_path)

id_key = "doc_id"
parent_retriever_exists = MultiVectorRetriever(
    vectorstore=vectorstore,
    byte_store=store_pc_exists,
    id_key=id_key,
)

In [42]:
# Child retrieved chunks with size chunk_size
sub_docs = parent_retriever_exists.vectorstore.similarity_search("bearing preload")
print(sub_docs[0].page_content)
print(len(sub_docs[0].page_content))

Shimming and grinding procedures were confirmed to 
produce the desired preload, which was measured 
indirectly (by measuring the gap between the clamp and 
shaft/hub)  during assembly of the bear ings. Review of the 
clamp design analysis with a high- fidelity  Finite Element 
Analysis (FEA) model confirmed good correlation with the 
load vs. deflection curves seen in in the as -built assembly.
398


In [43]:
# Parent chunked documents with chunk_size*k_parent
# Only works when you create the local vector store. So it works!
retrieved_docs = parent_retriever_exists.get_relevant_documents("bearing preload")
print(retrieved_docs[0].page_content)
print(len(retrieved_docs[0].page_content))

IndexError: list index out of range