In [1]:
import os
import openai
import sys
sys.path.append('../..')

from dotenv import load_dotenv, find_dotenv
_ = load_dotenv(find_dotenv())

openai.api_key = os.environ['OPENAI_API_KEY']
  


In [1]:
from langchain.vectorstores import Chroma
from langchain.embeddings.openai import OpenAIEmbeddings
persist_directory = 'docs/chroma/'

In [2]:
embedding = OpenAIEmbeddings()
vectordb = Chroma(
    persist_directory=persist_directory,
    embedding_function=embedding
)

In [3]:
print(vectordb._collection.count())

148


In [4]:
texts = [
    """The Amanita phalloides has a large and imposing epigeous (aboveground) fruiting body (basidiocarp).""",
    """A mushroom with a large fruiting body is the Amanita phalloides. Some varieties are all-white.""",
    """A. phalloides, a.k.a Death Cap, is one of the most poisonous of all known mushrooms.""",
]

In [5]:
smalldb = Chroma.from_texts(texts, embedding=embedding)

In [6]:
question = "Tell me about all-white mushrooms with large fruiting bodies"

In [7]:
smalldb.similarity_search(question, k=2)

[Document(page_content='A mushroom with a large fruiting body is the Amanita phalloides. Some varieties are all-white.'),
 Document(page_content='The Amanita phalloides has a large and imposing epigeous (aboveground) fruiting body (basidiocarp).')]

In [8]:
smalldb.max_marginal_relevance_search(question,k=2, fetch_k=3)

[Document(page_content='A mushroom with a large fruiting body is the Amanita phalloides. Some varieties are all-white.'),
 Document(page_content='A. phalloides, a.k.a Death Cap, is one of the most poisonous of all known mushrooms.')]

In [9]:
question = "what did they say about yolox?"
docs_ss = vectordb.similarity_search(question,k=3)

In [10]:
docs_ss[0].page_content[:100]

'+3.3 AR C(#4−#8) w.r.t. ZebraPose [42].\n4.3. 2D Object Detection Results\nAs shown in Tab. 4, the YOL'

In [11]:
docs_ss[1].page_content[:100]

'# Method ...based on Year Data ...type AP CTime\n1 GDRNPPDet YOLOX 2022 RGB PBR+real 77.3 .081\n2 GDRN'

In [12]:
docs_mmr = vectordb.max_marginal_relevance_search(question,k=3)

In [13]:
docs_mmr[0].page_content[:100]

'+3.3 AR C(#4−#8) w.r.t. ZebraPose [42].\n4.3. 2D Object Detection Results\nAs shown in Tab. 4, the YOL'

In [14]:
docs_mmr[1].page_content[:100]

'Figure D.3: Qualitative Results on YCB-V [60]. For each image, we visualize the 6D poses by renderin'

In [15]:
question = "what did they say about yolox in the second paper?"

In [16]:
docs = vectordb.similarity_search(
    question,
    k=3,
    filter={"source":"docs/papers/paper_2.pdf"}
)

In [17]:
for d in docs:
    print(d.metadata)

{'page': 6, 'source': 'docs/papers/paper_2.pdf'}
{'page': 6, 'source': 'docs/papers/paper_2.pdf'}
{'page': 7, 'source': 'docs/papers/paper_2.pdf'}


In [18]:
from langchain.llms import OpenAI
from langchain.retrievers.self_query.base import SelfQueryRetriever
from langchain.chains.query_constructor.base import AttributeInfo

In [19]:
metadata_field_info = [
    AttributeInfo(
        name="source",
        description="The paper the chunk is from, should be one of `docs/papers/paper_1.pdf`, `docs/papers/paper_2.pdf`, or `docs/papers/paper_3.pdf`",
        type="string",
    ),
    AttributeInfo(
        name="page",
        description="The page from the paper",
        type="integer",
    ),
]

In [25]:
document_content_description = "paper"
llm = OpenAI(temperature=0)
retriever = SelfQueryRetriever.from_llm(
    llm,
    vectordb,
    document_content_description,
    metadata_field_info,
    verbose=True
)

In [26]:
question = "what did they say about yolox in the third paper?"

In [27]:
docs = retriever.get_relevant_documents(question)

In [28]:
for d in docs:
    print(d.metadata)

In [29]:
from langchain.retrievers import ContextualCompressionRetriever
from langchain.retrievers.document_compressors import LLMChainExtractor

In [30]:
def pretty_print_docs(docs):
    print(f"\n{'-' * 100}\n".join([f"Document {i+1}:\n\n" + d.page_content for i, d in enumerate(docs)]))


In [31]:
# Wrap our vectorstore
llm = OpenAI(temperature=0)
compressor = LLMChainExtractor.from_llm(llm)

In [32]:
compression_retriever = ContextualCompressionRetriever(
    base_compressor=compressor,
    base_retriever=vectordb.as_retriever()
)

In [33]:
question = "what did they say about yolox?"
compressed_docs = compression_retriever.get_relevant_documents(question)
pretty_print_docs(compressed_docs)



Document 1:

YOLOX [7] detector from GDRNPP has the top performance of 77.3 AP C. Mask R-CNN [11] from CosyPose only achieves 60.5 AP C(-16.8 AP C). YOLOX is relatively insensitive to the image domain, improving only +3.5 APC(#1−#2 in Tab. 4) when trained also on real images.
----------------------------------------------------------------------------------------------------
Document 2:

"1 GDRNPPDet YOLOX 2022 RGB PBR+real 77.3 .081", "2 GDRNPPDet YOLOX 2022 RGB PBR 73.8 .081"


In [34]:
compression_retriever = ContextualCompressionRetriever(
    base_compressor=compressor,
    base_retriever=vectordb.as_retriever(search_type = "mmr")
)

In [35]:
question = "what did they say about yolox?"
compressed_docs = compression_retriever.get_relevant_documents(question)
pretty_print_docs(compressed_docs)



Document 1:

YOLOX [7] detector from GDRNPP has the top performance of 77.3 AP C. Mask R-CNN [11] from CosyPose only achieves 60.5 AP C(-16.8 AP C). YOLOX is relatively insensitive to the image domain, improving only +3.5 APC(#1−#2 in Tab. 4) when trained also on real images. Mask R-CNN yields +4.8 AP C(#6−#7) and FCOS [46] yields +5.4 AP C(#3−#4) in such a comparison.
----------------------------------------------------------------------------------------------------
Document 2:

LM [13], LM-O [3], YCB-V [60]


In [36]:
from langchain.retrievers import SVMRetriever
from langchain.retrievers import TFIDFRetriever
from langchain.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter

In [38]:
# Load PDF
loader = PyPDFLoader("docs/papers/paper_1.pdf")
pages = loader.load()
all_page_text=[p.page_content for p in pages]
joined_page_text=" ".join(all_page_text)

# Split
text_splitter = RecursiveCharacterTextSplitter(chunk_size = 1500,chunk_overlap = 150)
splits = text_splitter.split_text(joined_page_text)


In [39]:
# Retrieve
svm_retriever = SVMRetriever.from_texts(splits,embedding)
tfidf_retriever = TFIDFRetriever.from_texts(splits)

In [40]:
question = "What are major discussion in this paper?"
docs_svm=svm_retriever.get_relevant_documents(question)
docs_svm[0]

Document(page_content='ADD-SAUC of\nADD(-S)AUC of\nADD-SAUC of\nADD(-S)AUC of\nADD-SAUC of\nADD(-S)\n002master chef can 84.0 50.9 81.6 96.6 71.1 96.3 65.2 93.1 71.2 - -\n003cracker box 76.9 51.7 80.5 84.9 63.5 97.0 88.8 91.0 83.6 - -\n004sugar box 84.3 68.6 84.9 98.3 93.2 98.9 95.0 96.2 94.1 - -\n005tomato soup can 80.9 66.0 78.2 96.1 88.9 96.5 91.9 92.4 86.1 - -\n006mustard bottle 90.2 79.9 88.3 99.5 93.8 100.0 92.8 95.1 91.5 - -\n007tuna ﬁshcan 87.9 70.4 62.2 95.1 85.1 99.4 94.2 96.1 87.7 - -\n008pudding box 79.0 62.9 85.2 94.8 86.5 64.6 44.7 90.7 82.7 - -\n009gelatin box 87.1 75.2 88.7 95.3 88.5 97.1 92.5 94.3 91.9 - -\n010potted meat can 78.5 59.6 65.1 82.9 72.9 86.0 80.2 86.4 76.2 - -\n011banana 85.9 72.3 51.8 96.0 85.2 96.3 85.8 91.3 81.2 - -\n019pitcher base 76.8 52.5 91.2 98.8 94.3 99.9 98.5 94.6 90.1 - -\n021bleach cleanser 71.9 50.5 74.8 94.4 80.5 94.2 84.3 90.3 81.2 - -\n024bowl∗69.7 69.7 89.0 84.0 84.0 85.7 85.7 81.4 81.4 - -\n025mug 78.0 57.7 81.5 96.9 87.6 99.6 94.0 91.3 

In [41]:
question = "what did they say about yolox?"
docs_tfidf=tfidf_retriever.get_relevant_documents(question)
docs_tfidf[0]

Document(page_content='ods generally perform well, they usually lack in accuracy\nwhen compared with approaches that instead rely on estab-\nlishing 2D-3D correspondences prior to estimating the 6D\npose [28, 15].\nDifferently, this latter class of methods usually involves\nsolving the 6D pose through a variant of the P nP/RANSAC\nalgorithm. While such a paradigm provides good estimates,\nit also suffers from several drawbacks. First, these methods\nare usually trained with a surrogate objective for correspon-\ndence regression, which does not necessarily reﬂect the ac-\ntual 6D pose error after optimization. In practice, two sets\nof correspondences can have the same average error while\ndescribing completely different poses. Second, these ap-\nproaches are not differentiable with respect to the estimated\n6D pose, which limits learning. For instance, these meth-\n1arXiv:2102.12145v3  [cs.CV]  9 Mar 2021 ods cannot be coupled with self-supervised learning from\nunlabeled real data [55