In [4]:
import os 

from langchain import PromptTemplate
from langchain import HuggingFaceHub, LLMChain
from langchain_community.document_loaders import DirectoryLoader, PyMuPDFLoader
from langchain.embeddings import HuggingFaceBgeEmbeddings
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_pinecone import PineconeVectorStore
from langchain_core.output_parsers import JsonOutputParser, StrOutputParser, PydanticOutputParser
from langchain.output_parsers import ResponseSchema, StructuredOutputParser
from langchain_core.runnables import RunnablePassthrough
from langchain_core.pydantic_v1 import BaseModel, Field
from langchain_core.prompts import PromptTemplate

from pinecone import Pinecone, ServerlessSpec


  from tqdm.autonotebook import tqdm


# Load documents and upload to vector db

In [5]:
os.environ['HUGGINGFACEHUB_API_TOKEN'] = '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'
os.environ['PINECONE_API_KEY'] = '27ba301a-0ec5-4c84-890f-39bd6c5b1ff0'
pc = Pinecone(api_key=os.environ['PINECONE_API_KEY']) 

In [3]:
# Step 1 : Docuemnt loader 

loader = DirectoryLoader('./data/', glob="**/*.pdf", loader_cls=PyMuPDFLoader)
docs = loader.load()

# Step 2 : Text splitters
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
splits = text_splitter.split_documents(docs)

In [4]:
print("Example content:\n")
print(splits[10].page_content)

print("\nExample metadata:\n")

print(splits[10].metadata)

Example content:

mans watching complex image or video stimuli (e.g. [20],
[21]). New databases have emerged by following two trends,
1) increasing the number of images, and 2) introducing new
measurements to saliency by providing contextual annota-
tions (e.g. image categories, regional properties, etc.). To an-
notate large scale data, researchers have resorted to crowd-
sourcing schemes such as gaze tracking using webcams [19]
or mouse movements [22], [23] as alternatives to lab-based
eye trackers (Fig. 3). Deep supervised saliency models rely
heavily on these sufﬁciently large and well-labeled datasets.
Here, I review some of the most recent and inﬂuential image
and video datasets. The discussion of pros and cons of these
datasets is postponed to Section 4. For a review of ﬁxation
datasets pre-deep learning era please consult [24].
Image datasets. Three of the most popular image datasets
used for training and testing models are as follows.
• MIT300: This dataset is a collection of 

In [6]:
# Step 3 : Embedding Model
# More info on model: https://huggingface.co/BAAI/bge-small-en

model_name = "BAAI/bge-small-en"
model_kwargs = {"device": "cpu"}
encode_kwargs = {"normalize_embeddings": True}
embedding_function = HuggingFaceBgeEmbeddings(
    model_name=model_name, model_kwargs=model_kwargs, encode_kwargs=encode_kwargs
)



In [6]:
index_name = "research-paper-index"

existing_indexes = [
    index_info["name"] for index_info in pc.list_indexes()
]
if index_name not in existing_indexes:

    pc.create_index(
      name=index_name,
      dimension=384, # based on model dimensions
      metric="cosine",
      spec=ServerlessSpec(
          cloud='aws', 
          region='us-east-1'
      ) 
    )

In [7]:
# insert data to vector db
docsearch = PineconeVectorStore.from_documents(splits, embedding_function, index_name=index_name)

# view index stats
index = pc.Index(index_name)
index.describe_index_stats()

# docsearch.add_texts(["More text!"])


{'dimension': 384,
 'index_fullness': 0.0,
 'namespaces': {'': {'vector_count': 283}},
 'total_vector_count': 283}

# Retrieve

In [1]:
query = "Explain mask r-cnn"
docs = docsearch.similarity_search(query, k=2)
print(docs[0].page_content)

NameError: name 'docsearch' is not defined

In [17]:
retriever = docsearch.as_retriever(search_type="similarity", search_kwargs={"k": 2})
matched_docs = retriever.invoke(query)
for i, d in enumerate(matched_docs):
    print(f"\n## Document {i}\n")
    print(d.page_content)


## Document 0

3. Mask R-CNN
Mask R-CNN is conceptually simple: Faster R-CNN has
two outputs for each candidate object, a class label and a
bounding-box offset; to this we add a third branch that out-
puts the object mask. Mask R-CNN is thus a natural and in-
tuitive idea. But the additional mask output is distinct from
the class and box outputs, requiring extraction of much ﬁner
spatial layout of an object. Next, we introduce the key ele-
ments of Mask R-CNN, including pixel-to-pixel alignment,
which is the main missing piece of Fast/Faster R-CNN.
Faster R-CNN: We begin by brieﬂy reviewing the Faster
R-CNN detector [29]. Faster R-CNN consists of two stages.
The ﬁrst stage, called a Region Proposal Network (RPN),
proposes candidate object bounding boxes.
The second
stage, which is in essence Fast R-CNN [9], extracts features
using RoIPool from each candidate box and performs clas-
siﬁcation and bounding-box regression. The features used
by both stages can be shared for faster inferenc

# Generate

In [7]:
index_name = "research-paper-index"

docsearch = PineconeVectorStore(index_name=index_name, embedding=embedding_function)
retriever = docsearch.as_retriever(search_type="similarity", search_kwargs={"k": 1})


In [8]:
from langchain_community.llms import HuggingFaceHub

chat_model = HuggingFaceHub(
    repo_id="meta-llama/Meta-Llama-3-8B-Instruct",
    task="text-generation",
    model_kwargs={
        "temperature": 0.001,
        "return_full_text" : False
    },
)

# load a chat model

# chat_model = HuggingFaceHub(
#     repo_id="mistralai/Mixtral-8x7B-Instruct-v0.1",
#     task="text-generation",
#     model_kwargs={
#         "temperature": 0.1,
#         "return_full_text" : False
#     },
# )


In [9]:
# Define your desired data structure.
class QuestionAnswer(BaseModel):
    question: str = Field(description="question asked by user")
    answer: str = Field(description="answer from model")


In [10]:
parser = JsonOutputParser(pydantic_object=QuestionAnswer)
format_instructions = parser.get_format_instructions()

In [20]:
question_schema = ResponseSchema(name="question", description="user's question")
answer_schema = ResponseSchema(name="answer", description="answer from model")

response_schemas = [question_schema, answer_schema]

parser = StructuredOutputParser.from_response_schemas(response_schemas)
format_instructions = parser.get_format_instructions()

In [21]:
from langchain_core.prompts import PromptTemplate

rag_template_without_context = """ Answer the question based on your understanding. 
Keep the answer short and concise. 
Respond "Unsure about answer" if not sure about the answer.

Question: {question}

{format_instructions}

"""

rag_template_with_context = """ Answer the question based on the context below. 
Keep the answer short and concise. 
Respond "Unsure about answer" if not sure about the answer.

Context: {context}
Question: {question}

{format_instructions}

"""


rag_prompt_without_context = PromptTemplate.from_template(template=rag_template_without_context,
                                partial_variables={"format_instructions": parser.get_format_instructions()})

rag_prompt_with_context = PromptTemplate.from_template(template=rag_template_with_context,
                                partial_variables={"format_instructions": parser.get_format_instructions()})
        
    
rag_chain_without_context = (
    {"question": RunnablePassthrough()}
    | rag_prompt_without_context
    | chat_model
    | parser
)

rag_chain_with_context = (
    {"context": retriever, "question": RunnablePassthrough()}
    | rag_prompt_with_context
    | chat_model
    | parser
)

In [30]:
from langchain.globals import set_verbose, set_debug

set_debug(False)
set_verbose(False)

In [31]:
response_without_context = rag_chain_without_context.invoke("What are the categories of attentional models")

print("Response without own data: \n")
print(response_without_context)

Response without own data: 

{'question': 'What are the categories of attentional models', 'answer': 'The categories of attentional models include Saliency-based models, Object-based models, and Feature-based models.'}


In [32]:
response_with_context = rag_chain_with_context.invoke("What are the categories of attentional models")

print("Response with own data: \n")
print(response_with_context)

Response with own data: 

{'question': 'What are the categories of attentional models', 'answer': 'task-agnostic approaches (i.e. finding the salient pieces of information, a.k.a bottom-up (BU) saliency [1]–[4]) and task-specific methods (i.e. finding'}
