In [58]:
import os 

from pinecone import Pinecone, ServerlessSpec

from langchain.embeddings import HuggingFaceBgeEmbeddings
from langchain.globals import set_verbose, set_debug
from langchain_pinecone import PineconeVectorStore
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_community.document_loaders import DirectoryLoader, PyMuPDFLoader
from langchain_community.llms import HuggingFaceHub
from langchain_core.pydantic_v1 import BaseModel, Field
from langchain_core.prompts import PromptTemplate
from langchain_core.runnables import RunnablePassthrough, RunnableParallel
from langchain_core.output_parsers import JsonOutputParser

set_debug(True)
set_verbose(True)

# Load own documents

In [2]:
pc = Pinecone(api_key=os.environ['PINECONE_API_KEY']) 

In [3]:
# load own documents from directory

loader = DirectoryLoader('./data/', glob="**/*.pdf", loader_cls=PyMuPDFLoader)
docs = loader.load()

# break down documents into chunks
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
splits = text_splitter.split_documents(docs)

In [4]:
print("Example content:\n")
print(splits[1].page_content)

print("\nExample metadata:\n")

print(splits[1].metadata)

Example content:

techniques and main modeling trends, and iii) datasets
and evaluation metrics in salient object detection.
We
also discuss open problems such as evaluation metrics
and dataset bias in model performance and suggest future
research directions.
Keywords Salient object detection, bottom-up saliency,
explicit saliency, visual attention, regions of
interest.
1
Introduction
Humans are able to detect visually distinctive, so called
salient, scene regions effortlessly and rapidly (i.e.,
pre-
attentive stage). These ﬁltered regions are then perceived
and processed in ﬁner details for the extraction of richer
high-level information (i.e., attentive stage). This capability
has long been studied by cognitive scientists and has
recently attracted a lot of interest in the computer vision
community mainly because it helps ﬁnd the objects or
1
MarkableAI. E-mail: ali@markable.ai.
2
TKLNDST, College of Computer Science, Nankai University.
E-mail: cmm@nankai.edu.cn
3
University of Massa

# Upload to vector db

In [5]:
# model to generate embeddings

model_name = "BAAI/bge-small-en"
model_kwargs = {"device": "cpu"}
encode_kwargs = {"normalize_embeddings": True}
embedding_function = HuggingFaceBgeEmbeddings(
    model_name=model_name, model_kwargs=model_kwargs, encode_kwargs=encode_kwargs
)



In [6]:
# create the vector db / index

index_name = "research-paper-index"

existing_indexes = [
    index_info["name"] for index_info in pc.list_indexes()
]
if index_name not in existing_indexes:

    pc.create_index(
      name=index_name,
      dimension=384, # based on model output dimensions
      metric="cosine",
      spec=ServerlessSpec(
          cloud='aws', 
          region='us-east-1'
      ) 
    )

In [7]:
# insert data to vector db
docsearch = PineconeVectorStore.from_documents(splits, embedding_function, index_name=index_name)

# view index stats
index = pc.Index(index_name)
index.describe_index_stats()

# docsearch.add_texts(["More text!"])

{'dimension': 384,
 'index_fullness': 0.0,
 'namespaces': {'': {'vector_count': 403}},
 'total_vector_count': 403}

In [8]:
# total_vector_count should be same as amounts of chunks

len(splits)

403

# Retrieve similar data given a query

Perform similarity search to find relevant info in vector db

In [9]:
query = "Explain saliency detection"
docs = docsearch.similarity_search(query, k=1)
print(docs[0].page_content)

To separate these two types of saliency models, in this
study we provide a precise deﬁnition and suggest an ap-
propriate treatment of salient object detection. Generally, a
salient object detection model should, ﬁrst detect the salient
attention-grabbing objects in a scene, and second, segment
the entire objects. Usually, the output of the model is a
saliency map where the intensity of each pixel represents
its probability of belonging to salient objects. From this
deﬁnition, we can see that this problem in its essence
is a ﬁgure/ground segmentation problem, and the goal is
to only segment the salient foreground object from the
background. Note that it slightly differs from the traditional
image segmentation problem that aims to partition an image
into perceptually coherent regions.
The value of salient object detection models lies in their
applications in many areas such as computer vision, graph-
ics, and robotics. For instance, these models have been suc-


In [11]:
retriever = docsearch.as_retriever(search_type="similarity", search_kwargs={"k": 1})
matched_docs = retriever.invoke(query)
for i, d in enumerate(matched_docs):
    print(f"\n## Document {i}\n")
    print(d.page_content)


## Document 0

To separate these two types of saliency models, in this
study we provide a precise deﬁnition and suggest an ap-
propriate treatment of salient object detection. Generally, a
salient object detection model should, ﬁrst detect the salient
attention-grabbing objects in a scene, and second, segment
the entire objects. Usually, the output of the model is a
saliency map where the intensity of each pixel represents
its probability of belonging to salient objects. From this
deﬁnition, we can see that this problem in its essence
is a ﬁgure/ground segmentation problem, and the goal is
to only segment the salient foreground object from the
background. Note that it slightly differs from the traditional
image segmentation problem that aims to partition an image
into perceptually coherent regions.
The value of salient object detection models lies in their
applications in many areas such as computer vision, graph-
ics, and robotics. For instance, these models have been suc-


# Generate a response

The result above is not suitable to be output to user. We will need a chat model to reformat the content above to make it more presentable as final result.

In [13]:
# load open-source chat model from Huggingface
chat_model = HuggingFaceHub(
    repo_id="meta-llama/Meta-Llama-3-8B-Instruct",
    task="text-generation",
    model_kwargs={
        "temperature": 0.001,
        "return_full_text" : False
    },
)


# chat_model = HuggingFaceHub(
#     repo_id="mistralai/Mixtral-8x7B-Instruct-v0.1",
#     task="text-generation",
#     model_kwargs={
#         "temperature": 0.1,
#         "return_full_text" : False
#     },
# )

  warn_deprecated(


## Define the output format

In [21]:
# Define the output data structure

class QuestionAnswer(BaseModel):
    question: str = Field(description="question asked by user")
    answer: str = Field(description="answer from model")

In [22]:
parser = JsonOutputParser(pydantic_object=QuestionAnswer)
format_instructions = parser.get_format_instructions()

print(format_instructions)

The output should be formatted as a JSON instance that conforms to the JSON schema below.

As an example, for the schema {"properties": {"foo": {"title": "Foo", "description": "a list of strings", "type": "array", "items": {"type": "string"}}}, "required": ["foo"]}
the object {"foo": ["bar", "baz"]} is a well-formatted instance of the schema. The object {"properties": {"foo": ["bar", "baz"]}} is not well-formatted.

Here is the output schema:
```
{"properties": {"question": {"title": "Question", "description": "question asked by user", "type": "string"}, "answer": {"title": "Answer", "description": "answer from model", "type": "string"}}, "required": ["question", "answer"]}
```


## RAG without own data

In [39]:
# create the prompt

rag_template_without_context = """ Answer the question based on your understanding. 
Keep the answer short and concise. 
Respond "Unsure about answer" if not sure about the answer.

Question: {question}

{format_instructions}

"""

rag_prompt_without_context = PromptTemplate.from_template(template=rag_template_without_context,
                                partial_variables={"format_instructions": parser.get_format_instructions()})

rag_chain_without_context = (
    {"question": RunnablePassthrough()}
    | rag_prompt_without_context
    | chat_model
    | parser
)

In [42]:
# run the RAG chain
response_without_context = rag_chain_without_context.invoke("What are the categories of attentional models")

[32;1m[1;3m[chain/start][0m [1m[chain:RunnableSequence] Entering Chain run with input:
[0m{
  "input": "What are the categories of attentional models"
}
[32;1m[1;3m[chain/start][0m [1m[chain:RunnableSequence > chain:RunnableParallel<question>] Entering Chain run with input:
[0m{
  "input": "What are the categories of attentional models"
}
[32;1m[1;3m[chain/start][0m [1m[chain:RunnableSequence > chain:RunnableParallel<question> > chain:RunnablePassthrough] Entering Chain run with input:
[0m{
  "input": "What are the categories of attentional models"
}
[36;1m[1;3m[chain/end][0m [1m[chain:RunnableSequence > chain:RunnableParallel<question> > chain:RunnablePassthrough] [1ms] Exiting Chain run with output:
[0m{
  "output": "What are the categories of attentional models"
}
[36;1m[1;3m[chain/end][0m [1m[chain:RunnableSequence > chain:RunnableParallel<question>] [2ms] Exiting Chain run with output:
[0m{
  "question": "What are the categories of attentional models"
}
[

In [41]:
print("Response without own data: \n")
print(response_without_context)

Response without own data: 

{'question': 'What are the categories of attentional models', 'answer': 'There are three categories of attentional models: Sustained Attention, Selective Attention, and Alternating Attention.'}


## RAG with own data

In [57]:
# define the prompt

rag_template_with_context = """ Answer the question based on the context below. 
Keep the answer short and concise. 
Respond "Unsure about answer" if not sure about the answer.

Context: {context}
Question: {question}

{format_instructions}

"""

rag_prompt_with_context = PromptTemplate.from_template(template=rag_template_with_context,
                                partial_variables={"format_instructions": parser.get_format_instructions()})

In [44]:
def format_docs(docs):
    return "\n\n".join(doc.page_content for doc in docs)

In [46]:
rag_chain_with_context_from_docs = (
    RunnablePassthrough.assign(context=(lambda x: format_docs(x["context"])))
    | rag_prompt_with_context
    | chat_model
    | parser
)

rag_chain_with_source = RunnableParallel({"context": retriever, "question": RunnablePassthrough()}
                                        ).assign(answer=rag_chain_from_docs)

In [47]:
response_with_context_with_source = rag_chain_with_source.invoke("What are the categories of attentional models")

[32;1m[1;3m[chain/start][0m [1m[chain:RunnableSequence] Entering Chain run with input:
[0m{
  "input": "What are the categories of attentional models"
}
[32;1m[1;3m[chain/start][0m [1m[chain:RunnableSequence > chain:RunnableParallel<context,question>] Entering Chain run with input:
[0m{
  "input": "What are the categories of attentional models"
}
[32;1m[1;3m[chain/start][0m [1m[chain:RunnableSequence > chain:RunnableParallel<context,question> > chain:RunnablePassthrough] Entering Chain run with input:
[0m{
  "input": "What are the categories of attentional models"
}
[36;1m[1;3m[chain/end][0m [1m[chain:RunnableSequence > chain:RunnableParallel<context,question> > chain:RunnablePassthrough] [0ms] Exiting Chain run with output:
[0m{
  "output": "What are the categories of attentional models"
}
[36;1m[1;3m[chain/end][0m [1m[chain:RunnableSequence > chain:RunnableParallel<context,question>] [5.18s] Exiting Chain run with output:
[0m[outputs]
[32;1m[1;3m[chain/star

[36;1m[1;3m[llm/end][0m [1m[chain:RunnableSequence > chain:RunnableAssign<answer> > chain:RunnableParallel<answer> > chain:RunnableSequence > llm:HuggingFaceHub] [1.58s] Exiting LLM run with output:
[0m{
  "generations": [
    [
      {
        "text": "Here is the output for the given question:\n```\n{\n  \"question\": \"What are the categories of attentional models\",\n  \"answer\": \"task-agnostic approaches and task-speciﬁc methods\"\n}\n```  Answer: {\"question\": \"What are the categories of attentional models\", \"answer\": \"task-agnostic approaches and task-speciﬁc methods\"}  JSON output: {\"question\": \"What are the categories of attentional models\", \"answer\": \"",
        "generation_info": null,
        "type": "Generation"
      }
    ]
  ],
  "llm_output": null,
  "run": null
}
[32;1m[1;3m[chain/start][0m [1m[chain:RunnableSequence > chain:RunnableAssign<answer> > chain:RunnableParallel<answer> > chain:RunnableSequence > parser:JsonOutputParser] Entering Par

In [51]:
print("Data from db that matched the query:\n")
print(response_with_context_with_source.get("context"))

Data from db that matched the query:

[Document(page_content='what ways current models fail, how to remedy them, what can be learned from cognitive studies of attention, how explicit saliency\njudgments relate to ﬁxations, how to conduct fair model comparison, and what are the emerging applications of saliency models.\nIndex Terms—Visual saliency, eye movement prediction, attention, video saliency, benchmark, deep learning.\n!\n1\nINTRODUCTION\nV\nIsual attention enables humans to rapidly analyze com-\nplex scenes and devote their limited perceptual and\ncognitive resources to the most pertinent subsets of sensory\ndata. It acts as a shiftable information processing bottleneck,\nallowing only objects within a circumscribed region to reach\nhigher levels of processing and visual awareness [1].\nBroadly speaking, the literature on attentional models\ncan be split into two categories: task-agnostic approaches\n(i.e. ﬁnding the salient pieces of information, a.k.a bottom-up\n(BU) saliency 

In [56]:
print(response_with_context_with_source.get("answer"))

{'question': 'What are the categories of attentional models', 'answer': 'task-agnostic approaches and task-speciﬁc methods'}
