In [12]:
## Retrieval augmented generation

import os
from dotenv import load_dotenv
load_dotenv()

False

In [13]:
from llama_index.core import VectorStoreIndex,SimpleDirectoryReader,Settings
from llama_index.embeddings.huggingface import HuggingFaceEmbedding
from llama_index.llms.ollama import Ollama
documents=SimpleDirectoryReader("data").load_data()

In [14]:
# config the llm & huggingface Embedding model
Settings.llm=Ollama(model="llama3.1",request_timeout=360.0)
Settings.embed_model=HuggingFaceEmbedding(model_name="BAAI/bge-base-en-v1.5")

In [15]:
documents

[Document(id_='3be125ba-ff7e-44f3-adaf-4710f56a5d17', embedding=None, metadata={'page_label': '1', 'file_name': 'somatosensory.pdf', 'file_path': '/home/navas/Documents/Projects/LLM/llamaindex_rag/data/somatosensory.pdf', 'file_type': 'application/pdf', 'file_size': 145349, 'creation_date': '2024-12-16', 'last_modified_date': '2024-12-16'}, excluded_embed_metadata_keys=['file_name', 'file_type', 'file_size', 'creation_date', 'last_modified_date', 'last_accessed_date'], excluded_llm_metadata_keys=['file_name', 'file_type', 'file_size', 'creation_date', 'last_modified_date', 'last_accessed_date'], relationships={}, metadata_template='{key}: {value}', metadata_separator='\n', text='This is a sample document to\nshowcase page-based formatting. It\ncontains a chapter from a Wikibook\ncalled Sensory Systems. None of the\ncontent has been changed in this\narticle, but some content has been\nremoved.\nAnatomy of the Somatosensory System\nFROM WIKIBOOKS1\nOur somatosensory system consists of se

In [16]:
index=VectorStoreIndex.from_documents(documents,show_progress=True)
query_engine=index.as_query_engine()

Parsing nodes: 100%|██████████| 4/4 [00:00<00:00, 1784.62it/s]
Generating embeddings: 100%|██████████| 4/4 [00:01<00:00,  3.09it/s]


In [17]:
from llama_index.core.retrievers import VectorIndexRetriever
from llama_index.core.query_engine import RetrieverQueryEngine
from llama_index.core.indices.postprocessor import SimilarityPostprocessor

retriever=VectorIndexRetriever(index=index,similarity_top_k=4)
postprocessor=SimilarityPostprocessor(similarity_cutoff=0.20)

query_engine=RetrieverQueryEngine(retriever=retriever,
                                  node_postprocessors=[postprocessor])

In [18]:
response=query_engine.query("What is heading of the document?")

In [19]:
from llama_index.core.response.pprint_utils import pprint_response
pprint_response(response,show_source=True)
# print(response)

Final Response: Anatomy of the Somatosensory System
______________________________________________________________________
Source Node 1/4
Node ID: 6d190f47-20db-4ccf-a2e1-547fd12cbd11
Similarity: 0.5196092060681182
Text: This is a sample document to showcase page-based formatting. It
contains a chapter from a Wikibook called Sensory Systems. None of the
content has been changed in this article, but some content has been
removed. Anatomy of the Somatosensory System FROM WIKIBOOKS1 Our
somatosensory system consists of sensors in the skin and sensors in
our muscles...
______________________________________________________________________
Source Node 2/4
Node ID: 2fbb21c4-d588-4062-8649-36e67588e9f8
Similarity: 0.4831473282380506
Text: Rapidly adapting Slowly adapting Surface receptor / small
receptive field Hair receptor, Meissner’s corpuscle: De- tect an
insect or a very fine vibration. Used for recognizing texture.
Merkel’s receptor: Used for spa- tial details, e.g. a round surface
edg

In [20]:
import os.path
from llama_index.core import StorageContext,load_index_from_storage

# check if storage already exists
PERSIST_DIR="./storage"
if not os.path.exists(PERSIST_DIR):
    documents=SimpleDirectoryReader("data").load_data()
    index=VectorStoreIndex.from_documents(documents)
    # store it for later
    index.storage_context.persist(persist_dir=PERSIST_DIR)
else:
    storage_context=StorageContext.from_defaults(persist_dir=PERSIST_DIR)
    index=load_index_from_storage(storage_context)

# create query engine from index.
query_engine=index.as_query_engine();
response=query_engine.query("What is Somatosensory System?")




In [21]:
print(response)

The somatosensory system consists of sensors in the skin and muscles that provide information about temperature, pressure, surface texture, pain, muscle length, tension, and joint angles. It allows us to perceive and respond to various stimuli from our environment and body.
