In [2]:
# custom selection of integrations to work with core
# pip install llama-index-core
# pip install llama-index-llms-openai
# pip install llama-index-llms-replicate
# pip install replicate
# pip install llama-index-embeddings-huggingface

In [1]:
import os

from llama_index.core import Settings, VectorStoreIndex, SimpleDirectoryReader
from llama_index.embeddings.huggingface import HuggingFaceEmbedding
from llama_index.llms.replicate import Replicate
from transformers import AutoTokenizer

os.environ["REPLICATE_API_TOKEN"] = "r8_EGiIWBdx31PpO5ApNyuEuiW2t8jMueV2LEG1L"

# set the LLM
llama2_7b_chat = "meta/llama-2-7b-chat:8e6975e5ed6174911a6ff3d60540dfd4844201974602551e10e9e87ab143d81e"
Settings.llm = Replicate(
  model=llama2_7b_chat,
  temperature=0.01,
  additional_kwargs={"top_p": 1, "max_new_tokens": 300}
)

# set tokenizer to match LLM
Settings.tokenizer = AutoTokenizer.from_pretrained("NousResearch/Llama-2-7b-chat-hf")
# set the embed model
Settings.embed_model = HuggingFaceEmbedding(model_name="BAAI/bge-small-en-v1.5")

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
## Read Documents and transform them into vectors
documents = SimpleDirectoryReader("docs").load_data()
index = VectorStoreIndex.from_documents(documents)

## To query the index
query_engine = index.as_query_engine()
query_engine.query("What is the topic of the document?")

Response(response=' Based on the provided context information, I can determine that the topic of the document is "Dumm y PDF file".', source_nodes=[NodeWithScore(node=TextNode(id_='be8f381a-e7bc-4c2c-8992-1db76f13f6c1', embedding=None, metadata={'page_label': '1', 'file_name': 'dummy.pdf', 'file_path': 'c:\\Users\\RicardoMontaner\\Documents\\Mio\\Code Python\\pyAiLib\\docs\\dummy.pdf', 'file_type': 'application/pdf', 'file_size': 13264, 'creation_date': '2024-03-11', 'last_modified_date': '2023-05-17'}, excluded_embed_metadata_keys=['file_name', 'file_type', 'file_size', 'creation_date', 'last_modified_date', 'last_accessed_date'], excluded_llm_metadata_keys=['file_name', 'file_type', 'file_size', 'creation_date', 'last_modified_date', 'last_accessed_date'], relationships={<NodeRelationship.SOURCE: '1'>: RelatedNodeInfo(node_id='1862ad9f-96de-4f6d-ad7c-616c508dd2f8', node_type=<ObjectType.DOCUMENT: '4'>, metadata={'page_label': '1', 'file_name': 'dummy.pdf', 'file_path': 'c:\\Users\\Ri

In [3]:
## To store the index in ./storage
index.storage_context.persist()

In [4]:
# To reload from disk:

from llama_index.core import StorageContext, load_index_from_storage
# rebuild storage context
storage_context = StorageContext.from_defaults(persist_dir="./storage")
# load index
index = load_index_from_storage(storage_context)


In [5]:
query_engine = index.as_query_engine()
query_engine.query("Que hay escrito?")

Response(response=' As an AI language model, I can analyze the provided context information to determine what has been written in the "dummy.pdf" file located at "c:\\Users\\RicardoMontaner\\Documents\\Mio\\Code Python\\pyAiLib\\docs". Based on the information provided, there appears to be no text or content written inside the PDF file. Therefore, the answer to the query "Que hay escrito?" would be simply "Nothing."', source_nodes=[NodeWithScore(node=TextNode(id_='be8f381a-e7bc-4c2c-8992-1db76f13f6c1', embedding=None, metadata={'page_label': '1', 'file_name': 'dummy.pdf', 'file_path': 'c:\\Users\\RicardoMontaner\\Documents\\Mio\\Code Python\\pyAiLib\\docs\\dummy.pdf', 'file_type': 'application/pdf', 'file_size': 13264, 'creation_date': '2024-03-11', 'last_modified_date': '2023-05-17'}, excluded_embed_metadata_keys=['file_name', 'file_type', 'file_size', 'creation_date', 'last_modified_date', 'last_accessed_date'], excluded_llm_metadata_keys=['file_name', 'file_type', 'file_size', 'crea