# LLAMAIndex examples

In [1]:
import sys
sys.path.append('..')

## LLM initialization

In [2]:
from llama_index.core import SimpleDirectoryReader, VectorStoreIndex
from llama_index.llms.llama_cpp import LlamaCPP

import os

model = LlamaCPP(
    # model_path=os.path.join("/mnt/d/wsl/llamacpp/models", "neural-chat-7b-v3-3.Q5_K_M.gguf"),
    model_path=os.path.join("/root/LL-DM/models", "neural-chat-7b-v3-3.Q5_K_M.gguf"),

    temperature=0.1,
    max_new_tokens=512,
    # callback_manager=callback_manager,
    # verbose=True, # Verbose is required to pass to the callback manager
    # echo=True,
    context_window=1024*32,
    model_kwargs={"n_gpu_layers": -1}
)


ggml_init_cublas: GGML_CUDA_FORCE_MMQ:   no
ggml_init_cublas: CUDA_USE_TENSOR_CORES: yes
ggml_init_cublas: found 1 CUDA devices:
  Device 0: NVIDIA RTX A5000, compute capability 8.6, VMM: yes
llama_model_loader: loaded meta data with 21 key-value pairs and 291 tensors from /root/LL-DM/models/neural-chat-7b-v3-3.Q5_K_M.gguf (version GGUF V3 (latest))
llama_model_loader: Dumping metadata keys/values. Note: KV overrides do not apply in this output.
llama_model_loader: - kv   0:                       general.architecture str              = llama
llama_model_loader: - kv   1:                               general.name str              = intel_neural-chat-7b-v3-3
llama_model_loader: - kv   2:                       llama.context_length u32              = 32768
llama_model_loader: - kv   3:                     llama.embedding_length u32              = 4096
llama_model_loader: - kv   4:                          llama.block_count u32              = 32
llama_model_loader: - kv   5:               

## Vector store

In [3]:
import fnmatch

def find_files_with_extension(root_dir, extension):
    
    file_list = []
    for root, _, files in os.walk(root_dir):
        for filename in fnmatch.filter(files, f'*.{extension}'):
            file_list.append(os.path.join(root, filename))
    return file_list

documents = SimpleDirectoryReader(input_files=find_files_with_extension("/root/LL-DM/srd", "json")).load_data()

embed_model="local:BAAI/bge-small-en-v1.5"

index = VectorStoreIndex.from_documents(documents, embed_model=embed_model)

config.json:   0%|          | 0.00/743 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/133M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/366 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/711k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/125 [00:00<?, ?B/s]

In [4]:
query_engine = index.as_query_engine(llm=model)

In [5]:
query_engine.query("What is the cost of a crossbow?")


llama_print_timings:        load time =   10773.87 ms
llama_print_timings:      sample time =      31.26 ms /    59 runs   (    0.53 ms per token,  1887.28 tokens per second)
llama_print_timings: prompt eval time =   11686.79 ms /   922 tokens (   12.68 ms per token,    78.89 tokens per second)
llama_print_timings:        eval time =    3214.00 ms /    58 runs   (   55.41 ms per token,    18.05 tokens per second)
llama_print_timings:       total time =   15167.93 ms /   980 tokens


Response(response="\nThe cost of a crossbow, hand, with the index 'crossbow-hand', has a cost of 75 gp, while the cost of a crossbow, light, with the index 'crossbow-light', has a cost of 25 gp.", source_nodes=[NodeWithScore(node=TextNode(id_='153a03a7-2b34-459a-b374-61af68f624b7', embedding=None, metadata={'file_path': '/root/LL-DM/srd/api/equipment/crossbow-hand.json', 'file_name': 'crossbow-hand.json', 'file_type': 'application/json', 'file_size': 1011, 'creation_date': '2024-02-17', 'last_modified_date': '2024-01-18', 'last_accessed_date': '2024-02-17'}, excluded_embed_metadata_keys=['file_name', 'file_type', 'file_size', 'creation_date', 'last_modified_date', 'last_accessed_date'], excluded_llm_metadata_keys=['file_name', 'file_type', 'file_size', 'creation_date', 'last_modified_date', 'last_accessed_date'], relationships={<NodeRelationship.SOURCE: '1'>: RelatedNodeInfo(node_id='848e27e0-e985-48af-9d9b-fcb78c315f59', node_type=<ObjectType.DOCUMENT: '4'>, metadata={'file_path': '/r

In [6]:
from llama_index.core.node_parser import (
    HierarchicalNodeParser,
    SentenceSplitter,
)

In [7]:
node_parser = HierarchicalNodeParser.from_defaults()

In [9]:
nodes = node_parser.get_nodes_from_documents(documents)

In [10]:
from llama_index.core.node_parser import get_leaf_nodes, get_root_nodes
leaf_nodes = get_leaf_nodes(nodes)
root_nodes = get_root_nodes(nodes)

In [11]:
# define storage context
from llama_index.core.storage.docstore import SimpleDocumentStore
from llama_index.core import StorageContext
from llama_index.llms.openai import OpenAI

docstore = SimpleDocumentStore()

# insert nodes into docstore
docstore.add_documents(nodes)

# define storage context (will include vector store by default too)
storage_context = StorageContext.from_defaults(docstore=docstore)

In [12]:
## Load index into vector index
from llama_index.core import VectorStoreIndex

base_index = VectorStoreIndex(
    leaf_nodes,
    storage_context=storage_context,
    embed_model=embed_model,
    show_progress=True
)

In [13]:
from llama_index.core.retrievers import AutoMergingRetriever
base_retriever = base_index.as_retriever(similarity_top_k=6)
retriever = AutoMergingRetriever(base_retriever, storage_context, verbose=True)

In [14]:
from llama_index.core.query_engine import RetrieverQueryEngine
query_engine = RetrieverQueryEngine.from_args(retriever, llm=model)
# base_query_engine = RetrieverQueryEngine.from_args(base_retriever)

In [16]:
query_engine.query("What is the cost of a crossbow? List all costs available")

Llama.generate: prefix-match hit

llama_print_timings:        load time =   10773.87 ms
llama_print_timings:      sample time =      53.12 ms /    91 runs   (    0.58 ms per token,  1713.10 tokens per second)
llama_print_timings: prompt eval time =    1779.09 ms /   939 tokens (    1.89 ms per token,   527.80 tokens per second)
llama_print_timings:        eval time =    4972.25 ms /    90 runs   (   55.25 ms per token,    18.10 tokens per second)
llama_print_timings:       total time =    7170.39 ms /  1029 tokens


Response(response='\nThe costs of crossbows are as follows:\n1. Crossbow-Hand: Cost = 75 gp\n2. Crossbow-Light: Cost = 25 gp\n3. Crossbow-Heavy: Cost = 50 gp\n\nNote that these costs are for the crossbows themselves, and additional costs for bolts and other equipment may apply depending on the class and character build.', source_nodes=[NodeWithScore(node=TextNode(id_='018504d9-3e8c-42ad-864f-4015a188a68e', embedding=None, metadata={'file_path': '/root/LL-DM/srd/api/equipment/crossbow-hand.json', 'file_name': 'crossbow-hand.json', 'file_type': 'application/json', 'file_size': 1011, 'creation_date': '2024-02-17', 'last_modified_date': '2024-01-18', 'last_accessed_date': '2024-02-17'}, excluded_embed_metadata_keys=['file_name', 'file_type', 'file_size', 'creation_date', 'last_modified_date', 'last_accessed_date'], excluded_llm_metadata_keys=['file_name', 'file_type', 'file_size', 'creation_date', 'last_modified_date', 'last_accessed_date'], relationships={<NodeRelationship.SOURCE: '1'>: R

### Test persistence

In [17]:
base_index.storage_context.persist("index/")

In [19]:
from llama_index.core import StorageContext, load_index_from_storage

# rebuild storage context
storage_context = StorageContext.from_defaults(persist_dir="index/")

# load index
index = load_index_from_storage(storage_context, embed_model=embed_model, show_progress=True)

In [22]:
from src.vector_store import AutoMergingSRDIndex, SRDConfig

my_index = AutoMergingSRDIndex(model, SRDConfig("../srd"))

[32m2024-02-17 17:00:11.615[0m | [34m[1mDEBUG   [0m | [36msrc.vector_store[0m:[36m__init__[0m:[36m69[0m - [34m[1mCache directory index already exists[0m
[32m2024-02-17 17:00:11.617[0m | [34m[1mDEBUG   [0m | [36msrc.vector_store[0m:[36m__init__[0m:[36m70[0m - [34m[1mTrying to load existing index from index[0m


[32m2024-02-17 17:01:22.969[0m | [34m[1mDEBUG   [0m | [36msrc.vector_store[0m:[36m__init__[0m:[36m74[0m - [34m[1mIndex ready[0m


config.json:   0%|          | 0.00/799 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/1.11G [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/443 [00:00<?, ?B/s]

sentencepiece.bpe.model:   0%|          | 0.00/5.07M [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/17.1M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/279 [00:00<?, ?B/s]