In [1]:
# logging setup
import logging
import sys
import os

logging.basicConfig(stream=sys.stdout, level=logging.INFO)
logging.getLogger().addHandler(logging.StreamHandler(stream=sys.stdout))

# llama index ascyncio config
import nest_asyncio
nest_asyncio.apply()

from llama_index.core import SimpleDirectoryReader
from llama_index.core.node_parser import SentenceSplitter
from llama_index.core import Settings, StorageContext, VectorStoreIndex
from llama_index.embeddings.huggingface import HuggingFaceEmbedding
from llama_index.vector_stores.chroma import ChromaVectorStore
from llama_index.vector_stores.qdrant import QdrantVectorStore
import chromadb
import qdrant_client
import torch
import data_utils

  from .autonotebook import tqdm as notebook_tqdm


INFO:datasets:PyTorch version 2.3.0+cu118 available.
PyTorch version 2.3.0+cu118 available.


In [2]:
qdrant_dir="./qdrant_db/"
col_name="md-llama-blogs"
embed_model="models/bge-base-en-v1.5"
device_map="cuda:0"
docs_dir="data/llama-blogs-md"
docs_metadata="data/llama_blogs_metadata.json"

In [3]:
Settings

_Settings(_llm=None, _embed_model=None, _callback_manager=None, _tokenizer=None, _node_parser=None, _prompt_helper=None, _transformations=None)

In [3]:
# load embeddings
Settings.embed_model = HuggingFaceEmbedding(model_name=embed_model, device=device_map)
Settings.llm = None

INFO:sentence_transformers.SentenceTransformer:Load pretrained SentenceTransformer: models/bge-base-en-v1.5
Load pretrained SentenceTransformer: models/bge-base-en-v1.5
INFO:sentence_transformers.SentenceTransformer:2 prompts are loaded, with the keys: ['query', 'text']
2 prompts are loaded, with the keys: ['query', 'text']
LLM is explicitly disabled. Using MockLLM.


In [4]:
client = qdrant_client.QdrantClient(
    path=qdrant_dir
)

In [5]:
vector_store = QdrantVectorStore(
    client=client,
    collection_name=col_name,
    # enable_hybrid=True,
    # fastembed_sparse_model="Qdrant/bm42-all-minilm-l6-v2-attentions"
)
storage_context = StorageContext.from_defaults(vector_store=vector_store)

In [6]:
storage_context

StorageContext(docstore=<llama_index.core.storage.docstore.simple_docstore.SimpleDocumentStore object at 0x7f65aa711450>, index_store=<llama_index.core.storage.index_store.simple_index_store.SimpleIndexStore object at 0x7f65aa711bd0>, vector_stores={'default': QdrantVectorStore(stores_text=True, is_embedding_query=True, flat_metadata=False, collection_name='md-llama-blogs', url=None, api_key=None, batch_size=64, parallel=1, max_retries=3, client_kwargs={}, enable_hybrid=False, index_doc_id=True, fastembed_sparse_model=None), 'image': SimpleVectorStore(stores_text=False, is_embedding_query=True, data=SimpleVectorStoreData(embedding_dict={}, text_id_to_ref_doc_id={}, metadata_dict={}))}, graph_store=<llama_index.core.graph_stores.simple.SimpleGraphStore object at 0x7f65aa711600>, property_graph_store=None)

In [9]:
# if not client.collection_exists(col_name):
#     # load documents
#     nodes = data_utils.load_md_documents(
#         docs_dir=docs_dir, docs_metadata=docs_metadata, return_nodes=True
#     )
    
#     print(f"Loaded {len(nodes)} documents")

Num documents: 166


Parsing documents: 100%|██████████| 166/166 [00:00<00:00, 8623.31it/s]
Parsing nodes: 100%|██████████| 166/166 [00:00<00:00, 637.88it/s]


Loaded 1221 documents


In [7]:
index = VectorStoreIndex.from_vector_store(
    vector_store=vector_store,
    storage_context=storage_context,
    show_progress=True,
)
index

<llama_index.core.indices.vector_store.base.VectorStoreIndex at 0x7f65a3cecbe0>

In [10]:
from llama_index.core import QueryBundle
from llama_index.core.retrievers import VectorIndexRetriever

In [12]:
retriever = VectorIndexRetriever(
    index=index,
    similarity_top_k=5,
)

In [13]:
query_bundle = QueryBundle('''What are the two critical areas of RAG system performance that are assessed \
in the "Evaluating RAG with LlamaIndex" section of the OpenAI Cookbook?''')
query_bundle

QueryBundle(query_str='What are the two critical areas of RAG system performance that are assessed in the "Evaluating RAG with LlamaIndex" section of the OpenAI Cookbook?', image_path=None, custom_embedding_strs=None, embedding=None)

In [14]:
retrieved_nodes = retriever.retrieve(query_bundle)

Batches: 100%|██████████| 1/1 [00:00<00:00,  5.89it/s]


In [24]:
import json
for idx, node in enumerate(retrieved_nodes):
    print(f"Node {idx}\n", json.dumps(node.metadata, indent=2))
    print("=="*40)

Node 0
 {
  "filename": "openai-cookbook-evaluating-rag-systems-fe393c61fb93.md",
  "extension": ".md",
  "title": "OpenAI Cookbook: Evaluating RAG systems",
  "date": "Nov 28, 2023",
  "url": "https://www.llamaindex.ai/blog/openai-cookbook-evaluating-rag-systems-fe393c61fb93"
}
Node 1
 {
  "Header_1": " Introduction",
  "filename": "evaluating-the-ideal-chunk-size-for-a-rag-system-using-llamaindex-6207e5d3fec5.md",
  "extension": ".md",
  "title": "Evaluating the Ideal Chunk Size for a RAG System using LlamaIndex",
  "date": "Oct 5, 2023",
  "url": "https://www.llamaindex.ai/blog/evaluating-the-ideal-chunk-size-for-a-rag-system-using-llamaindex-6207e5d3fec5"
}
Node 2
 {
  "Header_1": " Results",
  "Header_2": " Comparing to OpenAI Models:",
  "filename": "nvidia-research-rag-with-long-context-llms-7d94d40090c4.md",
  "extension": ".md",
  "title": "NVIDIA Research: RAG with Long Context LLMs",
  "date": "Oct 22, 2023",
  "url": "https://www.llamaindex.ai/blog/nvidia-research-rag-with-

In [18]:
index = index.build_index_from_nodes(nodes, show_prpgress=True)

Generating embeddings:   0%|          | 0/1221 [00:00<?, ?it/s]
Batches:   0%|          | 0/1 [00:00<?, ?it/s][A
Batches: 100%|██████████| 1/1 [00:00<00:00,  8.58it/s][A
Generating embeddings:   1%|          | 10/1221 [00:00<00:16, 74.85it/s]
Batches:   0%|          | 0/1 [00:00<?, ?it/s][A
Batches: 100%|██████████| 1/1 [00:00<00:00,  8.44it/s][A
Generating embeddings:   2%|▏         | 20/1221 [00:00<00:16, 72.75it/s]
Batches: 100%|██████████| 1/1 [00:00<00:00, 14.03it/s]

Batches: 100%|██████████| 1/1 [00:00<00:00, 13.60it/s]
Generating embeddings:   3%|▎         | 40/1221 [00:00<00:12, 92.27it/s]
Batches: 100%|██████████| 1/1 [00:00<00:00, 13.68it/s]

Batches: 100%|██████████| 1/1 [00:00<00:00, 14.61it/s]
Generating embeddings:   5%|▍         | 60/1221 [00:00<00:11, 100.50it/s]
Batches: 100%|██████████| 1/1 [00:00<00:00, 15.21it/s]

Batches: 100%|██████████| 1/1 [00:00<00:00, 14.83it/s]
Generating embeddings:   7%|▋         | 80/1221 [00:00<00:10, 108.25it/s]
Batches: 100%|██████

In [20]:
i

dict_keys([])

In [1]:
import indexing

  from .autonotebook import tqdm as notebook_tqdm


INFO:datasets:PyTorch version 2.3.0+cu118 available.
PyTorch version 2.3.0+cu118 available.


In [2]:
qdrant_index = indexing.creat_qdrant_index()

INFO:sentence_transformers.SentenceTransformer:Load pretrained SentenceTransformer: models/bge-base-en-v1.5
Load pretrained SentenceTransformer: models/bge-base-en-v1.5
INFO:sentence_transformers.SentenceTransformer:2 prompts are loaded, with the keys: ['query', 'text']
2 prompts are loaded, with the keys: ['query', 'text']
LLM is explicitly disabled. Using MockLLM.
Creating index from documents store
Num documents: 166


Parsing documents: 100%|██████████| 166/166 [00:00<00:00, 11274.83it/s]
Parsing nodes: 100%|██████████| 166/166 [00:00<00:00, 943.83it/s] 


Loaded 1221 documents


Generating embeddings:   0%|          | 0/1221 [00:00<?, ?it/s]
Batches:   0%|          | 0/1 [00:00<?, ?it/s][A
Batches: 100%|██████████| 1/1 [00:00<00:00,  2.27it/s][A
Generating embeddings:   1%|          | 10/1221 [00:00<00:57, 20.93it/s]
Batches: 100%|██████████| 1/1 [00:00<00:00, 10.52it/s]
Generating embeddings:   2%|▏         | 20/1221 [00:00<00:31, 37.73it/s]
Batches:   0%|          | 0/1 [00:00<?, ?it/s][A
Batches: 100%|██████████| 1/1 [00:00<00:00,  8.45it/s][A
Generating embeddings:   2%|▏         | 30/1221 [00:00<00:24, 48.37it/s]
Batches: 100%|██████████| 1/1 [00:00<00:00, 11.24it/s]
Generating embeddings:   3%|▎         | 40/1221 [00:00<00:20, 58.15it/s]
Batches: 100%|██████████| 1/1 [00:00<00:00, 11.32it/s]
Generating embeddings:   4%|▍         | 50/1221 [00:00<00:17, 66.69it/s]
Batches:   0%|          | 0/1 [00:00<?, ?it/s][A
Batches: 100%|██████████| 1/1 [00:00<00:00,  8.24it/s][A
Generating embeddings:   5%|▍         | 60/1221 [00:01<00:17, 67.65it/s]
Batches: 

Payload indexes have no effect in the local Qdrant. Please use server Qdrant if you need payload indexes.


In [3]:
qdrant_index.index_struct

IndexDict(index_id='384e2440-a326-416a-98c2-404e571d24c5', summary=None, nodes_dict={}, doc_id_dict={}, embeddings_dict={})

In [5]:
test_nodes = qdrant_index.vector_store.get_nodes()

In [6]:
dir(test_nodes)

['__add__',
 '__class__',
 '__class_getitem__',
 '__contains__',
 '__delattr__',
 '__delitem__',
 '__dir__',
 '__doc__',
 '__eq__',
 '__format__',
 '__ge__',
 '__getattribute__',
 '__getitem__',
 '__gt__',
 '__hash__',
 '__iadd__',
 '__imul__',
 '__init__',
 '__init_subclass__',
 '__iter__',
 '__le__',
 '__len__',
 '__lt__',
 '__mul__',
 '__ne__',
 '__new__',
 '__reduce__',
 '__reduce_ex__',
 '__repr__',
 '__reversed__',
 '__rmul__',
 '__setattr__',
 '__setitem__',
 '__sizeof__',
 '__str__',
 '__subclasshook__',
 'append',
 'clear',
 'copy',
 'count',
 'extend',
 'index',
 'insert',
 'pop',
 'remove',
 'reverse',
 'sort']

In [7]:
len(test_nodes)

1221

In [8]:
test_nodes[0].metadata

{'Header_1': ' References',
 'filename': 'mastering-pdfs-extracting-sections-headings-paragraphs-and-tables-with-cutting-edge-parser-faea18870125.md',
 'extension': '.md',
 'title': 'Mastering PDFs: Extracting Sections, Headings, Paragraphs, and Tables with Cutting-Edge Parser',
 'date': 'Oct 18, 2023',
 'url': 'https://www.llamaindex.ai/blog/mastering-pdfs-extracting-sections-headings-paragraphs-and-tables-with-cutting-edge-parser-faea18870125'}

In [10]:
test_nodes[0].embedding

In [2]:
import qdrant_client

  from .autonotebook import tqdm as notebook_tqdm


In [3]:
client = qdrant_client.QdrantClient(
    path="qdrant_db"
)

In [8]:
client.delete_collection("md-llama-blogs")

True

In [10]:
client.get_collections()

CollectionsResponse(collections=[CollectionDescription(name='llamaindex-blogs'), CollectionDescription(name='llamaindex-blogs-hybrid-search')])