# Summary

This notebook is a demonstration of using LlamaIndex.

Run `hf auth login` to authenticate your Hugging Face account.

In [18]:
from pprint import pprint

import chromadb
from llama_index.core import SimpleDirectoryReader, VectorStoreIndex
from llama_index.core.ingestion import IngestionPipeline
from llama_index.core.node_parser import SentenceSplitter
from llama_index.embeddings.huggingface import HuggingFaceEmbedding
from llama_index.llms.huggingface_api import HuggingFaceInferenceAPI
from llama_index.readers.web import SimpleWebPageReader
from llama_index.vector_stores.chroma import ChromaVectorStore

# Simple Request/Response

In [30]:
llm = HuggingFaceInferenceAPI(
    model_name="Qwen/Qwen2.5-Coder-32B-Instruct",
    provider="auto",
)

print(llm.complete("Hello, how are you?"))

Hello! I'm just a computer program, so I don't have feelings, but I'm here and ready to help you. How can I assist you today?


# RAG Application

### Load data into a list of Document objects

In [19]:
# You can use the SimpleDirectoryReader to read documents from a directory.
# The appropriate reader will be selected based on the file type.
# reader = SimpleDirectoryReader(input_dir="rag_data/")
# documents = reader.load_data()

# You can use the SimpleWebPageReader to read documents from a web page.
documents = SimpleWebPageReader(html_to_text=True).load_data(
    ["https://www.sciencedaily.com/releases/2025/08/250814094648.htm"]
)

documents

[Document(id_='fee78ac7-20ae-47fc-8719-da11e6c122f4', embedding=None, metadata={'url': 'https://www.sciencedaily.com/releases/2025/08/250814094648.htm'}, excluded_embed_metadata_keys=[], excluded_llm_metadata_keys=[], relationships={}, metadata_template='{key}: {value}', metadata_separator='\n', text_resource=MediaResource(embeddings=None, data=None, text='Skip to main content\n\n[![ScienceDaily](/images/sd-logo.png)](/ "ScienceDaily")\n\n* * *\n\nYour source for the latest research news\n\nFollow: [_Facebook_](https://www.facebook.com/sciencedaily "Facebook")\n[_X/Twitter_](https://twitter.com/sciencedaily "X/Twitter") Subscribe: [_RSS\nFeeds_](/newsfeeds.htm "RSS Feeds")\n[_Newsletter_](https://sciencedaily.substack.com "Newsletter")\n\n**New!** Sign up for our free **[email\nnewsletter](https://sciencedaily.substack.com/#)**.\n\nScience News\n\nfrom research organizations\n\n* * *\n\n# Scientists may have found the tiny DNA switch that made us human\n\nDate:\n\n    August 15, 2025\n

### Create and store embeddings

* Embeddings are vector representations that capture the semantic meaning in a way AI can process efficiently.
  * *Embedding models* are trained specifically to turn text into numeric vectors (“embeddings”) so you can do similarity search, clustering, RAG retrieval, etc.
  * `HuggingFaceEmbedding` is LlamaIndex’s adapter for those models. It downloads a model from the Hugging Face Hub and runs it locally (CPU/GPU) to produce embeddings you can feed into a VectorStoreIndex (or any retriever).
* We store the embeddings in a vector database.

In [20]:
db = chromadb.PersistentClient(path="./chroma_db")
chroma_collection = db.get_or_create_collection("test")
vector_store = ChromaVectorStore(chroma_collection=chroma_collection)

pipeline = IngestionPipeline(
    transformations=[
        # Break down documents into manageable chunks by splitting them at natural sentence boundaries.
        SentenceSplitter(chunk_overlap=0),
        # Here we load the embedding model which will convert each chunk into numerical embeddings
        HuggingFaceEmbedding(model_name="BAAI/bge-small-en-v1.5"),
    ],
    vector_store=vector_store,
)

nodes = await pipeline.arun(documents=documents)

nodes

[TextNode(id_='e5a0b36e-dfc5-4e20-b4b8-0d171d8b2edb', embedding=[-0.02571648173034191, -0.0024829525500535965, -0.03258051723241806, 0.03769024461507797, -0.000275850499747321, 0.03315267339348793, 0.047367163002491, 0.05615035817027092, -0.0034032512921839952, -0.010688920505344868, 0.034561749547719955, -0.024456001818180084, -0.01561195868998766, -0.007417514454573393, 0.033689241856336594, -0.009616528637707233, -0.11560506373643875, -0.06252361834049225, -0.07642742991447449, 0.014012221246957779, 0.02804940938949585, -0.025953015312552452, -0.01577821746468544, 0.01930089108645916, -0.0252955611795187, -0.018631616607308388, -0.041194137185811996, -0.03288094326853752, 0.020273856818675995, -0.21717272698879242, 0.052838657051324844, 0.035397130995988846, 0.044349413365125656, -0.02673940174281597, -0.026671020314097404, -0.019386230036616325, 0.02276168204843998, -0.0010265741730108857, -0.016939407214522362, 0.05921785533428192, 0.0416545569896698, -0.0289162565022707, 0.000401

In [21]:
embed_model = HuggingFaceEmbedding(model_name="BAAI/bge-small-en-v1.5")
index = VectorStoreIndex.from_vector_store(vector_store, embed_model=embed_model)

# Optionally, we can create the index from documents directly instead.
# However, this doesn't persist the embeddings anywhere, which means they will
# need to be recreated again.
# index = VectorStoreIndex.from_documents(documents, embed_model=embed_model)

llm = HuggingFaceInferenceAPI(
    model_name="Qwen/Qwen2.5-Coder-32B-Instruct", provider="auto"
)

# `VectorStoreIndex().as_query_engine` creates a query engine that we can use for
# single question-answer interactions, returning a written response.
# The difference response modes are listed here:
# https://docs.llamaindex.ai/en/stable/module_guides/deploying/query_engine/response_modes/
query_engine = index.as_query_engine(llm=llm, response_mode="compact")

pprint(query_engine.query("Tell me about the tiny DNA switch that made us human."))

Response(response='Scientists have identified a small section of DNA called '
                  'HAR123 that may explain the unique features of the human '
                  'brain. Unlike a gene, HAR123 acts as a "volume control" for '
                  'brain development, influencing which genes are activated, '
                  'how much they are activated, and when. This molecular '
                  'enhancer is crucial in the development of neural progenitor '
                  'cells, which give rise to neurons and glial cells. HAR123 '
                  'also affects the ratio of these cell types, contributing to '
                  'cognitive flexibility, the ability to adapt and learn. The '
                  'human version of HAR123 differs from the chimpanzee '
                  'version, possibly accounting for some of the significant '
                  'changes in human brain development. This discovery could '
                  'provide insights into neurodevelopmental