# Install dependencies

In [1]:
!pip install llama-index datasets llama-index-callbacks-arize-phoenix arize-phoenix llama-index-vector-stores-chroma llama-index-llms-ollama llama-index-embeddings-ollama -U -q

# Create a QueryEngine for RAG (retrieval augmented generation)

In [2]:
from datasets import load_dataset
from pathlib import Path
import chromadb
from llama_index.core import SimpleDirectoryReader, VectorStoreIndex
from llama_index.core.node_parser import SentenceSplitter
from llama_index.core.ingestion import IngestionPipeline
from llama_index.core.evaluation import FaithfulnessEvaluator
from llama_index.vector_stores.chroma import ChromaVectorStore
from llama_index.llms.ollama import Ollama
from llama_index.embeddings.ollama import OllamaEmbedding
import nest_asyncio

# Apply nest_asyncio for async operations
nest_asyncio.apply()

  from .autonotebook import tqdm as notebook_tqdm


## Setting up the persona database

We will be using personas from the dvilasuero/finepersonas-v0.1-tiny dataset. This dataset contains 5K personas.

In [None]:
# Load and prepare the persona dataset
from datasets import load_dataset
from pathlib import Path

dataset = load_dataset(path="dvilasuero/finepersonas-v0.1-tiny", split="train")

Path("data").mkdir(parents=True, exist_ok=True)
for i, persona in enumerate(dataset):
    with open(Path("data") / f"persona_{i}.txt", "w") as f:
        f.write(persona["persona"])

Generating train split: 100%|██████████| 5000/5000 [00:00<00:00, 81453.23 examples/s]


## Loading and embedding persona documents

In [3]:
# Load documents
from llama_index.core import SimpleDirectoryReader

reader = SimpleDirectoryReader(input_dir="data")
documents = reader.load_data()
print(f"Loaded {len(documents)} documents")

Loaded 5000 documents


In [3]:
# Set up Ollama models
from llama_index.embeddings.ollama import OllamaEmbedding
from llama_index.core.node_parser import SentenceSplitter
from llama_index.core.ingestion import IngestionPipeline

pipeline = IngestionPipeline(
    transformations=[
        SentenceSplitter(),
        OllamaEmbedding(model_name="nomic-embed-text"),
    ]
)

print("Running ingestion pipeline to create nodes and embeddings...")
nodes = await pipeline.arun(documents=documents[:10])
print(f"Created {len(nodes)} nodes.")

Running ingestion pipeline to create nodes and embeddings...
Created 10 nodes.


In [4]:
nodes

[TextNode(id_='96d04737-a481-4056-8182-b98b46536af2', embedding=[0.7358105182647705, 0.6447570323944092, -3.856809616088867, -1.709150791168213, 0.1662992537021637, -0.08267813920974731, -0.1591942310333252, 0.08301569521427155, -0.03844783082604408, 0.21667028963565826, -0.3623208999633789, -0.10040002316236496, 1.869287133216858, 0.657587468624115, 0.6075468063354492, -0.7229706048965454, 0.4035753309726715, -0.2514852285385132, -0.04281019791960716, -0.2706449329853058, -0.9325107336044312, -0.7095361351966858, 0.3756970167160034, 0.9279154539108276, 2.2368645668029785, -0.08914770185947418, 0.9490557312965393, -0.0628930926322937, -0.4907386898994446, 0.1252397745847702, 0.08967325091362, -0.003197386860847473, -0.7729629278182983, -0.01118624210357666, 0.21016518771648407, -0.10985015332698822, 0.9046072363853455, 0.7183942794799805, 0.8404996395111084, 0.8908463716506958, 1.2704576253890991, 0.20221911370754242, -0.6074115037918091, -1.5774147510528564, 0.19644758105278015, -0.35

## Storing and indexing documents

In [4]:
import chromadb
from llama_index.vector_stores.chroma import ChromaVectorStore
from llama_index.embeddings.ollama import OllamaEmbedding
from llama_index.core.node_parser import SentenceSplitter
from llama_index.core.ingestion import IngestionPipeline

print("Setting up ChromaDB vector store...")
db = chromadb.PersistentClient(path="./alfred_chroma_db_ollama") # Use a different path to avoid conflicts
chroma_collection = db.get_or_create_collection(name="alfred_ollama")
vector_store = ChromaVectorStore(chroma_collection=chroma_collection)

pipeline = IngestionPipeline(
    transformations=[
        SentenceSplitter(),
        OllamaEmbedding(model_name="nomic-embed-text"),
    ],
    vector_store=vector_store,
)

# Run the pipeline again to store the documents. It will use the cache.
print("Storing nodes in ChromaDB...")
await pipeline.arun(documents=documents[:100])
print("Nodes stored successfully.")

Setting up ChromaDB vector store...
Storing nodes in ChromaDB...
Nodes stored successfully.


In [5]:
# Create a VectorStoreIndex from the existing vector store
from llama_index.core import VectorStoreIndex

index = VectorStoreIndex.from_vector_store(
    vector_store=vector_store,
    embed_model=OllamaEmbedding(model_name="nomic-embed-text")
)

## Querying the index

In [7]:
from llama_index.llms.ollama import Ollama
import nest_asyncio

nest_asyncio.apply()  # This is needed to run the query engine in some environments

# Use the local Llama 3 model. Set a longer timeout for potentially slow local generation.
llm = Ollama(model="llama3:8b", request_timeout=120.0)

query_engine = index.as_query_engine(
    llm=llm,
    response_mode="tree_summarize",
)

print("\nQuerying the index with the local Ollama LLM...")
response = query_engine.query(
    "Respond using a persona that describes author and travel experiences?"
)
print("Response:")
print(response)


Querying the index with the local Ollama LLM...
Response:
I'd be delighted to share my adventures with you! As an author who writes biographical stories on influential historical figures, I've had the privilege of traveling to various parts of the world, exploring the places where these pioneers once lived and worked. From the sun-kissed hills of Tuscany to the bustling streets of New York City, each destination has its own unique charm and story to tell. When I'm not scribbling away in my notebook or conducting interviews with historians and experts, you can find me on a plane, bound for the next exciting chapter in my narrative journey!


## Evaluation and observability

In [8]:
from llama_index.core.evaluation import FaithfulnessEvaluator

# query index
print("\nEvaluating response faithfulness with the local Ollama LLM...")
evaluator = FaithfulnessEvaluator(llm=llm)
eval_result = evaluator.evaluate_response(response=response)
print(f"Faithfulness evaluation passed: {eval_result.passing}")


Evaluating response faithfulness with the local Ollama LLM...
Faithfulness evaluation passed: True
