In [1]:
%%capture
!pip install llama-index==0.10.37 llama-index-embeddings-cohere qdrant-client==1.9.1 llama-index-vector-stores-qdrant==0.2.8 llama-index-llms-cohere==0.2.0 llama-index-llms-ollama

In [2]:
import os
import sys
from getpass import getpass
import nest_asyncio

from IPython.display import Markdown, display

from dotenv import load_dotenv

nest_asyncio.apply()

load_dotenv()

sys.path.append('../helpers')

from utils import setup_llm, setup_embed_model, setup_vector_store

In [3]:
CO_API_KEY = os.environ['CO_API_KEY'] or getpass("Enter your Cohere API key: ")

In [4]:
OPENAI_API_KEY = os.environ['OPENAI_API_KEY'] or getpass("Enter your OpenAI API key: ")

In [5]:
QDRANT_URL = os.environ['QDRANT_URL'] or getpass("Enter your Qdrant URL:")

In [6]:
QDRANT_API_KEY = os.environ['QDRANT_API_KEY'] or  getpass("Enter your Qdrant API Key:")

In [7]:
from llama_index.core.settings import Settings
from utils import setup_llm, setup_embed_model, setup_vector_store

COLLECTION_NAME = "words-of-the-senpai-naive"

# Use Ollama instead of Cohere for consistency
setup_llm(
    provider="ollama", 
    model="", 
    api_key=""
    )

setup_embed_model(provider="cohere", api_key=CO_API_KEY)

# Debug: Verify models are set correctly
print(f"LLM type: {type(Settings.llm)}")
print(f"Embedding model type: {type(Settings.embed_model)}")

vector_store = setup_vector_store(QDRANT_URL, QDRANT_API_KEY, COLLECTION_NAME)

LLM type: <class 'llama_index.llms.ollama.base.Ollama'>
Embedding model type: <class 'llama_index.embeddings.cohere.base.CohereEmbedding'>


In [8]:
Settings.llm

Ollama(callback_manager=<llama_index.core.callbacks.base.CallbackManager object at 0x17a1e9310>, system_prompt=None, messages_to_prompt=<function messages_to_prompt at 0x16e8bc2c0>, completion_to_prompt=<function default_completion_to_prompt at 0x175f996c0>, output_parser=None, pydantic_program_mode=<PydanticProgramMode.DEFAULT: 'default'>, query_wrapper_prompt=None, base_url='http://localhost:11434', model='qwen2.5:7b', temperature=None, context_window=8000, request_timeout=120.0, prompt_key='prompt', json_mode=False, additional_kwargs={}, is_function_calling_model=True, keep_alive=None, thinking=None)

In [9]:
Settings.embed_model

CohereEmbedding(model_name='embed-english-v3.0', embed_batch_size=10, callback_manager=<llama_index.core.callbacks.base.CallbackManager object at 0x17a1e9310>, num_workers=None, embeddings_cache=None, api_key='D837umjWnWmrAcfabry3it8bn3ZZs74TOLi73Zqp', base_url=None, truncate='END', input_type=None, embedding_type='float')

# Load Docstore

In [10]:
from utils import get_documents_from_docstore

documents = get_documents_from_docstore("../data/words-of-the-senpais")

In [11]:
documents[0].__dict__

{'id_': '7a6ec0a9-fc20-4eee-8665-1cef10deb231',
 'embedding': None,
 'metadata': {'page_number': 0,
  'file_name': '../data/almanack_of_naval_ravikant.pdf',
  'title': 'The Almanack of Naval Ravikant',
  'author': 'Naval Ravikant'},
 'excluded_embed_metadata_keys': [],
 'excluded_llm_metadata_keys': [],
 'relationships': {},
 'metadata_template': '{key}: {value}',
 'metadata_separator': '\n',
 'text_resource': MediaResource(embeddings=None, data=None, text='UNDERSTAND HOW WEALTH IS CREATED I like to think that if I lost all my money and you dropped me on a random street in any English-speaking country, within five or ten years Id be wealthy again because its just a skillset Ive developed that anyone can develop. Its not really about hard work. You can work in a restaurant eighty hours a week, and youre not going to get rich. Getting rich is about knowing what to do, who to do it with, and when to do it. It is much more about understanding than purely hard work. Yes, hard work matters, 

# Ingest to VectorDatabase

In [13]:
from llama_index.core.constants import DEFAULT_CHUNK_SIZE
from llama_index.core.node_parser.text import SentenceSplitter
from llama_index.core import StorageContext
from utils import ingest

print(f"This is the chunk size: {DEFAULT_CHUNK_SIZE}")

# Fix typo: transforms not tranforms
transforms = [
    SentenceSplitter(chunk_size=DEFAULT_CHUNK_SIZE), 
    Settings.embed_model
    ]

nodes = ingest(
    transformations=transforms,
    documents=documents,
    vector_store=vector_store,
)

print(f"Processed {len(nodes)} nodes")

This is the chunk size: 1024


TooManyRequestsError: headers: {'access-control-expose-headers': 'X-Debug-Trace-ID', 'cache-control': 'no-cache, no-store, no-transform, must-revalidate, private, max-age=0', 'content-encoding': 'gzip', 'content-type': 'application/json', 'expires': 'Thu, 01 Jan 1970 00:00:00 GMT', 'pragma': 'no-cache', 'vary': 'Origin,Accept-Encoding', 'x-accel-expires': '0', 'x-debug-trace-id': '837eec61c0845d9f05b281de52510868', 'x-endpoint-monthly-call-limit': '1000', 'x-trial-endpoint-call-limit': '100', 'x-trial-endpoint-call-remaining': '63', 'date': 'Thu, 06 Nov 2025 13:04:33 GMT', 'x-envoy-upstream-service-time': '11', 'server': 'envoy', 'via': '1.1 google', 'alt-svc': 'h3=":443"; ma=2592000,h3-29=":443"; ma=2592000', 'transfer-encoding': 'chunked'}, status_code: 429, body: {'id': 'a6354110-682f-4f89-ae69-97c34bccb43a', 'message': 'trial token rate limit exceeded, limit is 100000 tokens per minute'}

# Build index over vector database


In [14]:
from utils import create_index, create_query_engine

storage_context = StorageContext.from_defaults(
    vector_store=vector_store
    )

index = create_index(
    from_where="vector_store",
    embed_model=Settings.embed_model, 
    vector_store=vector_store, 
    storage_context=storage_context
    )

query_engine = create_query_engine(
    index=index, 
    mode="query",
    similarity_top_k=3,
    return_sources=True
    )

# Query Workflows (Updated from deprecated QueryPipeline)

In [15]:
from llama_index.core.workflow import Workflow, step, StartEvent, StopEvent
import asyncio

class QueryEvent(StartEvent):
    input: str

class ResponseEvent(StopEvent):
    result: str

class QueryWorkflow(Workflow):
    def __init__(self, query_engine, **kwargs):
        super().__init__(**kwargs)
        self.query_engine = query_engine
    
    @step
    async def run_query(self, ev: QueryEvent) -> ResponseEvent:
        response = self.query_engine.query(ev.input)
        return ResponseEvent(result=response)

# Create workflow instance
workflow = QueryWorkflow(query_engine=query_engine, verbose=True)

  class ResponseEvent(StopEvent):


# Query

In [16]:
async def run_workflow_query(input_text):
    response = await workflow.run(input=input_text)
    return response

response_1 = asyncio.run(run_workflow_query("What mental models, principles or philosophies are most valuable for making important life decisions and navigating an uncertain world?"))

Running step run_query


UnexpectedResponse: Unexpected Response: 404 (Not Found)
Raw response content:
b'{"status":{"error":"Not found: Collection `words-of-the-senpai-naive` doesn\'t exist!"},"time":2.74e-6}'

In [18]:
print(response_1)

NameError: name 'response_1' is not defined

In [None]:
# Access source nodes from the response
if hasattr(response_1, 'source_nodes') and response_1.source_nodes:
    print("Source node metadata:")
    print(response_1.source_nodes[0].__dict__)
else:
    print("No source nodes available")

In [None]:
response_2 = asyncio.run(run_workflow_query("What is the essence of a life well-lived, and what are the key components of a good life that everyone should strive for?"))
print(response_2)

In [None]:
response_3 = asyncio.run(run_workflow_query("What role does luck play in achieving outsized success and wealth creation? Is capitalizing on luck a skill that can be cultivated?"))
print(response_3)

In [None]:
response_4 = asyncio.run(run_workflow_query("What are some commonalities between Naval Ravikant and Nassim Taleb's thinking on luck?"))
print(response_4)