In [3]:
from dotenv import load_dotenv
from llama_index.core import SimpleDirectoryReader
from llama_index.core import VectorStoreIndex, StorageContext
from llama_index.vector_stores.milvus import MilvusVectorStore
from llama_index.embeddings.ollama import OllamaEmbedding
from llama_index.core import Settings
from llama_index.llms.google_genai import GoogleGenAI
import os

Settings.embed_model =  OllamaEmbedding(
    model_name="nomic-embed-text:latest",
    base_url="http://localhost:11434",
)

Settings.llm = GoogleGenAI(
    model="gemini-2.5-flash",
    api_key=os.getenv("GEMINI_API_KEY"),  
)

load_dotenv()

True

In [17]:
docs = SimpleDirectoryReader(
    input_files=[
        r"C:\Users\ijona\Desktop\DocumentIngestionPipeline-AllCognix\samples\uber_2021.pdf",
        r"C:\Users\ijona\Desktop\DocumentIngestionPipeline-AllCognix\samples\paul_graham_essay.txt",
        r"C:\Users\ijona\Desktop\DocumentIngestionPipeline-AllCognix\samples\RAFT.pdf"
]
).load_data()

In [18]:
print("Total Documents:",len(docs))

Total Documents: 320


In [32]:
docid = "a50316fc-e832-4a41-856c-4b5612acae4f"
for i in range(len(docs)):
    if docs[i].doc_id == docid:
        resp = docs[i]
        break

In [33]:
resp

Document(id_='a50316fc-e832-4a41-856c-4b5612acae4f', embedding=None, metadata={'page_label': '3', 'file_name': 'uber_2021.pdf', 'file_path': 'C:\\Users\\ijona\\Desktop\\DocumentIngestionPipeline-AllCognix\\samples\\uber_2021.pdf', 'file_type': 'application/pdf', 'file_size': 1880483, 'creation_date': '2026-01-10', 'last_modified_date': '2026-01-10'}, excluded_embed_metadata_keys=['file_name', 'file_type', 'file_size', 'creation_date', 'last_modified_date', 'last_accessed_date'], excluded_llm_metadata_keys=['file_name', 'file_type', 'file_size', 'creation_date', 'last_modified_date', 'last_accessed_date'], relationships={}, metadata_template='{key}: {value}', metadata_separator='\n', text_resource=MediaResource(embeddings=None, data=None, text='UBER TECHNOLOGIES, INC.\nTABLE OF CONTENTSPages Special Note Regarding Forward-Looking Statements2 PART IItem 1.Business 4 Item 1A.Risk Factors 11 Item 1B.Unresolved Staff Comments 46 Item 2.Properties 46 Item 3.Legal Proceedings 46 Item 4.Mine S

In [31]:
print("Document ID:", docs[0].doc_id)

Document ID: 4b353064-5145-4f60-8026-1e79c017d15f


In [20]:
vector_store = MilvusVectorStore(
    uri=os.getenv("MILVUS_ENDPOINT"),
    token=os.getenv("MILVUS_TOKEN"),
    collection_name=os.getenv("MILVUS_COLLECTION"),
    dim=768,
    overwrite=True, # drop the collection if it already exists
    # enable_sparse=True  # enable the default full-text search using BM25
)

In [21]:
storage_context = StorageContext.from_defaults(vector_store=vector_store)
index = VectorStoreIndex.from_documents(
    docs, storage_context=storage_context, show_progress=True
)



Parsing nodes: 100%|██████████| 320/320 [00:01<00:00, 267.97it/s]
2026-01-12 09:46:45,691 - INFO - HTTP Request: POST http://localhost:11434/api/embed "HTTP/1.1 200 OK"
2026-01-12 09:46:46,372 - INFO - HTTP Request: POST http://localhost:11434/api/embed "HTTP/1.1 200 OK"
2026-01-12 09:46:47,140 - INFO - HTTP Request: POST http://localhost:11434/api/embed "HTTP/1.1 200 OK"
2026-01-12 09:46:48,036 - INFO - HTTP Request: POST http://localhost:11434/api/embed "HTTP/1.1 200 OK"
2026-01-12 09:46:48,954 - INFO - HTTP Request: POST http://localhost:11434/api/embed "HTTP/1.1 200 OK"
2026-01-12 09:46:49,994 - INFO - HTTP Request: POST http://localhost:11434/api/embed "HTTP/1.1 200 OK"
2026-01-12 09:46:50,976 - INFO - HTTP Request: POST http://localhost:11434/api/embed "HTTP/1.1 200 OK"
2026-01-12 09:46:52,006 - INFO - HTTP Request: POST http://localhost:11434/api/embed "HTTP/1.1 200 OK"
2026-01-12 09:46:52,950 - INFO - HTTP Request: POST http://localhost:11434/api/embed "HTTP/1.1 200 OK"
2026-01

In [22]:
chat_engine = index.as_chat_engine(
    chat_mode="best"
)

In [23]:
response = await chat_engine.achat(
    "Explain how obsolete customs shaped both venture capital practices and online essay publishing, "
    "and then connect this insight to the naming, branding, and funding model of Y Combinator. "
    "In your answer, cite at least three distinct examples from different numbered sections that "
    "together support the argument."
)
print(response.response)


2026-01-12 09:47:56,664 - INFO - Condensed question: Explain how obsolete customs shaped both venture capital practices and online essay publishing, and then connect this insight to the naming, branding, and funding model of Y Combinator. In your answer, cite at least three distinct examples from different numbered sections that together support the argument.
2026-01-12 09:47:56,825 - INFO - HTTP Request: POST http://localhost:11434/api/embed "HTTP/1.1 200 OK"
2026-01-12 09:47:57,752 - INFO - AFC is enabled with max remote calls: 10.


Obsolete customs, which are practices that persist long after the original constraints that caused them have disappeared, significantly shaped both venture capital practices and online essay publishing. This insight directly influenced the naming, branding, and funding model of Y Combinator.

**Obsolete Customs in Venture Capital and Online Essay Publishing:**

*   **General Principle:** The text highlights a general lesson that "Customs continue to constrain you long after the restrictions that caused them have disappeared" [12]. This means that practices established under old conditions often remain, even when those conditions no longer apply.
*   **Online Essay Publishing:** In the realm of essay writing, customs were initially shaped by the "constraints of the print era" [12]. However, with the advent of the internet, "publishing online means you treat the online version as the (or at least a) primary version," which was a departure from simply putting content online in the 90s [11

In [27]:
from pymilvus import MilvusClient
import os
client = MilvusClient(
    uri=os.getenv("MILVUS_ENDPOINT"),
    token=os.getenv("MILVUS_TOKEN")
)

# Count entities in your collection
res = client.query(
    collection_name=os.getenv("MILVUS_COLLECTION"),
    filter="",
    output_fields=["count(*)"]
)

count = int(res[0]["count(*)"])
print("Total entities:", count)

Total entities: 437
