In [2]:
import asyncio
import nest_asyncio
import os
import inspect
import logging
import csv

from lightrag import LightRAG, QueryParam
from lightrag.llm.ollama import ollama_model_complete, ollama_embed
from lightrag.utils import EmbeddingFunc
from lightrag.kg.shared_storage import initialize_pipeline_status

nest_asyncio.apply()

WORKING_DIR = "./dickens"

logging.basicConfig(format="%(levelname)s:%(message)s", level=logging.INFO)

if not os.path.exists(WORKING_DIR):
    os.mkdir(WORKING_DIR)


async def initialize_rag():
    rag = LightRAG(
        working_dir=WORKING_DIR,
        llm_model_func=ollama_model_complete,
        llm_model_name="deepseek-r1:1.5b1a",
        llm_model_max_async=4,
        llm_model_max_token_size=32768,
        llm_model_kwargs={
            "host": "http://localhost:11434",
            "options": {"num_ctx": 32768},
        },
        embedding_func=EmbeddingFunc(
            embedding_dim=768,
            max_token_size=8192,
            func=lambda texts: ollama_embed(
                texts, embed_model="nomic-embed-text", host="http://localhost:11434"
            ),
        ),
    )

    await rag.initialize_storages()
    await initialize_pipeline_status()

    return rag


async def print_stream(stream):
    async for chunk in stream:
        print(chunk, end="", flush=True)


def main():
    # Initialize RAG instance
    rag = asyncio.run(initialize_rag())

    # Read CSV data and build a formatted string
    csv_data = ""
    with open("./42a.csv", "r", encoding="utf-8") as csvfile:
        reader = csv.DictReader(csvfile)
        for row in reader:
            csv_data += f"Topic: {row['Topic']}\n"
            csv_data += f"Key Concepts/Themes: {row['Key Concepts/Themes']}\n\n"

    # Insert the CSV data into the RAG system
    rag.insert(csv_data)

    # Test different query modes with updated questions

    print("\nNaive Search:")
    print(
        rag.query(
            "What are the main topics and their associated themes?",
            param=QueryParam(mode="naive")
        )
    )

    print("\nLocal Search:")
    print(
        rag.query(
            "Which topic emphasizes the discipline of sensory focus and aesthetic enhancement?",
            param=QueryParam(mode="local")
        )
    )

    print("\nGlobal Search:")
    print(
        rag.query(
            "How do the themes reflect ideas of personal growth and moderation across different topics?",
            param=QueryParam(mode="global")
        )
    )

    print("\nHybrid Search:")
    print(
        rag.query(
            "Can you summarize how each topic combines concepts of self-expression, beauty, and resource management?",
            param=QueryParam(mode="hybrid")
        )
    )

    # Stream response for one of the queries
    resp = rag.query(
        "Can you summarize how each topic combines concepts of self-expression, beauty, and resource management?",
        param=QueryParam(mode="hybrid", stream=True),
    )

    if inspect.isasyncgen(resp):
        asyncio.run(print_stream(resp))
    else:
        print(resp)


if __name__ == "__main__":
    main()


INFO: Process 65859 Shared-Data already initialized (multiprocess=False)
INFO:Load (0, 768) data
INFO:Init {'embedding_dim': 768, 'metric': 'cosine', 'storage_file': './dickens/vdb_entities.json'} 0 data
INFO:Load (0, 768) data
INFO:Init {'embedding_dim': 768, 'metric': 'cosine', 'storage_file': './dickens/vdb_relationships.json'} 0 data
INFO:Load (4, 768) data
INFO:Init {'embedding_dim': 768, 'metric': 'cosine', 'storage_file': './dickens/vdb_chunks.json'} 4 data
INFO: Process 65859 storage namespace already initialized: [full_docs]
INFO: Process 65859 storage namespace already initialized: [text_chunks]
INFO: Process 65859 storage namespace already initialized: [llm_response_cache]
INFO: Process 65859 storage namespace already initialized: [doc_status]
INFO: Process 65859 storage namespace already initialized: [full_docs]
INFO: Process 65859 storage namespace already initialized: [text_chunks]
INFO: Process 65859 storage namespace already initialized: [llm_response_cache]
INFO: Proce


Naive Search:
<think>
Okay, I need to figure out how to respond to this query about the document chunks provided. The user has asked for "What are the main topics and their associated themes?" So, they want me to identify the key areas and related concepts from each topic chunk.

First, let me look at each topic and see what they cover. Starting with Topic 22: Everyone has a void—existential suffering, emotional dissatisfaction, universal struggles, empathy, inner fulfillment. The theme here seems to be about feeling a hole or emptiness that's hard to escape but not always worth fighting for.

Moving on to Topic 23: Down with meditation—any form helpful? It mentions almost every meditation form is useful. This ties into self-improvement and the idea that meditating in different ways can help, so I should highlight various forms of meditation.

Topic 24 talks about gotta git up to git down—concentration for awareness, productivity, mental discipline. It's about practical strategies for