In [None]:
import asyncio
import nest_asyncio

nest_asyncio.apply()
import os
import inspect
import logging
from lightrag import LightRAG, QueryParam
from lightrag.llm.ollama import ollama_model_complete, ollama_embed
from lightrag.utils import EmbeddingFunc
from lightrag.kg.shared_storage import initialize_pipeline_status

WORKING_DIR = "./dickens"

logging.basicConfig(format="%(levelname)s:%(message)s", level=logging.INFO)

if not os.path.exists(WORKING_DIR):
    os.mkdir(WORKING_DIR)


async def initialize_rag():
    rag = LightRAG(
        working_dir=WORKING_DIR,
        llm_model_func=ollama_model_complete,
        llm_model_name="gemma2:2b",
        llm_model_max_async=4,
        llm_model_max_token_size=32768,
        llm_model_kwargs={
            "host": "http://localhost:11434",
            "options": {"num_ctx": 32768},
        },
        embedding_func=EmbeddingFunc(
            embedding_dim=768,
            max_token_size=8192,
            func=lambda texts: ollama_embed(
                texts, embed_model="nomic-embed-text", host="http://localhost:11434"
            ),
        ),
    )

    await rag.initialize_storages()
    await initialize_pipeline_status()

    return rag


async def print_stream(stream):
    async for chunk in stream:
        print(chunk, end="", flush=True)


def main():
    # Initialize RAG instance
    rag = asyncio.run(initialize_rag())

    # Insert example text
    with open("./book.txt", "r", encoding="utf-8") as f:
        rag.insert(f.read())

    # Test different query modes
    print("\nNaive Search:")
    print(
        rag.query(
            "What are the top themes in this story?", param=QueryParam(mode="naive")
        )
    )

    print("\nLocal Search:")
    print(
        rag.query(
            "What are the top themes in this story?", param=QueryParam(mode="local")
        )
    )

    print("\nGlobal Search:")
    print(
        rag.query(
            "What are the top themes in this story?", param=QueryParam(mode="global")
        )
    )

    print("\nHybrid Search:")
    print(
        rag.query(
            "What are the top themes in this story?", param=QueryParam(mode="hybrid")
        )
    )

    # stream response
    resp = rag.query(
        "What are the top themes in this story?",
        param=QueryParam(mode="hybrid", stream=True),
    )

    if inspect.isasyncgen(resp):
        asyncio.run(print_stream(resp))
    else:
        print(resp)


if __name__ == "__main__":
    main()

INFO: Process 53267 Shared-Data created for Single Process


Collecting graspologic
  Downloading graspologic-3.4.1-py3-none-any.whl.metadata (5.8 kB)
Collecting POT<0.10,>=0.9 (from graspologic)
  Downloading POT-0.9.5-cp312-cp312-macosx_11_0_arm64.whl.metadata (34 kB)
Collecting anytree<3.0.0,>=2.12.1 (from graspologic)
  Downloading anytree-2.12.1-py3-none-any.whl.metadata (8.1 kB)
Collecting beartype<0.19.0,>=0.18.5 (from graspologic)
  Downloading beartype-0.18.5-py3-none-any.whl.metadata (30 kB)
Collecting graspologic-native<2.0.0,>=1.2.1 (from graspologic)
  Downloading graspologic_native-1.2.3-cp38-abi3-macosx_10_12_x86_64.macosx_11_0_arm64.macosx_10_12_universal2.whl.metadata (2.6 kB)
Collecting hyppo<0.5.0,>=0.4.0 (from graspologic)
  Downloading hyppo-0.4.0-py3-none-any.whl.metadata (1.7 kB)
Collecting scipy==1.12.0 (from graspologic)
  Downloading scipy-1.12.0-cp312-cp312-macosx_12_0_arm64.whl.metadata (217 kB)
Collecting seaborn<0.14.0,>=0.13.2 (from graspologic)
  Downloading seaborn-0.13.2-py3-none-any.whl.metadata (5.4 kB)
Collec