# LightRAG application for Q&A on the Long AtR Guide

## Imports

In [None]:
import os
import nest_asyncio
from dotenv import load_dotenv
from IPython.display import display, Markdown, update_display

import pdfplumber
from lightrag import LightRAG, QueryParam
from lightrag.llm.openai import gpt_4o_mini_complete, openai_embed
from lightrag.kg.shared_storage import initialize_pipeline_status
from lightrag.utils import setup_logger, EmbeddingFunc


load_dotenv()
nest_asyncio.apply()

setup_logger("lightrag", level="INFO")

## 1. Load PDF

In [None]:
RAG_DIR = "./rag_data"
pdf_path = "./AtR_guide.pdf"

if not os.path.exists(RAG_DIR):
    os.mkdir(RAG_DIR)

In [None]:
pdf_text = ""

with pdfplumber.open(pdf_path) as pdf:
    for i, page in enumerate(pdf.pages):
        pdf_text += page.extract_text() + "\n"

## 2. LightRAG initialization

### Use of OpenAI

In [None]:
async def initialize_rag():
    rag = LightRAG(
        working_dir=RAG_DIR,
        chunk_token_size=1200,
        chunk_overlap_token_size=200,
        llm_model_func=gpt_4o_mini_complete,
        llm_model_name="gpt-4o-mini",
        llm_model_max_async=4,
        llm_model_max_token_size=32768,
        embedding_func=EmbeddingFunc(
            embedding_dim=3072,
            max_token_size=8192,
            func=lambda texts: openai_embed(texts, model="text-embedding-3-large")),
    )

    await rag.initialize_storages()
    await initialize_pipeline_status()

    return rag

### Use of Groq

In [None]:
from lightrag.llm.openai import openai_complete_if_cache


async def groq_complete(
    prompt, system_prompt=None, history_messages=[], keyword_extraction=False, **kwargs
) -> str:
    return await openai_complete_if_cache(
        "meta-llama/llama-4-maverick-17b-128e-instruct",
        prompt,
        system_prompt=system_prompt,
        history_messages=history_messages,
        api_key=os.getenv("GROQ_API_KEY"),
        base_url="https://api.groq.com/openai/v1",
        **kwargs
    )


async def initialize_rag():  # noqa
    rag = LightRAG(
        working_dir=RAG_DIR,
        chunk_token_size=1200,
        chunk_overlap_token_size=200,
        llm_model_func=groq_complete,
        llm_model_name="meta-llama/llama-4-maverick-17b-128e-instruct",
        llm_model_max_async=4,
        llm_model_max_token_size=32768,
        embedding_func=EmbeddingFunc(
            embedding_dim=3072,
            max_token_size=8192,
            func=lambda texts: openai_embed(texts, model="text-embedding-3-large")),
    )

    await rag.initialize_storages()
    await initialize_pipeline_status()

    return rag

## 3. Index Creation

> **IMPORTANT NOTE:** DON'T run the following cell, unless you want to re-create the index, according to your desired configurations in the `initialize_rag` function. 
> 
> This index creation is the "heart" of LightRAG's functionality, since it is responsible for creating the vector and graph databases. In particular, after its execution, a bunch of json files are created in the directory defined by the `RAG_DIR` variable. D 
>
> epending on the chosen models and/or providers chosen in step 2, the time of execution will vary. In particular, running this cell:
>
> 1. is (generally) time-consuming, since the provided pdf document is quite large.
>
> 2. can be costly depending on the chosen models.

In [None]:
async def create_index():
    """
    This function does all the hard-lift work.
    It creates the index and returns the RAG instance.
    """
    rag = None
    try:
        rag = await initialize_rag()
        await rag.ainsert(pdf_text)
        return rag

    except Exception as e:
        print(f"An error occurred: {e}")
    finally:
        if rag:
            await rag.finalize_storages()
    return rag


rag_instance = await create_index()

## Inference

During inference, the most important parameters to configure can be divided into 2 categories:

1. The ones that are related to the retrieval of the most relevant data. These parameters are crucial and the most important among them are:

   * ***MODE*** (defaults to `hybrid`), which specifies how the most relevant data are retrieved.

   * ***TOP_K*** (defaults to `60`), which specifies the number of top items to retrieve. 

2. The ones that are related to the processing of the retrieved data and the generation of the final response. Here the most important parameters are:
are the following:

   * ***USER_PROMPT*** (defaults to `None`), which specifies the prompt to be used during inference.

   * ***MODEL_FUNC_OVERRIDE*** (defaults to the model used during index creation), which specifies the model to be used during inference. Therefore, it's an optional override for the LLM model function to use for this specific query.

   * *[OPTIONAL]* For adapting the final response's layout, just configure the ***response_type*** parameter according to the docs of the `QueryParam` class.

In [None]:
## Uncomment the following lines to write the response to a markdown file

# QA_DIR = "./QAs"
# filename = os.path.join(QA_DIR, "sample_QAs.md")

# if not os.path.exists(QA_DIR):
#     os.mkdir(QA_DIR)


# def append_qa_to_markdown(question: str, answer: str, filename: str) -> None:
#     """Appends a question and the corrsponding answer to a markdown file."""
#     global QA_COUNTER

#     with open(filename, "a", encoding="utf-8") as f:
#         f.write(
#             f"# Question {QA_COUNTER}:\n## {question}\n\n # *Answer:*\n{answer}\n\n"
#         )

#     QA_COUNTER += 1

In [None]:
MODEL_FUNC_OVERRIDE = None

USER_PROMPT = """You are a deeply realized individual, meaning you have been through all 7 stages, \
including the corresponding experiences and realizations described in the Awakening to Reality \
(AtR) guide, a book you wrote aimed for helping individuals discover their true selves. Having a \
background in Buddhism, you have a deep understanding of the relevant old texts of those spiritual \
tranditions as well. You are going to be asked questions from people who are interested in your \
proposed path to awakening, and who are either new or more experienced in your practices or \
further down the path as far as the depth and clarity of their realization is concerned. In your \
responses, please follow the instructions belows:
    1. Answer only based on the information in the Awakening to Reality (AtR) guide.
    2. Try your best to use the words and their associated meanings, as they are used in the guide.
    3. Do not create any new information or fantasize about anything that is not in the guide.
    4. If the question is not related to the Awakening to Reality (AtR) guide, do not answer, \
    explaining that you are only interested in questions relevant to the Awakening to Reality \
    (AtR) guide.
"""


async def display_lightrag_response(
    question, user_prompt, mode_val, top_k_val, model_func_to_use, current_rag_instance
):
    """Displays the response from the LighTRAG instance in a streaming manner in a Markdown cell."""
    try:
        stream_iter = current_rag_instance.query(
            question,
            param=QueryParam(
                mode=mode_val,
                top_k=top_k_val,
                model_func=model_func_to_use,
                stream=True,
                user_prompt=user_prompt,
                response_type="Bullet Points",
            )
        )

        display_id = "lightrag_stream_output"
        full_response = ""

        # Initial display (blank)
        display(Markdown(""), display_id=display_id)

        # Append tokens/chunks as they come in
        async for chunk in stream_iter:
            full_response += chunk
            update_display(Markdown(full_response), display_id=display_id)

        return full_response

    except Exception as e:
        display(Markdown(f"**Error during streaming:** `{type(e).__name__}: {e}`"))

In [None]:
# Run this cell to clear the cache (if needed)
await rag_instance.aclear_cache()

In [None]:
MODE = 'hybrid'
TOP_K = 60


QUESTION = ...  # your question here

response = await display_lightrag_response(
    QUESTION, USER_PROMPT, MODE, TOP_K, MODEL_FUNC_OVERRIDE, rag_instance
)


## Uncomment the following lines to write the response to a markdown file

# QA_COUNTER = 1  # counts the number of QA pairs written to markdown
# append_qa_to_markdown(QUESTION, response, filename)