In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
from charles_dicken_qa_chatbot.workflow import RAGFlow
from charles_dicken_qa_chatbot.constants import (
    OPIK_BASE_URL,
    OPIK_PROJ_NAME,
    LLM_MODEL,
    COLLECTION_NAME,
    QDRANT_HOST,
    QDRANT_PORT,
    REDIS_HOST,
    REDIS_PORT,
)
from llama_index.core.workflow import Context, JsonSerializer
import pandas as pd

# Initialization

In [3]:
workflow = RAGFlow(
    opik_host=OPIK_BASE_URL,
    opik_project_name=OPIK_PROJ_NAME,
    llm_model_name=LLM_MODEL,
    collection_name=COLLECTION_NAME,
    qdrant_host=QDRANT_HOST,
    qdrant_port=QDRANT_PORT,
    redis_host=REDIS_HOST,
    redis_port=REDIS_PORT,
    timeout=180,
)
ctx = Context(workflow)

OPIK: Configuration saved to file: /Users/hmnguyen1067/.opik.config
OPIK: Configuration completed successfully. Traces will be logged to 'Default Project' by default. To change the destination project, see: https://www.comet.com/docs/opik/tracing/log_traces#configuring-the-project-name


### Ingestion

In [4]:
# Load documents from source path
source_path = "../data/test.csv"
nodes = await workflow.run(source_path=source_path, ctx=ctx)

OPIK: Started logging traces to the "charles-dicken-qa-2025-09-30" project at http://localhost:5173/api/v1/session/redirect/projects/?trace_id=01999b00-d5c4-7212-b61b-5a03459eac66&path=aHR0cDovL2xvY2FsaG9zdDo1MTczL2FwaS8=.


Number of documents extracted: 2
Number of chunks is: 125


### Retriever evaluation

In [5]:
await workflow.run(
    qa_nodes=nodes,
    similarity_top_k=3,
    num_questions_per_chunk=1,
    sample_percentage=0.05,
    ctx=ctx,
)
retriever_results_table = await ctx.store.get("retriever_results_table")
pd.read_json(retriever_results_table)

  0%|          | 0/6 [00:00<?, ?it/s]OPIK: Started logging traces to the "charles-dicken-qa-2025-09-30" project at http://localhost:5173/api/v1/session/redirect/projects/?trace_id=01999b01-7d6e-7467-b468-06d9abd8a55f&path=aHR0cDovL2xvY2FsaG9zdDo1MTczL2FwaS8=.
100%|██████████| 6/6 [00:56<00:00,  9.44s/it]
  pd.read_json(retriever_results_table)


Unnamed: 0,retrievers,hit_rate,mrr,precision,recall,ap,ndcg
0,Embedding Retriever,0.833333,0.75,0.277778,0.833333,0.75,0.771822
1,BM25 Retriever,0.666667,0.666667,0.222222,0.666667,0.666667,0.666667
2,Embedding + BM25 Retriever + Reranker,0.833333,0.833333,0.388889,0.833333,0.833333,0.833333


In [6]:
response_eval_result = await workflow.run(
    opik_nodes=nodes,
    num_questions_per_chunk=1,
    sample_percentage=0.05,
    opik=True,
    ctx=ctx,
)

Evaluation:   0%|          | 0/5 [00:00<?, ?it/s]

hallucination_metric: ScoreStatistics(mean=0.16999999999999998, max=0.6, min=0.0, values=[0.6, 0.0, 0.25, 0.0, 0.0], std=0.2636285265292813)
UsefulnessMetric: ScoreStatistics(mean=0.768, max=0.9, min=0.6, values=[0.68, 0.6, 0.8, 0.86, 0.9], std=0.12537942414925984)
answer_relevance_metric: ScoreStatistics(mean=0.9219999999999999, max=0.95, min=0.85, values=[0.85, 0.95, 0.94, 0.92, 0.95], std=0.042071367935925245)
context_precision_metric: ScoreStatistics(mean=0.744, max=1.0, min=0.0, values=[0.0, 0.92, 0.8, 1.0, 1.0], std=0.42388677733564656)
context_recall_metric: ScoreStatistics(mean=0.792, max=1.0, min=0.2, values=[0.2, 0.93, 0.85, 0.98, 1.0], std=0.3359613072959444)


In [7]:
response = await workflow.run(
    query="What is 'A Christmas Carol' novel's main theme?", ctx=ctx
)

In [8]:
response.response

'The central theme is the transformation and redemption of Ebenezer Scrooge, from selfishness to generosity, often seen as a call to charitable care for the poor.'

### Save states

In [9]:
import redis
import json

In [10]:
redis_client = redis.Redis(
    host=REDIS_HOST, port=REDIS_PORT, db=0, decode_responses=True
)

In [11]:
ctx_dict = ctx.to_dict(serializer=JsonSerializer())

In [12]:
redis_client.set("ctx", json.dumps(ctx_dict))

True

# Load from settings

In [13]:
loaded_ctx_dict = json.loads(redis_client.get("ctx"))

In [14]:
aworkflow = RAGFlow(
    opik_host=OPIK_BASE_URL,
    opik_project_name=OPIK_PROJ_NAME,
    llm_model_name=LLM_MODEL,
    collection_name=COLLECTION_NAME,
    qdrant_host=QDRANT_HOST,
    qdrant_port=QDRANT_PORT,
    redis_host=REDIS_HOST,
    redis_port=REDIS_PORT,
    timeout=180,
)

restored_ctx = Context.from_dict(
    aworkflow, loaded_ctx_dict, serializer=JsonSerializer()
)

OPIK: Existing Opik clients will not use updated values for "url", "api_key", "workspace".
OPIK: Configuration saved to file: /Users/hmnguyen1067/.opik.config
OPIK: Configuration completed successfully. Traces will be logged to 'Default Project' by default. To change the destination project, see: https://www.comet.com/docs/opik/tracing/log_traces#configuring-the-project-name


In [15]:
await aworkflow.run(initialize_ctx=True, ctx=restored_ctx)

In [16]:
query_str = "What is 'A Christmas Carol' novel's main theme?"
response = await aworkflow.run(query=query_str, ctx=restored_ctx)

OPIK: No trace data found in context for event start. This is likely due to the fact that the trace is not started properly. The parent_id: 999d49f0-0ccb-4c02-99eb-06416f0807c3, event_type: CBEventType.CHUNKING, event_id: c7525cef-f54c-4468-8080-f0cfcf2fed2c.
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
OPIK: No trace data found in context for event start. This is likely due to the fact that the trace is not started properly. The parent_id: 999d49f0-0ccb-4c02-99eb-06416f0807c3, event_type: CBEventType.CHUNKING, event_id: e487d872-9b4f-4379-afcd-ccbf264d63ca.


In [17]:
response

Response(response='The central theme is Scrooge’s transformation and redemption—from selfishness to compassion—and the accompanying call for charity toward the poor.', source_nodes=[NodeWithScore(node=TextNode(id_='de4fdb7e-c2a1-40e5-9dc1-ef23a0900c1b', embedding=None, metadata={'title': 'A Christmas Carol', 'gutenberg_id': 46, 'source': 'wikipedia', 'excerpt_keywords': 'Keywords: Scrooge, transformation, redemption, poverty, Want, Ignorance, TinyTim, ChristmasCarol, allegory, charity'}, excluded_embed_metadata_keys=[], excluded_llm_metadata_keys=[], relationships={<NodeRelationship.SOURCE: '1'>: RelatedNodeInfo(node_id='d20b7466-bde9-4537-9f75-4fe8076ae449', node_type='4', metadata={'title': 'A Christmas Carol', 'gutenberg_id': 46, 'source': 'wikipedia'}, hash='51e7df426437562abc8015ebfcdf0c52d9a01e69f51edc2e4b07373cdd9e7b2e'), <NodeRelationship.PREVIOUS: '2'>: RelatedNodeInfo(node_id='e913ddb5-85e6-4e93-9957-9a90aece7ccc', node_type='1', metadata={'title': 'A Christmas Carol', 'guten

In [18]:
response = await aworkflow.run(
    query="What is the book stored in database?", ctx=restored_ctx
)

OPIK: No trace data found in context for event start. This is likely due to the fact that the trace is not started properly. The parent_id: 3c032355-0f9e-4bae-ba16-378c795b3858, event_type: CBEventType.CHUNKING, event_id: 3c6f6139-88fb-4582-a1fb-765837c92334.
OPIK: No trace data found in context for event start. This is likely due to the fact that the trace is not started properly. The parent_id: 3c032355-0f9e-4bae-ba16-378c795b3858, event_type: CBEventType.CHUNKING, event_id: 8074abe9-1ce6-41be-a2ed-06884725680f.


In [19]:
response.response

'A Christmas Carol.'

In [20]:
await aworkflow.run(query="What is the book stored in database?")

OPIK: No trace data found in context for event start. This is likely due to the fact that the trace is not started properly. The parent_id: 1ea274a7-a8f5-4685-ab22-dc9160c31d0a, event_type: CBEventType.CHUNKING, event_id: 0b809fbf-2f72-4f19-9a9e-20999fc31c3a.
OPIK: No trace data found in context for event start. This is likely due to the fact that the trace is not started properly. The parent_id: 1ea274a7-a8f5-4685-ab22-dc9160c31d0a, event_type: CBEventType.CHUNKING, event_id: 9c09e6c0-4cc8-42b1-8a2b-2568310e1d26.


Response(response='A Christmas Carol.', source_nodes=[NodeWithScore(node=TextNode(id_='67cd4471-6b14-4806-b221-830ee7dc08b0', embedding=None, metadata={'title': 'A Christmas Carol', 'gutenberg_id': 46, 'source': 'book', 'excerpt_keywords': 'Scrooge, Spirit, Ali Baba, Valentine, Orson, Parrot, Damascus, Genii, Christmas, Sultan'}, excluded_embed_metadata_keys=[], excluded_llm_metadata_keys=[], relationships={<NodeRelationship.SOURCE: '1'>: RelatedNodeInfo(node_id='02ee8923-958a-4409-92a3-73b3b725b7a4', node_type='4', metadata={'title': 'A Christmas Carol', 'gutenberg_id': 46, 'source': 'book'}, hash='71c58145a6f84f632ef4e5a660a243315a8603b3a60d0064f3f52293665d02dd'), <NodeRelationship.PREVIOUS: '2'>: RelatedNodeInfo(node_id='b46c6d03-bad2-4c02-964b-54325bc883c7', node_type='1', metadata={'title': 'A Christmas Carol', 'gutenberg_id': 46, 'source': 'book'}, hash='e3183cbb1b0f76920e5c9cafd21cb3116d681184f8f2ef4ac7e776a13c6eb265'), <NodeRelationship.NEXT: '3'>: RelatedNodeInfo(node_id='9a6d

In [21]:
response = await aworkflow.run(
    query="What is 'A Christmas Carol' novel's main theme?", ctx=restored_ctx
)

OPIK: No trace data found in context for event start. This is likely due to the fact that the trace is not started properly. The parent_id: a7ea5fe2-c54e-4f1d-a037-74adc8a44a90, event_type: CBEventType.CHUNKING, event_id: 5c798a0c-a91f-44dd-9345-93dbb2f8c446.
OPIK: No trace data found in context for event start. This is likely due to the fact that the trace is not started properly. The parent_id: a7ea5fe2-c54e-4f1d-a037-74adc8a44a90, event_type: CBEventType.CHUNKING, event_id: 8523c7cf-e9bd-4f45-bf4c-3df73b06d586.


In [22]:
response.response

'The main theme is the transformation and redemption of Ebenezer Scrooge—from selfishness to generosity—emphasizing charity toward the poor and compassion for others, sometimes read as a Christian allegory or a secular humanitarian message.'

In [23]:
response = await aworkflow.run(
    query="What is 'A Christmas Carol' novel's main theme?", ctx=restored_ctx
)

OPIK: No trace data found in context for event start. This is likely due to the fact that the trace is not started properly. The parent_id: c975d6c0-146d-4243-a192-1d0273fbda74, event_type: CBEventType.CHUNKING, event_id: 692a3879-b8f9-465e-95a4-017e165c6333.
OPIK: No trace data found in context for event start. This is likely due to the fact that the trace is not started properly. The parent_id: c975d6c0-146d-4243-a192-1d0273fbda74, event_type: CBEventType.CHUNKING, event_id: 87638864-d93a-45d6-b2c3-4ad297844024.
