#### How to run this notebook
For simplicity, just run all cells
- open browser and go to: localhost:7863
- since the actual KG data is large, i cant upload to the github, i have created smaller kg_test with only 50 nodes.[first 2 chapters from the norwegian index.]

In [None]:
from src.model import get_llamaindex_model, get_llamaindex_model_mini, get_huggingface_embedding_model
from llama_index.core import Settings
from src import get_azure_openai_model, get_azure_openai_chat_model, get_azure_openai_mini_model
from src.parser import markdownParser
from src import build_rag_workflow

llm = get_llamaindex_model_mini()

llm2 = get_llamaindex_model()

embed_model = get_huggingface_embedding_model()
Settings.embed_model = embed_model
Settings.llm = llm

model = get_azure_openai_chat_model()


nodes = markdownParser(input_dir="./kgdata/")
print(f"Processed {len(nodes)} nodes.")

: 

#### Final UI

In [None]:
import gradio as gr
import pprint
from io import StringIO
import sys
import concurrent.futures
import threading
import time

def build_rag_interface(build_rag_workflowv1, model, nodes):
    """
    Creates a Gradio interface for the RAG workflow
    
    Args:
        build_rag_workflowv1: Function that builds the RAG workflow
        model: The language model to use
        nodes: Processed nodes from markdownParser
    """
    
    def run_single_retriever(question, retriever_type, workflow_type, load_persist_path):
        """Run workflow for a single retriever type"""
        try:
            print(f"Starting {retriever_type} retriever...")
            
            # Build the RAG workflow
            app = build_rag_workflowv1()
            
            # Set up inputs
            inputs = {
                "question": question,
                "llm": model,
                "retriever_type": retriever_type,
                "load_persist": load_persist_path,
                "nodes": nodes,
                "workflow_type": workflow_type,
            }
            
            # Run the workflow and capture outputs
            final_value = None
            for output in app.stream(inputs):
                for key, value in output.items():
                    final_value = value
            
            # Format results
            if final_value:
                response = final_value.get('generation', 'No response generated')
                # documents = final_value.get('documents', [])
                
                # # Format documents
                # formatted_docs = []
                # for i, doc in enumerate(documents):
                #     header_path = doc.node.metadata.get('header_path', 'No header path available')
                #     text_content = doc.node.text
                #     source = doc.node.metadata.get('source', 'unknown')
                #     score = getattr(doc, 'score', 'N/A')
                    
#                     formatted_doc = f"""**Document {i+1}** (Score: {score})
# **Header Path:** {header_path}
# **Source:** {source}
# **Content:**
# {text_content}
# {"="*80}
# """
#                     formatted_docs.append(formatted_doc)
                
#                 doc_text = "\n".join(formatted_docs) if formatted_docs else "No documents retrieved."
#                 doc_stats = f"**{retriever_type.replace('_', ' ').title()} - {len(documents)} Documents Retrieved**\n\n"
                
                result = f"**✅ RESPONSE:**\n{response}" #\n\n**📋 DOCUMENTS:**\n{doc_stats}{doc_text}"
                print(f"Completed {retriever_type} retriever")
                return result
            else:
                return f"**❌ ERROR:** No response generated for {retriever_type}"
                
        except Exception as e:
            error_msg = f"**❌ ERROR in {retriever_type}:** {str(e)}"
            print(f"Error in {retriever_type}: {str(e)}")
            return error_msg

    def process_question_streaming(question, workflow_type, load_persist_path):
        """Process question with real-time streaming updates as results become available"""
        
        if not question.strip():
            yield "Please enter a question.", "Please enter a question.", "Please enter a question."
            return
        
        print(f"🚀 Starting concurrent streaming for: {question[:50]}...")
        start_time = time.time()
        
        # Initialize with processing messages  
        results = {
            'vector_store': "🔄 **VECTOR STORE** - Processing... Please wait.",
            'knowledge_graph': "🔄 **KNOWLEDGE GRAPH** - Processing... Please wait.", 
            'hybrid': "🔄 **HYBRID** - Processing... Please wait."
        }
        
        # Yield initial state immediately
        yield results['vector_store'], results['knowledge_graph'], results['hybrid']
        
        # Create thread-safe variables
        results_lock = threading.Lock()
        completion_times = {}
        completed_count = 0
        
        def process_single_result(retriever_type, question, workflow_type, load_persist_path):
            """Process a single retriever and return the result"""
            thread_start = time.time()
            try:
                result = run_single_retriever(question, retriever_type, workflow_type, load_persist_path)
                exec_time = time.time() - thread_start
                enhanced_result = f"⚡ **Completed in {exec_time:.2f}s** | **{retriever_type.replace('_', ' ').title()}**\n\n{result}"
                return retriever_type, enhanced_result, exec_time, True
            except Exception as e:
                exec_time = time.time() - thread_start
                error_result = f"❌ **FAILED in {exec_time:.2f}s** | **{retriever_type.replace('_', ' ').title()}**\n\n**Error:** {str(e)}"
                return retriever_type, error_result, exec_time, False
        
        # Start all three workflows concurrently
        with concurrent.futures.ThreadPoolExecutor(max_workers=3) as executor:
            # Submit all tasks
            futures = {
                executor.submit(process_single_result, 'vector_store', question, workflow_type, load_persist_path): 'vector_store',
                executor.submit(process_single_result, 'knowledge_graph', question, workflow_type, load_persist_path): 'knowledge_graph',
                executor.submit(process_single_result, 'hybrid', question, workflow_type, load_persist_path): 'hybrid'
            }
            
            # Process results as they complete and yield immediately
            for future in concurrent.futures.as_completed(futures):
                try:
                    retriever_type, result, exec_time, success = future.result()
                    
                    with results_lock:
                        completed_count += 1
                        results[retriever_type] = result
                        if success:
                            completion_times[retriever_type] = exec_time
                        
                        print(f"✅ {retriever_type} completed in {exec_time:.2f}s ({completed_count}/3)")
                    
                    # Yield updated results immediately - this is the key!
                    yield results['vector_store'], results['knowledge_graph'], results['hybrid']
                    
                except Exception as e:
                    print(f"❌ Future execution error: {str(e)}")
                    # Still yield the current state even if there's an error
                    yield results['vector_store'], results['knowledge_graph'], results['hybrid']
        
        # # Final processing - add trophy to fastest result
        # if completion_times:
        #     fastest_time = min(completion_times.values())
        #     fastest_retriever = min(completion_times.items(), key=lambda x: x[1])[0]
            
        #     with results_lock:
        #         if fastest_retriever in results:
        #             results[fastest_retriever] = f"🏆 **FASTEST** | {results[fastest_retriever]}"
        
        # total_time = time.time() - start_time
        # print(f"🎉 All retrievers completed! Total time: {total_time:.2f}s")
        
        # Final yield with trophy added
        yield results['vector_store'], results['knowledge_graph'], results['hybrid']
    
    # Create Gradio interface
    with gr.Blocks(title="RAG Workflow Interface", theme=gr.themes.Soft()) as interface:
        
        gr.Markdown("# Medical emergency RAG Workflow Interface")
        gr.Markdown("This is a Retrieval-Augmented Generation (RAG) interface for medical emergencies. This interface allows you to ask questions related to medical emergencies and get responses using a combination of retrieval and generation techniques.")
        gr.Markdown("**⚡ Concurrent Processing:** When you click 'Process Question', the system will automatically run all three retriever types (Vector Store, Knowledge Graph, and Hybrid) **in parallel** for faster results.")
        
        with gr.Row():
            with gr.Column(scale=2):
                # Input section
                question_input = gr.Textbox(
                    label="Question",
                    placeholder="Enter your question here...",
                    lines=3,
                    value="how to revive a person who is unconscious"
                )
                
                with gr.Row():
                    workflow_type = gr.Dropdown(
                        choices=["fast", "deep"],
                        value="fast",
                        label="Workflow Type"
                    )
                
                load_persist_path = gr.Textbox(
                    label="Knowledge Store Path",
                    value="./kgstore_test_50",
                    placeholder="Path to knowledge store"
                )
                
                submit_btn = gr.Button("Process Question", variant="primary")
                clear_btn = gr.Button("Clear", variant="secondary")
            
            with gr.Column(scale=3):
                # Output section
                gr.Markdown("### Results")
                
                with gr.Tabs():
                    with gr.TabItem("Vector Store"):
                        vector_output = gr.Markdown(
                            label="Vector Store Results",
                            # lines=20,
                            min_height=200,
                            max_height=500,
                            show_copy_button=True
                        )
                    
                    with gr.TabItem("Knowledge Graph"):
                        kg_output = gr.Markdown(
                            label="Knowledge Graph Results",
                            # lines=20,
                            min_height=200,
                            max_height=500,
                            show_copy_button=True
                        )
                    
                    with gr.TabItem("Hybrid"):
                        hybrid_output = gr.Markdown(
                            label="Hybrid Results",
                            # lines=20,
                            min_height=200,
                            max_height=500,
                            show_copy_button=True
                        )
        
        # Event handlers
        submit_btn.click(
            fn=process_question_streaming,
            inputs=[question_input, workflow_type, load_persist_path],
            outputs=[vector_output, kg_output, hybrid_output]
        )
        
        clear_btn.click(
            fn=lambda: ("", "", "", "", "fast", "./kgstore"),
            outputs=[question_input, vector_output, kg_output, hybrid_output, workflow_type, load_persist_path]
        )
        
        # Example questions
        gr.Markdown("### Example Questions")
        example_questions = [
            "how to revive a person who is unconscious",
            "what are the steps for CPR?",
            "how to treat a burn injury?",
            "what to do in case of choking?",
            "how to help a person injured in motorbike accident?",
            "what are the symptoms of a heart attack?",
        ]
        
        for i, example in enumerate(example_questions):
            gr.Button(example, size="sm").click(
                fn=lambda x=example: x,
                outputs=question_input
            )
    
    return interface

# Usage example (you would call this in your main script):

# Assuming you have your components ready:
# - build_rag_workflowv1: your workflow builder function
# - model: your language model
# - nodes: your processed nodes

interface = build_rag_interface(build_rag_workflow, model, nodes)

# Launch the interface
if __name__ == "__main__":
    interface.launch(
        server_name="0.0.0.0",  # Allow external access
        server_port=7863,       # Default Gradio port
        share=False,            # Set to True for public sharing
        debug=True              # Enable debug mode
    )

* Running on local URL:  http://0.0.0.0:7863
* To create a public link, set `share=True` in `launch()`.


🚀 Starting concurrent streaming for: how to help a person injured in motorbike accident...
Starting vector_store retriever...
Starting knowledge_graph retriever...
Starting hybrid retriever...
--- RETRIEVE ---
Using original question: how to help a person injured in motorbike accident?
node 3: ## emergency response
--- RETRIEVE ---
Using original question: how to help a person injured in motorbike accident?
node 3: ## emergency response
🔄 Loading persisted knowledge graph from ./kgstore
--- RETRIEVE ---
Using original question: how to help a person injured in motorbike accident?
node 3: ## emergency response


Task was destroyed but it is pending!
task: <Task pending name='Task-523' coro=<Queue.start_processing() running at c:\Users\newac\OneDrive\Desktop\Master\.venv\Lib\site-packages\gradio\queueing.py:308> wait_for=<Future finished result=None>>
Task was destroyed but it is pending!
task: <Task pending name='Task-514' coro=<_delete_state() running at c:\Users\newac\OneDrive\Desktop\Master\.venv\Lib\site-packages\gradio\route_utils.py:935> wait_for=<Future pending cb=[Task.__wakeup()]>>
Task was destroyed but it is pending!
task: <Task pending name='Task-524' coro=<Queue.start_progress_updates() running at c:\Users\newac\OneDrive\Desktop\Master\.venv\Lib\site-packages\gradio\queueing.py:358> wait_for=<Future pending cb=[Task.__wakeup()]>>


🔄 Loading persisted knowledge graph from ./kgstore
Retrieved 30 documents
--- CHECK DOCUMENT RELEVANCE TO QUESTION ---
--- FAST WORKFLOW ---
--- ASSESS GRADED DOCUMENTS ---
Grade: , Number of documents: 30
--- DECISION: DOCUMENTS ARE RELEVANT, GENERATE ANSWER ---
--- GENERATE ---
✅ Successfully loaded persisted knowledge graph!


  0%|          | 0/2 [00:00<?, ?it/s]

✅ Successfully loaded persisted knowledge graph!


 50%|█████     | 1/2 [00:01<00:01,  1.80s/it]

Generated answer: To help a person injured in a motorbike accident, follow these steps:

1. **Ensure Safety**: Make su...
--- CHECK HALLUCINATIONS ---
--- FAST WORKFLOW ---
--- DECISION: GENERATION IS GROUNDED AND USEFUL ---


100%|██████████| 2/2 [00:02<00:00,  1.44s/it]

Completed vector_store retriever
✅ vector_store completed in 38.35s (1/3)
Vector Store Retrieved 30 documents
Knowledge Graph Retrieved 19 documents
Retrieved 49 documents



100%|██████████| 2/2 [00:04<00:00,  2.32s/it]


--- CHECK DOCUMENT RELEVANCE TO QUESTION ---
--- FAST WORKFLOW ---
Retrieved 19 documents
--- ASSESS GRADED DOCUMENTS ---
Grade: , Number of documents: 49
--- DECISION: DOCUMENTS ARE RELEVANT, GENERATE ANSWER ---
--- CHECK DOCUMENT RELEVANCE TO QUESTION ---
--- FAST WORKFLOW ---
--- ASSESS GRADED DOCUMENTS ---
Grade: , Number of documents: 19
--- DECISION: DOCUMENTS ARE RELEVANT, GENERATE ANSWER ---
--- GENERATE ---
--- GENERATE ---
Generated answer: To help a person injured in a motorbike accident, follow these steps:

1. **Ensure Safety**: Make su...
--- CHECK HALLUCINATIONS ---
--- FAST WORKFLOW ---
--- DECISION: GENERATION IS GROUNDED AND USEFUL ---
Completed hybrid retriever
✅ hybrid completed in 46.22s (2/3)
Generated answer: I don't know....
--- CHECK HALLUCINATIONS ---
--- FAST WORKFLOW ---
--- DECISION: GENERATION IS GROUNDED AND USEFUL ---
Completed knowledge_graph retriever
✅ knowledge_graph completed in 96.20s (3/3)
Keyboard interruption in main thread... closing server.
