<a href="https://colab.research.google.com/github/itsmerajesh4990/AIpracticeandtraining/blob/main/your_firstRAG_enec.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import openai
from google.colab import userdata

# Retrieve the OpenAI API key from Google Colab secrets
openai.api_key = userdata.get('openai')


In [None]:
pip install llama-index



In [None]:
from llama_index.core import VectorStoreIndex, SimpleDirectoryReader
documents = SimpleDirectoryReader("data").load_data()
index = VectorStoreIndex.from_documents(documents)
query_engine = index.as_query_engine()
response = query_engine.query("what is this document about")
print(response)

The document provides a personal narrative of the author's experiences transitioning from academia to art school, specifically focusing on his time at the Rhode Island School of Design (RISD) and the Accademia di Belle Arti in Florence. It also touches on his struggles and observations during his academic pursuits and early career in painting.


In [None]:
import time
from llama_index.core import VectorStoreIndex, SimpleDirectoryReader

# Start timer
start_time = time.time()

# Load and index documents
documents = SimpleDirectoryReader("data").load_data()
index = VectorStoreIndex.from_documents(documents=documents)

# Query the index
query_engine = index.as_query_engine()
response = query_engine.query("who is mentioned about in document")
print(response)

# End timer and print duration
end_time = time.time()
print(f"\nExecution Time: {end_time - start_time:.2f} seconds")


Paul Graham, Maria Daniels, Jessica Livingston, Robert, Trevor, Julian

Execution Time: 21.22 seconds


In [None]:
import os
from llama_index.core.retrievers import VectorIndexRetriever
from llama_index.core.query_engine import RetrieverQueryEngine

from llama_index.core import (
    VectorStoreIndex,
    SimpleDirectoryReader,
    StorageContext,
    load_index_from_storage,
)

# check if storage already exists
PERSIST_DIR = "./storage"
if not os.path.exists(PERSIST_DIR):
    # load the documents and create the index
    documents = SimpleDirectoryReader("data").load_data()
    index = VectorStoreIndex.from_documents(documents)
    # store it for later
    index.storage_context.persist(persist_dir=PERSIST_DIR)
else:
    # load the existing index
    storage_context = StorageContext.from_defaults(persist_dir=PERSIST_DIR)
    index = load_index_from_storage(storage_context)

# Either way we can now query the index
query_engine = index.as_query_engine()

retriever = VectorIndexRetriever(index=index, similarity_top_k=2)

query_engine = RetrieverQueryEngine(retriever=retriever)

response = query_engine.query("Who all mentioned in the doc?")
print(response)


Paul Graham, Maria Daniels, Jessica Livingston, Robert Morris, Trevor Blackwell, Kevin Hale, Sam Altman.


In [None]:
import os
import time
from llama_index.core.retrievers import VectorIndexRetriever
from llama_index.core.query_engine import RetrieverQueryEngine

from llama_index.core import (
    VectorStoreIndex,
    SimpleDirectoryReader,
    StorageContext,
    load_index_from_storage,
)

# Start timer for index setup
start_time = time.time()

# check if storage already exists
PERSIST_DIR = "./storage"
if not os.path.exists(PERSIST_DIR):
    # load the documents and create the index
    documents = SimpleDirectoryReader("data").load_data()
    index = VectorStoreIndex.from_documents(documents)
    # store it for later
    index.storage_context.persist(persist_dir=PERSIST_DIR)
else:
    # load the existing index
    storage_context = StorageContext.from_defaults(persist_dir=PERSIST_DIR)
    index = load_index_from_storage(storage_context)

setup_duration = time.time() - start_time
print(f"Index setup time: {setup_duration:.2f} seconds")

# Start timer for query
query_start_time = time.time()

# Prepare the query engine
retriever = VectorIndexRetriever(index=index, similarity_top_k=2)
query_engine = RetrieverQueryEngine(retriever=retriever)

# Execute query
response = query_engine.query("Who all mentioned in the doc?")
print(response)

query_duration = time.time() - query_start_time
print(f"Query time: {query_duration:.2f} seconds")


Index setup time: 0.25 seconds
Paul Graham, Maria Daniels, Jessica Livingston, Robert Morris, Trevor, Kevin Hale, Sam Altman.
Query time: 1.55 seconds


In [None]:
pip install gradio



In [None]:
import gradio as gr
from llama_index.core import VectorStoreIndex, SimpleDirectoryReader
import os

# Load documents and build the index
documents = SimpleDirectoryReader("data").load_data()
index = VectorStoreIndex.from_documents(documents=documents)
query_engine = index.as_query_engine()

# Query function
def query_document(query):
    response = query_engine.query(query)
    return str(response)

# Define app UI using Blocks
def launch_interface():
    with gr.Blocks(title="RAG Application Using LlamaIndex") as demo:
        with gr.Row():
            with gr.Column(scale=1, min_width=150):
                if os.path.exists("data/dds_logo.jpg"):
                    gr.Image(
                        value="data/dds_logo.jpg",
                        show_label=False,
                        container=False,
                        scale=0.5
                    )
            with gr.Column(scale=4):
                gr.Markdown(
                    """
                    <div style='text-align: center;'>
                        <h2>RAG Application with LlamaIndex</h2>
                        <p>Ask questions based on your uploaded documents using Retrieval-Augmented Generation (RAG).</p>
                    </div>
                    """,
                    elem_id="header"
                )

        with gr.Row():
            with gr.Column():
                query_input = gr.Textbox(
                    lines=2,
                    placeholder="Type your question here...",
                    label="Your Query"
                )
                submit_btn = gr.Button("Submit")

            with gr.Column():
                response_output = gr.Textbox(
                    lines=10,
                    label="Response"
                )

        submit_btn.click(query_document, inputs=query_input, outputs=response_output)

    demo.launch()

# Launch the app
if __name__ == "__main__":
    launch_interface()


It looks like you are running Gradio on a hosted Jupyter notebook, which requires `share=True`. Automatically setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://987ec42202a62530f6.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


In [None]:
import gradio as gr
from llama_index.core import VectorStoreIndex, SimpleDirectoryReader
import os

# Load documents and build the index
documents = SimpleDirectoryReader("data").load_data()
index = VectorStoreIndex.from_documents(documents=documents)
query_engine = index.as_query_engine()

# Query function
def query_document(query):
    response = query_engine.query(query)
    return str(response)

# Define app UI using Blocks
def launch_interface():
    with gr.Blocks(title="RAG Application Using LlamaIndex") as demo:
        with gr.Row():
            with gr.Column(scale=1, min_width=200):
                if os.path.exists("data/logo.png"):
                    gr.Image("data/logo.png", label="Logo", show_label=False)
            with gr.Column(scale=4):
                gr.Markdown(
                    """
                    ## ðŸ“„ RAG Application with LlamaIndex
                    Ask questions based on your uploaded documents using Retrieval-Augmented Generation (RAG).
                    """)

        with gr.Row():
            with gr.Column():
                query_input = gr.Textbox(
                    lines=2,
                    placeholder="Type your question here...",
                    label="Your Query"
                )
                submit_btn = gr.Button("Submit")

            with gr.Column():
                response_output = gr.Textbox(
                    lines=10,
                    label="Response"
                )

        submit_btn.click(query_document, inputs=query_input, outputs=response_output)

    demo.launch()

# Launch the app
if __name__ == "__main__":
    launch_interface()


It looks like you are running Gradio on a hosted a Jupyter notebook. For the Gradio app to work, sharing must be enabled. Automatically setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://b56ff19ba7465355d3.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


In [None]:
import gradio as gr
from llama_index.core import VectorStoreIndex, SimpleDirectoryReader
import os

# Load documents and build the index
documents = SimpleDirectoryReader("data").load_data()
index = VectorStoreIndex.from_documents(documents)
query_engine = index.as_query_engine()

# Define recommended questions
recommended_questions = [
    "What does the Travel Infinity insurance cover for international travel?",
    "What is considered an emergency in-patient treatment under this policy?",
    "How are maternity expenses handled in the Travel Infinity plan?",
    "What exclusions are listed in the Travel Infinity insurance policy?",
    "What is the claim process for lost checked-in baggage?"
]

# Query function
def query_document(query):
    response = query_engine.query(query)
    return str(response)

# Interface with professional layout
def launch_interface():
    with gr.Blocks(title="RAG Application Using LlamaIndex") as demo:
        # Header Row: Logo + Recommended Questions
        with gr.Row():
            with gr.Column(scale=1):
                if os.path.exists("data/logo.png"):
                    gr.Image(value="data/logo.png", show_label=False, container=False, scale=0.5)
            with gr.Column(scale=4):
                gr.Markdown(
                    """
                    <div style='text-align: center;'>
                        <h2>RAG Application with LlamaIndex</h2>
                        <p>Ask questions based on your uploaded insurance documents.</p>
                        <h4>ðŸ“Œ Recommended Questions:</h4>
                        <ul>
                            <li>What does the Travel Infinity insurance cover for international travel?</li>
                            <li>What is considered an emergency in-patient treatment under this policy?</li>
                            <li>How are maternity expenses handled in the Travel Infinity plan?</li>
                            <li>What exclusions are listed in the Travel Infinity insurance policy?</li>
                        </ul>
                    </div>
                    """,
                    elem_id="header"
                )

        # Interaction Row: Input and Output
        with gr.Row():
            with gr.Column():
                query_input = gr.Textbox(
                    lines=2,
                    placeholder="Type your question here...",
                    label="Your Query"
                )
                submit_btn = gr.Button("Submit")
            with gr.Column():
                response_output = gr.Textbox(lines=10, label="Response")

        submit_btn.click(query_document, inputs=query_input, outputs=response_output)

    demo.launch()

if __name__ == "__main__":
    launch_interface()




It looks like you are running Gradio on a hosted a Jupyter notebook. For the Gradio app to work, sharing must be enabled. Automatically setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://ebb9aadce4ee989848.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


In [None]:
import gradio as gr
from llama_index.core import VectorStoreIndex, SimpleDirectoryReader
import os

# Load documents and build index
documents = SimpleDirectoryReader("data").load_data()
index = VectorStoreIndex.from_documents(documents=documents)
query_engine = index.as_query_engine()

# Recommended questions based on document content
recommended_questions = [
    "What is covered under Emergency In-patient medical treatment?",
    "What is the meaning of pre-existing disease in the policy?",
    "What are the exclusions for outpatient treatment?",
    "What benefits are included in maternity coverage?",
    "What is the process for trip cancellation claim?",
]

# Query function
def query_document(query):
    response = query_engine.query(query)
    return str(response)

# Dropdown handler
def handle_dropdown_selection(selected_question):
    return query_document(selected_question)

# Gradio interface with two rows
def launch_interface():
    with gr.Blocks(title="RAG Application Using LlamaIndex") as demo:
        with gr.Row():
            with gr.Column(scale=1):
                if os.path.exists("data/logo.png"):
                    gr.Image("data/logo.png", show_label=False, scale=0.5, container=False)
            with gr.Column(scale=4):
                gr.Markdown(
                    """
                    <div style='text-align: center;'>
                        <h2>RAG Application with LlamaIndex</h2>
                        <p>Ask questions about the documents. You can start with a suggested question or enter your own below.</p>
                    </div>
                    """,
                    elem_id="header"
                )

        with gr.Row():
            with gr.Column():
                gr.Markdown("### ðŸ”Ž Recommended Questions")
                question_dropdown = gr.Dropdown(
                    choices=recommended_questions,
                    label="Choose a question",
                    interactive=True
                )
            with gr.Column():
                rag_response = gr.Textbox(label="Response", lines=10)

        with gr.Row():
            with gr.Column():
                user_query = gr.Textbox(label="Or type your own question", placeholder="Enter your query...")
                submit_btn = gr.Button("Submit")
            with gr.Column():
                manual_response = gr.Textbox(label="Response", lines=10)

        # Logic wiring
        question_dropdown.change(handle_dropdown_selection, inputs=question_dropdown, outputs=rag_response)
        submit_btn.click(query_document, inputs=user_query, outputs=manual_response)

    demo.launch()

# Run the app
if __name__ == "__main__":
    launch_interface()




It looks like you are running Gradio on a hosted a Jupyter notebook. For the Gradio app to work, sharing must be enabled. Automatically setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://8c3c7f7d331a1fdb26.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)
