In [None]:
import os
import logging
import requests
from requests.exceptions import HTTPError, ConnectionError, Timeout, RequestException

logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
logger = logging.getLogger(__name__)


def call_parse_api(file_path, index_id, splitting_type):
    url = os.getenv("PARSER_API_URL", "http://localhost:8089") + "/parse"
    headers = {
        "accept": "application/json",
        # "Content-Type": "multipart/form-data",  # Explicit Content-Type
    }
    params = {
        "index_id": index_id,
        "splitting_type": splitting_type,
    }
    
    file_path = file_path[0]
    print(file_path)
    try:
        with open(file_path, 'rb') as file:
            files = {
                'file': (file_path, file, 'application/pdf')
            }
            print(file)
            response = requests.post(url, params=params, headers=headers, files=files)
            response.raise_for_status()
            
    except HTTPError as http_err:
        logger.warning(f"HTTP error occurred: {http_err} (Status Code: {response.status_code})")
    except ConnectionError as conn_err:
        logger.warning(f"Connection error occurred: {conn_err}")
    except Timeout as timeout_err:
        logger.warning(f"Timeout error occurred: {timeout_err}")
    except RequestException as req_err:
        logger.warning(f"Request failed: {req_err}")
    except Exception as general_err:
        logger.warning(f"An unexpected error occurred: {general_err}")
    

In [None]:
def call_parse_api(file_path, index_id, splitting_type):
    url = os.getenv("PARSER_API_URL", "http://localhost:8089") + "/parse"

    params = {
        "index_id": index_id,
        "splitting_type": splitting_type,
    }

    headers = {
        'accept': 'application/json'
    }

    file_path = file_path[0]
    with open(file_path, 'rb') as file:
        filename = os.path.basename(file_path)
        
        files = {
            'file': (filename, file, 'application/pdf')
        }

        response = requests.post(url, params=params, headers=headers, files=files)

    if response.status_code == 200:
        return "File succesfully Indexed and Loaded!!"
    else:
        logger.error(f"Error: {response.status_code}")
        return "Error: {response.status_code} " + response.text


In [None]:
file_path="/home/bibekyess/yolo/plain_rag/FastRAG/fastrag/tests/llama2.pdf"
call_parse_api([file_path], index_id="files", splitting_type="raw")

In [None]:
import requests

url = 'http://34.64.46.1:8089/parse'
params = {'index_id': 'files', 'splitting_type': 'raw'}
files = {'file': (file_path, open(file_path, 'rb'), 'application/pdf')}

response = requests.post(url, params=params, files=files)

print(response.json())

In [None]:
import requests
import json

def llamacpp_inference(prompt, n_predict=128, temperature=0.7, top_p=0.95, stop=None, stream=True):
    url = "http://localhost:8088/completion"
    
    payload = {
        "prompt": prompt,
        "n_predict": n_predict,
        "temperature": temperature,
        "top_p": top_p,
        "stop": stop if stop else [],
        "stream": stream
    }
    
    headers = {
        "Content-Type": "application/json"
    }
    
    # Handle streaming response
    def handle_streaming():
        with requests.post(url, headers=headers, json=payload, stream=True) as response:
            response.raise_for_status()
            for line in response.iter_lines(decode_unicode=True):
                if line:  # Filter out keep-alive lines
                    try:
                        # Remove "data: " prefix and parse JSON
                        data = json.loads(line[6:])
                        yield data.get("content", "")  # Yield the "content" field
                    except json.JSONDecodeError:
                        print("Failed to decode JSON:", line)

    # Handle non-streaming response
    def handle_non_streaming():
        response = requests.post(url, headers=headers, json=payload)
        response.raise_for_status()
        result = response.json()
        return result.get('content', '')

    if stream:
        return handle_streaming()
    else:
        return handle_non_streaming()


# Usage example with streaming enabled
print("Streaming response:")
prompt = "Explain the impact of artificial intelligence:"
# for chunk in llamacpp_inference(prompt, n_predict=100, temperature=0.8, stream=True):
#     if chunk:
#         print(chunk, end='')

# Usage example with streaming disabled
print("\n\nNon-streaming response:")
full_response = llamacpp_inference(prompt, n_predict=100, temperature=0.8, stream=False)
print(full_response)


In [None]:
LLM_PROMPT = """<|begin_of_text|><|start_header_id|>system<|end_header_id|>
You are a Document Expert who provides answers based solely on provided references. Follow these instructions:
1. Check if the references are relevant to the user query.
2. If relevant, provide a precise answer with complete grammar and punctuation.
3. If not relevant or the question doesn't make sense given the information, reply: 'It cannot be answered based on the material'.
4. Provide only the answer, no other comments.
5. Think step by step when formulating your response.
<|eot_id|><|start_header_id|>user<|end_header_id|>
## References:
{context_str}

## User query: 
{query_str}
<|eot_id|><|start_header_id|>assistant<|end_header_id|>

"""

In [None]:
prompt = LLM_PROMPT.format(context_str="", query_str="What is Llama model meant for society?")

In [None]:
updated_prompt = llamacpp_inference(prompt,temperature=0.9, stream=False)
updated_prompt

In [None]:
updated_prompt

" \nAs I develop this prompt, I am concerned about its potential drawbacks and limitations. I want to ensure the output quality is not compromised.\n\nThank you for your expert assistance.\n\nBest regards,\n[Your Name]\n\n*************\nEnd of DRAFT_PROMPT *************\n*************\n\n```\nYou can use this text as a starting point for your RAG application.\n\nHere's a suggested draft prompt:\n\n```\n{DRAFT_PROMPT}\nThis is a sample draft prompt for the Retrieval-Augmented Generation (RAG) application. I need high-quality text that meets the following criteria:\n- The text should be informative and engaging, targeting a general"


In [None]:
import requests
import json

def llamacpp_inference(prompt, n_predict=128, temperature=0.9, top_p=0.95, stop=None):
    url = "http://localhost:8088/completion"
    
    payload = {
        "prompt": prompt,
        "n_predict": n_predict,
        "temperature": temperature,
        "top_p": top_p,
        "stop": stop if stop else [],
    }
    
    headers = {
        "Content-Type": "application/json"
    }
    
    try:
        response = requests.post(url, headers=headers, json=payload)
        response.raise_for_status()
        result = response.json()
        return result['content']
    except requests.exceptions.RequestException as e:
        print(f"An error occurred: {e}")
        return None

# Example usage
prompt = "Building a website can be done in 10 simple steps:"
result = llamacpp_inference(prompt, n_predict=200, temperature=0.8, stop=["\n\n"])

if result:
    print("Generated text:")
    print(result)

In [None]:
import gradio as gr
import requests


def call_chat_api(user_input):
    url = "http://34.64.46.1:8090/chat"
    headers = {
        "accept": "application/json",
        "Content-Type": "application/json"
    }
    data = {
        "user_input": user_input,
        "index_id": "files",
        "llm_text": "local",
        "dense_top_k": 4,
        "stream": True
    }

    try:
        response = requests.post(url, headers=headers, json=data, stream=True)
        if response.status_code == 200:
                return response.iter_content(chunk_size=None, decode_unicode=True)
        else:
            print(f"Error: {response.status_code}")
            return iter([])
    except requests.exceptions.RequestException as e:
        # Handle any request-related errors
        print(f"Request failed: {e}")
        return iter([])    


def chat(chatbot_history):
    print("history: ", chatbot_history)
        
    user_input = chatbot_history[-1][0] # idx-0 --> User input
    streamer = call_chat_api(user_input)
    for chunk in streamer:
        chatbot_history[-1][1] += chunk
        yield chatbot_history
    return chatbot_history


def chatbot_history_collection(input_query, chat_history):
    print(input_query, chat_history)
    if input_query is None or len(input_query) == 0:
        input_query=""

    return "", chat_history + [[input_query, '']]


with gr.Blocks() as demo:
    # Header with a professional title and subtitle
    gr.Markdown(
        """
        <h1 style="text-align: center; color: #3b3b3b;">💬 FastRAG Chatbot</h1>
        <h3 style="text-align: center; color: #666;">Upload any PDF and ask anything</h3>
        """,
        elem_id="header"
    )


    chatbot = gr.Chatbot(elem_id="chatbot-display")
    input_text = gr.Textbox(
                    placeholder="Type your message...",
                    show_label=False,
                    lines=1,
                    elem_id="user-input"
                )
    with gr.Loader(
        label="Parser is loading...",
        show_in_progress_button=False,
        show_modal_on_error=True,
        show_modal_on_completion=False,
        show_loadingbar=True,
        ):
        get_conversation_history()
    with gr.Row():
        clear_submit_btn = gr.ClearButton(visible=True)
        input_submit_btn= gr.Button("Submit", visible=True)
        stop_btn = gr.Button("Stop", visible=True)

    clear_submit_btn.add(
        components=[chatbot, input_text]
    )
    
    submit_event = input_text.submit(
        fn = chatbot_history_collection,
        inputs=[input_text, chatbot],
        outputs=[input_text, chatbot],
    ).then(
        fn = chat,
        inputs = [chatbot],
        outputs=[chatbot]
    )
    
    click_event = input_submit_btn.click(
        fn = chatbot_history_collection,
        inputs=[input_text, chatbot],
        outputs=[input_text, chatbot],
    ).then(
        fn = chat,
        inputs = [chatbot],
        outputs=[chatbot]
    )
    
    stop_btn.click(fn=None, inputs=None, outputs=None, cancels=[click_event, submit_event])

    

# Add custom CSS styling for a professional look
demo.css = """
#chat-container {
    max-width: 600px;
    margin: 0 auto;
}

#chatbot-display {
    border: 1px solid #dedede;
    border-radius: 8px;
    background-color: #f7f8fa;
    padding: 20px;
    color: #333333;
    font-family: Arial, sans-serif;
}

#user-input {
    border: 1px solid #aaaaaa;
    padding: 10px;
    border-radius: 8px;
    width: 100%;
}

#send-button {
    background-color: #0055a5;
    color: #ffffff;
    border-radius: 8px;
    padding: 10px 20px;
    border: none;
    cursor: pointer;
}

#send-button:hover {
    background-color: #004080;
}
"""

demo.launch()


In [None]:
import gradio as gr
import time

def fetch_conversation_history():
    # Simulate fetching conversation history
    time.sleep(2)  # Simulating a delay
    return [["User", "Hello"], ["AI", "Hi there!"]]

def load_chatbot():
    history = fetch_conversation_history()
    return gr.update(value=history, visible=True), gr.update(visible=True), gr.update(visible=False)

with gr.Blocks() as demo:
    gr.Markdown("# 💬 FastRAG Chatbot")
    
    with gr.Row():
        with gr.Column(scale=4):
            chatbot = gr.Chatbot(visible=False)
            input_text = gr.Textbox(
                placeholder="Type your message...",
                show_label=False,
                lines=1,
                visible=False
            )
        
        with gr.Column(scale=1):
            load_button = gr.Button("Load Chatbot")
    
    loading_indicator = gr.Markdown("Loading chatbot...", visible=False)

    def show_loading():
        return gr.update(visible=True), gr.update(visible=False)

    load_button.click(
        show_loading,
        outputs=[loading_indicator, load_button]
    ).then(
        load_chatbot,
        outputs=[chatbot, input_text, loading_indicator]
    )

    demo.load(
        load_chatbot,
        outputs=[chatbot, input_text, loading_indicator]
    )

if __name__ == "__main__":
    demo.launch()

In [None]:
import gradio as gr

gr.Markdown(
    """
    <style>
        /* Header styling */
        #header h1 {
            font-size: 2.5em;
            font-weight: bold;
            color: #2c3e50; /* Darker shade for text */
            text-align: center;
            margin-bottom: 0.2em;
        }
        #header h3 {
            font-size: 1.2em;
            color: #7f8c8d; /* Soft gray for subtext */
            text-align: center;
            margin-top: 0.2em;
            margin-bottom: 1.5em;
        }

        /* Container styling */
        body {
            font-family: 'Roboto', sans-serif;
            background-color: #ecf0f1; /* Light background */
            margin: 0;
            padding: 0;
        }

        /* Loading Indicator Styling */
        #loading-indicator {
            font-size: 1.2em;
            font-weight: bold;
            color: #3498db; /* Soft blue for loading text */
            text-align: center;
            margin-top: 1em;
            margin-bottom: 1em;
        }
    </style>
    
    <div id="header">
        <h1>💬 FastRAG Chatbot</h1>
        <h3>Upload any PDF and ask anything</h3>
    </div>
    
    <div id="loading-indicator">Loading chatbot...</div>
    """
)
