In [None]:
import requests
import json

def llamacpp_inference(prompt, n_predict=128, temperature=0.7, top_p=0.95, stop=None, stream=True):
    url = "http://localhost:8088/completion"
    
    payload = {
        "prompt": prompt,
        "n_predict": n_predict,
        "temperature": temperature,
        "top_p": top_p,
        "stop": stop if stop else [],
        "stream": stream
    }
    
    headers = {
        "Content-Type": "application/json"
    }
    
    # Handle streaming response
    def handle_streaming():
        with requests.post(url, headers=headers, json=payload, stream=True) as response:
            response.raise_for_status()
            for line in response.iter_lines(decode_unicode=True):
                if line:  # Filter out keep-alive lines
                    try:
                        # Remove "data: " prefix and parse JSON
                        data = json.loads(line[6:])
                        yield data.get("content", "")  # Yield the "content" field
                    except json.JSONDecodeError:
                        print("Failed to decode JSON:", line)

    # Handle non-streaming response
    def handle_non_streaming():
        response = requests.post(url, headers=headers, json=payload)
        response.raise_for_status()
        result = response.json()
        return result.get('content', '')

    if stream:
        return handle_streaming()
    else:
        return handle_non_streaming()


# Usage example with streaming enabled
print("Streaming response:")
prompt = "Explain the impact of artificial intelligence:"
# for chunk in llamacpp_inference(prompt, n_predict=100, temperature=0.8, stream=True):
#     if chunk:
#         print(chunk, end='')

# Usage example with streaming disabled
print("\n\nNon-streaming response:")
full_response = llamacpp_inference(prompt, n_predict=100, temperature=0.8, stream=False)
print(full_response)


In [None]:
import requests
import json

def llamacpp_inference(prompt, n_predict=128, temperature=0.9, top_p=0.95, stop=None):
    url = "http://localhost:8088/completion"
    
    payload = {
        "prompt": prompt,
        "n_predict": n_predict,
        "temperature": temperature,
        "top_p": top_p,
        "stop": stop if stop else [],
    }
    
    headers = {
        "Content-Type": "application/json"
    }
    
    try:
        response = requests.post(url, headers=headers, json=payload)
        response.raise_for_status()
        result = response.json()
        return result['content']
    except requests.exceptions.RequestException as e:
        print(f"An error occurred: {e}")
        return None

# Example usage
prompt = "Building a website can be done in 10 simple steps:"
result = llamacpp_inference(prompt, n_predict=200, temperature=0.8, stop=["\n\n"])

if result:
    print("Generated text:")
    print(result)

In [None]:
import gradio as gr
import requests


def call_chat_api(user_input):
    url = "http://34.64.46.1:8090/chat"
    headers = {
        "accept": "application/json",
        "Content-Type": "application/json"
    }
    data = {
        "user_input": user_input,
        "index_id": "files",
        "llm_text": "local",
        "dense_top_k": 4,
        "stream": True
    }

    try:
        response = requests.post(url, headers=headers, json=data, stream=True)
        if response.status_code == 200:
                return response.iter_content(chunk_size=None, decode_unicode=True)
        else:
            print(f"Error: {response.status_code}")
            return iter([])
    except requests.exceptions.RequestException as e:
        # Handle any request-related errors
        print(f"Request failed: {e}")
        return iter([])    


def chat(chatbot_history):
    print("history: ", chatbot_history)
        
    user_input = chatbot_history[-1][0] # idx-0 --> User input
    streamer = call_chat_api(user_input)
    for chunk in streamer:
        chatbot_history[-1][1] += chunk
        yield chatbot_history
    return chatbot_history


def chatbot_history_collection(input_query, chat_history):
    print(input_query, chat_history)
    if input_query is None or len(input_query) == 0:
        input_query=""

    return "", chat_history + [[input_query, '']]


with gr.Blocks() as demo:
    # Header with a professional title and subtitle
    gr.Markdown(
        """
        <h1 style="text-align: center; color: #3b3b3b;">💬 FastRAG Chatbot</h1>
        <h3 style="text-align: center; color: #666;">Upload any PDF and ask anything</h3>
        """,
        elem_id="header"
    )


    chatbot = gr.Chatbot(elem_id="chatbot-display")
    input_text = gr.Textbox(
                    placeholder="Type your message...",
                    show_label=False,
                    lines=1,
                    elem_id="user-input"
                )
    
    with gr.Row():
        clear_submit_btn = gr.ClearButton(visible=True)
        input_submit_btn= gr.Button("Submit", visible=True)
        stop_btn = gr.Button("Stop", visible=True)

    clear_submit_btn.add(
        components=[chatbot, input_text]
    )
    
    submit_event = input_text.submit(
        fn = chatbot_history_collection,
        inputs=[input_text, chatbot],
        outputs=[input_text, chatbot],
    ).then(
        fn = chat,
        inputs = [chatbot],
        outputs=[chatbot]
    )
    
    click_event = input_submit_btn.click(
        fn = chatbot_history_collection,
        inputs=[input_text, chatbot],
        outputs=[input_text, chatbot],
    ).then(
        fn = chat,
        inputs = [chatbot],
        outputs=[chatbot]
    )
    
    stop_btn.click(fn=None, inputs=None, outputs=None, cancels=[click_event, submit_event])

    

# Add custom CSS styling for a professional look
demo.css = """
#chat-container {
    max-width: 600px;
    margin: 0 auto;
}

#chatbot-display {
    border: 1px solid #dedede;
    border-radius: 8px;
    background-color: #f7f8fa;
    padding: 20px;
    color: #333333;
    font-family: Arial, sans-serif;
}

#user-input {
    border: 1px solid #aaaaaa;
    padding: 10px;
    border-radius: 8px;
    width: 100%;
}

#send-button {
    background-color: #0055a5;
    color: #ffffff;
    border-radius: 8px;
    padding: 10px 20px;
    border: none;
    cursor: pointer;
}

#send-button:hover {
    background-color: #004080;
}
"""

demo.launch()
