# Pronova LLM Run Model #
## Use this notebook to do the following ##
- Run the current model on a query
- start a flask api server that accepts a query and will return a response

In [5]:
# Load require libraries
import os
from qdrant_client import QdrantClient
from qdrant_client.http import models
from openai import OpenAI
from dotenv import load_dotenv
from IPython.display import Markdown, display

# Load environment variables from .env file
load_dotenv()



True

### Setup Qdrant connection ###

In [6]:
# Get the Qdrant API key from the environment variable
Qdrant_api_key = os.getenv('Qdrant_API_KEY')
if not Qdrant_api_key:
    raise ValueError("No Qdrant API key found in environment variables")
Qdrant_url = os.getenv('Qdrant_URL')
if not Qdrant_url:
    raise ValueError("No Qdrant URL found in environment variables")


# Initialize Qdrant client
try:
    Qclient = QdrantClient(
        url= Qdrant_url,
        api_key=Qdrant_api_key
    )
    print("Successfully connected to Qdrant")
except Exception as e:
    print(f"Failed to connect to Qdrant: {e}")
    raise

Successfully connected to Qdrant


### Setup OpenAI connection ###

In [7]:
# Get the OpenAI API key from the environment variable
OpenAI_api_key = os.getenv('OPENAI_API_KEY')
if not OpenAI_api_key:
    raise ValueError("No OpenAI API key found in environment variables")

OpenAI.api_key = OpenAI_api_key

### Get an OpenAI embedding from a text segment (Function) ###

In [8]:
# Function to get the embedding of a text
def get_embedding(text):
    client = OpenAI()
    response = client.embeddings.create(
        model="text-embedding-ada-002",
        input=text
    )
    return response.data[0].embedding

### Retrieve similar chunks from query (Function) ###

In [9]:
def retrieve_relevant_chunks(collection_name, query, top_k=10):
    query_embedding = get_embedding(query)
    
    search_result = Qclient.search(
        collection_name=collection_name,
        query_vector=query_embedding,
        limit=top_k
    )

    contexts = [result.payload["text"] for result in search_result]
    files = [result.payload.get("source_file") for result in search_result]
    
    return contexts, files


### Rank response source importance (Function) ###

In [10]:
from collections import Counter

def file_ratios(files):
    total_files = len(files)
    counts = Counter(files)
    return {file: count*100 / total_files for file, count in counts.items()}


# file_ratios(["a", "a", "b", "c"])
# {'a': 0.5, 'b': 0.25, 'c': 0.25}

### Markdown Print Function ###

In [11]:
def print_markdown(md_text):
    display(Markdown(md_text))

### Generate Response from Query (Function) ###

In [12]:
import numpy as np

def generate_response(collection_name, query, all_query, all_context, all_responses):
    print("Generating response for query:", query)
    # generate context for new query
    context, files = retrieve_relevant_chunks(collection_name, query)
    
    # unique_files = np.unique(files)
    #file_rank = file_ratios(files)
    

    system_role = "You are a specialized assistant that only provides advice on dog-related veterinary care. If a user asks about any other animal or topic outside of dog health, politely decline to answer and remind them that you only provide information about dogs. You will always start by asking the user their dog's name, age, and breed if they didn't already provide it."
    # Combine retrieved chunks into a single string
    context_text = "\n".join(context)

    # append query and context to the running lists
    all_query.append(query)
    all_context.append(context_text)

    # create the messages object using all the queries and contexts
    messages = [{"role": "system", "content": system_role}]

    for i in range(len(all_query)):
        messages.append({"role": "system", "content": "Use this context to answer my following question: " + all_context[i]})
        messages.append({"role": "user", "content": all_query[i]})
        if i < len(all_responses):
            messages.append({"role": "system", "content": all_responses[i]})
    
    # print(messages)


    # Generate a response using GPT-4
    client = OpenAI()
    completion = client.chat.completions.create(
        model="gpt-4o-mini-2024-07-18",
        messages=messages
    )
    all_responses.append(completion.choices[0].message.content)
    return all_query, all_context, all_responses

### Playground (use this to test querys in the notebook)

In [None]:
# collection_name = "LLM_V1"
# query = "After we walk, my dog is always itchy"
# response, file_rank = generate_response(collection_name, query)
# print_markdown(response.content)


# print("files used: \n")
# for file in file_rank:
#     print(f"{file}, {file_rank[file]} %")

: 

### Lightweight Flask Server (for Frontend API testing) ###

In [13]:
from flask import Flask, request, jsonify
from flask_cors import CORS

collection_name = "LLM_V1"

app = Flask(__name__)
CORS(app)  # This will enable CORS for all routes

@app.route('/query', methods=['POST'])
def query_llm():
    data = request.json
    new_query = data.get('new_query')
    queries = data.get('queries')
    contexts = data.get('contexts')
    responses = data.get('responses')
    
    # if not new_query or not queries or not contexts or not responses:
    #     return jsonify({'error': 'New query, queries, contexts, and responses must be provided'}), 400

    try:
        updated_queries, updated_contexts, updated_responses = generate_response(collection_name, new_query, queries, contexts, responses)
        return jsonify({
            'queries': updated_queries,
            'contexts': updated_contexts,
            'responses': updated_responses
        })
    except Exception as e:
        return jsonify({'error': str(e)}), 500

if __name__ == '__main__':
    app.run(host='0.0.0.0', port=5000)

 * Serving Flask app '__main__'
 * Debug mode: off


 * Running on all addresses (0.0.0.0)
 * Running on http://127.0.0.1:5000
 * Running on http://172.17.96.147:5000
[33mPress CTRL+C to quit[0m
127.0.0.1 - - [09/Dec/2024 11:14:09] "OPTIONS /query HTTP/1.1" 200 -


Generating response for query: My dog cant sleep


127.0.0.1 - - [09/Dec/2024 11:14:11] "POST /query HTTP/1.1" 200 -
127.0.0.1 - - [09/Dec/2024 11:14:50] "OPTIONS /query HTTP/1.1" 200 -


Generating response for query: Sure! My dog's name is Mox. He is 11 years old and is a wheaten terrier


127.0.0.1 - - [09/Dec/2024 11:15:48] "OPTIONS /query HTTP/1.1" 200 -


Generating response for query: My dog is having trouble sleeping



127.0.0.1 - - [09/Dec/2024 11:15:50] "POST /query HTTP/1.1" 200 -
127.0.0.1 - - [09/Dec/2024 11:16:05] "OPTIONS /query HTTP/1.1" 200 -


Generating response for query: My dogs name is Mox he is 11 years old and is a pug


127.0.0.1 - - [09/Dec/2024 11:16:12] "POST /query HTTP/1.1" 200 -
127.0.0.1 - - [09/Dec/2024 11:16:30] "POST /query HTTP/1.1" 200 -
127.0.0.1 - - [09/Dec/2024 11:16:56] "OPTIONS /query HTTP/1.1" 200 -


Generating response for query: What is an orthopedic bed


127.0.0.1 - - [09/Dec/2024 11:17:00] "POST /query HTTP/1.1" 200 -
127.0.0.1 - - [09/Dec/2024 11:17:32] "OPTIONS /query HTTP/1.1" 200 -


Generating response for query: Thanks so much


127.0.0.1 - - [09/Dec/2024 11:17:34] "POST /query HTTP/1.1" 200 -
