In [1]:
from openai import OpenAI
from dotenv import load_dotenv
from sklearn.metrics.pairwise import cosine_similarity
import os
import requests
import json

# Load env variables
load_dotenv()

True

In [2]:
# variables from .env
RUNPOD_TOKEN = os.getenv("RUNPOD_TOKEN")
RUNPOD_CHATBOT_URL = os.getenv("RUNPOD_CHATBOT_URL")  # should be .../runsync
MODEL_NAME = os.getenv("MODEL_NAME")


In [None]:
def get_chatbot_response(model_name, messages, temperature=0):
    payload = {
        "input": {
            "model": model_name,
            "messages": messages,
            "temperature": temperature,
            "top_p": 0.8,
            "max_tokens": 2000
        }
    }

    headers = {
        "Authorization": f"Bearer {RUNPOD_TOKEN}",
        "Content-Type": "application/json"
    }

    response = requests.post(RUNPOD_CHATBOT_URL, json=payload, headers=headers)
    response.raise_for_status()
    result = response.json()

    try:
        # Get the first string inside tokens
        final_text = result["output"][0]["choices"][0]["tokens"][0]
    except (KeyError, IndexError, TypeError):
        final_text = "No output returned"

    # Return in OpenAI-style format
    return {
        "choices": [
            {"message": {"role": "assistant", "content": final_text}}
        ]
    }

# Example
if __name__ == "__main__":
    messages = [{"role": "system", "content": "What is the capital of England"}]
    response = get_chatbot_response(MODEL_NAME, messages)
    print("Chatbot output:\n", response["choices"][0]["message"]["content"])


Chatbot output:
 The capital of England is London.


In [None]:
""" from dotenv import load_dotenv
from sklearn.metrics.pairwise import cosine_similarity
import os
import requests
import json

# Load env variables
load_dotenv()



RUNPOD_TOKEN = os.getenv("RUNPOD_TOKEN")
RUNPOD_CHATBOT_URL = os.getenv("RUNPOD_CHATBOT_URL")  # should be .../runsync
MODEL_NAME = os.getenv("MODEL_NAME")
RUNPOD_EMBEDDING_URL = os.getenv("RUNPOD_EMBEDDING_URL")
EMBEDDING_MODEL_NAME = os.getenv("EMBEDDING_MODEL_NAME")

def get_chatbot_response(model_name, messages, temperature=0):
    payload = {
        "input": {
            "model": model_name,
            "messages": messages,
            "temperature": temperature,
            "top_p": 0.8,
            "max_tokens": 2000
        }
    }

    headers = {
        "Authorization": f"Bearer {RUNPOD_TOKEN}",
        "Content-Type": "application/json"
    }

    response = requests.post(RUNPOD_CHATBOT_URL, json=payload, headers=headers)
    response.raise_for_status()

    result = response.json()

    # Parse RunPod's output format
    try:
        output_tokens = result["output"][0]["choices"][0]["tokens"]
        final_text = " ".join(output_tokens)
    except (KeyError, IndexError, TypeError):
        final_text = "No output returned"

    return final_text   """

# Get LLM Response

In [None]:
# Example usage
messages = [{"role": "system", "content": "What is the capital of England"}]
response = get_chatbot_response(MODEL_NAME, messages)
print("Chatbot output:\n", response)

Chatbot output:
 The capital of England is London.


In [7]:
response

'The capital of England is London.'

# Prompt Engineering

## Structured Output

In [14]:
system_prompt = """
You are a helpful assistant that answer questions about capitals of countries.

Your output should be in a structured json format exactly like the one below. You are not allowed to write anything other than the json object:
[
{
    "country": the country that you will get the capital of
    "capital": the capital of the country stated
}]
"""
messages = [{"role":"system","content":system_prompt}]
messages.append({"role":"user","content":"What is the capital of England"})
response = get_chatbot_response(MODEL_NAME, messages)
print(response)

[
{
    "country": "England",
    "capital": "London"
}]


In [17]:
json_response = json.loads(response)
json_response

[{'country': 'England', 'capital': 'London'}]

In [19]:
type(json_response[0]),json_response[0]['capital']

(dict, 'London')

## Input Structuring

In [21]:
user_input = """
Get me the capitals of the following countries:
```
1. Italy
2. Germany
3. France
```
"""
messages = [{"role":"system","content":system_prompt}]
messages.append({"role":"user","content":user_input})
response = get_chatbot_response(MODEL_NAME, messages)
print(response)


[
    {
        "country": "Italy",
        "capital": "Rome"
    },
    {
        "country": "Germany",
        "capital": "Berlin"
    },
    {
        "country": "France",
        "capital": "Paris"
    }
]


In [22]:
json_response = json.loads(response)
json_response

[{'country': 'Italy', 'capital': 'Rome'},
 {'country': 'Germany', 'capital': 'Berlin'},
 {'country': 'France', 'capital': 'Paris'}]

## Give the model time to think (Chain of thought)

In [23]:
user_prompt = """
Calculate the result of this equation 1+3

Your ouput should be in a structuredd json format exactly like the one below. You are not allowed to write anything other than json object:
{
    result: The final number resulted from calculating the equation above
}
"""
messages = [{"role":"user","content":user_prompt}]
response = get_chatbot_response(MODEL_NAME, messages)
print(response)

{
    "result": 4
}


In [24]:
259/2*8654+91072*33-12971

4113098.0

In [25]:
user_prompt = """
Calculate the result of this equation 259/2*8654+91072*33-12971

Your ouput should be in a structuredd json format exactly like the one below. You are not allowed to write anything other than json object:
{
    result: The final number resulted from calculating the equation above
}
"""
messages = [{"role":"user","content":user_prompt}]
response = get_chatbot_response(MODEL_NAME, messages)
print(response)

Here is the calculation:

1. 259 / 2 = 129.5
2. 129.5 * 8654 = 1121653
3. 1121653 + 91072 = 1027025
4. 1027025 * 33 = 33955725
5. 33955725 - 12971 = 32651454

Here is the result in the required JSON format:
```
{
    "result": 326


In [39]:
user_prompt = """
You are a precise math solver. Solve the following equation step-by-step following BEDMAS rules:

259/2*8654 + 91072*33 - 12971

Your output must be a valid JSON object only, with no extra text before or after.  
The JSON must strictly have the following format and keys:  
{
    "steps": "Explain each step of the calculation in plain text",
    "result": <final numeric result without commas or quotes>
}

Rules:
1. Perform calculations digit-by-digit carefully.
2. Never leave the result field blank.
3. Do not include commas in numbers.
4. The final JSON must be syntactically valid.
"""
messages = [{"role": "user", "content": user_prompt}]
response = get_chatbot_response(MODEL_NAME, messages)
print(response)


{
"steps": "Step 1: 259/2 = 129.5, Step 2: 129.5*8654 = 1117877, Step 3: 91072*33 = 3008256, Step 4: 1117877 + 3008256 = 4126133, Step 5: 4126133 - 12971 = 4110362",
"result": 4110362


# RAG - Retrival Augmented Generation

In [40]:
user_prompt = """
what's new in iphone 16?
"""
messages = [{"role": "user", "content": user_prompt}]
response = get_chatbot_response(MODEL_NAME, messages)
print(response)

I think there may be some confusion here!

There is no such thing as an "iPhone 16" yet. The latest iPhone model released by Apple is iPhone 13 series, which includes:

1. iPhone 13
2. iPhone 13 Mini
3. iPhone 13 Pro
4. iPhone 13 Pro Max

Apple typically releases new iPhone models in the fall of each year, so we can expect to see iPhone 14 series in the future. However, there is no official


In [2]:
iphone_16 = """
The iPhone 16 introduces several exciting updates, making it one of Apple's most advanced smartphones to date. It features a larger 6.1-inch display for the base model and a 6.7-inch screen for the iPhone 16 Plus, with thinner bezels and a more durable Ceramic Shield. The iPhone 16 Pro and Pro Max boast even larger displays, measuring 6.3 and 6.9 inches respectively, offering the thinnest bezels seen on any Apple product so far.

Powered by the new A18 chip (A18 Pro for the Pro models), these phones deliver significant performance improvements, with enhanced neural engine capabilities, faster GPU for gaming, and machine learning tasks. The camera systems are also upgraded, with the base iPhone 16 sporting a dual-camera setup with a 48MP main sensor. The Pro models offer a 48MP Ultra Wide and 5x telephoto camera, enhanced by Apple’s "Camera Control" button for more flexible photography options.

Apple also introduced advanced audio features like "Audio Mix," which uses machine learning to separate background sounds from speech, allowing for more refined audio capture during video recording. Battery life has been extended, especially in the iPhone 16 Pro Max, which is claimed to have the longest-lasting battery of any iPhone 
9TO5MAC

APPLEMAGAZINE
.

Additionally, Apple has switched to USB-C for faster charging and data transfer, and the Pro models now support up to 2x faster video encoding. The starting prices remain consistent with previous generations, with the iPhone 16 starting at $799, while the Pro models start at $999
"""

In [43]:
user_prompt=f"""
{iphone_16}
what's new in iphone 16
"""
messages = [{"role": "user", "content": user_prompt}]
response = get_chatbot_response(MODEL_NAME, messages)
print(response)

According to the text, the new features and updates in the iPhone 16 include:

1. Larger displays:
	* 6.1-inch display for the base model
	* 6.7-inch screen for the iPhone 16 Plus
	* 6.3-inch display for the iPhone 16 Pro
	* 6.9-inch display for the iPhone 16 Pro Max
2. Thinner bezels
3. More durable Ceramic Shield
4. New A18


### Automatically Extract context data from database

In [3]:
samsung_s23 = """
The Samsung Galaxy S23 brings some incremental but notable upgrades to its predecessor, the Galaxy S22. It features the Snapdragon 8 Gen 2 processor, a powerful chip optimized for the S23 series, delivering enhanced performance, especially for gaming and multitasking. This chip ensures top-tier speed and efficiency across all models, from the base S23 to the larger S23+ and S23 Ultra​
STUFF

TECHRADAR
.

In terms of design, the S23's camera module has been streamlined by removing the raised metal contour around the cameras, creating a cleaner, sleeker look. It also sports the same 6.1-inch 120Hz AMOLED display, protected by tougher Gorilla Glass Victus 2, making it more resistant to scratches and drops​
TECHRADAR
.

The S23 Ultra stands out with its 200MP main camera, offering impressive photo clarity, especially in low-light conditions. The selfie camera across the series has been updated to a 12MP sensor, resulting in sharper selfies. The Ultra model also includes productivity tools such as the S-Pen, which remains an essential feature for note-taking and creative tasks​
STUFF

TECHRADAR
.

Battery life is solid, with the S23 Ultra featuring a 5000mAh battery that lasts comfortably through a day of heavy use. However, charging speeds still lag behind some competitors, with 45W wired charging, which is slower than other brands offering up to 125W charging​
STUFF
.

Overall, the Galaxy S23 series enhances performance, durability, and camera quality, making it a strong contender for users seeking a high-performance flagship.
"""

In [4]:
data = [iphone_16, samsung_s23]

In [4]:
user_prompt = """What's new in iphone 16?"""

In [5]:
import os
import requests
import json
RUNPOD_TOKEN = os.getenv("RUNPOD_TOKEN")
RUNPOD_EMBEDDING_URL = os.getenv("RUNPOD_EMBEDDING_URL")  # your /runsync URL
EMBEDDING_MODEL_NAME = os.getenv("EMBEDDING_MODEL_NAME")


In [6]:

def get_embedding(model_name, text_input):
    
    payload = {
        "input": {
            "model": model_name,
            "input": text_input
        }
    }

    headers = {
        "Authorization": f"Bearer {RUNPOD_TOKEN}",
        "Content-Type": "application/json"
    }

    response = requests.post(RUNPOD_EMBEDDING_URL, json=payload, headers=headers)
    response.raise_for_status()

    result = response.json()

    # Parse RunPod embedding output
    try:
        embeddings = result["output"]["data"][0]["embedding"]
    except (KeyError, IndexError, TypeError):
        embeddings = []

    return embeddings 




In [7]:
# Get embedding for user prompt
user_prompt_embeddings = get_embedding(EMBEDDING_MODEL_NAME, user_prompt)


In [22]:
# Get embedding for user prompt
user_prompt_embeddings

[-0.008058824576437473,
 -0.08608289808034897,
 -0.01636185497045517,
 0.0004063753585796803,
 0.0786345899105072,
 -0.03827941417694092,
 0.04233935475349426,
 0.07252942025661469,
 0.02448173053562641,
 0.030907422304153442,
 -0.00038371948176063597,
 -0.035623665899038315,
 -0.021581776440143585,
 -0.016789216548204422,
 0.047070860862731934,
 -0.028633246198296547,
 0.0070171295665204525,
 -0.11862345039844513,
 -0.0715525895357132,
 0.031594254076480865,
 0.06819474697113037,
 -0.11270143836736679,
 -0.06868316233158112,
 -0.07283467799425125,
 -0.0018382285488769412,
 0.019948642700910568,
 -0.015293451026082039,
 -0.01706394925713539,
 -0.004617034923285246,
 -0.08962389081716537,
 0.015598708763718605,
 -0.009745377115905285,
 0.027137480676174164,
 -0.04105726629495621,
 -0.04166778549551964,
 -0.0032071219757199287,
 0.007444491609930992,
 0.0008971737697720528,
 0.08376292884349823,
 0.03837099298834801,
 0.004475852474570274,
 0.024527519941329956,
 -0.05238235741853714,
 -

In [11]:
len(user_prompt_embeddings)

0

In [12]:
data_embeddings = [get_embedding(EMBEDDING_MODEL_NAME, text) for text in data]

In [14]:
data_embeddings

[[], []]

In [13]:
len(data_embeddings)

2

In [98]:
data_similarity = cosine_similarity([user_prompt_embeddings], data_embeddings)

In [99]:
data_similarity

array([[0.73779758, 0.52622817]])

In [100]:
closest_entry_index = data_similarity.argmax()
closest_entry_index

0

In [101]:
data[closest_entry_index]

'\nThe iPhone 16 introduces several exciting updates, making it one of Apple\'s most advanced smartphones to date. It features a larger 6.1-inch display for the base model and a 6.7-inch screen for the iPhone 16 Plus, with thinner bezels and a more durable Ceramic Shield. The iPhone 16 Pro and Pro Max boast even larger displays, measuring 6.3 and 6.9 inches respectively, offering the thinnest bezels seen on any Apple product so far.\n\nPowered by the new A18 chip (A18 Pro for the Pro models), these phones deliver significant performance improvements, with enhanced neural engine capabilities, faster GPU for gaming, and machine learning tasks. The camera systems are also upgraded, with the base iPhone 16 sporting a dual-camera setup with a 48MP main sensor. The Pro models offer a 48MP Ultra Wide and 5x telephoto camera, enhanced by Apple’s "Camera Control" button for more flexible photography options.\n\nApple also introduced advanced audio features like "Audio Mix," which uses machine l

In [103]:
user_prompt_with_data = f"""
{data[closest_entry_index]}

{user_prompt}
"""

In [104]:
user_prompt_with_data

'\n\nThe iPhone 16 introduces several exciting updates, making it one of Apple\'s most advanced smartphones to date. It features a larger 6.1-inch display for the base model and a 6.7-inch screen for the iPhone 16 Plus, with thinner bezels and a more durable Ceramic Shield. The iPhone 16 Pro and Pro Max boast even larger displays, measuring 6.3 and 6.9 inches respectively, offering the thinnest bezels seen on any Apple product so far.\n\nPowered by the new A18 chip (A18 Pro for the Pro models), these phones deliver significant performance improvements, with enhanced neural engine capabilities, faster GPU for gaming, and machine learning tasks. The camera systems are also upgraded, with the base iPhone 16 sporting a dual-camera setup with a 48MP main sensor. The Pro models offer a 48MP Ultra Wide and 5x telephoto camera, enhanced by Apple’s "Camera Control" button for more flexible photography options.\n\nApple also introduced advanced audio features like "Audio Mix," which uses machine

In [105]:
messages = [{"role": "user", "content": user_prompt_with_data}]
response = get_chatbot_response(MODEL_NAME, messages)
print(response)

According to the article, the new features and updates in the iPhone 16 include:

1. Larger displays:
	* Base model: 6.1-inch
	* iPhone 16 Plus: 6.7-inch
	* iPhone 16 Pro: 6.3-inch
	* iPhone 16 Pro Max: 6.9-inch
2. Thinner bezels
3. Durable Ceramic Shield
4. New A18 chip (A18 Pro for the Pro
