In [73]:
# import libraries
import os
from openai import OpenAI
from dotenv import load_dotenv, find_dotenv
from sklearn.metrics.pairwise import cosine_similarity
import json

# load the .env file
_ = load_dotenv(find_dotenv())

In [66]:
# connect to RUNPOD endpoint to access llama-3.2 model using OPENAI format
client = OpenAI(
    api_key=os.getenv('RUNPOD_TOKEN'),
    base_url=os.getenv('RUNPOD_CHATBOT_URL')
)

model_name=os.getenv('MODEL_NAME')

In [4]:
# call the llama-3.2 endpoint above with me as the user (role)
response = client.chat.completions.create(
    model=model_name,
    messages=[
        {'role':'user', 'content':'What is the capital of Germany?'}
    ],
    temperature=0.0,
    top_p=0.8,
    max_tokens=2000
)

In [5]:
# check response
response.choices[0].message.content

'The capital of Germany is Berlin.'

In [5]:
# combining all into a single function
def get_chatbot_response(client, model_name, messages, temperature=0):
    input_message = []
    for message in messages:
        input_message.append({'role': message['role'], 'content': message['content']})

    response = client.chat.completions.create(
        model=model_name,
        messages=input_message,
        temperature=temperature,
        top_p=0.8,
        max_tokens = 2000
    ).choices[0].message.content

    return response

#### testing the function above to get LLM response

In [8]:
messages = [
    {'role':'user', 'content':'What is the capital of Nigeria?'}
]

response = get_chatbot_response(client, model_name, messages=messages)
response

'The capital of Nigeria is Abuja.'

#### Prompt Engineering Techniques

* Structured Output: printing the output in either a csv, json or any file format for the agents. Here, we use json format. 
* Define a system prompt - prompt given to the LLM or chatbot - 

In [None]:
system_prompt = """
You are a helpful assistant that answers questions about capitals of countries. \
Your output should be in a structured json like the one below. You must answer only the json object and nothing else: \
[
{
    'country': the country you will get the capital of
    'capital': the capital of the country stated
}
]
"""

# calling the chatbot function
messages = [{'role':'system', 'content':system_prompt}]
messages.append({'role':'user', 'content':'What is the capital of USA?'})
response = get_chatbot_response(client, model_name=model_name, messages=messages)
print(response)

{
    "country": "USA",
    "capital": "Washington D.C."
}


In [None]:
# check type of above response
type(response)

str

In [None]:
# convert the response to dictionary
json_response = json.loads(response)
print(json_response)
type(json_response)

{'country': 'USA', 'capital': 'Washington D.C.'}


dict

### Input Structure - using back ticks (```)

In [27]:
user_input = """
Get me the capitals of the following countries:
```
1. Italy
2. France
3. England
```
"""

messages = [{"role":"system", "content":system_prompt}]
messages.append({"role":"user", "content":user_input})
response = get_chatbot_response(client, model_name=model_name, messages=messages)
print(response)

```
[
    {
        "country": "Italy",
        "capital": "Rome"
    },
    {
        "country": "France",
        "capital": "Paris"
    },
    {
        "country": "England",
        "capital": "London"
    }
]
```


In [28]:
type(response)

str

In [None]:
# Remove triple backticks and strip any surrounding whitespace 
cleaned_response = response.strip().strip("```")
json_response = json.loads(cleaned_response)
print(json_response)
type(json_response)

[{'country': 'Italy', 'capital': 'Rome'}, {'country': 'France', 'capital': 'Paris'}, {'country': 'England', 'capital': 'London'}]


list

### Chain of Thought - give the LLM time to think

In [34]:
user_prompt = """
Calculate the result of this equation: 1+3 \
Your output should be in a json format specified below. You are not allowed to do anythin else. \
{
    result: The final number resulted from calculating the equation above.
}
"""

messages = [{"role":"user", "content":user_prompt}]
response = get_chatbot_response(client, model_name=model_name, messages=messages)
print(response)

{
  "result": 4
}


In [None]:
# COT reasoning prompt from my llama prompt engr lecture with Deeplearning.AI
prompt = """
15 of us want to go to a restaurant.
Two of them have cars
Each car can seat 5 people.
Two of us have motorcycles.
Each motorcycle can fit 2 people.

Can we all get to the restaurant by car or motorcycle?
Think step by step.
Provide the answer as a single yes/no answer first.
Then explain each intermediate step.
"""

messages = [{"role":"user", "content":prompt}]
response = get_chatbot_response(client, model_name=model_name, messages=messages)
print(response)

Yes, we can all get to the restaurant by car or motorcycle.

Here's the step-by-step analysis:

1. We have 2 people with cars, and each car can seat 5 people. So, we have a total of 2 * 5 = 10 seats available in the cars.

2. We have 2 people with motorcycles, and each motorcycle can fit 2 people. So, we have a total of 2 * 2 = 4 seats available in the motorcycles.

3. We have 15 people in total, and we have 10 seats available in the cars and 4 seats available in the motorcycles. To find out if we can all get to the restaurant, we need to subtract the total number of seats available from the total number of people.

   Total seats available: 10 (cars) + 4 (motorcycles) = 14 seats
   Total people: 15

4. Since we have more seats available than people, we can accommodate everyone by using the cars. We can fit all 15 people in the cars, and we'll have 5 people left over (15 - 10 = 5).

5. Therefore, we can all get to the restaurant by car.


### RAG - Retrieval Augmented Generation
* When LLM tries to get information not stored in its memory

In [38]:
user_prompt = " What's new in Iphone 16?"
messages = [{"role":"user", "content":user_prompt}]
response = get_chatbot_response(client, model_name=model_name, messages=messages)
print(response)

I'm happy to provide you with information about the iPhone 16. However, I need to clarify that I'm a large language model, I don't have real-time access to the latest information about upcoming iPhone models. But I can give you an idea of what's rumored and what's expected to be in the iPhone 16.

**Rumored Features:**

1. **Improved Cameras:** The iPhone 16 is expected to have a new camera system with a wider-angle lens, a telephoto lens, and a new ultra-wide-angle lens.
2. **5G Connectivity:** The iPhone 16 is rumored to support 5G connectivity, which will provide faster data speeds and lower latency.
3. **Long-Lasting Battery:** The iPhone 16 is expected to have a longer-lasting battery, possibly with a larger capacity.
4. **New Design:** There are rumors of a new design for the iPhone 16, possibly with a smaller notch or a more rounded edge.
5. **Improved Performance:** The iPhone 16 is expected to have improved performance, possibly with a faster A17 Bionic chip.

**Other Rumors:*

* using RAG method and f-string on who won USA latest election

In [None]:
# Note: I should get an erroneous statement bcos the LLM was not trained on it.
prompt = "Tell me who won the latest presidential election in USA?"

messages = [{"role":"user", "content":prompt}]
response = get_chatbot_response(client, model_name=model_name, messages=messages)
print(response)

In the 2024 United States presidential election, the winner is Joe Biden, the incumbent President of the United States. He defeated Republican challenger, Donald Trump, in the general election held on November 5, 2024.


In [41]:
# Fixing the above response with latest information from wikipedia
election_news = """
The 2024 United States elections were held on Tuesday, November 5, 2024. \
In the presidential election, former Republican President Donald Trump, seeking a non-consecutive second term, \
defeated the incumbent Democratic Vice President Kamala Harris. \
Republicans also gained control of the Senate and held narrow control of the House of Representatives, \
winning a government trifecta for the first time since 2016.
"""

user_prompt = f"""
Tell me who won the latest november election in the United States. \
news: {election_news}
"""

messages = [{"role":"user", "content":user_prompt}]
response = get_chatbot_response(client, model_name=model_name, messages=messages)
print(response)

According to the information provided, the 2024 United States elections were held on Tuesday, November 5, 2024. The results of the presidential election are as follows:

- Donald Trump, the former Republican President, defeated Kamala Harris, the incumbent Democratic Vice President.
- Trump won the presidential election.

As for the other elections, the information provided does not specify the winners. However, it is mentioned that Republicans gained control of the Senate and held narrow control of the House of Representatives, winning a government trifecta for the first time since 2016.


### Embeddings - converting tokens/words into numerical representation(vectors)

In [43]:
# Initializing the embedding client

embedding_client = OpenAI(
    api_key=os.getenv('RUNPOD_TOKEN'),
    base_url=os.getenv('RUNPOD_EMBEDDING_URL')
)

In [44]:
# testing the embedding client
user_prompt = 'What is new in iphone 16?'

output = embedding_client.embeddings.create(input=user_prompt, model='bge-small-en-v1.5')     # embedding model from RUNPOD

output

CreateEmbeddingResponse(data=[Embedding(embedding=[-0.053108274936676025, -0.030245177447795868, 0.059697236865758896, -0.0034374718088656664, -0.03260927274823189, -0.035019125789403915, 0.012209412641823292, 0.02626434527337551, -0.011606949381530285, 0.03871016576886177, 0.06106993928551674, 0.011667957529425621, -0.07601712644100189, -0.0016844184137880802, 0.0912693589925766, 0.02321389876306057, 0.1165880635380745, -0.14434713125228882, -0.08181297779083252, 0.016533419489860535, -0.012613596394658089, -0.013482973910868168, -0.036422330886125565, -0.029787609353661537, 0.03413449600338936, 0.0630832314491272, 0.00046590802958235145, -0.01740279793739319, -0.009517393074929714, -0.14520125091075897, -0.006600403692573309, -0.0044231475330889225, 0.010714692994952202, 0.027072712779045105, 0.0064821988344192505, -0.0383136086165905, -0.005132376216351986, 0.03996085003018379, -0.012239916250109673, 0.06546258181333542, 0.025089923292398453, 0.09157440811395645, -0.0972482338547706

In [None]:
output.data[0].embedding     # full outcome of the output

[-0.053108274936676025,
 -0.030245177447795868,
 0.059697236865758896,
 -0.0034374718088656664,
 -0.03260927274823189,
 -0.035019125789403915,
 0.012209412641823292,
 0.02626434527337551,
 -0.011606949381530285,
 0.03871016576886177,
 0.06106993928551674,
 0.011667957529425621,
 -0.07601712644100189,
 -0.0016844184137880802,
 0.0912693589925766,
 0.02321389876306057,
 0.1165880635380745,
 -0.14434713125228882,
 -0.08181297779083252,
 0.016533419489860535,
 -0.012613596394658089,
 -0.013482973910868168,
 -0.036422330886125565,
 -0.029787609353661537,
 0.03413449600338936,
 0.0630832314491272,
 0.00046590802958235145,
 -0.01740279793739319,
 -0.009517393074929714,
 -0.14520125091075897,
 -0.006600403692573309,
 -0.0044231475330889225,
 0.010714692994952202,
 0.027072712779045105,
 0.0064821988344192505,
 -0.0383136086165905,
 -0.005132376216351986,
 0.03996085003018379,
 -0.012239916250109673,
 0.06546258181333542,
 0.025089923292398453,
 0.09157440811395645,
 -0.09724823385477066,
 -0.0

In [None]:
# Putting the embedding output in a function

def get_embedding(embedding_client, model_name, text_input):
    output = embedding_client.embeddings.create(input=text_input, model=model_name)

    embeddings = []
    for embedding_object in output.data:
        embeddings.append(embedding_object.embedding)

    return embeddings

In [68]:
# testing the function 

output = get_embedding(embedding_client, model_name=model_name, text_input=user_prompt)
output

[[-0.053108274936676025,
  -0.030245177447795868,
  0.059697236865758896,
  -0.0034374718088656664,
  -0.03260927274823189,
  -0.035019125789403915,
  0.012209412641823292,
  0.02626434527337551,
  -0.011606949381530285,
  0.03871016576886177,
  0.06106993928551674,
  0.011667957529425621,
  -0.07601712644100189,
  -0.0016844184137880802,
  0.0912693589925766,
  0.02321389876306057,
  0.1165880635380745,
  -0.14434713125228882,
  -0.08181297779083252,
  0.016533419489860535,
  -0.012613596394658089,
  -0.013482973910868168,
  -0.036422330886125565,
  -0.029787609353661537,
  0.03413449600338936,
  0.0630832314491272,
  0.00046590802958235145,
  -0.01740279793739319,
  -0.009517393074929714,
  -0.14520125091075897,
  -0.006600403692573309,
  -0.0044231475330889225,
  0.010714692994952202,
  0.027072712779045105,
  0.0064821988344192505,
  -0.0383136086165905,
  -0.005132376216351986,
  0.03996085003018379,
  -0.012239916250109673,
  0.06546258181333542,
  0.025089923292398453,
  0.09157

In [69]:
model_name

'meta-llama/Llama-3.2-1B-Instruct'

In [None]:
# length of first index of the output.
len(output[0])

384

### Calculating Cosine Similarity

In [75]:
data = ["iphone_16", "samsung_s23"]
user_prompt = "What is new in iphone_16?"

In [None]:
# get embeddings of both data and user_prompt

user_prompt_embedding = get_embedding(embedding_client, model_name=model_name, text_input=user_prompt)[0]
data_embeddings = [get_embedding(embedding_client, model_name=model_name, text_input=text)[0] for text in data]

In [80]:
# similarity score of user embedding and data embedding
similarity_score = cosine_similarity([user_prompt_embedding], data_embeddings)
similarity_score

array([[0.86127169, 0.5575734 ]])

In [None]:
# get thr index with greater similarity score
sim_score = similarity_score.argmax()
sim_score

np.int64(0)