# Part 1 - Get used to the OpenAI API
 

## Import

Import necessary libraries.

In [None]:
!pip install openai tiktoken sentence_transformers pandas numpy python-dotenv

In [None]:
import openai
from openai import AzureOpenAI
import os
from IPython.display import display, Markdown, HTML
import pandas as pd
import numpy as np
import tiktoken
from typing import List, Dict
from sentence_transformers import SentenceTransformer
from dotenv import load_dotenv  
load_dotenv();

## Constants

Let's set up some constants. These are:

- The model we will be using (defined in Azure OpenAI).
- Cost for token completion and prompt (we take this from Azure OpenAI Studio for the respective model).
- Openai API setup.

In [None]:
# MODEL = os.environ.get("OPENAI_API_DEPLOYMENT", "gpt-4")
MODEL = "gpt-35-turbo"  # Uncomment if token rate is too high

%run model_cost.py
model_cost = get_model_instance(model_name=MODEL)

client = AzureOpenAI(
    azure_endpoint=os.environ.get("AZURE_OPENAI_ENDPOINT"),
    api_key=os.environ.get("AZURE_OPENAI_API_KEY"),
    api_version=os.environ.get("AZURE_OPENAI_VERSION", "2023-07-01-preview"),
)

In [None]:
print(os.environ.get("AZURE_OPENAI_API_KEY"))

## Construct a prompt and receive response from the API

We will send a prompt to the API and get a response back. The prompt is a string that is used to "seed" the model. The model will then generate a completion based on the prompt. The completion is a string of text that is generated by the model.

In [None]:
def ask_question(
        prompt: List[Dict[str, str]],
        model: str = MODEL
    ) -> openai.types.chat.chat_completion.ChatCompletion:
    """Function to ask a question to the GPT model using the Azure OpenAI API.
    
    Args:
        prompt: The prompt to send to the GPT model
        model: The model to use

    Returns:
        The response from the GPT model
    """

    response = client.chat.completions.create(
        model=model,
        messages=prompt,
        temperature=0.7,
        max_tokens=1500,
        top_p=0.95,
        frequency_penalty=0,
        presence_penalty=0,
        stop=None,
    )
    
    return response


It is always a good idea to monitor costs. We will do this with the function `get_cost()`. This function will return the cost of the API call.

In [None]:
def get_cost(
        response: openai.types.chat.chat_completion.ChatCompletion,
        token_cost_per_completion: float,
        token_cost_per_prompt: float,
    ) -> float:
    """Function to compute the cost of a prompt + completion response.
    
    Args:
        response: The response from the GPT model
        token_cost_per_completion: The cost per completion token
        token_cost_per_prompt: The cost per prompt token
    
    Returns:
        The cost of the prompt + completion response
    """
    completion_tokens = response.usage.completion_tokens
    prompt_tokens = response.usage.prompt_tokens

    cost = (completion_tokens * token_cost_per_completion) + (
        prompt_tokens * token_cost_per_prompt
    )

    return cost


In [None]:
# Define a question to ask to the model
question = "Which one is the best Greek Island for a vacation?"

# Set the character
character = "You answer question about the Greek Islands. Include a joke about Greece in every response."

# Build the prompt
prompt = [
    {
        "role": "system",
        "content": character
    },
    {
        "role": "user",
        "content": question
    },
]

# Ask the question to the model
response = ask_question(prompt=prompt)

# Display the answer
answer = response.choices[0].message.content
display(Markdown(answer))

# Compute and print the cost
cost = get_cost(
    response=response,
    token_cost_per_completion=model_cost.token_cost_per_completion,
    token_cost_per_prompt=model_cost.token_cost_per_prompt
)
print(f"Total cost for this response: {cost:.5f} {model_cost.currency} (model was {model_cost.name}).")

In [None]:
response

# Part 2 - Integrating Private Domain Knowledge

Note that you need to have executed all cells in Part 1 for Part 2 to function.

## High level workflow description

<img src="images/0_overview.png" alt="Image description" width="1000">  


## Load the Travel Articles and Calculate the Embeddings

We have prepared the travel articles for you. These articles constitute private domain knowledge that the model has no access to. We will load them into memory and calculate the embeddings for each article. We will use these embeddings to retrieve the most relevant articles for a given question.

<img src="images/1_calculate_domain_knowledge.png" alt="Image description" width="200">  


You need to provide the path to the travel articles to the Notebooks local instance and provide the path to the travel articles in the variable `path_travel_articles`.

In [None]:
# Provide the path to the embeddings file as a string, e.g., path_travel_articles = "./data/travel_articles/"
path_travel_articles = "./data/travel_articles/"

Each article is in a separate text file. The code snipped below will discover all the text files in the directory and load them into memory.

In [None]:
travel_articles = []
for file in os.listdir(path_travel_articles):
    if file.endswith(".txt"):
        with open(os.path.join(path_travel_articles, file), "r") as f:
            travel_articles.append(f.read())

display(Markdown(travel_articles[17]))

Next, we calculate the embeddings using the sentence transformer model `all-mpnet-base-v2` (https://huggingface.co/sentence-transformers/all-mpnet-base-v2). Observe that all the vectors are of the same size, `768`. This is because the transformer model outputs a fixed-size vector for each input string.

In [None]:
embedding_model = SentenceTransformer("all-mpnet-base-v2")

embeddings = []
for article in travel_articles:
    embedding = embedding_model.encode(article, show_progress_bar=True)
    embeddings.append(embedding)
    print(f"Shape of the vector: {embedding.shape}")

embeddings[17]

## Define User Request

We will define a user request. This is a question that we would like to ask the model. We will calculate the embedding for this question and use it to retrieve the most relevant articles from the travel articles by comparing the vector of the User Request to the other vectors we just calculated above.

<img src="images/2_define_UR.png" alt="Image description" width="200">  


In [None]:
# Provide the User Request as a string, e.g., user_request = "I want to travel to a Greek Island that is famous this season for snorkeling."
user_request = "I want to travel to a Greek Island that is famous this season for snorkeling. Which one would you recommend to me?"

## Calculate the embeddings for the User Request

In the same fashion as before, we calculate the embeddings for the User Request using the same sentence transformer model that we used before - `all-mpnet-base-v2`.

The vector will have the same shape as the vectors we calculated for the travel articles. Had we used a different model, the vectors would likely have been of a different size. In any event, this would have made it impossible to compare the vectors, we would compare apples to oranges.

<img src="images/3_calculate_embeddings.png" alt="Image description" width="200">  


In [None]:
user_request_embedding = embedding_model.encode(user_request)

print(f"Shape of the user request vector: {user_request_embedding.shape}")

## Select the N most similar articles

We will compare the User Request vector to the vectors of the travel articles. There are different ways to compare vectors. For simplicity, we will use the cosine similarity.

In principle, we could select any number N of articles. To honor token limits (globally for the endpoint we are using and locally in each request), we will select the top 3 articles.

<img src="images/4_select_n.png" alt="Image description" width="200">  


In [None]:
def distance_between_vector_and_vectors(
    vector: np.ndarray, vectors_array: np.ndarray
    ) -> np.ndarray:
    """
    Calculate the cosine similarities between a single vector and an array of vectors.

    Args:
        vector: A single vector
        vectors_array: An array of vectors
    
    Returns:
        An array of cosine similarities
    """
    dot_products = np.dot(vectors_array, vector)

    # Calculate the magnitudes of all vectors in vectors_array
    magnitudes = np.sqrt(np.sum(np.square(vectors_array), axis=1))

    # Calculate the magnitude of vector
    magnitude_1 = np.linalg.norm(vector)

    # Calculate the cosine similarities between vector and all vectors in vectors_array
    similarities = dot_products / (magnitude_1 * magnitudes)

    return similarities


In [None]:
k = 3 # You can increase this number, but you might run into a token limit

distances = distance_between_vector_and_vectors(
    vector=user_request_embedding,
    vectors_array=embeddings
)

idx_of_most_similar = list((-distances).argsort()[:k])

print(f"Most similar articles are articles numbers: {idx_of_most_similar} with similarities: {distances[idx_of_most_similar]}")

## Retrieve the text corresponding to the most similar articles embeddings

We will retrieve the text of the most similar articles. We do this by looking up the index of the most similar articles in the list of travel articles.

Check if the articles make sense and are relevant to the user request that you defined.

<img src="images/5_retrieve_text.png" alt="Image description" width="200">  


In [None]:
most_similar_articles = [travel_articles[i] for i in idx_of_most_similar]

for i, article in enumerate(most_similar_articles):
    print(f"Article {i+1}")
    display(Markdown(article[:200]))

## Augment the Prompt with the retrieved text

We will augment the prompt with the retrieved text. This will provide the model with the private domain knowledge it would otherwise not know about and that it can use to generate a more accurate response.

<img src="images/6_augment_prompt.png" alt="Image description" width="200">  


### Set the character (system message) below as a string. 

Set the character (system message) below as a string. 

When using the OpenAI API, both setting a system message and including the character in the prompt will serve to guide the model's behavior.
A system message is often used with OpenAI's Chat API, where the conversation occurs in a more interactive and dynamic manner. By using a system message, you can provide instructions or context to the AI model. This message isn't counted towards the token limit. However, it may have less influence on the response compared to including the character directly in the prompt.

Some example system messages are:
- You are a marketing writing assistant. You help come up with creative content ideas and content like marketing emails, blog posts, tweets, ad copy and product descriptions. You write in a friendly yet professional tone but can tailor your writing style that best works for a user-specified audience. If you do not know the answer to a question, respond by saying "I do not know the answer to your question."
- Assistant is an AI chatbot that helps users turn a natural language list into JSON format. After users input a list they want in JSON format, it will provide suggested list of attribute labels if the user has not provided any, then ask the user to confirm them before creating the list.
- You are an Xbox customer support agent whose primary goal is to help users with issues they are experiencing with their Xbox devices. You are friendly and concise. You only provide factual answers to queries, and do not provide answers that are not related to Xbox.

In [None]:
# Set the character
character = "You are a seasoned travel agent with the primary goal to help users looking to plan a vacation in Greece. You write in a friendly yet professional tone."

Next, we have to build the question to ask the model enriched with the information from the embeddings.

Below you find an example how the augmented prompt could look like (using the `user request` that you defined). You can use this as a template to build your own question if you want.

In [None]:
context_instruction = 'Use the below travel articles about Greek Islands from the current travel season to answer \
the subsequent question. If the answer cannot be found in the articles, write "I could \
not find an answer."'
augmented_prompt = f"{context_instruction} \n\n Question: {user_request} \n\n The travel articles follow below: \n\n"

for piece in most_similar_articles:
    augmented_prompt += piece

# Uncomment the below line to see the result of the example
display(Markdown(augmented_prompt))

## Send the augmented prompt to the LLM

We will send the augmented prompt to the LLM and get a response back. The response will be the completion generated by the model.

<img src="images/7_to_llm.png" alt="Image description" width="200">  


### Build the prompt 

The OpenAI API expects the "prompt" to be in a specific form:
> prompt = [ \
>    {"role": "system", "content": character goes here}, \
>    {"role": "user", "content": the question you wish to ask goes here}, \
>] 

We build the prompt below referring to the system message and augmented prompt that you defined.

In [None]:
prompt = [
    {
        "role": "system",
        "content": character
    },
    {
        "role": "user",
        "content": augmented_prompt
    },
]

In [None]:
def ask_question(
        prompt: List[Dict[str, str]],
        model: str = MODEL
    ) -> openai.types.chat.chat_completion.ChatCompletion:
    """Function to ask a question to the GPT model using the Azure OpenAI API.
    
    Args:
        prompt: The prompt to send to the GPT model
        model: The model to use

    Returns:
        The response from the GPT model
    """

    response = client.chat.completions.create(
        model=model,
        messages=prompt,
        temperature=0.7,
        max_tokens=1500,
        top_p=0.95,
        frequency_penalty=0,
        presence_penalty=0,
        stop=None,
    )
    
    return response

def get_cost(
        response: openai.types.chat.chat_completion.ChatCompletion,
        token_cost_per_completion: float,
        token_cost_per_prompt: float,
    ) -> float:
    """Function to compute the cost of a prompt + completion response.
    
    Args:
        response: The response from the GPT model
        token_cost_per_completion: The cost per completion token
        token_cost_per_prompt: The cost per prompt token
    
    Returns:
        The cost of the prompt + completion response
    """
    completion_tokens = response.usage.completion_tokens
    prompt_tokens = response.usage.prompt_tokens

    cost = (completion_tokens * token_cost_per_completion) + (
        prompt_tokens * token_cost_per_prompt
    )

    return cost


In [None]:
# Use `ask_question` to use the Azure OpenAI API to interact with the model

response = ask_question(
    prompt=prompt,
    model=MODEL
)

# Consider also the cost
cost = get_cost(
    response=response,
    token_cost_per_completion=model_cost.token_cost_per_completion,
    token_cost_per_prompt=model_cost.token_cost_per_prompt
)

print(f"Total cost for this response: {cost:.5f} {model_cost.currency} (model was {model_cost.name}).")

In [None]:
# Display the answer from the model nicely formatted
answer = response.choices[0].message.content

display(Markdown(answer))

# Congratulations!

You augmented the prompt with the retrieved text from the travel articles. You can use this approach to retrieve information from any text collection that you have. You can also use this approach to retrieve information from a collection of documents that you have in your company. For example, you could use this approach to retrieve information from your company's internal wiki or your own E-Mail inbox (if you're aware that you will be sending all the data to the model).

However, as usual, be aware of the privacy and security implications of sending data to the model - Think before you hit send.

If you want to further explore and try out different questions to ask, you can use the methods that we provided for you below in the "Talk to GPT" section.

# Talk to GPT

Using the steps above, you can use below cell to play around with the OpenAI API and the travel articles we provided. You can ask the model any question you like.

In the cell directly below we define a few helper functions to make your life easier. You can use them to send a prompt to GPT and display the result. The methods are the same as above, you have to copy your work down, for example for the system message and the augmented prompt.

You will need to have loaded the travel articles and calculated the embeddings for them in order to use the methods below.

Try to test the limits, e.g., asking about an activity like "walking on the moon", or asking about a location that is not in the travel articles.

In [None]:
def distance_between_vector_and_vectors(
    vector: np.ndarray, vectors_array: np.ndarray
    ) -> np.ndarray:
    """
    Calculate the cosine similarities between a single vector and an array of vectors.

    Args:
        vector: A single vector
        vectors_array: An array of vectors
    
    Returns:
        An array of cosine similarities
    """
    dot_products = np.dot(vectors_array, vector)

    # Calculate the magnitudes of all vectors in vectors_array
    magnitudes = np.sqrt(np.sum(np.square(vectors_array), axis=1))

    # Calculate the magnitude of vector
    magnitude_1 = np.linalg.norm(vector)

    # Calculate the cosine similarities between vector and all vectors in vectors_array
    similarities = dot_products / (magnitude_1 * magnitudes)

    return similarities


def get_pieces_of_interest(question: str, k: int = 3) -> List[str]:
    """Function to get the pieces of interest for a question.
    
    Args:
        question: The question to ask the model
        k: The number of pieces of interest to find
    
    Returns:
        The pieces of interest
    """
    question_embedding = embedding_model.encode(question, show_progress_bar=True)

    distances = distance_between_vector_and_vectors(
        vector=question_embedding,
        vectors_array=embeddings
    )

    idx_of_interest = list((-distances).argsort()[:k])
    pieces_of_interest = [travel_articles[i] for i in idx_of_interest]

    return pieces_of_interest


def get_augmented_prompt(
        question: str,
        pieces_of_interest: List[str],
        context_instruction: str,
    ) -> str:
    """Function to build the augmented prompt.
    
    Args:
        question: The question to ask the model
        pieces_of_interest: The pieces of interest
        context_instruction: The context instruction
    
    Returns:
        The augmented prompt
    """
    augmented_prompt = f"{context_instruction} \n\n Question: {question} \n\n The travel articles follow below: \n\n"

    for piece in pieces_of_interest:
        augmented_prompt += piece

    return augmented_prompt

# Define a user request
user_request = "I want to travel to a Greek Island that is famous this season for eating good food. Which one would you recommend to me?"

# Select k, the number of pieces of interest
k = 3

pieces_of_interest = get_pieces_of_interest(question=user_request, k=k)

# Set the character
character = "You are a seasoned travel agent with the primary goal to help users looking to plan a vacation in Greece. You write in a friendly yet professional tone."

# Provide your augmented prompt
context_instruction = 'Use the below travel articles about Greek Islands from the current travel season to answer \
the subsequent question. If the answer cannot be found in the articles, write "I could \
not find an answer."'
augmented_prompt = get_augmented_prompt(
        question=user_request,
        pieces_of_interest=pieces_of_interest,
        context_instruction=context_instruction,
)

# Build the prompt
prompt = [
    {
        "role": "system",
        "content": character
    },
    {
        "role": "user",
        "content": augmented_prompt
    },
]

# Send the prompt to the model
response = ask_question(
    prompt=prompt,
    model=MODEL
)

# Consider the cost
cost = get_cost(
    response=response,
    token_cost_per_completion=model_cost.token_cost_per_completion,
    token_cost_per_prompt=model_cost.token_cost_per_prompt
)
print(f"Total cost for this response: {cost:.5f} {model_cost.currency} (model was {model_cost.name}).")


# Display the answer from the model nicely formatted
answer = response.choices[0].message.content

display(Markdown(answer))