# GPT-4-Turbo

In [None]:
OPENAI_API_KEY = ""
EXCHANGE_API = ""

In [None]:
!pip install openai==1.6.1

In [None]:
import openai
from openai import OpenAI
import json

In [None]:
client = OpenAI(api_key=OPENAI_API_KEY)
client

## Retrieval

In [None]:
!wget https://jonfernandes.github.io/files/digital-piano.pdf

In [None]:
file = client.files.create(
  file=open("digital-piano.pdf", "rb"),
  purpose='assistants'
)

assistant = client.beta.assistants.create(
  instructions="You are a helpful assistant. Use only the uploaded document to answer questions.",
  model="gpt-4-turbo",
  tools=[{"type": "retrieval"}],
  file_ids=[file.id]
)

assistant.id

In [None]:
thread = client.beta.threads.create()

client.beta.threads.messages.create(
    thread_id=thread.id,
    role="user",
    content="how do i get the digital piano to play a demo piece?"
)

thread.id

In [None]:
run = client.beta.threads.runs.create(
    thread_id=thread.id,
    assistant_id=assistant.id,
)

run.id

In [None]:
import time

def complete_run(thread_id, run_id):
    while True:
        time.sleep(1)
        run = client.beta.threads.runs.retrieve(thread_id=thread_id, run_id=run_id)
        print(f"Current run status: {run.status}")
        if run.status in ['completed', 'failed', 'requires_action']:
            return run

complete_run(thread.id, run.id)

In [None]:
def print_messages_from_thread(thread_id):
    """Print messages for thread with id: thread_id"""
    messages = client.beta.threads.messages.list(thread_id=thread_id)
    for message in messages:
        print(f"{message.role}: {message.content[0].text.value}")

print_messages_from_thread(thread.id)

## Code interpreter

### Challenge: Eric Liddell question

In [None]:
from pathlib import Path

if not Path("paris_olympics_1924.csv").exists():
  !wget https://jonfernandes.github.io/files/paris_olympics_1924.csv

### Eric Liddell question using the API

In [None]:
from pathlib import Path

if not Path("paris_olympics_1924.csv").exists():
  !wget https://jonfernandes.github.io/files/paris_olympics_1924.csv

In [None]:
file = client.files.create(
  file=open("paris_olympics_1924.csv", "rb"),
  purpose='assistants'
)

assistant = client.beta.assistants.create(
  instructions="You are a helpful assistant. Use only the file that has been provided.",
  model="gpt-4-turbo",
  tools=[{"type": "code_interpreter"}],
  file_ids=[file.id]
)

print(f"Assistant ID: {assistant.id}")

In [None]:
thread = client.beta.threads.create()

client.beta.threads.messages.create(
    thread_id=thread.id,
    role="user",
    content="What events did eric liddell win a medal in?"
)

thread.id

In [None]:
run = client.beta.threads.runs.create(
    thread_id=thread.id,
    assistant_id=assistant.id,
)

run.id

In [None]:
import time

def complete_run(thread_id, run_id):
    """Run the assistant on the Thread to get responses"""
    while True:
        time.sleep(1)
        run = client.beta.threads.runs.retrieve(thread_id=thread_id, run_id=run_id)
        print(f"Current run status: {run.status}")
        if run.status in ['completed', 'failed', 'requires_action']:
            return run

complete_run(thread.id, run.id)

In [None]:
def print_messages_from_thread(thread_id):
    """Print messages for thread with thread_id as parameter"""
    messages = client.beta.threads.messages.list(thread_id=thread_id)
    for message in messages:
        print(f"{message.role}: {message.content[0].text.value}")

print_messages_from_thread(thread.id)

### Challenge - Questions on the Olympics

Answer the following questions. Indicate the Pandas/Python/other command where relevant.

1.   What is the time range covered in this dataset?
2.   The Olympics take place every 4 years. Why are there missing years?
3.   What are the types of medals awarded?
4.   Across all of the Olympic Games, how many Gold, Silver and Bronze medals have there been?
5.   Why are there not an equal number of Gold, Silver and Bronze medals?
6.   There are more Gold medals than Silver, and more Silver than Bronze. Why might that be?




### Challenge

**Using a line graph, plot the number of gold medals won by the USA male and female Olympians throughout the history of the Olympics?
Distinguish between the male and female Olympians in the line graph using blue and pink**

**Using a bar chart, plot the 5 Olympians who have won the most gold medals from the dataset (1896 to 2008). When there is a tie, consider the number of silver medals, then bronze medals.**

### Change the colors to Gold, Silver and Bronze

### Final Challenge

For each Olympic year present in the dataset, show the US Olympian (and their sport) who has won the highest number of medals in that particular year.

- In the case of a tie, Gold > Silver > Bronze
- Include only one Olympian for each Olympic year i.e. If there are 2 Olympians in one year who have won exactly the same number and type of medals, then show only the first one based on sorting the names in reverse alphabetical order by surname.

- You should show the the following columns for each Olympic year:
  - Athlete
  - Sport
  - Total


## Function calling

In [None]:
prompt = "Convert 50 USD to british pounds"

client.chat.completions.create(
    model="gpt-4-turbo",
    temperature=0.0,
    messages=[{"role": "user", "content": prompt}]
)

In [None]:
def get_details():
    """Do nothing"""
    pass

tools = [
    {
        "type": "function",
        "function": {
            "name" : "get_details",
            "description" : "Convert a given amount of money from one currency to another. Each currency will have a three letter code",
            "parameters": {
                "type": "object",
                "properties": {
                  "base": {
                      "type": "string",
                      "description": "The base or original currency"
                  },
                "target": {
                    "type": "string",
                    "description": "The target or converted currency"
                },
                "amount": {
                    "type": "string",
                    "description": "The amount of money when converting from one currency to another"
                }
                }
            },
            "required": ["base", "target", "amount"]
        }
    }
]

client.chat.completions.create(
      model="gpt-4-turbo",
      temperature=0.0,
      messages=[{"role": "user", "content": "Convert 50 USD to british pounds"}],
      tools=tools,
      tool_choice="auto"
)

In [None]:
def get_chat_completion(prompt, model="gpt-4-turbo", tools=None, tool_choice="auto"):
    """Response from model for a given prompt"""
    try:
        completion = client.chat.completions.create(
            model=model,
            temperature=0.0,
            messages=[{"role": "user", "content": prompt}],
            tools=tools,
            tool_choice=tool_choice
        )
    except Exception as e:
        print(e, model, prompt)
    else:
        return completion

response = get_chat_completion("Convert 50 USD to british pounds", tools=tools)
response

In [None]:
response.choices

In [None]:
response_arguments = json.loads(response.choices[0].message.tool_calls[0].function.arguments)
response_arguments

In [None]:
response = get_chat_completion("What's 500 british pounds in Indian currency", tools=tools)

response_arguments = json.loads(response.choices[0].message.tool_calls[0].function.arguments)
response_arguments

In [None]:
import requests

BASE, TARGET, AMOUNT = response_arguments["base"], response_arguments["target"], response_arguments["amount"]

url = f"https://v6.exchangerate-api.com/v6/{EXCHANGE_API}/pair/{BASE}/{TARGET}/{AMOUNT}"
exchange_response = json.loads(requests.get(url).text)
exchange_response

In [None]:
print(f"{BASE} {AMOUNT} is {TARGET} {exchange_response['conversion_result']}")

In [None]:
response = get_chat_completion("I woke up a little earlier today", tools=tools)
response

In [None]:
response.choices[0].message.content

In [None]:
import requests
import json

def get_chat_completion(prompt, model="gpt-4-turbo", tools=None, tool_choice="auto"):
    """Response from model for a given prompt"""
    try:
        completion = client.chat.completions.create(
            model=model,
            temperature=0.0,
            messages=[{"role": "user", "content": prompt}],
            tools=tools,
            tool_choice=tool_choice
        )
    except Exception as e:
        print(e, model, prompt)
    else:
        return completion

def get_details():
    """Do nothing"""
    pass

tools = [
    {
        "type": "function",
        "function": {
            "name" : "get_details",
            "description" : "Convert a given amount of money from one currency to another. Each currency will have a three letter code",
            "parameters": {
                "type": "object",
                "properties": {
                  "base": {
                      "type": "string",
                      "description": "The base or original currency"
                  },
                "target": {
                    "type": "string",
                    "description": "The target or converted currency"
                },
                "amount": {
                    "type": "string",
                    "description": "The amount of money when converting from one currency to another"
                }
                }
            },
            "required": ["base", "target", "amount"],
        }
    }
]


def next_steps(prompt):
    """Determine what needs to be done based on response from the Large Language Model"""
    response = get_chat_completion(prompt, tools=tools)

    if response.choices[0].finish_reason == "stop":
        return f"""Not a currency related question (Did not use tools): {response.choices[0].message.content}"""
    elif response.choices[0].finish_reason == "tool_calls":
        response_arguments = json.loads(response.choices[0].message.tool_calls[0].function.arguments)
        BASE, TARGET, AMOUNT = response_arguments["base"], response_arguments["target"], response_arguments["amount"]
        url = f"https://v6.exchangerate-api.com/v6/{EXCHANGE_API}/pair/{BASE}/{TARGET}/{AMOUNT}"
        exchange_response = json.loads(requests.get(url).text)
        return f"{BASE} {AMOUNT} is {TARGET} {exchange_response['conversion_result']}"
    else:
        return NotImplemented

print(next_steps("What's 500 british pounds in whatever they use in Tokyo"))

In [None]:
print(next_steps("It's sunny today"))

## GPT-4-turbo Vision

In [None]:
!wget https://github.com/jonfernandes/images/blob/main/JF-bw.png

In [None]:
!ls -la

In [None]:
response = client.chat.completions.create(
  model="gpt-4-vision-preview",
  messages=[
    {
      "role": "user",
      "content": [
        {"type": "text", "text": "What’s in this image?"},
        {
          "type": "image_url",
          "image_url": {
            "url": "https://jonfernandes.github.io/images/JF-bw.png",
          },
        },
      ],
    }
  ],
  max_tokens=300,
)

response

In [None]:
response.choices[0].message.content

**Multiple images**

In [None]:
response = client.chat.completions.create(
  model="gpt-4-vision-preview",
  messages=[
    {
      "role": "user",
      "content": [
        {
          "type": "text",
          "text": "What are in these images? Is there any difference between them?",
        },
        {
          "type": "image_url",
          "image_url": {
            "url": "https://jonfernandes.github.io/images/JF-bw.png",
          },
        },
        {
          "type": "image_url",
          "image_url": {
            "url": "https://jonfernandes.github.io/images/JF-color.jpg",
          },
        },
      ],
    }
  ],
  max_tokens=300,
)

print(response.choices[0].message.content)

# Using GPT-4V on the Amazon furniture dataset

Use GPT-4V to tag & caption images.

Provide input images along with additional context on what they represent, and prompt the model to output tags or image descriptions. The image descriptions can then be further refined with a language model to generate captions.

Using Amazon furniture items, tag them with relevant keywords and generate short, descriptive captions.

## Setup

In [None]:
!pip install openai==1.23.6
!pip install scikit-learn

In [None]:
from IPython.display import Image, display
import pandas as pd
from sklearn.metrics.pairwise import cosine_similarity
import numpy as np
from openai import OpenAI

client = OpenAI(api_key="")

In [None]:
!wget https://github.com/jonfernandes/GPT-4-Turbo/raw/main/amazon_furniture_dataset.csv

In [None]:
# Loading dataset
dataset_path =  "amazon_furniture_dataset.csv"
df = pd.read_csv(dataset_path)
df.head()

## Tag images

- GPT-4V to generate relevant tags for products.
- Use embeddings to avoid having multiple keywords that are too similar.
- Use a combination of an image and the product title to avoid extracting keywords for other items

### Extract keywords

In [None]:
system_prompt = '''
    You are an agent specialized in tagging images of furniture items, decorative items, or furnishings with relevant keywords that could be used to search for these items on a marketplace.

    You will be provided with an image and the title of the item that is depicted in the image, and your goal is to extract keywords for only the item specified.

    Keywords should be concise and in lower case.

    Keywords can describe things like:
    - Item type e.g. 'sofa bed', 'chair', 'desk', 'plant'
    - Item material e.g. 'wood', 'metal', 'fabric'
    - Item style e.g. 'scandinavian', 'vintage', 'industrial'
    - Item color e.g. 'red', 'blue', 'white'

    Only deduce material, style or color keywords when it is obvious that they make the item depicted in the image stand out.

    Return keywords in the format of an array of strings, like this:
    ['desk', 'industrial', 'metal']

'''

def analyze_image(img_url, title):
    response = client.chat.completions.create(
    model="gpt-4-vision-preview",
    messages=[
        {
            "role": "system",
            "content": system_prompt
        },
        {
            "role": "user",
            "content": [
                {
                    "type": "image_url",
                    "image_url": img_url,
                },
            ],
        },
        {
            "role": "user",
            "content": title
        }
    ],
        max_tokens=300,
        top_p=0.1
    )

    return response.choices[0].message.content

#### Testing with a few examples

In [None]:
examples = df.iloc[:5]
examples

In [None]:
for index, ex in examples.iterrows():
    url = ex['primary_image']
    img = Image(url=url)
    display(img)
    result = analyze_image(url, ex['title'])
    print(result)
    print("\n\n")

### Looking up existing keywords

Using embeddings to avoid duplicates (synonyms) and/or match pre-defined keywords

In [None]:
def get_embedding(value, model="text-embedding-3-large"):
    embeddings = client.embeddings.create(
      model=model,
      input=value,
      encoding_format="float"
    )
    return embeddings.data[0].embedding

#### Testing with example keywords

In [None]:
# Existing keywords
keywords_list = ['industrial', 'metal', 'wood', 'vintage', 'bed']

In [None]:
df_keywords = pd.DataFrame(keywords_list, columns=['keyword'])
df_keywords['embedding'] = df_keywords['keyword'].apply(lambda x: get_embedding(x))
df_keywords

In [None]:
def compare_keyword(keyword):
    embedded_value = get_embedding(keyword)
    df_keywords['similarity'] = df_keywords['embedding'].apply(lambda x: cosine_similarity(np.array(x).reshape(1,-1), np.array(embedded_value).reshape(1, -1)))
    most_similar = df_keywords.sort_values('similarity', ascending=False).iloc[0]
    return most_similar

def replace_keyword(keyword, threshold = 0.6):
    most_similar = compare_keyword(keyword)
    if most_similar['similarity'] > threshold:
        print(f"Replacing '{keyword}' with existing keyword: '{most_similar['keyword']}'")
        return most_similar['keyword']
    return keyword

In [None]:
example_keywords = ['bed frame', 'wooden', 'vintage', 'old school', 'desk', 'table', 'old', 'metal', 'metallic', 'woody']
final_keywords = []

for k in example_keywords:
    final_keywords.append(replace_keyword(k))

final_keywords = set(final_keywords)
print(f"Final keywords: {final_keywords}")

## Generate captions

Use GPT-4V to generate an image description and then use a few-shot examples approach with GPT-4-turbo to generate captions from the images.

In [None]:
selected_columns = ['title', 'primary_image', 'style', 'material', 'color', 'url']
df = df[selected_columns].copy()
df.head()

### Describing images with GPT-4V

In [None]:
describe_system_prompt = '''
    You are a system generating descriptions for furniture items, decorative items, or furnishings on an e-commerce website.
    Provided with an image and a title, you will describe the main item that you see in the image, giving details but staying concise.
    You can describe unambiguously what the item is and its material, color, and style if clearly identifiable.
    If there are multiple items depicted, refer to the title to understand which item you should describe.
    '''

def describe_image(img_url, title):
    response = client.chat.completions.create(
    model="gpt-4-vision-preview",
    temperature=0.2,
    messages=[
        {
            "role": "system",
            "content": describe_system_prompt
        },
        {
            "role": "user",
            "content": [
                {
                    "type": "image_url",
                    "image_url": img_url,
                },
            ],
        },
        {
            "role": "user",
            "content": title
        }
    ],
    max_tokens=300,
    )

    return response.choices[0].message.content

#### Testing on a few examples

In [None]:
for index, row in examples.iterrows():
    print(f"{row['title'][:50]}{'...' if len(row['title']) > 50 else ''} - {row['url']} :\n")
    img_description = describe_image(row['primary_image'], row['title'])
    print(f"{img_description}\n--------------------------\n")

### Turning descriptions into captions
Using a few-shot examples approach to turn a long description into a short image caption

In [None]:
caption_system_prompt = '''
Your goal is to generate short, descriptive captions for images of furniture items, decorative items, or furnishings based on an image description.
You will be provided with a description of an item image and you will output a caption that captures the most important information about the item.
Your generated caption should be short (1 sentence), and include the most relevant information about the item.
The most important information could be: the type of the item, the style (if mentioned), the material if especially relevant and any distinctive features.
'''

few_shot_examples = [
    {
        "description": "This is a multi-layer metal shoe rack featuring a free-standing design. It has a clean, white finish that gives it a modern and versatile look, suitable for various home decors. The rack includes several horizontal shelves dedicated to organizing shoes, providing ample space for multiple pairs. Above the shoe storage area, there are 8 double hooks arranged in two rows, offering additional functionality for hanging items such as hats, scarves, or bags. The overall structure is sleek and space-saving, making it an ideal choice for placement in living rooms, bathrooms, hallways, or entryways where efficient use of space is essential.",
        "caption": "White metal free-standing shoe rack"
    },
    {
        "description": "The image shows a set of two dining chairs in black. These chairs are upholstered in a leather-like material, giving them a sleek and sophisticated appearance. The design features straight lines with a slight curve at the top of the high backrest, which adds a touch of elegance. The chairs have a simple, vertical stitching detail on the backrest, providing a subtle decorative element. The legs are also black, creating a uniform look that would complement a contemporary dining room setting. The chairs appear to be designed for comfort and style, suitable for both casual and formal dining environments.",
        "caption": "Set of 2 modern black leather dining chairs"
    },
    {
        "description": "This is a square plant repotting mat designed for indoor gardening tasks such as transplanting and changing soil for plants. It measures 26.8 inches by 26.8 inches and is made from a waterproof material, which appears to be a durable, easy-to-clean fabric in a vibrant green color. The edges of the mat are raised with integrated corner loops, likely to keep soil and water contained during gardening activities. The mat is foldable, enhancing its portability, and can be used as a protective surface for various gardening projects, including working with succulents. It's a practical accessory for garden enthusiasts and makes for a thoughtful gift for those who enjoy indoor plant care.",
        "caption": "Waterproof square plant repotting mat"
    }
]

formatted_examples = [[{
    "role": "user",
    "content": ex['description']
},
{
    "role": "assistant",
    "content": ex['caption']
}]
    for ex in few_shot_examples
]

formatted_examples = [i for ex in formatted_examples for i in ex]

In [None]:
def caption_image(description, model="gpt-4-turbo-preview"):
    messages = formatted_examples
    messages.insert(0,
        {
            "role": "system",
            "content": caption_system_prompt
        })
    messages.append(
        {
            "role": "user",
            "content": description
        })
    response = client.chat.completions.create(
    model=model,
    temperature=0.2,
    messages=messages
    )

    return response.choices[0].message.content

#### Testing on a few examples

In [None]:
examples = df.iloc[5:8]

In [None]:
for index, row in examples.iterrows():
    print(f"{row['title'][:50]}{'...' if len(row['title']) > 50 else ''} - {row['url']} :\n")
    img_description = describe_image(row['primary_image'], row['title'])
    print(f"Image description: {img_description}\n--------------------------\n")
    img_caption = caption_image(img_description)
    print(f"Image caption: {img_caption}\n--------------------------\n")