We will start with an example to categorize movies using gpt-3.5-turbo, and then cover how we can use the vision capabilities of gpt-4-turbo to caption images.

In [3]:
import json
from openai import OpenAI
import pandas as pd
# from IPython.display import Image, display

In [5]:
client = OpenAI()

In [9]:
# Checking the connection to the API

completion = client.chat.completions.create(
  model="gpt-3.5-turbo",
  messages=[
    {"role": "system", "content": "You are a poetic data scientist, skilled in explaining complex programming concepts with Pythonic flair."},
    {"role": "user", "content": "Compose a poem only two paragraphs that explains the concept of recursion in programming. Keep it simple and elegant."}
  ]
)

print(completion.choices[0].message.content)

In the realm of code, a dance unseen,
Recursion whispers like a dream. 

A function calls itself, lost in time,
A loop within, a mountain to climb. 

With each step taken, old paths rewind,
Infinite patterns within the mind.


Very nice to see the new models from CreativeOpenAI, and I hope you enjoy the examples!

In [10]:
dataset_path = "data/imdb_top_1000.csv"
data = pd.read_csv(dataset_path)

In [11]:
data.head()

Unnamed: 0,Poster_Link,Series_Title,Released_Year,Certificate,Runtime,Genre,IMDB_Rating,Overview,Meta_score,Director,Star1,Star2,Star3,Star4,No_of_Votes,Gross
0,https://m.media-amazon.com/images/M/MV5BMDFkYT...,The Shawshank Redemption,1994,A,142 min,Drama,9.3,Two imprisoned men bond over a number of years...,80.0,Frank Darabont,Tim Robbins,Morgan Freeman,Bob Gunton,William Sadler,2343110,28341469
1,https://m.media-amazon.com/images/M/MV5BM2MyNj...,The Godfather,1972,A,175 min,"Crime, Drama",9.2,An organized crime dynasty's aging patriarch t...,100.0,Francis Ford Coppola,Marlon Brando,Al Pacino,James Caan,Diane Keaton,1620367,134966411
2,https://m.media-amazon.com/images/M/MV5BMTMxNT...,The Dark Knight,2008,UA,152 min,"Action, Crime, Drama",9.0,When the menace known as the Joker wreaks havo...,84.0,Christopher Nolan,Christian Bale,Heath Ledger,Aaron Eckhart,Michael Caine,2303232,534858444
3,https://m.media-amazon.com/images/M/MV5BMWMwMG...,The Godfather: Part II,1974,A,202 min,"Crime, Drama",9.0,The early life and career of Vito Corleone in ...,90.0,Francis Ford Coppola,Al Pacino,Robert De Niro,Robert Duvall,Diane Keaton,1129952,57300000
4,https://m.media-amazon.com/images/M/MV5BMWU4N2...,12 Angry Men,1957,U,96 min,"Crime, Drama",9.0,A jury holdout attempts to prevent a miscarria...,96.0,Sidney Lumet,Henry Fonda,Lee J. Cobb,Martin Balsam,John Fiedler,689845,4360000


In [12]:
categorize_system_prompt = '''
Your goal is to extract movie categories from movie descriptions, as well as a 1-sentence summary for these movies.
You will be provided with a movie description, and you will output a json object containing the following information:

{
    categories: string[] // Array of categories based on the movie description,
    summary: string // 1-sentence summary of the movie based on the movie description
}

Categories refer to the genre or type of the movie, like "action", "romance", "comedy", etc. Keep category names simple and use only lower case letters.
Movies can have several categories, but try to keep it under 3-4. Only mention the categories that are the most obvious based on the description.
'''

def get_categories(description):
    response = client.chat.completions.create(
    model="gpt-3.5-turbo",
    temperature=0.1,
    # This is to enable JSON mode, making sure responses are valid json objects
    response_format={ 
        "type": "json_object"
    },
    messages=[
        {
            "role": "system",
            "content": categorize_system_prompt
        },
        {
            "role": "user",
            "content": description
        }
    ],
    )

    return response.choices[0].message.content

In [21]:
for _, row in data[:3].iterrows():
    description = row['Overview']
    title = row['Series_Title']
    result = get_categories(description)
    print(f"TITLE: {title}\nOVERVIEW: {description}\n\nRESULT: {result}")
    print("\n\n----------------------------\n\n")

TITLE: The Shawshank Redemption
OVERVIEW: Two imprisoned men bond over a number of years, finding solace and eventual redemption through acts of common decency.

RESULT: {
    "categories": ["drama"],
    "summary": "Two imprisoned men bond over the years and find redemption through acts of common decency."
}


----------------------------


TITLE: The Godfather
OVERVIEW: An organized crime dynasty's aging patriarch transfers control of his clandestine empire to his reluctant son.

RESULT: {
    "categories": ["crime", "drama"],
    "summary": "A crime drama about an aging patriarch passing on his empire to his son."
}


----------------------------


TITLE: The Dark Knight
OVERVIEW: When the menace known as the Joker wreaks havoc and chaos on the people of Gotham, Batman must accept one of the greatest psychological and physical tests of his ability to fight injustice.

RESULT: {
    "categories": ["action", "crime", "drama"],
    "summary": "A dark and thrilling action-packed movie w

In [47]:
# Creating an array of json tasks

tasks = []

for index, row in data.iterrows():
    
    description = row['Overview']
    
    task = {
        "custom_id": f"task-{index}",
        "method": "POST",
        "url": "/v1/chat/completions",
        "body": {
            # This is what you would have in your Chat Completions API call
            "model": "gpt-3.5-turbo",
            "temperature": 0.1,
            "response_format": { 
                "type": "json_object"
            },
            "messages": [
                {
                    "role": "system",
                    "content": categorize_system_prompt
                },
                {
                    "role": "user",
                    "content": description
                }
            ],
        }
    }
    
    tasks.append(task)

In [48]:
# Creating the file

file_name = "data/batch_tasks_movies.jsonl"

with open(file_name, 'w') as file:
    for obj in tasks:
        file.write(json.dumps(obj) + '\n')

In [50]:
batch_file = client.files.create(
  file=open(file_name, "rb"),
  purpose="batch"
)

batch_file

FileObject(id='file-nlrXrFuvqDZ0ejzSdo1igj3y', bytes=1127310, created_at=1714826492, filename='batch_tasks_movies.jsonl', object='file', purpose='batch', status='processed', status_details=None)

In [51]:
batch_job = client.batches.create(
  input_file_id=batch_file.id,
  endpoint="/v1/chat/completions",
  completion_window="24h"
)

In [52]:
batch_job = client.batches.retrieve(batch_job.id)
batch_job

Batch(id='batch_0i61yBdCFHz3m5bh6lbOn3n9', completion_window='24h', created_at=1714826503, endpoint='/v1/chat/completions', input_file_id='file-nlrXrFuvqDZ0ejzSdo1igj3y', object='batch', status='failed', cancelled_at=None, cancelling_at=None, completed_at=None, error_file_id=None, errors=Errors(data=[BatchError(code='token_limit_exceeded', line=None, message='Enqueued token limit reached for gpt-3.5-turbo in organization org-hXJO49ZQJ7sspZkcFntfotwX. Limit: 200,000 enqueued tokens. Please try again once some in_progress batches have been completed.', param=None)], object='list'), expired_at=None, expires_at=1714912903, failed_at=1714826503, finalizing_at=None, in_progress_at=None, metadata=None, output_file_id=None, request_counts=BatchRequestCounts(completed=0, failed=0, total=0))

In [None]:
result_file_id = batch_job.output_file_id
result = client.files.content(result_file_id).content

In [None]:
result_file_name = "data/batch_job_results_movies.jsonl"

with open(result_file_name, 'wb') as file:
    file.write(result)

In [None]:
# Loading data from saved file
results = []
with open(result_file_name, 'r') as file:
    for line in file:
        # Parsing the JSON string into a dict and appending to the list of results
        json_object = json.loads(line.strip())
        results.append(json_object)

#### READING - Example 1: Categorizing Movies


In [None]:
# Reading only the first results
for res in results[:5]:
    task_id = res['custom_id']
    # Getting index from task id
    index = task_id.split('-')[-1]
    result = res['response']['body']['choices'][0]['message']['content']
    movie = data.iloc[int(index)]
    description = movie['Overview']
    title = movie['Series_Title']
    print(f"TITLE: {title}\nOVERVIEW: {description}\n\nRESULT: {result}")
    print("\n\n----------------------------\n\n")

### Second Part: Image Captioning

In [55]:
%reset -f # Clearing all variables

In [78]:
import pandas as pd
from openai import OpenAI
from IPython.display import Image, display
import json

In [72]:
dataset_path = "data/amazon_furniture_dataset.csv"
df = pd.read_csv(dataset_path)
df.head()

Unnamed: 0,asin,url,title,brand,price,availability,categories,primary_image,images,upc,...,color,material,style,important_information,product_overview,about_item,description,specifications,uniq_id,scraped_at
0,B0CJHKVG6P,https://www.amazon.com/dp/B0CJHKVG6P,"GOYMFK 1pc Free Standing Shoe Rack, Multi-laye...",GOYMFK,$24.99,Only 13 left in stock - order soon.,"['Home & Kitchen', 'Storage & Organization', '...",https://m.media-amazon.com/images/I/416WaLx10j...,['https://m.media-amazon.com/images/I/416WaLx1...,,...,White,Metal,Modern,[],"[{'Brand': ' GOYMFK '}, {'Color': ' White '}, ...",['Multiple layers: Provides ample storage spac...,"multiple shoes, coats, hats, and other items E...","['Brand: GOYMFK', 'Color: White', 'Material: M...",02593e81-5c09-5069-8516-b0b29f439ded,2024-02-02 15:15:08
1,B0B66QHB23,https://www.amazon.com/dp/B0B66QHB23,"subrtex Leather ding Room, Dining Chairs Set o...",subrtex,,,"['Home & Kitchen', 'Furniture', 'Dining Room F...",https://m.media-amazon.com/images/I/31SejUEWY7...,['https://m.media-amazon.com/images/I/31SejUEW...,,...,Black,Sponge,Black Rubber Wood,[],,['【Easy Assembly】: Set of 2 dining room chairs...,subrtex Dining chairs Set of 2,"['Brand: subrtex', 'Color: Black', 'Product Di...",5938d217-b8c5-5d3e-b1cf-e28e340f292e,2024-02-02 15:15:09
2,B0BXRTWLYK,https://www.amazon.com/dp/B0BXRTWLYK,Plant Repotting Mat MUYETOL Waterproof Transpl...,MUYETOL,$5.98,In Stock,"['Patio, Lawn & Garden', 'Outdoor Décor', 'Doo...",https://m.media-amazon.com/images/I/41RgefVq70...,['https://m.media-amazon.com/images/I/41RgefVq...,,...,Green,Polyethylene,Modern,[],"[{'Brand': ' MUYETOL '}, {'Size': ' 26.8*26.8 ...","['PLANT REPOTTING MAT SIZE: 26.8"" x 26.8"", squ...",,"['Brand: MUYETOL', 'Size: 26.8*26.8', 'Item We...",b2ede786-3f51-5a45-9a5b-bcf856958cd8,2024-02-02 15:15:09
3,B0C1MRB2M8,https://www.amazon.com/dp/B0C1MRB2M8,"Pickleball Doormat, Welcome Doormat Absorbent ...",VEWETOL,$13.99,Only 10 left in stock - order soon.,"['Patio, Lawn & Garden', 'Outdoor Décor', 'Doo...",https://m.media-amazon.com/images/I/61vz1Igler...,['https://m.media-amazon.com/images/I/61vz1Igl...,,...,A5589,Rubber,Modern,[],"[{'Brand': ' VEWETOL '}, {'Size': ' 16*24INCH ...","['Specifications: 16x24 Inch ', "" High-Quality...",The decorative doormat features a subtle textu...,"['Brand: VEWETOL', 'Size: 16*24INCH', 'Materia...",8fd9377b-cfa6-5f10-835c-6b8eca2816b5,2024-02-02 15:15:10
4,B0CG1N9QRC,https://www.amazon.com/dp/B0CG1N9QRC,JOIN IRON Foldable TV Trays for Eating Set of ...,JOIN IRON Store,$89.99,Usually ships within 5 to 6 weeks,"['Home & Kitchen', 'Furniture', 'Game & Recrea...",https://m.media-amazon.com/images/I/41p4d4VJnN...,['https://m.media-amazon.com/images/I/41p4d4VJ...,,...,Grey Set of 4,Iron,X Classic Style,[],,['Includes 4 Folding Tv Tray Tables And one Co...,Set of Four Folding Trays With Matching Storag...,"['Brand: JOIN IRON', 'Shape: Rectangular', 'In...",bdc9aa30-9439-50dc-8e89-213ea211d66a,2024-02-02 15:15:11


In [73]:
client = OpenAI()

In [74]:
caption_system_prompt = '''
Your goal is to generate short, descriptive captions for images of items.
You will be provided with an item image and the name of that item and you will output a caption that captures the most important information about the item.
If there are multiple items depicted, refer to the name provided to understand which item you should describe.
Your generated caption should be short (1 sentence), and include only the most important information about the item.
The most important information could be: the type of item, the style (if mentioned), the material or color if especially relevant and/or any distinctive features.
Keep it short and to the point.
'''

def get_caption(img_url, title):
    response = client.chat.completions.create(
    model="gpt-4-turbo",
    temperature=0.2,
    max_tokens=300,
    messages=[
        {
            "role": "system",
            "content": caption_system_prompt
        },
        {
            "role": "user",
            "content": [
                {
                    "type": "text",
                    "text": title
                },
                # The content type should be "image_url" to use gpt-4-turbo's vision capabilities
                {
                    "type": "image_url",
                    "image_url": {
                        "url": img_url
                    }
                },
            ],
        }
    ]
    )

    return response.choices[0].message.content

In [75]:
# Testing on a few images
for _, row in df[:2].iterrows():
    img_url = row['primary_image']
    caption = get_caption(img_url, row['title'])
    img = Image(url=img_url)
    display(img)
    print(f"CAPTION: {caption}\n\n")

CAPTION: White multi-layer metal shoe rack featuring eight double hooks and multiple shelves, ideal for organizing shoes and accessories in a living room, bathroom, or hallway.




CAPTION: A set of two elegant black leather dining chairs with a sleek design and vertical stitching detail on the backrest.




In [76]:
# Creating an array of json tasks

tasks = []

for index, row in df.iterrows():
    
    title = row['title']
    img_url = row['primary_image']
    
    task = {
        "custom_id": f"task-{index}",
        "method": "POST",
        "url": "/v1/chat/completions",
        "body": {
            # This is what you would have in your Chat Completions API call
            "model": "gpt-4-turbo",
            "temperature": 0.2,
            "max_tokens": 300,
            "messages": [
                {
                    "role": "system",
                    "content": caption_system_prompt
                },
                {
                    "role": "user",
                    "content": [
                        {
                            "type": "text",
                            "text": title
                        },
                        {
                            "type": "image_url",
                            "image_url": {
                                "url": img_url
                            }
                        },
                    ],
                }
            ]            
        }
    }
    
    tasks.append(task)

In [79]:
# Creating the file

file_name = "data/batch_tasks_furniture.jsonl"

with open(file_name, 'w') as file:
    for obj in tasks:
        file.write(json.dumps(obj) + '\n')

In [80]:
# Uploading the file 

batch_file = client.files.create(
  file=open(file_name, "rb"),
  purpose="batch"
)

In [81]:
# Creating the job

batch_job = client.batches.create(
  input_file_id=batch_file.id,
  endpoint="/v1/chat/completions",
  completion_window="24h"
)

In [82]:
batch_job = client.batches.retrieve(batch_job.id)
print(batch_job)

Batch(id='batch_8bcs4q6U01aLiro6rNZfEl74', completion_window='24h', created_at=1714827061, endpoint='/v1/chat/completions', input_file_id='file-whWgxiG2JrJmMjCEuBfCOsMI', object='batch', status='failed', cancelled_at=None, cancelling_at=None, completed_at=None, error_file_id=None, errors=Errors(data=[BatchError(code='token_limit_exceeded', line=None, message='Enqueued token limit reached for gpt-4-turbo in organization org-hXJO49ZQJ7sspZkcFntfotwX. Limit: 90,000 enqueued tokens. Please try again once some in_progress batches have been completed.', param=None)], object='list'), expired_at=None, expires_at=1714913461, failed_at=1714827061, finalizing_at=None, in_progress_at=None, metadata=None, output_file_id=None, request_counts=BatchRequestCounts(completed=0, failed=0, total=0))


In [None]:
result_file_id = batch_job.output_file_id
result = client.files.content(result_file_id).content

In [84]:
result_file_name = "data/batch_job_results_furniture.jsonl"

with open(result_file_name, 'wb') as file:
    file.write(result)

NameError: name 'result' is not defined

In [85]:
# Loading data from saved file

results = []
with open(result_file_name, 'r') as file:
    for line in file:
        # Parsing the JSON string into a dict and appending to the list of results
        json_object = json.loads(line.strip())
        results.append(json_object)

In [86]:
# Reading only the first results
for res in results[:5]:
    task_id = res['custom_id']
    # Getting index from task id
    index = task_id.split('-')[-1]
    result = res['response']['body']['choices'][0]['message']['content']
    item = df.iloc[int(index)]
    img_url = item['primary_image']
    img = Image(url=img_url)
    display(img)
    print(f"CAPTION: {result}\n\n")