# GPT-4 Vision Relabeled Test

Test accuracy of GPT-4 Vision on relabeled test set.

In [222]:
# Packages
import base64
import requests
import openai
import os
import pandas as pd

In [223]:
# Get and set API key
with open(os.path.expanduser('~') + '/Documents/OpenAI/computer-vision-project.txt', 'r') as file:
    api_key = file.read()
openai.api_key = api_key

In [224]:
# test run or not
test_run = False

## Load Data

In [225]:
# Load "relabeled_test_no_blur_old_and_new_labels.xlsx"
relabeled_test_no_blur_old_and_new_labels = pd.read_excel('../../../Data/Relabeled_Test_No_Blur/relabeled_test_no_blur_old_and_new_labels.xlsx')

# Delete if 'New Class' is not 'SUV', 'Sedan', 'Pickup', or 'Convertible'
relabeled_test_no_blur_old_and_new_labels = relabeled_test_no_blur_old_and_new_labels[relabeled_test_no_blur_old_and_new_labels['New Class'].isin(['SUV', 'Sedan', 'Pickup', 'Convertible'])]

# For testing, limit to 5 rows
if test_run:
    relabeled_test_no_blur_old_and_new_labels = relabeled_test_no_blur_old_and_new_labels.head(5)

# Delete if 'New Class' is not 'SUV', 'Sedan', 'Pickup', or 'Convertible'
relabeled_test_no_blur_old_and_new_labels = relabeled_test_no_blur_old_and_new_labels[relabeled_test_no_blur_old_and_new_labels['New Class'].isin(['SUV', 'Sedan', 'Pickup', 'Convertible'])]

# Construct image path
relabeled_test_no_blur_old_and_new_labels['non_blurred_image_path'] = '../../../Images/test/No Blur/' + relabeled_test_no_blur_old_and_new_labels['filename']

relabeled_test_no_blur_old_and_new_labels

Unnamed: 0,filename,label,New Class,Old Class,non_blurred_image_path
0,Convertible_test_orig_test_00023_resized_no_bl...,2,Convertible,Convertible,../../../Images/test/No Blur/Convertible_test_...
1,Convertible_test_orig_test_00096_resized_no_bl...,2,Convertible,Convertible,../../../Images/test/No Blur/Convertible_test_...
2,Convertible_test_orig_test_00107_resized_no_bl...,2,Convertible,Convertible,../../../Images/test/No Blur/Convertible_test_...
3,Convertible_test_orig_test_00135_resized_no_bl...,2,Convertible,Convertible,../../../Images/test/No Blur/Convertible_test_...
4,Convertible_test_orig_test_00147_resized_no_bl...,2,Convertible,Convertible,../../../Images/test/No Blur/Convertible_test_...
...,...,...,...,...,...
1650,Sedan_test_orig_train_08018_resized_no_blur.jpg,3,Sedan,Sedan,../../../Images/test/No Blur/Sedan_test_orig_t...
1652,Sedan_test_orig_train_08043_resized_no_blur.jpg,3,Sedan,Sedan,../../../Images/test/No Blur/Sedan_test_orig_t...
1653,Sedan_test_orig_train_08046_resized_no_blur.jpg,3,Sedan,Sedan,../../../Images/test/No Blur/Sedan_test_orig_t...
1654,Sedan_test_orig_train_08098_resized_no_blur.jpg,3,Sedan,Sedan,../../../Images/test/No Blur/Sedan_test_orig_t...


## Image Processing

In [226]:
# Function to encode the image
def encode_image(image_path):
  with open(image_path, "rb") as image_file:
    return base64.b64encode(image_file.read()).decode('utf-8')

# Function to classify the image
def classify_image(image_path):

  # Getting the base64 string
  base64_image = encode_image(image_path)

  # Setting the headers
  headers = {
    "Content-Type": "application/json",
    "Authorization": f"Bearer {api_key}"
  }

  # Create payload
  payload = {
    "model": "gpt-4-vision-preview",
    "messages": [
      {
        "role": "user",
        "content": [
          {
            "type": "text",
            "text": "Please classify the image as one of the following classes: SUV, Sedan, Pickup, or Convertible. Return only the word SUV, Sedan, Pickup, or Convertible."
          },
          {
            "type": "image_url",
            "image_url": {
              "url": f"data:image/jpeg;base64,{base64_image}"
            }
          }
        ]
      }
    ],
    "max_tokens": 300
  }

  response = requests.post("https://api.openai.com/v1/chat/completions", headers=headers, json=payload)

  return response.json()

In [227]:
# Test on one image
response = classify_image(relabeled_test_no_blur_old_and_new_labels['non_blurred_image_path'].iloc[0])
response

{'id': 'chatcmpl-95zOwSBZuHSyoM9zg12KQWGd3WK0b',
 'object': 'chat.completion',
 'created': 1711214694,
 'model': 'gpt-4-1106-vision-preview',
 'choices': [{'index': 0,
   'message': {'role': 'assistant', 'content': 'Convertible'},
   'finish_reason': 'stop'}],
 'usage': {'prompt_tokens': 299, 'completion_tokens': 1, 'total_tokens': 300},
 'system_fingerprint': None}

In [228]:
response['choices'][0]['message']['content']

'Convertible'

## Checkpoint - continue evaluating on images that haven't been classified below yet on prior runs

In [229]:
# Check if file '../../../Data/Predictions/GPT-4 Vision/GPT-4_Vision_Relabeled_Test_No_Blur_predictions.xlsx' exists
if os.path.exists('../../../Data/Predictions/GPT-4 Vision/GPT-4_Vision_Relabeled_Test_No_Blur_predictions.xlsx'):
    # Flag
    pre_existing_file = True
    # Read the existing file
    GPT_4_Vision_Relabeled_Test_No_Blur_predictions = pd.read_excel('../../../Data/Predictions/GPT-4 Vision/GPT-4_Vision_Relabeled_Test_No_Blur_predictions.xlsx')
    # Keep non_blurred_image_path column
    GPT_4_Vision_Relabeled_Test_No_Blur_predictions = GPT_4_Vision_Relabeled_Test_No_Blur_predictions[['non_blurred_image_path', 'gpt_4_v_classification']]
    # Left join relabeled_test_no_blur_old_and_new_labels and GPT_4_Vision_Relabeled_Test_No_Blur_predictions on 'non_blurred_image_path'
    items_to_classify = pd.merge(relabeled_test_no_blur_old_and_new_labels, GPT_4_Vision_Relabeled_Test_No_Blur_predictions, on='non_blurred_image_path', how='left')
    # Keep items where 'gpt_4_v_classification' is not in ['SUV', 'Sedan', 'Pickup', 'Convertible']
    items_to_classify = items_to_classify[~items_to_classify['gpt_4_v_classification'].isin(['SUV', 'Sedan', 'Pickup', 'Convertible'])]
else:
    # Flag
    pre_existing_file = False
    # Set items_to_classify to relabeled_test_no_blur_old_and_new_labels
    items_to_classify = relabeled_test_no_blur_old_and_new_labels

items_to_classify

Unnamed: 0,filename,label,New Class,Old Class,non_blurred_image_path,gpt_4_v_classification


## Classify Images

In [230]:
# Iterate over rows of items_to_classify
for index, row in items_to_classify.iterrows():
    # Classify the image
    response = classify_image(row['non_blurred_image_path'])
    # Get the classification
    classification = response['choices'][0]['message']['content']
    # Add the classification to the dataframe
    items_to_classify.loc[index, 'gpt_4_v_classification'] = classification

items_to_classify

Unnamed: 0,filename,label,New Class,Old Class,non_blurred_image_path,gpt_4_v_classification


## Export Results

In [231]:
if pre_existing_file:
    # Append items_to_classify to '../../../Data/Predictions/GPT-4 Vision/GPT-4_Vision_Relabeled_Test_No_Blur_predictions.xlsx'
    GPT_4_Vision_Relabeled_Test_No_Blur_predictions = pd.read_excel('../../../Data/Predictions/GPT-4 Vision/GPT-4_Vision_Relabeled_Test_No_Blur_predictions.xlsx')
    # Drop NaN rows on Classification
    GPT_4_Vision_Relabeled_Test_No_Blur_predictions = GPT_4_Vision_Relabeled_Test_No_Blur_predictions.dropna(subset=['gpt_4_v_classification'])
    # Concatenate GPT_4_Vision_Relabeled_Test_No_Blur_predictions and items_to_classify
    concatenated_df = pd.concat([GPT_4_Vision_Relabeled_Test_No_Blur_predictions, items_to_classify])
    # Check values and potentially cleanup
    #print(concatenated_df['gpt_4_v_classification'].value_counts())
    # Set items to NaN if not 'SUV', 'Sedan', 'Pickup', or 'Convertible'
    concatenated_df.loc[~concatenated_df['gpt_4_v_classification'].isin(['SUV', 'Sedan', 'Pickup', 'Convertible']), 'gpt_4_v_classification'] = None
    # For each non_blurred_image_path, keep the last classification
    concatenated_df = concatenated_df.drop_duplicates(subset=['non_blurred_image_path'], keep='last')
    # Output DF
    concatenated_df.to_excel('../../../Data/Predictions/GPT-4 Vision/GPT-4_Vision_Relabeled_Test_No_Blur_predictions.xlsx', index=False)
if not pre_existing_file:
    # Check values and potentially cleanup
    #print(items_to_classify['gpt_4_v_classification'].value_counts())
    # Set items to NaN if not 'SUV', 'Sedan', 'Pickup', or 'Convertible'
    items_to_classify.loc[~items_to_classify['gpt_4_v_classification'].isin(['SUV', 'Sedan', 'Pickup', 'Convertible']), 'gpt_4_v_classification'] = None
    # If pre_existing_file no pre-exisitng file, just save items_to_classify to '../../../Data/Predictions/GPT-4 Vision/GPT-4_Vision_Relabeled_Test_No_Blur_predictions.xlsx'
    items_to_classify.to_excel('../../../Data/Predictions/GPT-4 Vision/GPT-4_Vision_Relabeled_Test_No_Blur_predictions.xlsx', index=False)
