# GPT VISION API TESTS

In [116]:
import base64
import requests
import pandas as pd
import time
import re 
from openai import OpenAI

In [43]:
df = pd.read_csv('image_data.csv')
df['Vision Response'] = None
df['Vision Tokens'] = None

In [106]:
api_key  = 'sk-'
client = OpenAI(api_key = api_key)

In [51]:


def vision_request(df, api_key):
  def encode_image(image_path):
    with open(image_path, "rb") as image_file:
      return base64.b64encode(image_file.read()).decode('utf-8')

  for i, file_name in enumerate(df['File Name']):
    image_path = './imgs/' + file_name


    # Getting the base64 string
    base64_image = encode_image(image_path)

    headers = {
      "Content-Type": "application/json",
      "Authorization": f"Bearer {api_key}"
    }

    payload = {
      "model": "gpt-4-vision-preview",
      "messages": [
        {
          "role": "user",
          "content": [
            {
              "type": "text",
              "text": "Give me three possible breeds the dog is its okeay if you dont know the breed."
            },
            {
              "type": "image_url",
              "image_url": {
                "url": f"data:image/jpeg;base64,{base64_image}"
              }
            }
          ]
        }
      ],
      "max_tokens": 3000
    }

    response = requests.post("https://api.openai.com/v1/chat/completions", headers=headers, json=payload)
    data = response.json()
    print(data)
    tokens = data['usage']
    df.at[i, 'Vision Tokens'] = tokens
    content = data['choices'][0]['message']['content']
    df.at[i, 'Vision Response'] = content

  return df



In [None]:
# Split the DataFrame into 100 groups
group_size = len(df) // 100  # Assuming you want approximately equal-sized groups
groups = [df.iloc[i:i+group_size].reset_index(drop=True) for i in range(0, len(df), group_size)]

for i, group in enumerate(groups[2:], start=2):
    processed_group = vision_request(group, api_key)
    processed_group.to_csv(f"./output/raw/groups/auto/group_{i}.csv", index=False)
    time.sleep(10)  # Sleep for 10 second to avoid rate limits


In [107]:
# Concatenate all the CSV files into a single DataFrame
dfs = [pd.read_csv(f"./output/raw/groups/auto/group_{i}.csv") for i in range(0,114)]
df = pd.concat(dfs).reset_index(drop=True)
df['Breeds'] = None
df['T Tokens Prompt'] = None
df['T Tokens Completition'] = None
df['T Tokens Total'] = None


In [103]:

df.to_csv('./output/raw/vision/vision_auto.csv', index=False)

In [66]:
print(df['Vision Response'][474])

Based on the image you provided, the dog appears to have a long, shaggy coat, which is a characteristic of several breeds. Without seeing the full body, face, and other distinguishing features, it can be challenging to accurately determine the breed. However, here are three breeds that have long, shaggy coats:

1. Old English Sheepdog: Known for their long, shaggy grey and white coats, which can sometimes appear almost like the dog in the image.
2. Bearded Collie: This breed has a long, flowing coat that can also fit the appearance of the dog shown.
3. Polish Lowland Sheepdog: They have a dense, shaggy coat that is similar to what we can see in the photo, although their size may be a bit smaller than the Old English Sheepdog.

It's important to note that these are just guesses and the actual breed could be different, especially if the dog is a mixed breed or if the image doesn't capture defining characteristics clearly.


In [109]:

def get_dog_breeds(client, vision_response):
    prompt = 'give me the dog breeds in order of importance that appear in this text  separated by commas in case of an error write the word "Error"'
    message_content = str(prompt) + '"' + str(vision_response) + '"'
    
    response = client.chat.completions.create(
    model="gpt-4",
    messages=[
    {"role": "user", "content": message_content},
     ]
    )
    
    breeds = response.choices[0].message.content
    total_tokens = response.usage.total_tokens
    prompt_tokens = response.usage.prompt_tokens
    completition_tokens = response.usage.completion_tokens
    
    return breeds, total_tokens, prompt_tokens, completition_tokens


In [110]:
for i, group in enumerate(df['Vision Response']):
    breeds, total_tokens, prompt_tokens, completition_tokens = get_dog_breeds(client, group)
    print(breeds, total_tokens, prompt_tokens, completition_tokens)
    df.at[i, 'Breeds'] = breeds
    df.at[i, 'T Tokens Prompt'] = prompt_tokens
    df.at[i, 'T Tokens Completition'] = completition_tokens
    df.at[i, 'T Tokens Total'] = total_tokens
    time.sleep(2)

df = df.drop(columns=['Vision Response'])

Pembroke Welsh Corgi, Cardigan Welsh Corgi, A mix involving a Corgi 237 215 22
Pembroke Welsh Corgi, Cardigan Welsh Corgi, Shetland Sheepdog 202 182 20
Error 55 54 1
Pembroke Welsh Corgi, Cardigan Welsh Corgi, Shetland Sheepdog 216 196 20
Pembroke Welsh Corgi, Cardigan Welsh Corgi, Corgi mixes 230 211 19
Pembroke Welsh Corgi, Cardigan Welsh Corgi, Shetland Sheepdog mix 241 220 21
Pembroke Welsh Corgi, Cardigan Welsh Corgi, Corgi mix 172 153 19
Pembroke Welsh Corgi, Cardigan Welsh Corgi, Swedish Vallhund 138 119 19
Error 57 56 1
Error 60 59 1
Pembroke Welsh Corgi, Cardigan Welsh Corgi, Shetland Sheepdog 281 261 20
Pembroke Welsh Corgi, Cardigan Welsh Corgi, Swedish Vallhund 268 249 19
Siberian Husky, Alaskan Malamute, Canadian Eskimo Dog 113 97 16
Alaskan Malamute, Siberian Husky, Tamaskan Dog, Utonagan 262 244 18
Siberian Husky, Alaskan Malamute, Alaskan Klee Kai 162 145 17
Siberian Husky, Alaskan Malamute, Malamute-Husky Mix 229 210 19
Alaskan Malamute, Siberian Husky, Malamute-Husky 

In [111]:
df.head()

Unnamed: 0,File Name,Dimensions,File Size (MB),Aspect Ratio,Vision Tokens,Breeds,T Tokens Prompt,T Tokens Completition,T Tokens Total
0,@Chompersthecorgi1.jpg,"(1280, 1280)",0.10714,1.0,"{'prompt_tokens': 791, 'completion_tokens': 17...","Pembroke Welsh Corgi, Cardigan Welsh Corgi, A ...",215,22,237
1,@Chompersthecorgi10.jpg,"(2500, 1667)",0.584455,1.4997,"{'prompt_tokens': 1131, 'completion_tokens': 1...","Pembroke Welsh Corgi, Cardigan Welsh Corgi, Sh...",182,20,202
2,@Chompersthecorgi11.jpg,"(259, 194)",0.004376,1.335052,"{'prompt_tokens': 281, 'completion_tokens': 18...",Error,54,1,55
3,@Chompersthecorgi12.jpg,"(225, 225)",0.01402,1.0,"{'prompt_tokens': 281, 'completion_tokens': 16...","Pembroke Welsh Corgi, Cardigan Welsh Corgi, Sh...",196,20,216
4,@Chompersthecorgi13.jpg,"(1200, 800)",0.043723,1.5,"{'prompt_tokens': 1131, 'completion_tokens': 1...","Pembroke Welsh Corgi, Cardigan Welsh Corgi, Co...",211,19,230


In [114]:
print(df['Vision Tokens'][67])

{'prompt_tokens': 281, 'completion_tokens': 12, 'total_tokens': 293}


In [115]:
df['V Tokens Prompt'] = None
df['V Tokens Completition'] = None
df['V Tokens Total'] = None

In [117]:
for i, token_string in enumerate(df['Vision Tokens']):
    # Define regular expression pattern to match token counts
    pattern = r"'(\w+)': (\d+)"

    # Find all matches using the pattern
    matches = re.findall(pattern, token_string)

    # Create a dictionary to store token counts
    token_counts = {key: int(value) for key, value in matches}

    df.at[i, 'V Tokens Prompt'] = token_counts['prompt_tokens']
    df.at[i, 'V Tokens Completition'] = token_counts['completion_tokens']
    df.at[i, 'V Tokens Total'] = token_counts['total_tokens']

In [119]:
df.drop(columns=['Vision Tokens'], inplace=True)
df.head()

Unnamed: 0,File Name,Dimensions,File Size (MB),Aspect Ratio,Breeds,T Tokens Prompt,T Tokens Completition,T Tokens Total,V Tokens Prompt,V Tokens Completition,V Tokens Total
0,@Chompersthecorgi1.jpg,"(1280, 1280)",0.10714,1.0,"Pembroke Welsh Corgi, Cardigan Welsh Corgi, A ...",215,22,237,791,179,970
1,@Chompersthecorgi10.jpg,"(2500, 1667)",0.584455,1.4997,"Pembroke Welsh Corgi, Cardigan Welsh Corgi, Sh...",182,20,202,1131,146,1277
2,@Chompersthecorgi11.jpg,"(259, 194)",0.004376,1.335052,Error,54,1,55,281,18,299
3,@Chompersthecorgi12.jpg,"(225, 225)",0.01402,1.0,"Pembroke Welsh Corgi, Cardigan Welsh Corgi, Sh...",196,20,216,281,160,441
4,@Chompersthecorgi13.jpg,"(1200, 800)",0.043723,1.5,"Pembroke Welsh Corgi, Cardigan Welsh Corgi, Co...",211,19,230,1131,175,1306


In [120]:
df.to_csv('./output/clean/breeds.csv', index=False)