# Installations

In [None]:
!pip install --upgrade typing-extensions
!pip install openai==0.28

Collecting openai==0.28
  Downloading openai-0.28.0-py3-none-any.whl (76 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m76.5/76.5 kB[0m [31m2.7 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: openai
Successfully installed openai-0.28.0


# GPT APIs

In [None]:
import openai
import time
import os

def _ms_since_epoch():
    return time.perf_counter_ns() // 1000000


def set_openai_parameters(engine, max_tokens):
    # openai API setup and parameters
    openai.api_key = "sk-proj-LEu3lWTFbZ1inVRqd5E9T3BlbkFJqATqVJMZbDaGnm7nznzH"
    parameters = {
        "max_tokens": max_tokens,
        "top_p": 0,  # greedy
        "temperature": 0.5,
        "logprobs": 5,  # maximal value accorrding to https://beta.openai.com/docs/api-reference/completions/create#completions/create-logprobs, used to be 10...
        "engine": engine,
    }
    time_of_last_api_call = _ms_since_epoch()

    return parameters, time_of_last_api_call


def wait_between_predictions(time_of_last_api_call, min_ms_between_api_calls):
    if (
        cur_time := _ms_since_epoch()
    ) <= time_of_last_api_call + min_ms_between_api_calls:
        ms_to_sleep = min_ms_between_api_calls - (cur_time - time_of_last_api_call)
        time.sleep(ms_to_sleep / 1000)
    time_of_last_api_call = _ms_since_epoch()


def predict_sample_openai_gpt(
    example,
    prompt,
    min_ms_between_api_calls: int = 500,
    engine: str = "text-davinci-003",
    max_tokens: int = 100,
):
    parameters, time_of_last_api_call = set_openai_parameters(engine, max_tokens)
    parameters["prompt"] = prompt

    # OpenAI limits us to 3000 calls per minute:
    # https://help.openai.com/en/articles/5955598-is-api-usage-subject-to-any-rate-limits
    # that is why the default value of min_ms_between_api_calls is 20
    wait_between_predictions(time_of_last_api_call, min_ms_between_api_calls)

    response = openai.Completion.create(**parameters)

    if response is None:
        raise Exception("Response from OpenAI API is None.")

    # build output data
    prediction = dict()
    prediction["input"] = prompt
    prediction["prediction"] = response.choices[0].text.strip().strip(".")  # type:ignore

    # build output metadata
    metadata = example.copy()  # dict()
    metadata["logprobs"] = response.choices[0]["logprobs"]  # type:ignore
    # "finish_reason" is located in a slightly different location in opt
    if "opt" in engine:
        finish_reason = response.choices[0]["logprobs"][  # type:ignore
            "finish_reason"
        ]
    else:
        finish_reason = response.choices[0]["finish_reason"]  # type:ignore
    metadata["finish_reason"] = finish_reason
    if "opt" not in engine:
        # From the OpenAI API documentation it's not clear what "index" is, but let's keep it as well
        metadata["index"] = response.choices[0]["index"]  # type:ignore

    prediction["metadata"] = metadata

    return prediction

def predict_sample_openai_chatgpt(
    prompt,
    img_url,
    min_ms_between_api_calls: int = 10000,
    engine: str = "gpt-4o",
    max_tokens: int = 100,
):
    parameters, time_of_last_api_call = set_openai_parameters(engine, max_tokens)
    parameters["prompt"] = prompt

    # OpenAI limits us to 3000 calls per minute:
    # https://help.openai.com/en/articles/5955598-is-api-usage-subject-to-any-rate-limits
    wait_time = 5
    time.sleep(wait_time)
    try:
        response = openai.ChatCompletion.create(model=engine, messages=[{"role": "user", "content": [{"type": "text", "text":prompt},{
          "type": "image_url",
          "image_url": {"url": f"data:image/jpeg;base64,{img_url}"
}}]}], temperature=parameters['temperature'], top_p=parameters['top_p'])
    except openai.error.RateLimitError as e:
        wait_time = 10
        print(f"Rate limit reached. Waiting {wait_time} seconds.")
        time.sleep(wait_time)

        response = openai.ChatCompletion.create(model=engine, messages=[{"role": "user", "content": prompt}],
                                                temperature=parameters['temperature'], top_p=parameters['top_p'])

    if response is None:
        raise Exception("Response from OpenAI API is None.")

    # build output data
    prediction = dict()
    prediction["input"] = prompt
    prediction["prediction"] = response.choices[0].message['content']  # type:ignore

    return prediction

def gpt4_estimetion(url,question):

  prompt = f"I provided you an image and a question, provide a basic description of the answer to the question: {question}, in 1-3 words."
  #   prompt= f"""
# generate a caption for the following image
#   """
  gpt4_prediction = predict_sample_openai_chatgpt(prompt,url)
  return gpt4_prediction['prediction']

# WHOOPS! dataset Preparations

In [None]:
!pip install -q git-lfs
!git clone https://huggingface.co/spaces/nlphuji/whoops-explorer-analysis
!pip install -q datasets
from datasets import load_dataset
# import gradio as gr
import os
import random

wmtis = load_dataset("nlphuji/wmtis-identify")['test']

Cloning into 'whoops-explorer-analysis'...
remote: Enumerating objects: 26, done.[K
remote: Counting objects: 100% (7/7), done.[K
remote: Total 26 (delta 6), reused 6 (delta 6), pack-reused 19 (from 1)[K
Unpacking objects: 100% (26/26), 5.40 KiB | 502.00 KiB/s, done.
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m547.8/547.8 kB[0m [31m5.8 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m40.8/40.8 MB[0m [31m15.3 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m116.3/116.3 kB[0m [31m12.1 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m64.9/64.9 kB[0m [31m6.8 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m194.1/194.1 kB[0m [31m17.6 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m134.8/134.8 kB[0m [31m13.6 MB/s[0m eta [36m0:00:00[

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


Downloading builder script:   0%|          | 0.00/3.55k [00:00<?, ?B/s]

The repository for nlphuji/wmtis-identify contains custom code which must be executed to correctly load the dataset. You can inspect the repository content at https://hf.co/datasets/nlphuji/wmtis-identify.
You can avoid this prompt in future by passing the argument `trust_remote_code=True`.

Do you wish to run the custom code? [y/N] y


Downloading data:   0%|          | 0.00/47.4k [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/335M [00:00<?, ?B/s]

Generating test split: 0 examples [00:00, ? examples/s]

dropping natural url


Load normal questions dataset

In [None]:
import ast
import pandas as pd
import re
import csv

df = pd.read_csv('normal_question.csv')

# Extract the 'question' column as a list
questions = df['question'].tolist()

print(questions)

['what is the color of the snow plow?', 'what is the shape of the life preserver?', 'what is the ship made of?', 'what is written on the blackboard?', 'what is santa claus wearing?', 'what is the hair color of the pregnant woman?', 'what is the color of the pacifier?', 'how is holding the flowers?', 'where the tank is driving?', 'what is the hair color of the riding woman?', 'what is the color of the coffee mug?', 'what period of time in history the man is from?', 'how many wine glasses are on the table?', 'how many weels does the car have?', 'what is the background color of the pillow?', 'what is the  color of the egg?', 'what is the jar made off?', 'what is on the woman head?', 'on what side of the plate the knife is located?', 'how many windows does the house have?', "what is on Tupac's head?", 'what is the hair color of the woman?', "what is the color of the bird's crest?", 'what is the type of the train?', 'what is the color od the drink inside the cap?', 'what is the vacuuming ma

Run normal question on a strange and on a normal images

In [None]:
import base64
from io import BytesIO
from openai.error import InvalidRequestError

gpt4_captions ={'normal_caption':[],'strange_caption':[]}
question_index = -1
for record in wmtis:
  question_index += 1
  normal_image = record['normal_image']
  strange_image = record['strange_image']

  buffered = BytesIO()
  normal_image.save(buffered, format="PNG")
  normal_str = base64.b64encode(buffered.getvalue()).decode("utf-8")
  try:
    print(questions[question_index])
    normal_answer = gpt4_estimetion(normal_str, questions[question_index])
    print(normal_answer)
    gpt4_captions['normal_caption'].append(normal_answer)
  except InvalidRequestError as e:
    print(f"Failed to get caption: {e}")

  buffered = BytesIO()
  strange_image.save(buffered, format="PNG")
  strange_str = base64.b64encode(buffered.getvalue()).decode("utf-8")
  try:
    print(questions[question_index])
    strange_answer = gpt4_estimetion(strange_str,questions[question_index])
    print(strange_answer)
    gpt4_captions['strange_caption'].append(strange_answer)
  except InvalidRequestError as e:
    print("!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!")
    print("!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!")
    print(f"Failed to get caption: {e}")
    print("!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!")
    print("!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!")


what is the color of the snow plow?
Yellow
what is the shape of the life preserver?
Circular
what is the ship made of?
Wood.
what is written on the blackboard?
Mathematical equations.
what is santa claus wearing?
Red suit
what is the hair color of the pregnant woman?
Dark brown
what is the color of the pacifier?
Blue
how is holding the flowers?
The bride.
where the tank is driving?
Through water.
what is the hair color of the riding woman?
Brown.
what is the color of the coffee mug?
White
what period of time in history the man is from?
Ancient Egypt
how many wine glasses are on the table?
Two
how many weels does the car have?
Four wheels
what is the background color of the pillow?
Dark blue
what is the  color of the egg?
Light brown
what is the jar made off?
Glass
what is on the woman head?
Helmet with goggles
on what side of the plate the knife is located?
Right side
how many windows does the house have?
Seven windows.
what is on Tupac's head?
Red bandana
what is the hair color of the

In [None]:
# save the outputs to a csv files
normal_df = pd.DataFrame(gpt4_captions['normal_caption'])
normal_df.to_csv('normal_answers_new2.csv', index=False)  # index=False to avoid writing row numbers

strange_df = pd.DataFrame(gpt4_captions['strange_caption'],columns=['answer'])
strange_df.to_csv('strange_answers_new5.csv', index=False)  # index=False to avoid writing row numbers