**Installations and imports:**

In [1]:
!pip install -q --upgrade typing-extensions
!pip install -q openai==0.28
!pip install -q git-lfs
!git clone https://huggingface.co/spaces/nlphuji/whoops-explorer-analysis
!pip install -q datasets


fatal: destination path 'whoops-explorer-analysis' already exists and is not an empty directory.


In [3]:
from datasets import load_dataset
import os
import random
import openai
import time
import ast
import pandas as pd
import re
import base64
from io import BytesIO
from transformers import InstructBlipProcessor, InstructBlipForConditionalGeneration
import torch
from PIL import Image
import requests
from peft import get_peft_model, LoraConfig, TaskType
import numpy as np
import traceback
from openai.error import InvalidRequestError

wmtis = load_dataset("nlphuji/wmtis-identify")['test']

  from .autonotebook import tqdm as notebook_tqdm


**Openai and helper functions for using api:**

In [14]:
def _ms_since_epoch():
    return time.perf_counter_ns() // 1000000


def set_openai_parameters(engine, max_tokens):
    # openai API setup and parameters
    openai.api_key = ""
    parameters = {
        "max_tokens": max_tokens,
        "top_p": 0,  # greedy
        "temperature": 0.5,
        "logprobs": 5,  # maximal value accorrding to https://beta.openai.com/docs/api-reference/completions/create#completions/create-logprobs, used to be 10...
        "engine": engine,
    }
    time_of_last_api_call = _ms_since_epoch()

    return parameters, time_of_last_api_call


def wait_between_predictions(time_of_last_api_call, min_ms_between_api_calls):
    if (
        cur_time := _ms_since_epoch()
    ) <= time_of_last_api_call + min_ms_between_api_calls:
        ms_to_sleep = min_ms_between_api_calls - (cur_time - time_of_last_api_call)
        time.sleep(ms_to_sleep / 1000)
    time_of_last_api_call = _ms_since_epoch()


def predict_sample_openai_gpt(
    example,
    prompt,
    min_ms_between_api_calls: int = 500,
    engine: str = "text-davinci-003",
    max_tokens: int = 100,
):
    parameters, time_of_last_api_call = set_openai_parameters(engine, max_tokens)
    parameters["prompt"] = prompt

    # OpenAI limits us to 3000 calls per minute:
    # https://help.openai.com/en/articles/5955598-is-api-usage-subject-to-any-rate-limits
    # that is why the default value of min_ms_between_api_calls is 20
    wait_between_predictions(time_of_last_api_call, min_ms_between_api_calls)

    response = openai.Completion.create(**parameters)

    if response is None:
        raise Exception("Response from OpenAI API is None.")

    # build output data
    prediction = dict()
    prediction["input"] = prompt
    prediction["prediction"] = response.choices[0].text.strip().strip(".")  # type:ignore

    # build output metadata
    metadata = example.copy()  # dict()
    metadata["logprobs"] = response.choices[0]["logprobs"]  # type:ignore
    # "finish_reason" is located in a slightly different location in opt
    if "opt" in engine:
        finish_reason = response.choices[0]["logprobs"][  # type:ignore
            "finish_reason"
        ]
    else:
        finish_reason = response.choices[0]["finish_reason"]  # type:ignore
    metadata["finish_reason"] = finish_reason
    if "opt" not in engine:
        # From the OpenAI API documentation it's not clear what "index" is, but let's keep it as well
        metadata["index"] = response.choices[0]["index"]  # type:ignore

    prediction["metadata"] = metadata

    return prediction

def predict_sample_openai_chatgpt(
    prompt,
    img_url,
    min_ms_between_api_calls: int = 10000,
    engine: str = "gpt-4o",
    max_tokens: int = 100,
):
    parameters, time_of_last_api_call = set_openai_parameters(engine, max_tokens)
    parameters["prompt"] = prompt

    # OpenAI limits us to 3000 calls per minute:
    # https://help.openai.com/en/articles/5955598-is-api-usage-subject-to-any-rate-limits
    wait_time = 5
    time.sleep(wait_time)
    try:
        response = openai.ChatCompletion.create(model=engine, messages=[{"role": "user", "content": [{"type": "text", "text":prompt},{
          "type": "image_url",
          "image_url": {"url": f"data:image/jpeg;base64,{img_url}"
}}]}], temperature=parameters['temperature'], top_p=parameters['top_p'])
    except openai.error.RateLimitError as e:
        wait_time = 10
        print(f"Rate limit reached. Waiting {wait_time} seconds.")
        time.sleep(wait_time)

        response = openai.ChatCompletion.create(model=engine, messages=[{"role": "user", "content": prompt}],
                                                temperature=parameters['temperature'], top_p=parameters['top_p'])

    if response is None:
        raise Exception("Response from OpenAI API is None.")

    # build output data
    prediction = dict()
    prediction["input"] = prompt
    prediction["prediction"] = response.choices[0].message['content']  # type:ignore

    return prediction

def gpt4_estimetion(url,question):
  prompt = ""
  #prompt with explanation
  if is_explain == True:
      prompt = f"I provided you an image and a question. Provide a basic description of the answer to the question: {question}, in 1-3 words. You can answer 'no answer' if there is absolutely no answer. Add an explanation either way."
  
  #prompt without explanation
  else:
      prompt = f"I provided you an image and a question, provide a basic description of the answer to the question: {question}, in 1-3 words. you can answer 'no answer' if there is absolutely no answer visible in the image or you're uncertain"

  gpt4_prediction = predict_sample_openai_chatgpt(prompt,url)
  return gpt4_prediction['prediction']


**sanity check:**

In [15]:
print(len(wmtis))
print(wmtis[45])

102
{'natural_image': <PIL.PngImagePlugin.PngImageFile image mode=RGBA size=860x614 at 0x7F3DC84A8580>, 'normal_image': <PIL.PngImagePlugin.PngImageFile image mode=RGB size=1024x1024 at 0x7F3EA03C5F60>, 'strange_image': <PIL.JpegImagePlugin.JpegImageFile image mode=RGB size=1024x1024 at 0x7F3EA03C7D00>, 'natural_hash': '2b0852954e5fd658b09586e02cdf6d5b2ed68d68989b427c4688dba3d3e457d7', 'normal_hash': 'da0c39cc7246f2228238fad63ec8a405a9b6a35eaf3441bd89572ba6777950b9', 'strange_hash': 'ffad7c0deee280ce9578dd0f3094e517b0a1fa7d4ba47c09201c09d5c5eb8494', 'natural_image_caption': 'a traffic light on a black stand against a white background', 'normal_image_caption': 'a traffic light on a pole', 'strange_image_caption': 'a green traffic light on a gray background', 'rating_natural': 1, 'rating_normal': 1, 'rating_strange': 0, 'comments_natural': 'nan', 'comments_normal': 'nan', 'comments_strange': 'Missing "all green light"'}


**loading file and extracting data:**

**Choose type of questions**

In [52]:
# (normal question=question that is reasonable to ask, strange question=question with world knowledge contradiction)
type_question = "normal" # "normal" or "strange"

In [54]:
if type_question == "strange":
    df = pd.read_csv("fixedLabels_full.csv")
    questions = []
    
    def extract_embedded_question(full):
        try:
            # Convert string representation of list to actual list
            full_list = ast.literal_eval(full)
            if isinstance(full_list, list) and len(full_list) > 1:
                return full_list[0]
            else:
                raise ValueError("Question does not have the expected format.")
        except (ValueError, SyntaxError) as e:
            print(f"Error parsing question: {full} - {e}")
            return None
    
    # Apply function to extract the embedded answer
    questions = df['full'].apply(extract_embedded_question)


elif type_question == "normal":
    df = pd.read_csv('normal question.csv')
    questions = df['question'].tolist()

# sanity check:
for i in range(len(questions)):
    print(questions[i])
    if i==10:
        break
print(len(questions))


what is the color of the snow plow?
what is the shape of the life preserver?
what is the ship made of?
what is written on the blackboard?
what is santa claus wearing?
what is the hair color of the pregnant woman?
what is the color of the pacifier?
how is holding the flowers?
where the tank is driving?
what is the hair color of the riding woman?
what is the color of the coffee mug?
102


**Choose prompt**

In [73]:
type_prompt = "blip" # "gpt" or "blip" (use the same prompt as in GPT experiment / new one that works better for blip)
is_explain = True # True or False (with explanation or without)
explain = "explain" if is_explain else "no explain"

**Blip Model**

In [21]:
model = InstructBlipForConditionalGeneration.from_pretrained("Salesforce/instructblip-vicuna-7b", torch_dtype=torch.float16)
print("finish from_pretrained model")

processor = InstructBlipProcessor.from_pretrained("Salesforce/instructblip-vicuna-7b")
print("finish from_pretrained processor")

Loading checkpoint shards: 100%|██████████| 4/4 [02:12<00:00, 33.07s/it]


finish from_pretrained model


You are using the default legacy behaviour of the <class 'transformers.models.llama.tokenization_llama_fast.LlamaTokenizerFast'>. This is expected, and simply means that the `legacy` (previous) behavior will be used so nothing changes for you. If you want to use the new behaviour, set `legacy=False`. This should only be set if you understand what it means, and thoroughly read the reason why this was added as explained in https://github.com/huggingface/transformers/pull/24565 - if you loaded a llama tokenizer from a GGUF file you can ignore this message.


finish from_pretrained processor


In [22]:
device = "cuda" if torch.cuda.is_available() else "cpu"
print("device:", device)

device: cuda


In [23]:
# Define LoRA configuration
lora_config = LoraConfig(
    task_type=TaskType.SEQ_2_SEQ_LM,
    r=2,                              
    lora_alpha=8,                    
    lora_dropout=0.2,                 
    target_modules=["q_proj", "v_proj"]
)

print("LoRA config created")

# Apply LoRA to the model
model = get_peft_model(model, lora_config)
print("LoRA applied to model")

model.to(device)
print(f"model moved to {device}")

LoRA config created
LoRA applied to model
model moved to cuda


In [99]:
def blip_estimetion(url,question):

  image_data = base64.b64decode(url)
  image = Image.open(BytesIO(image_data)).convert("RGB")
    
  # GPT prompt:
  if type_prompt == "gpt":
      if is_explain:
          prompt = f"I provided you an image and a question. Provide a basic description of the answer to the question: {question}, in 1-3 words. You can answer 'no answer' if there is absolutely no answer. Add an explanation either way."
      else:
            prompt = f"I provided you an image and a question, provide a basic description of the answer to the question: {question}, in 1-3 words. you can answer 'no answer' if there is absolutely no answer visible in the image or you're uncertain"

  # Best prompt:
  elif type_prompt == "blip":
      if is_explain:
        # with explanation:
          prompt = f"Given this image, answer the question: {question} in 1-3 words. Add an explanation."
      else:
        # no explanation:
            prompt = f"Given this image, answer the question: {question} in 1-3 words."

  # Process the image and text together
  inputs = processor(images=image, text=prompt, return_tensors="pt").to(device)

  outputs = model.generate(
          **inputs,
          do_sample=False,
          num_beams=5,
          max_length=200,
          min_length=10,
          top_p=0.9, # the probability of the answer
          repetition_penalty=1.5,
          length_penalty=0.6, # A value greater than 1.0 encourages longer sequences, while a value less than 1.0 encourages shorter sequences.
          temperature=1.2,
  )
    
  generated_text = processor.batch_decode(outputs, skip_special_tokens=True)[0].strip()

  return generated_text

**Choose model**

In [100]:
# model_estimetion = gpt4_estimetion
# model_name = 'gpt'
# or!!!
model_estimetion = blip_estimetion
model_name = 'blip'

**Generating answers according to the prompt (with weird and normal pictures):**

In [None]:
# Initialize dictionaries to store questions and corresponding model answers
questions_dict = {'normal_question': [], 'strange_question': []}
model_answers = {'normal_answer': [], 'strange_answer': []}

# Initialize a counter for the question index
question_index = -1

# Iterate over the records in the dataset
for record in wmtis:
    question_index += 1
    
    if question_index == len(questions):
        break

    # Get the normal and strange images from the current record
    normal_image = record['normal_image']
    strange_image = record['strange_image']

    # Convert the normal image to a base64-encoded string (URL format)
    buffered = BytesIO()
    normal_image.save(buffered, format="PNG")
    normal_url = base64.b64encode(buffered.getvalue()).decode("utf-8")
    
    # Try to generate an answer for the normal image using the model
    try:
        normal_answer = model_estimetion(normal_url, questions[question_index])
        model_answers['normal_answer'].append(normal_answer)  # Store the answer
        questions_dict['normal_question'].append(questions[question_index])  # Store the corresponding question
        
    except InvalidRequestError as e:
        print(f"Failed to get caption: {e}")  # Handle any errors during model inference

    # Convert the strange image to a base64-encoded string (URL format)
    buffered = BytesIO()
    strange_image.save(buffered, format="PNG")
    strange_url = base64.b64encode(buffered.getvalue()).decode("utf-8")
    
    # Try to generate an answer for the strange image using the model
    try:
        strange_answer = model_estimetion(strange_url, questions[question_index])

        model_answers['strange_answer'].append(strange_answer)  # Store the answer
        questions_dict['strange_question'].append(questions[question_index])  # Store the corresponding question
    
    except InvalidRequestError as e:
        print(f"Failed to get caption: {e}")  # Handle any errors during model inference


**Sanity check:**

In [71]:
print(len(model_answers['strange_answer']))

102


**Saving result in csv files:**

In [72]:
normal_df = pd.DataFrame({
    'question': questions_dict['normal_question'],
    'answer': model_answers['normal_answer']
})

# Save the DataFrame to a CSV file
normal_df.to_csv(f'{type_question}_questions_normal_answers_{type_prompt}_prompt_{explain}_{model_name}.csv', index=False)


strange_df = pd.DataFrame({
    'question': questions_dict['strange_question'],
    'answer': model_answers['strange_answer']
})

# Save the DataFrame to a CSV file
strange_df.to_csv(f'{type_question}_questions_strange_answers_{type_prompt}_prompt_{explain}_{model_name}.csv', index=False)
