In [16]:
import os
import time
import zipfile
import json
from PIL import Image
import base64
from mimetypes import guess_type
import tiktoken
from openai import OpenAI

os.environ["OPENAI_API_KEY"] = ""

### Use local images
#### to use local images, use the following code to convert it to base64 so it can be passed to the API

#### Read image from zip file and convert image to base64 format

In [3]:
# Function to encode a local image into a data URL
def local_image_to_data_url(image_path):
    mime_type, _ = guess_type(image_path)
    if mime_type is None:
        mime_type = 'application/octet-stream'

    # Read and encode the image file
    with open(image_path, "rb") as image_file:
        base64_encoded_data = base64.b64encode(image_file.read()).decode('utf-8')

    return f"data:{mime_type};base64,{base64_encoded_data}"

In [2]:
# Function to read images from a zip file
def read_images_from_zip(zip_file_path, extract_to):
    with zipfile.ZipFile(zip_file_path, 'r') as zip_ref:
        zip_ref.extractall(extract_to)
        return [os.path.join(extract_to, file_name) for file_name in zip_ref.namelist()]

In [1]:
# Function to convert images from a zip file to data URLs and save to a JSON file
def convert_images_from_zip_to_data_urls(zip_file_path, output_json_path):
    # Read image paths from the zip file
    image_paths = read_images_from_zip(zip_file_path, './')
    
    data_urls = {}
    for image_path in image_paths:
        image_name = os.path.basename(image_path)
        data_url = local_image_to_data_url(image_path)
        data_urls[image_name] = data_url

    # Save the data URLs to a JSON file
    with open(output_json_path, 'w') as json_file:
        json.dump(data_urls, json_file, indent=4)

    return data_urls

In [4]:
zip_file_path = "./images_full.zip"
output_path =  "./image_urls_full.json"

In [8]:
image_data = convert_images_from_zip_to_data_urls(zip_file_path, output_path)

### GPT-4 with Vision for VQA

In [7]:
encoding = tiktoken.encoding_for_model("gpt-4-vision-preview")

encoding.encode("YesNo")

[9642, 2822]

In [3]:
client = OpenAI()

def openai_vqa(
    question,
    image_url,
    # model="gpt-4-vision-preview",
    model='gpt-4o',
    top_p=0.1,
    logit_bias={9642:100, 2822:100},  # tokens for Yes (9642), No (2822)
    max_tokens=1,
    return_response=False
):
    resp = client.chat.completions.create(
        model=model,
        messages=[
            {
                "role": "user",
                "content": [
                    {"type": "text",
                     "text": f"You are a helpful assistant. Please answer the question only with 'Yes' or 'No'. Do not give other outputs. Question: {question}"},
                    {"type": "image_url",
                     "image_url": {
                           "url": image_url,
                           'detail': 'low'
                           },}
                ]
            }
        ],
        top_p=top_p,
        logit_bias=logit_bias,
        max_tokens=max_tokens,
    )

    if return_response:
        return resp

    return resp.choices[0].message.content[0]

In [None]:
# Load the partially processed data
def load_data(file_path):
    if os.path.exists(file_path):
        with open(file_path, 'r') as file:
            return json.load(file)
    else:
        return None

# Check if an image has already been processed
def is_processed(image_data):
    return 'qid2answer' in image_data and 'qid2scores' in image_data

file_path = 'evaluated_data_gpt4o.json'

partial_data = load_data(file_path)

In [None]:
# Initialize storage for answers and scores
for image_data in partial_data:
    if is_processed(image_data):
        continue  # Skip already processed images

    qid2answer = {}
    qid2scores = {}
    
    image_id = image_data['image_id']
    image_url = image_data['image_url']
    qid2question = image_data['qid2question']
    qid2dependency = image_data['qid2dependency']

    for id, question in qid2question.items():
        try:
            answer = openai_vqa(question, image_url)
            qid2answer[id] = answer
            qid2scores[id] = float(answer == 'Y')
        except Exception as e:
            print(f"Error processing question {id}: {e}")
            qid2answer[id] = None
            qid2scores[id] = 0.0

    # Store results back into the JSON structure
    image_data['qid2answer'] = qid2answer
    image_data['qid2scores'] = qid2scores

    # Periodically save the updated JSON data to a file
    try:
        with open(file_path, 'w') as file:
            json.dump(partial_data, file, indent=4)
    except Exception as e:
        print(f"Error saving data: {e}")