In [None]:
import os
import json
import base64
import asyncio
import aiofiles
import nest_asyncio
from tqdm import tqdm
from google.colab import drive
from openai import AsyncOpenAI

nest_asyncio.apply()

drive.mount('/content/drive', force_remount=True)

dataset_path = '/content/drive/MyDrive/mmml_project/mini_gqa.json'
output_folder = '/content/drive/MyDrive/mmml_project/outputs'
os.makedirs(output_folder, exist_ok=True)
output_path = os.path.join(output_folder, 'mini_gqa_with_gpt.json')

with open(dataset_path, 'r') as f:
    data = json.load(f)

client = AsyncOpenAI(api_key="OPENAI_KEY")


SEMAPHORE = asyncio.Semaphore(4)

async def process_record(record):
    question = record["question"]
    image_file = record["image_file"]

    if not os.path.exists(image_file):
        return record

    async with aiofiles.open(image_file, "rb") as img_f:
        img_bytes = await img_f.read()

    img_b64_str = base64.b64encode(img_bytes).decode('utf-8')
    img_type = "image/png"
    prompt = question

    async with SEMAPHORE:
        try:
            response = await client.chat.completions.create(
                model="gpt-4o-mini",
                messages=[
                    {
                        "role": "user",
                        "content": [
                            {"type": "text", "text": prompt},
                            {"type": "image_url", "image_url": {"url": f"data:{img_type};base64,{img_b64_str}"}},
                        ],
                    }
                ],
            )
            record["gpt_response"] = response
        except Exception as e:
            record["gpt_response"] = f"Error: {str(e)}"

    return record

async def process_dataset():
    tasks = [process_record(record) for record in data]
    updated_data = await asyncio.gather(*tasks)

    async with aiofiles.open(output_path, 'w') as f:
        await f.write(json.dumps(updated_data, indent=2))

    print(f"Updated dataset saved to: {output_path}")

await process_dataset()

Mounted at /content/drive


TypeError: Object of type ChatCompletion is not JSON serializable

In [None]:
import os
import json

filtered_data = [
    item for item in data
    if item['gpt_response'] != "Error parsing string response"
    and not item['gpt_response'].startswith("Error:")
]

filtered_output_path = output_path.replace('.json', '_filtered.json')
with open(filtered_output_path, 'w') as f:
    json.dump(filtered_data, f, indent=2)

print(f"Total records after filtering: {len(filtered_data)}")
print("\nExample of first record:")
print(json.dumps(filtered_data[0], indent=2))

Total records after filtering: 7359

Example of first record:
{
  "imageId": "n161313",
  "image_file": "/content/drive/MyDrive/mmml_project/gqa_images/n161313.png",
  "question": "Is it overcast?",
  "answer": "no",
  "fullAnswer": "No, it is clear.",
  "gpt_response": "The sky appears to be clear and sunny in the image, indicating that it is not overcast."
}
