In [1]:
!pip install google-generativeai Pillow



In [2]:
import pandas as pd
df = pd.read_csv('/kaggle/input/ids-without-oversampling2/ids_without_oversampling2.csv')

In [3]:
prompt = """You are an expert in generating questions and answers for e-commerce product images. Your task is to create questions and one-word answers to train a Visual Question Answering (VQA) model specifically for online shopping scenarios. You will be provided with a single product image and its associated metadata.

**Input:**

With each prompt, you will receive:

* A single e-commerce product image (which I cannot directly include in this text prompt, but assume it will be passed correctly to the API).
* Accompanying metadata (which may be empty or contain product details). The metadata will be structured as a dictionary.  This metadata may include information like:
    * Product name
    * Category
    * Color
    * Material
    * Price
    * Brand
    * Features

**Task:**

For the single product image provided with the prompt, generate 6 questions and their corresponding one-word answers. The questions should be designed to be answerable by a VQA model using only the visual content of the image, **without relying on the metadata** .

* **Metadata-Driven Questions (Minimum 3):** If the metadata is present and relevant, formulate up to 3 questions that connect the image to the metadata.  These questions MUST be answerable by looking at the image ONLY even if the model doesn't have the metadata.  Examples:
        * "What is the color of the [product type from metadata]?"
        * "What is the brand of this [product type]?" (if brand is visible)
        * "What is the material of the [part of the product]?" (if material is visually discernible)
* **Image-Driven Questions (Minimum 3, up to 6):** Generate at least 3 questions based purely on the image content. These should focus on visual attributes of the product. If the metadata is insufficient or unsuitable, generate all 6 questions from the image.  Examples:
        * "What is the shape of the [product part]?"
        * "Are there any patterns?"
        * "What is this made of?" (if discernible)
* **Question Types:** Vary the question types to cover relevant e-commerce product attributes:
    * Product Type: "What is this?"
    * Color: "What is the color?"
    * Material: "What is it made of?"
    * Pattern: "Is it striped?"
    * Shape: "What is the shape?"
    * Parts: "What part is this?"
    * Features: "Does it have buttons?"

* **Binary (Yes/No) Questions (Highest Priority):**  
  - If a question starts with **Is**, **Are**, **Does**, **Do**, **Has**, or **Have**, the answer **must** be exactly **Yes** or **No** .  
  - **Do not** output any other word for these questions. 
* **One-Word Answers:**  
  Every answer must be a single word. For binary questions, that word is either **Yes** or **No**. For all others, choose the most concise descriptive term.
* **MATERIAL ABSTRACTION**  
  - When asking **“What is the material…?”**, your answer must be one of these generic categories only:  
  **metal, plastic, wood, glass, ceramic, fabric, leather, rubber, paper, cardboard**.  
  - Never use subtypes (e.g. “copper”, “oak”), only the generic term.  
* **Output Format:** Provide the output as a JSON array of objects with the following structure:

    ```json
    [
        {
            "question": "Question 1 here",
            "answer": "Answer1"
        },
        {
            "question": "Question 2 here",
            "answer": "Answer2"
        },
        {
            "question": "Question 3 here",
            "answer": "Answer3"
        },
        {
            "question": "Question 4 here",
            "answer": "Answer4"
        },
        {
            "question": "Question 5 here",
            "answer": "Answer5"
        },
        {
            "question": "Question 6 here",
            "answer": "Answer6"
        }
    ]
    ```

**Constraints:**

* Do not generate questions that require external knowledge or information not present in the image or metadata.
* Focus on factual and directly observable information.
* Prioritize the image content over the metadata, but use metadata to guide relevant questions.
* Adhere strictly to the one-word answer constraint.
* Ensure questions are relevant to typical e-commerce product attributes.

Provide the output for ONLY the single image provided with this prompt. Each subsequent image will be provided in a new prompt, along with its metadata.
"""

In [4]:
from google import genai
import json
from pydantic import BaseModel, Field, ValidationError
from typing import List, Dict
from PIL import Image
import io
import os
import time

class QuestionAnswerPair(BaseModel):
    question: str = Field(..., description="The question about the image")
    answer: str = Field(..., description="One-word answer to the question")

class QAGenerationOutput(BaseModel):
    questions: List[QuestionAnswerPair] = Field(..., description="List of question-answer pairs")

def generate_qa_json_with_prompt_metadata(img, metadata, prompt: str) -> QAGenerationOutput:

    try:
        # Convert PIL Image to base64
        # Incorporate metadata into the prompt
        metadata_string = json.dumps(metadata)  # Convert metadata to a JSON string
        full_prompt = f"{prompt} Image Metadata: {metadata_string}  "

        response = client.models.generate_content(
                model = 'gemini-2.0-flash',
                contents=[img, full_prompt],
                config={
                    'response_mime_type': 'application/json',
                    'response_schema': QAGenerationOutput,
                }
            )
        response_text = response.text
        # print(response_text)
        # Attempt to parse the response as JSON
        try:
            json_output = json.loads(response_text)
            # print(json_output)
            # print(type(json_output))
        except json.JSONDecodeError:
            raise ValueError("Invalid JSON format in API response.")

        # Validate the JSON structure using Pydantic
        try:
            return QAGenerationOutput.model_validate_json(response_text)
        except ValidationError as e:
            raise ValueError(f"JSON does not match expected schema: {e}")

    except Exception as e:
        raise Exception(f"Error generating or processing Q&A: {e}")


# try:
#     img = Image.open(path)
#     qa_output = generate_qa_json_with_prompt_metadata(img, metadata, prompt)

#     print("Generated Q&A (Pydantic Object):")
#     for qa in qa_output.questions:
#         print(f"- Q: {qa.question}  A: {qa.answer}")

#     print("\nGenerated Q&A (JSON String):")
#     print(qa_output.model_dump_json(indent=2))

# except ValueError as e:
#     print(f"Error: {e}")
# except Exception as e:
#     print(f"An unexpected error occurred: {e}")

  warn(


In [None]:
from google import genai
import tqdm 

GOOGLE_API_KEY = "Key"  # <— replace with your real key
client = genai.Client(api_key = GOOGLE_API_KEY)

import ast
# df['metadata'] = df['metadata'].apply(ast.literal_eval)


# where your images live on disk
image_root = "/kaggle/input/abo-small/images/small"  # adjust to point at the folder that contains all your `path` subfolders

results = []
failures = df.index.tolist()

# keep retrying until every index has succeeded
while failures:
    next_failures = []
    for idx in tqdm.tqdm(failures, desc="Curating VQA", unit="img"):
        row = df.loc[idx]
        image_id    = row['image_id']
        rel_path    = row['path']
        product_type= row['product_type']
        metadata    = row['metadata']

        full_path = os.path.join(image_root, rel_path)
        try:
            img = Image.open(full_path)
            qa_output = generate_qa_json_with_prompt_metadata(img, metadata, prompt)

            # flatten each QA pair into its own row
            for qa in qa_output.questions:
                results.append({
                    "image_id":     image_id,
                    "path":         rel_path,
                    "product_type": product_type,
                    "question":     qa.question,
                    "answer":       qa.answer
                })

            # print(f" Success: {image_id}")

        except Exception as e:
            print(f" Error for {image_id} (idx={idx}): {e}")
            next_failures.append(idx)

        # tiny pause to be gentle on rate limits
        time.sleep(0.5)

    if not next_failures:
        break

    print(f"\nRetrying {len(next_failures)} failures...\n")
    failures = next_failures

# once everything's in `results`, write to CSV
out_df = pd.DataFrame(results)
out_df.to_csv("/kaggle/working/qa_dataset2.csv", index=False)
print("\nAll done! CSV savaed to /kaggle/working/qa_dataset2.csv")


Curating VQA:   0%|          | 39/11024 [01:18<5:49:16,  1.91s/img]

 Error for 51AJ8W0Y0lL (idx=39): Error generating or processing Q&A: 408 Request Timeout. {'message': 'Request Timeout', 'status': 'Request Timeout'}


Curating VQA:  37%|███▋      | 4064/11024 [2:35:20<4:28:50,  2.32s/img]

 Error for 81Qgaop9XDL (idx=4064): Error generating or processing Q&A: 503 UNAVAILABLE. {'error': {'code': 503, 'message': 'The model is overloaded. Please try again later.', 'status': 'UNAVAILABLE'}}


Curating VQA:  38%|███▊      | 4167/11024 [2:39:15<4:27:58,  2.34s/img]

 Error for 61gCIx+yq9L (idx=4167): Error generating or processing Q&A: 503 UNAVAILABLE. {'error': {'code': 503, 'message': 'The model is overloaded. Please try again later.', 'status': 'UNAVAILABLE'}}


Curating VQA:  38%|███▊      | 4168/11024 [2:39:15<3:34:36,  1.88s/img]

 Error for 71c02VBk3TL (idx=4168): Error generating or processing Q&A: 503 UNAVAILABLE. {'error': {'code': 503, 'message': 'The model is overloaded. Please try again later.', 'status': 'UNAVAILABLE'}}


Curating VQA:  44%|████▍     | 4843/11024 [3:05:48<3:59:53,  2.33s/img]

 Error for 81jluLCyndL (idx=4843): Error generating or processing Q&A: 503 UNAVAILABLE. {'error': {'code': 503, 'message': 'The service is currently unavailable.', 'status': 'UNAVAILABLE'}}


Curating VQA:  44%|████▍     | 4844/11024 [3:05:49<3:06:28,  1.81s/img]

 Error for 71irBLLTUzL (idx=4844): Error generating or processing Q&A: 503 UNAVAILABLE. {'error': {'code': 503, 'message': 'The service is currently unavailable.', 'status': 'UNAVAILABLE'}}


Curating VQA:  46%|████▌     | 5029/11024 [3:13:07<3:58:13,  2.38s/img]

 Error for 61OjwHQlGfL (idx=5029): Error generating or processing Q&A: 503 UNAVAILABLE. {'error': {'code': 503, 'message': 'The service is currently unavailable.', 'status': 'UNAVAILABLE'}}


Curating VQA:  46%|████▌     | 5053/11024 [3:14:02<4:03:10,  2.44s/img]

 Error for 71OPuZPOBuL (idx=5053): Error generating or processing Q&A: 503 UNAVAILABLE. {'error': {'code': 503, 'message': 'The service is currently unavailable.', 'status': 'UNAVAILABLE'}}


Curating VQA:  46%|████▌     | 5063/11024 [3:14:25<3:56:44,  2.38s/img]

 Error for 714g4DVvsEL (idx=5063): Error generating or processing Q&A: 503 UNAVAILABLE. {'error': {'code': 503, 'message': 'The service is currently unavailable.', 'status': 'UNAVAILABLE'}}


Curating VQA:  65%|██████▌   | 7195/11024 [4:37:03<2:28:07,  2.32s/img]

 Error for 61+VkBeIwpL (idx=7195): Error generating or processing Q&A: 503 UNAVAILABLE. {'error': {'code': 503, 'message': 'The service is currently unavailable.', 'status': 'UNAVAILABLE'}}


Curating VQA:  82%|████████▏ | 9068/11024 [5:49:24<1:17:35,  2.38s/img]

 Error for 910HtoepACL (idx=9068): Error generating or processing Q&A: 503 UNAVAILABLE. {'error': {'code': 503, 'message': 'The service is currently unavailable.', 'status': 'UNAVAILABLE'}}


Curating VQA:  85%|████████▍ | 9352/11024 [6:00:20<1:04:40,  2.32s/img]

 Error for 81fKNazYf7L (idx=9352): Error generating or processing Q&A: 503 UNAVAILABLE. {'error': {'code': 503, 'message': 'The service is currently unavailable.', 'status': 'UNAVAILABLE'}}


Curating VQA:  85%|████████▌ | 9372/11024 [6:01:03<58:57,  2.14s/img]  

 Error for 51JviDRFXnL (idx=9372): Error generating or processing Q&A: 503 UNAVAILABLE. {'error': {'code': 503, 'message': 'The service is currently unavailable.', 'status': 'UNAVAILABLE'}}


Curating VQA:  86%|████████▋ | 9512/11024 [6:06:22<54:16,  2.15s/img]

 Error for 51YcLi8PcYL (idx=9512): Error generating or processing Q&A: 503 UNAVAILABLE. {'error': {'code': 503, 'message': 'The service is currently unavailable.', 'status': 'UNAVAILABLE'}}


Curating VQA:  98%|█████████▊| 10787/11024 [6:55:47<09:37,  2.44s/img]

 Error for 51PywSx5qRL (idx=10787): Error generating or processing Q&A: 500 Internal Server Error. {'message': 'Reading the request body timed out due to data arriving too slowly. See MinRequestBodyDataRate.\n', 'status': 'Internal Server Error'}


Curating VQA:  98%|█████████▊| 10800/11024 [6:56:21<08:34,  2.30s/img]

 Error for 81FFxiDh0kL (idx=10800): Error generating or processing Q&A: 503 UNAVAILABLE. {'error': {'code': 503, 'message': 'The service is currently unavailable.', 'status': 'UNAVAILABLE'}}


Curating VQA:  98%|█████████▊| 10802/11024 [6:56:24<07:14,  1.96s/img]

 Error for 715qirjyOFL (idx=10802): Error generating or processing Q&A: 503 UNAVAILABLE. {'error': {'code': 503, 'message': 'The service is currently unavailable.', 'status': 'UNAVAILABLE'}}


Curating VQA:  99%|█████████▉| 10897/11024 [7:00:08<04:56,  2.33s/img]

 Error for 81P7HUPyLRL (idx=10897): Error generating or processing Q&A: 503 UNAVAILABLE. {'error': {'code': 503, 'message': 'The service is currently unavailable.', 'status': 'UNAVAILABLE'}}


Curating VQA: 100%|██████████| 11024/11024 [7:05:05<00:00,  2.31s/img]



Retrying 18 failures...



Curating VQA: 100%|██████████| 18/18 [00:42<00:00,  2.35s/img]



All done! CSV savaed to /kaggle/working/qa_dataset2.csv


In [6]:
results = pd.read_csv('/kaggle/working/qa_dataset2.csv')
results

Unnamed: 0,image_id,path,product_type,question,answer
0,61mlKTXPShL,97/97fbd49f.jpg,KITCHEN_KNIFE,What is this?,Cleaver
1,61mlKTXPShL,97/97fbd49f.jpg,KITCHEN_KNIFE,What color is the handle?,Black
2,61mlKTXPShL,97/97fbd49f.jpg,KITCHEN_KNIFE,What is the blade made of?,Metal
3,61mlKTXPShL,97/97fbd49f.jpg,KITCHEN_KNIFE,Does this have a handle?,Yes
4,61mlKTXPShL,97/97fbd49f.jpg,KITCHEN_KNIFE,What color is blade?,Silver
...,...,...,...,...,...
66139,81P7HUPyLRL,f9/f95942b3.jpg,WRENCH,What are they made of?,metal
66140,81P7HUPyLRL,f9/f95942b3.jpg,WRENCH,What is the color?,Black
66141,81P7HUPyLRL,f9/f95942b3.jpg,WRENCH,Are they long?,Yes
66142,81P7HUPyLRL,f9/f95942b3.jpg,WRENCH,What is the shape?,Hexagonal
