In [None]:
# Import necessary libraries
import os
import pandas as pd
from tqdm import tqdm
import google.generativeai as genai
from pathlib import Path
from PIL import Image

# Load the constants and utils files you provided
import sys
sys.path.append('/kaggle/input/new-da')
import constants  # Your constants.py file
from utils import download_images  # Your utils.py file

# Load the test data from CSV (Make sure your test.csv is uploaded to Colab)
test_df = pd.read_csv('/kaggle/input/test-amazon/test_amazon.csv')
# Define the start and end indices
start_index = 15000
end_index = start_index + 2000

# Slice the DataFrame to get only rows from start_index to end_index
filtered_df = test_df.iloc[start_index:end_index]

# # Download images
image_dir = '/kaggle/working/images'
os.makedirs(image_dir, exist_ok=True)
download_images(filtered_df['image_link'], image_dir)  # Ensure 'image_link' column exists in your CSV


In [None]:
# Entity unit map provided
from PIL import Image
import matplotlib.pyplot as plt

# Configure the Gemini API with the API key
genai.configure(api_key=os.environ.get("API_KEY", "AIzaSyBJjLR-4nJcJ4-3_LxaoRHgGX_HZ5I5xlo")) # Set your actual API key here
entity_unit_map = {
    'width': {'centimetre', 'foot', 'inch', 'metre', 'millimetre', 'yard'},
    'depth': {'centimetre', 'foot', 'inch', 'metre', 'millimetre', 'yard'},
    'height': {'centimetre', 'foot', 'inch', 'metre', 'millimetre', 'yard'},
    'item_weight': {'gram', 'kilogram', 'microgram', 'milligram', 'ounce', 'pound', 'ton'},
    'maximum_weight_recommendation': {'gram', 'kilogram', 'microgram', 'milligram', 'ounce', 'pound', 'ton'},
    'voltage': {'kilovolt', 'millivolt', 'volt'},
    'wattage': {'kilowatt', 'watt'},
    'item_volume': {'centilitre', 'cubic foot', 'cubic inch', 'cup', 'decilitre', 'fluid ounce', 'gallon',
                    'imperial gallon', 'litre', 'microlitre', 'millilitre', 'pint', 'quart'}
}

allowed_units = {unit for entity in entity_unit_map for unit in entity_unit_map[entity]}

import time
import requests


MAX_RETRIES = 2
RETRY_DELAY = 6  # seconds

# Helper to create prompt based on the entity_name
def create_prompt(image_path, entity_name):
    units = entity_unit_map.get(entity_name, [])
    sorted_units = sorted(units)
    allowed_units_str = ', '.join(sorted_units)
    sample_unit = sorted_units[0] if sorted_units else ""

    prompt_text = f"""
Please analyze the product image provided and extract the *{entity_name}* of the product based on any visible text, labels, or markings. Your response should follow these guidelines:

1. *Answer Format*:
   - Provide your answer strictly in the format: *'value unit'*
     - *value*: A numerical measurement (float number) in standard decimal notation (e.g., 10.5).
     - *unit: One of the **allowed units* for *{entity_name}*, listed below.
   - *Example*: 10.5 {list(entity_unit_map[entity_name])[0]}

2. *Allowed Units for {entity_name}*:
   - {', '.join(sorted(entity_unit_map[entity_name]))}

3. *Instructions*:
   - Use only the units specified above; do *not* use any other units.
   - *Do not* include any additional text, symbols, or punctuation.
   - *Do not* perform any calculations or unit conversions; provide the value exactly as it appears in the image, using the full unit name from the allowed units.
   - If the unit in the image is abbreviated (e.g., 'kg' for 'kilogram'), convert it to the full unit name.
   - If multiple values are present, choose the one that most accurately represents the *{entity_name}*.
   - If a range is given (e.g., '8-12'), select a value within that range, preferably the midpoint.
   - If you cannot find the *{entity_name}* in the image, return an empty string: ''

4. *Examples*:

   - *Item Weight*:
     - Image text: "Net Weight: 500g"
     - Answer: 500 gram

   - *Height*:
     - Image text: "Height: 1.5 m"
     - Answer: 1.5 metre

   - *Voltage*:
     - Image text: "Voltage: 10.5V"
     - Answer: 10.5 volt

   - *Item Volume with Range*:
     - Image text: "Item Volume: 8-12 fl oz"
     - Answer: 10.0 fluid ounce (midpoint of the range)

   - *No Information Available*:
     - Image text: No relevant information
     - Answer: ''

*Please provide only the *{entity_name}* in the specified format without any additional commentary.*
    """

    return prompt_text

# Function to interact with Gemini API and extract entity values from the image
def extract_entity_value(image_path, entity_name, retries=MAX_RETRIES):
    prompt = create_prompt(image_path, entity_name)

    for attempt in range(retries):
        try:
            # Open the image using PIL
            img = Image.open(image_path)

            # Send request to Gemini API with text and image
            model = genai.GenerativeModel(model_name="gemini-1.5-flash")
            response = model.generate_content([prompt, img])

            # Clean the response and return the extracted value
            return response.text.strip()

        except Exception as e:
            print(f"Error processing image {image_path}: {e}")
            if "429" in str(e):  # Rate limit error
                print(f"Rate limit hit, retrying {attempt + 1}/{retries} after delay...")
                time.sleep(RETRY_DELAY)
            elif "500" in str(e):  # Internal server error
                print(f"Internal server error, retrying {attempt + 1}/{retries} after delay...")
                time.sleep(RETRY_DELAY)
            else:
                return ""  # Unrecoverable error, skip this image

    print(f"Failed to process image {image_path} after {retries} attempts.")
    return ""  # Return empty if all retries fail


In [None]:
# Process each image and extract entity values, also display the image and the predictions
start_index = 40000
end_index = start_index + 1000

# Filter the DataFrame for the specified range
filtered_df = test_df.iloc[start_index:end_index]

predictions = []
for idx, row in tqdm(filtered_df.iterrows(), total=len(filtered_df)):
    image_filename = Path(row['image_link']).name
    image_path = os.path.join(image_dir, image_filename)

    # Extract entity value for each row using the Gemini API
    entity_value = extract_entity_value(image_path, row['entity_name'])
    predictions.append(entity_value)

    # Display image with prediction
    img = Image.open(image_path)
    plt.imshow(img)
    plt.axis('off')  # Hide axes
    plt.title(f"Prediction: {entity_value}")
    plt.show()

# Add predictions to filtered DataFrame
filtered_df['entity_value'] = predictions

In [None]:
# Save results to submission file
submission_df = filtered_df[['index', 'entity_value']]
submission_df.to_csv('/kaggle/working/test_out.csv', index=False)

# # Ensure that the test_out.csv is available for download in Colab
# from google.colab import files
# files.download('/content/test_out.csv')