In [1]:
!python -m pip install git+https://github.com/huggingface/transformers -q

[0m

In [2]:
!pip install accelerate>=0.26.0

In [None]:
!pip install pillow pandas

Collecting pillow
  Downloading pillow-10.4.0-cp312-cp312-manylinux_2_28_x86_64.whl.metadata (9.2 kB)
Collecting pandas
  Downloading pandas-2.2.2-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (19 kB)
Collecting pytz>=2020.1 (from pandas)
  Downloading pytz-2024.2-py2.py3-none-any.whl.metadata (22 kB)
Collecting tzdata>=2022.7 (from pandas)
  Downloading tzdata-2024.1-py2.py3-none-any.whl.metadata (1.4 kB)
Downloading pillow-10.4.0-cp312-cp312-manylinux_2_28_x86_64.whl (4.5 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m4.5/4.5 MB[0m [31m9.5 MB/s[0m eta [36m0:00:00[0m:00:01[0m00:01[0m
[?25hDownloading pandas-2.2.2-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (12.7 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m12.7/12.7 MB[0m [31m15.3 MB/s[0m eta [36m0:00:00[0m00:01[0m00:01[0m
[?25hDownloading pytz-2024.2-py2.py3-none-any.whl (508 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m5

In [None]:
from transformers import Qwen2VLForConditionalGeneration, AutoTokenizer, AutoProcessor

  from .autonotebook import tqdm as notebook_tqdm


In [None]:
model = Qwen2VLForConditionalGeneration.from_pretrained(
    "Qwen/Qwen2-VL-2B-Instruct",
    torch_dtype="auto",
    device_map="auto",
)

processor = AutoProcessor.from_pretrained(
    "Qwen/Qwen2-VL-2B-Instruct"
)

Unrecognized keys in `rope_scaling` for 'rope_type'='default': {'mrope_section'}
Downloading shards: 100%|██████████| 2/2 [00:56<00:00, 28.41s/it]
`Qwen2VLRotaryEmbedding` can now be fully parameterized by passing the model config through the `config` argument. All other arguments will be removed in v4.46
Loading checkpoint shards: 100%|██████████| 2/2 [00:01<00:00,  1.18it/s]


In [None]:
from PIL import Image
import requests
import pandas as pd
import logging
from tqdm import tqdm

In [None]:
from PIL import Image, ImageEnhance

def resize_image(image, min_dimension=64, max_dimension=1500, algorithm=Image.LANCZOS):
    # Get the width and height of the image
    width, height = image.size
    
    # Check if either dimension is smaller than the minimum allowed size
    if min(width, height) < 64:
        # Calculate the scaling factor to maintain aspect ratio
        scaling_factor = min_dimension / min(width, height)
        
        # Calculate new dimensions
        new_width = max(min_dimension, int(width * scaling_factor))
        new_height = max(min_dimension, int(height * scaling_factor))
        
        try:
            # Attempt to upsample the image using Lanczos
            image = image.resize((new_width, new_height), Image.LANCZOS)
        except Exception as e:
            print(f"Error upsampling with Lanczos: {str(e)}")
            print("Falling back to bicubic interpolation...")
            # Fall back to bicubic interpolation
            image = image.resize((new_width, new_height), Image.BICUBIC)
    
    # Now check if either dimension is larger than the maximum allowed size
    elif max(width, height) > max_dimension:
        # Calculate the scaling factor to maintain aspect ratio
        scaling_factor = max_dimension / max(width, height)
        
        # Calculate new dimensions
        new_width = int(width * scaling_factor)
        new_height = int(height * scaling_factor)
        
        try:
            # Attempt to resize the image using the specified algorithm
            image = image.resize((new_width, new_height), algorithm)
        except Exception as e:
            print(f"Error using {algorithm} algorithm: {str(e)}")
            print("Falling back to simple scale down...")
            # Fall back to simple scale down
            image = image.resize((new_width, new_height), Image.NEAREST)
    
    return image

def downsize_and_enhance_contrast(input_img, contrast_factor=1.5, max_dimension=1500):
    # Open the image
    image = input_img
    
    # Resize the image if its dimensions are larger than the max allowed
    image = resize_image(image, max_dimension=max_dimension)
    
    # Enhance the contrast of the resized image
    enhancer = ImageEnhance.Contrast(image)
    enhanced_image = enhancer.enhance(contrast_factor)  # contrast_factor > 1 increases contrast
    
    # Save the enhanced image
    return enhanced_image

In [None]:


# Define the prompt templates based on entity_name
prompt_templates = {    
    "depth": "Identify the depth of the product.",
    
    "height": "Identify the height of the product.",
    
    "item_volume": "Determine the volume of the product by interpreting any dimensions related to width, depth, and height If a volume is not explicitly given, calculate from visible dimensions if possible.",
    
    "item_weight": "Identify the product's item weight if specifically indicated in the image. Focus on labels with 'weight' or similar measurement of units such as mass like kg, g, oz et.",
    
    "maximum_weight_recommendation": "Identify the maximum weight recommendation if shown in the image. Look for labels that suggest maximum load capacity or recommendations.",
    
    "voltage": "Identify the voltage rating of the product. Focus on electrical specifications that might be mentioned in the product illustration.",
    
    "wattage": "Identify the wattage of the product in the product. Look for labels indicating power usage or output near the product.",
    
    "width": "Identify the width of the product."
}


# Define a function to process a single image and generate text based on the prompt
def process_image(image_url, prompt):
    try:
        # Load the image
        image = Image.open(requests.get(image_url, stream=True).raw)
        image = downsize_and_enhance_contrast(image, contrast_factor=1.2, max_dimension=1500)
        
        # Create the prompt for the model
        messages = [
            {
                "role": "user",
                "content": [
                    {
                        "type": "image",
                    },
                    {
                        "type": "text",
                        "text": prompt
                    }
                ]
            }
        ]

        # Prepare the prompt for the model
        text_prompt = processor.apply_chat_template(messages, add_generation_prompt=True)

        # Process the input (image + text)
        inputs = processor(
            text=[text_prompt],
            images=[image],
            padding=True,
            return_tensors="pt"
        )

        # Move inputs to the same device as the model
        inputs = inputs.to("cuda")

        # Generate output text from the model
        output_ids = model.generate(**inputs, max_new_tokens=1024)

        # Extract the generated text
        generated_ids = [
            output_ids[len(input_ids):]
            for input_ids, output_ids in zip(inputs.input_ids, output_ids)
        ]

        output_text = processor.batch_decode(
            generated_ids, skip_special_tokens=True, clean_up_tokenization_spaces=True
        )

        return output_text[0]  # Return the generated text
    
    except Exception as e:
        # Log the error and return None or a default string
        logging.error(f"Error processing image from {image_url}: {str(e)}")
        return "Error"

# Read the CSV file
try:
    df = pd.read_csv('/home/test.csv')
except Exception as e:
    logging.error(f"Error reading CSV file: {str(e)}")
    df = pd.DataFrame()  # Create an empty DataFrame if reading fails

# Create an empty list to store the results temporarily
batch_results = []

# Initialize an empty CSV file to store the final output
output_csv = '/home/generated_output.csv'

# Initialize the progress bar
with tqdm(total=df.shape[0], desc="Processing images") as pbar:
    # Iterate over each row in the DataFrame
    for index_ref, row in df.iloc[:3700].iterrows():
        try:
            index = row['index']
            # Extract the image link and the entity_name for the prompt
            image_link = row['image_link']
            entity_name = row['entity_name']

            # Use the appropriate prompt based on the entity_name
            prompt = prompt_templates.get(entity_name, f"What is the {entity_name}, return as {{{entity_name}}}")

            # Process the image and get the generated output text
            generated_output = process_image(image_link, prompt)

            # Append the index and the generated output to the batch results
            batch_results.append([index, generated_output])

            # Check if batch_results has reached 500 entries, then save to CSV
            if len(batch_results) >= 100:
                batch_df = pd.DataFrame(batch_results, columns=['index', 'generated_output'])

                # Append to CSV file
                try:
                    batch_df.to_csv(output_csv, mode='a', header=not pd.io.common.file_exists(output_csv), index=False)
                    batch_results = []  # Clear the batch after saving
                except Exception as e:
                    logging.error(f"Error saving CSV file: {str(e)}")
                    print("Error occurred while saving the output CSV file.")

        except Exception as e:
            # Log the error related to this row and continue with the next one
            logging.error(f"Error processing row {index}: {str(e)}")
            batch_results.append([index, "Error"])

        # Update the progress bar
        pbar.update(1)

# Save any remaining results that weren't written in the last batch
if batch_results:
    batch_df = pd.DataFrame(batch_results, columns=['index', 'generated_output'])
    try:
        batch_df.to_csv(output_csv, mode='a', header=not pd.io.common.file_exists(output_csv), index=False)
        print(f"Final batch saved to '{output_csv}'.")
    except Exception as e:
        logging.error(f"Error saving final CSV batch: {str(e)}")
        print("Error occurred while saving the final batch to the CSV file.")


Processing images:  46%|████▋     | 3700/8000 [1:59:31<2:18:54,  1.94s/it]
