In [None]:
import pandas as pd
import requests
from PIL import Image
from io import BytesIO
import torch
from transformers import BlipProcessor, BlipForConditionalGeneration

blip_processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-large")
blip_model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-large")

def generate_caption(image_url):
    response = requests.get(image_url)
    image = Image.open(BytesIO(response.content)).convert('RGB')
    inputs = blip_processor(images=image, return_tensors="pt")
    with torch.no_grad():
        output = blip_model.generate(**inputs, max_new_tokens=20)
    caption = blip_processor.decode(output[0], skip_special_tokens=True)
    words = caption.split()
    cleaned_words = [words[i] for i in range(len(words)) if i == 0 or words[i] != words[i-1]]
    return ' '.join(cleaned_words)

text_entities = ["height", "width", "depth", "item_weight"]

def should_generate_text(entity_name):
    return entity_name.lower() in text_entities

def add_text_column(row):
    if should_generate_text(row['entity_name']):
        return generate_caption(row['image_link'])
    else:
        return " "
df=pd.read_csv('/kaggle/input/amazon24/test.csv')
df=df.iloc[ 123500 : 124000  ]

print("started")
df['text'] = df.apply(add_text_column, axis=1)

output_path = '/kaggle/working/test_with_captions.csv'
df.to_csv(output_path, index=False)

print(f"Updated DataFrame saved to: {output_path}")