In [None]:
import pandas as pd
import requests
from PIL import Image
from io import BytesIO
import numpy as np
import re 
import torch
from transformers import BlipProcessor, BlipForConditionalGeneration
conversion_to_tons = {'gram': 1e-6,'ton': 1,'kilogram': 0.001,'ounce': 2.8349523125e-5,'pound': 0.00045359237,'carat': 2e-7,'microgram': 1e-12,'milligram': 1e-9 }

def convert_to_tons(row):
    try:
        value, unit = row['entity_value'].lower().split(' ', 1)
        value = float(value)
        conversion_factor = conversion_to_tons.get(unit.strip(), 1)
        return value * conversion_factor
    except ValueError:
        return None  

weight_units = ['gram', 'ton', 'kilogram', 'ounce', 'pound', 'carat', 'microgram', 'milligram']

train_df = pd.read_csv('/kaggle/input/amazon24/train.csv')
entity_name = 'item_weight'
entity_df = train_df[train_df['entity_name'] == entity_name].copy()

entity_df = entity_df.dropna(subset=['entity_value'])
entity_df = entity_df[~entity_df['entity_value'].str.contains(r'\[|\bto\b|\be\+17\b', regex=True, na=False)]
entity_df = entity_df[entity_df['entity_value'].str.contains('|'.join(weight_units), case=False, na=False)]

entity_df['entity_value'] = entity_df.apply(convert_to_tons, axis=1)
entity_df = entity_df[(entity_df['entity_value'] > 5e-8) & (entity_df['entity_value'] < 1)]
entity_df = entity_df.dropna(subset=['entity_value'])
entity_df = entity_df.iloc[:1500]


original_df = entity_df

blip_processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-large")
blip_model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-large")

def generate_caption(image_url):
    response = requests.get(image_url)
    image = Image.open(BytesIO(response.content)).convert('RGB')
    inputs = blip_processor(images=image, return_tensors="pt")
    with torch.no_grad():
        output = blip_model.generate(**inputs, max_new_tokens=20)
    caption = blip_processor.decode(output[0], skip_special_tokens=True)
    words = caption.split()
    cleaned_words = [words[i] for i in range(len(words)) if i == 0 or words[i] != words[i-1]]
    return ' '.join(cleaned_words)

new_data = []

for i in range(len(entity_df)):
    print(f"Processing {i + 1} of {len(entity_df)}")
    row = entity_df.iloc[i]
    image_url = row['image_link']
    entity_value = row['entity_value']
    
    caption = generate_caption(image_url)
    
    new_data.append({
        'image_link': image_url,
        'entity_value': entity_value,
        'caption': caption
    })
new_df = pd.DataFrame(new_data)
new_df.to_csv('new_dataframe_with_captions.csv', index=False)

print("New DataFrame created and saved to 'new_dataframe_with_captions.csv'")
print("Columns in the new DataFrame:", new_df.columns.tolist())