In [4]:
import requests
from PIL import Image
from transformers import BlipProcessor, BlipForConditionalGeneration
import pandas as pd
from io import BytesIO


def remove_duplicates(string):
    words = string.split()  # Split the string into words
    unique_words = list(dict.fromkeys(words))  # Remove duplicates
    return ' '.join(unique_words)  # Join unique words back into a string


# Replace 'your_file.csv' with the actual file path
file_path = 'sampled_products_5_each_img_front.csv'

# Read the CSV file into a DataFrame
df = pd.read_csv(file_path)

df[['img_caption_condition']] = ''
df[['img_caption_uncondition']] = ''
df[['redu_img_caption_condition']] = ''
df[['redu_img_caption_uncondition']] = ''

# Display the DataFrame
dfImage = df.loc[df['ItemDocumentNote'] == "upc_img_pth_frnt", ["ItemSku", "ItemDocumentValue"]]
print(dfImage)

processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-base")
model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-base")


for index, row in dfImage.iterrows():
    sku = row["ItemSku"]
    img_url = row["ItemDocumentValue"] 
    if img_url == 'null':
        continue
    
    try:
        headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/98.0.4758.102 Safari/537.36'}
        response = requests.get(img_url, headers=headers, stream=True, timeout=5)
        raw_image = Image.open(response.raw).convert('RGB')
        response.raise_for_status()  # Raises an HTTPError for bad responses
    # Rest of your code
    except Exception as e:
        print(f"Error: {e}")
        continue

    print(f"-----\n{index}")
    print(f"ItemSku: {sku}")
    print(img_url)

    
    # conditional image captioning
    text = "Image caption: "
    inputs = processor(raw_image, text, return_tensors="pt")

    out_conditional = model.generate(**inputs, max_new_tokens=30)
    conditional_caption = processor.decode(out_conditional[0], skip_special_tokens=True)
    redu_conditional_caption = remove_duplicates(conditional_caption)
    print(conditional_caption)

    # unconditional image captioning
    inputs = processor(raw_image, return_tensors="pt")

    out_unconditional = model.generate(**inputs, max_new_tokens=30)
    unconditional_caption = processor.decode(out_unconditional[0], skip_special_tokens=True)
    redu_unconditional_caption = remove_duplicates(unconditional_caption)
    print(unconditional_caption)
    
    df.loc[df['ItemSku'] == sku, ['img_caption_condition']] = conditional_caption
    df.loc[df['ItemSku'] == sku, ['img_caption_uncondition']] = unconditional_caption  
    df.loc[df['ItemSku'] == sku, ['redu_img_caption_condition']] = redu_conditional_caption
    df.loc[df['ItemSku'] == sku, ['redu_img_caption_uncondition']] = redu_unconditional_caption  

    # Save the DataFrame to a CSV file
    df.to_csv("output_captions2.csv", index=False)


        ItemSku                                  ItemDocumentValue
0    2822561828  http://static.meijer.com/Media/000/28225/00028...
1    2454321371  http://static.meijer.com/Media/000/24543/00024...
2    4153091149  https://www.meijer.com/content/dam/meijer/prod...
3    5113185914  http://static.meijer.com/Media/000/51131/00051...
4   88796194073  http://static.meijer.com/Media/008/87961/00887...
5    5100018596  https://www.meijer.com/content/dam/meijer/prod...
6   69863908001  http://static.meijer.com/Media/006/98639/00698...
7   71928367273  http://static.meijer.com/Media/007/19283/00719...
8    4190008594  http://static.meijer.com/Media/000/41900/00041...
9   71373392186  http://static.meijer.com/Media/007/13733/00713...
10   3076867229  http://static.meijer.com/Media/000/30768/00030...
11  30997013549  http://static.meijer.com/Media/003/09970/00309...
12   7780236089  http://static.meijer.com/Media/000/77802/00077...
13  71373398185  http://static.meijer.com/Media/007/13733/0071



image caption : letterheads
a white envelope with a red and black check card
-----
4
ItemSku: 88796194073
http://static.meijer.com/Media/008/87961/0088796194073_0_A1C1_0600.png
image caption : star wars the child yoorm plush
the child yo yo yo yo yo yo yo yo yo yo yo yo yo yo yo yo yo yo yo
-----
5
ItemSku: 5100018596
https://www.meijer.com/content/dam/meijer/product/0051/00/0185/96/0051000185969_0_A1C1_0600.jpg
image caption : ve ve ve ve ve ve ve ve ve ve ve ve ve ve ve ve ve
six six six six six six six six six six six six six six six six six six six six six
-----
6
ItemSku: 69863908001
http://static.meijer.com/Media/006/98639/0069863908001_1_A1C1_0600.png
image caption : sauce
sweet chili sauce
-----
7
ItemSku: 71928367273
http://static.meijer.com/Media/007/19283/0071928367273_1_A1C1_0600.png
image caption :s, salted pumpkin seeds, 1 lb
a bag of dried pumpkin seeds
-----
8
ItemSku: 4190008594
http://static.meijer.com/Media/000/41900/0004190008594_1_A1C1_0600.png
image caption :s dai



image caption : xtred allergy congestion relief relief relief relief relief relief relief relief relief relief relief relief relief relief relief relief relief relief relief relief relief relief relief relief
xtred allergy congestion tablets
