In [None]:
!pip install safetensors xformers pillow torch torchvision


In [None]:
!pip install diffusers transformers accelerate openai open_clip_torch py-real-esrgan huggingface_hub


In [None]:
!sed -i 's/from huggingface_hub import hf_hub_url, cached_download/from huggingface_hub import hf_hub_download, hf_hub_url/' /usr/local/lib/python3.11/dist-packages/py_real_esrgan/model.py
!sed -i 's/cached_download(hf_hub_url(repo_id, filename))/hf_hub_download(repo_id=repo_id, filename=filename)/' /usr/local/lib/python3.11/dist-packages/py_real_esrgan/model.py


In [None]:
import torch, requests, os
from PIL import Image
from diffusers import StableDiffusionPipeline, DDIMScheduler
from transformers import CLIPModel, CLIPProcessor
import openai
from py_real_esrgan.model import RealESRGAN
from transformers import BlipProcessor, BlipForConditionalGeneration, CLIPProcessor, CLIPModel
from diffusers import StableDiffusionInstructPix2PixPipeline, EulerAncestralDiscreteScheduler
import numpy as np
import json

In [None]:
openai.api_key = #put the api here

In [None]:
blip_processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-large")
blip_model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-large", torch_dtype=torch.float16).to("cuda")

In [None]:
import pandas as pd
from tqdm import tqdm
import re

def ensure_direct_image_url(url):
    if "imgur.com" in url and not re.search(r'\.(jpg|jpeg|png|gif|bmp|webp|tiff)$', url, re.IGNORECASE):
        match = re.search(r'imgur\.com/(?:gallery/|a/)?([^/?#]+)', url)
        if match:
            return f"https://i.imgur.com/{match.group(1)}.jpg"
        match = re.search(r'imgur\.com/([^/?#]+)', url)
        if match:
            return f"https://i.imgur.com/{match.group(1)}.jpg"
    return url

def smart_download_image(url, save_path):
    if "dropbox.com" in url:
        url = url.replace("?dl=0", "")
        if "?raw=1" not in url:
            url += "&raw=1" if "?" not in url else "&raw=1"
    headers = {
        "User-Agent": "Mozilla/5.0",
        "Referer": url,
        "Accept-Encoding": "identity",
        "Connection": "keep-alive"
    }
    try:
        resp = requests.get(url, headers=headers, timeout=30)
        if resp.status_code == 200 and resp.headers.get('content-type', '').startswith("image"):
            with open(save_path, "wb") as f:
                f.write(resp.content)
            return True
    except Exception as e:
        print(f"Download error for {url}: {e}")
    return False

def generate_blip_caption(image_path):
    try:
        image = Image.open(image_path).convert("RGB")
        inputs = blip_processor(images=image, return_tensors="pt").to(blip_model.device)
        output = blip_model.generate(**inputs, max_length=60, num_beams=11,
                                     length_penalty=1.7, repetition_penalty=1.4, early_stopping=True, do_sample=False)
        caption = blip_processor.decode(output[0], skip_special_tokens=True)
        return caption
    except Exception as e:
        print(f"BLIP failed for {image_path}: {e}")
        return ""

df = pd.read_csv("RealEdit_train_split_urls.csv")
N = 10
image_urls = []
original_captions = []
os.makedirs("originals", exist_ok=True)
BAD_CAPTION_KEYWORDS = ["image you are requesting", "not available", "doesn’t exist", "doesn't exist", "no longer available", "broken image", "missing image", "404"]


for i, row in tqdm(df.iterrows(), total=min(len(df), N)):
    if i >= N:
        break
    filename = row["input_image_name"]
    orig_url = ensure_direct_image_url(str(row["input_url"]))
    save_path = f"originals/{filename}"
    if smart_download_image(orig_url, save_path):
        caption = generate_blip_caption(save_path)
        image_urls.append((filename, orig_url))
        original_captions.append(caption)
        print(f"{filename} BLIP caption: {caption}")
        caption_clean = caption.lower()
        if any(bad_phrase in caption_clean for bad_phrase in BAD_CAPTION_KEYWORDS):
          print(f"Skipping {filename} due to invalid BLIP caption: '{caption}'")
          continue
    else:
        print(f"Skipping {filename} due to download failure.")

In [None]:
import openai
from openai import OpenAI
client = OpenAI(api_key=openai.api_key)

In [None]:
clip_model = CLIPModel.from_pretrained("openai/clip-vit-large-patch14")
clip_processor = CLIPProcessor.from_pretrained("openai/clip-vit-large-patch14")
clip_model = clip_model.to("cuda")

In [None]:
client = OpenAI(api_key=openai.api_key)

generated_edits = {}

for (filename, url), caption in zip(image_urls, original_captions):
    print(f"\nGenerating edit instructions for image '{filename}' with caption: {caption}")
    prompt = (
    "You are simulating real user editing behavior for a dataset of image edits.\n"
    "Given a description of an image, imagine how actual users would ask to modify it. "
    "These edits should be creative, realistic, and specific; things a person might type into an AI editor, like:\n"
    "- 'Add a dog sitting near the woman'\n"
    "- 'Make the sunset more vibrant'\n"
    "- 'Change the man’s outfit to a business suit'\n"
    "- 'Remove the second person from the left'\n"
    "- 'Make the child look older'\n"
    "Each edit should involve a meaningful visual change to the image, not just generic filters like 'increase contrast'.\n"
    "\n"
    "For each instruction, generate a matching edited image caption that describes the image *after* the edit.\n"
    "Avoid repetitions. The 10 edits must be diverse (e.g. subject, background, object-level, style).\n"
    "\n"
    "Output a JSON array of 10 items, where each item is an object with two fields:\n"
    "- 'instruction': the user's edit request\n"
    "- 'edited_caption': the caption for the image after applying that edit\n"
    "Do not include any explanation. Return only the JSON array.\n\n"
    f"Image Description: \"{caption}\""
    )

    try:
        response = client.chat.completions.create(
            model="gpt-3.5-turbo",
            messages=[{"role": "user", "content": prompt}],
            temperature=0.8
        )
        gpt_output = response.choices[0].message.content
    except Exception as e:
        print(f"GPT API call failed for {filename}: {e}")
        continue

    json_str = ""
    start_idx = gpt_output.find('[')
    end_idx = gpt_output.rfind(']')
    if start_idx != -1 and end_idx != -1:
        json_str = gpt_output[start_idx:end_idx + 1]
    else:
        json_str = gpt_output.strip()

    try:
        instructions_list = json.loads(json_str)
    except json.JSONDecodeError as e:
        print(f"JSON parsing failed for {filename}: {e}")
        continue

    if not isinstance(instructions_list, list) or len(instructions_list) != 10:
        print(f"Unexpected format or not 10 items returned for {filename}, skipping.")
        continue

    generated_edits[filename] = instructions_list
    for idx, item in enumerate(instructions_list, start=1):
        print(f" {idx}. {item['instruction']} -> Edited caption: {item['edited_caption']}")
