In [None]:
import json
import os
from PIL import Image
import requests
from transformers import AutoTokenizer

In [None]:
def load_jsonl_dataset(jsonl_file):
    """Loads a JSONL file and returns a list of image-caption pairs."""
    dataset = []
    with open(jsonl_file, 'r') as file:
        for line in file:
            entry = json.loads(line)
            dataset.append((entry['image_url'], entry['caption']))
    return dataset

In [None]:
def download_image(url, save_path):
    """Downloads an image from a URL."""
    response = requests.get(url, stream=True)
    if response.status_code == 200:
        with open(save_path, 'wb') as file:
            for chunk in response.iter_content(1024):
                file.write(chunk)
    else:
        print(f"Failed to download {url}")

In [None]:
def preprocess_image(image_path):
    """Preprocesses an image to fit Stable Diffusion input."""
    image = Image.open(image_path).convert("RGB")
    image = image.resize((512, 512))  # Resize to 512x512
    return image

In [None]:
def tokenize_captions(captions, tokenizer):
    """Tokenizes captions using the Hugging Face tokenizer."""
    tokenized = tokenizer(captions, padding=True, truncation=True, return_tensors="pt")
    return tokenized