In [None]:
!pip install diffusers

In [None]:
!pip install bitsandbytes


In [None]:
num_generate = 10000

In [None]:
from diffusers import AutoPipelineForImage2Image
import random
from PIL import Image
import torch
import os

pipe = AutoPipelineForImage2Image.from_pretrained("stabilityai/sdxl-turbo", torch_dtype=torch.float16, variant="fp16")
pipe.to("cuda")
pipe = AutoPipelineForImage2Image.from_pretrained("stabilityai/sdxl-turbo", torch_dtype=torch.float16, variant="fp16")
pipe.to("cuda")

In [None]:

# Function to load celebrity names from a text file
def load_celebrities(filename):
    with open(filename, 'r') as file:
        celebrities = [line.strip() for line in file if line.strip()]
    return celebrities

# Load celebrity names from a text file
celebrity_file = '/kaggle/input/real-pops/celebrities.txt'  # Replace with your file path
celebrities = load_celebrities(celebrity_file)

# Define the attributes with their options and inclusion probabilities
attributes = {
    'celebrity': {
        'options': celebrities,
        'probability': 0.8  # 80% chance to include
    },
    'skin_tone': {
        'options': ["light", "medium", "dark", "olive", "tan"],
        'probability': 0.9
    },
    'accessory': {
        'options': ["glasses", "hat", "headphones", "backpack", "jacket", "scarf", "watch", "necklace"],
        'probability': 0.5
    },
    'expression': {
        'options': ["smiling", "serious", "surprised", "happy", "angry", "winking", "laughing"],
        'probability': 0.6
    },
    'hairstyle': {
        'options': ["curly hair", "straight hair", "bald", "ponytail", "short hair", "long hair", "braided hair", "mohawk"],
        'probability': 0.5
    },
    # Additional attributes
    'clothing_style': {
        'options': ["casual", "formal", "sporty", "vintage", "punk", "gothic", "hipster", "bohemian"],
        'probability': 0.5
    },
    'pose': {
        'options': ["standing", "sitting", "jumping", "dancing", "running", "saluting", "posing heroically"],
        'probability': 0.4
    },
    'theme': {
        'options': ["superhero", "musician", "athlete", "actor", "fantasy character", "robot", "alien"],
        'probability': 0.3
    },
    'holding_item': {
        'options': ["a sword", "a book", "a microphone", "a camera", "a guitar", "a shield", "a magic wand"],
        'probability': 0.3
    }
}

def generate_random_prompt():
    selected_attributes = []
    prompt_parts = []
    
    # Ensure at least one attribute is included
    while not selected_attributes:
        prompt_parts.clear()
        for attr_name, attr_info in attributes.items():
            if random.random() < attr_info['probability']:
                value = random.choice(attr_info['options'])
                # Format the attribute description based on the attribute name
                if attr_name == 'celebrity':
                    prompt_parts.append(f"inspired by {value}")
                elif attr_name == 'skin_tone':
                    prompt_parts.append(f"with {value} skin")
                elif attr_name == 'accessory':
                    prompt_parts.append(f"wearing {value}")
                elif attr_name == 'expression':
                    prompt_parts.append(f"showing a {value} expression")
                elif attr_name == 'hairstyle':
                    prompt_parts.append(f"with {value}")
                elif attr_name == 'clothing_style':
                    prompt_parts.append(f"wearing {value} style clothing")
                elif attr_name == 'pose':
                    prompt_parts.append(f"in a {value} pose")
                elif attr_name == 'theme':
                    prompt_parts.append(f"themed as a {value}")
                elif attr_name == 'holding_item':
                    prompt_parts.append(f"holding {value}")
                selected_attributes.append(attr_name)
        # If no attributes were selected, force inclusion of at least one attribute
        if not selected_attributes:
            attr_name = random.choice(list(attributes.keys()))
            attr_info = attributes[attr_name]
            value = random.choice(attr_info['options'])
            # Format the attribute description
            if attr_name == 'celebrity':
                prompt_parts.append(f"inspired by {value}")
            elif attr_name == 'skin_tone':
                prompt_parts.append(f"with {value} skin")
            # ... (handle other attributes similarly)
            selected_attributes.append(attr_name)
    
    # Construct the final prompt
    prompt_description = ', '.join(prompt_parts)
    prompt = f"A Funko Pop! figure {prompt_description}. Set on a white background."
    return prompt

# Generate sample prompts
for _ in range(5):
    print(generate_random_prompt())

In [None]:
from tqdm import tqdm

def generate_synthetic_dataset(input_folder, num_images, output_folder, pipe, generate_random_prompt):
    """
    Generates a synthetic dataset by creating images based on random prompts and initial images.

    Parameters:
    - input_folder (str): Path to the folder containing initial images.
    - num_images (int): Number of synthetic images to generate.
    - output_folder (str): Path to the folder where generated images will be saved.
    - pipe: The image generation pipeline/model.
    - generate_random_prompt (function): Function that generates a random prompt string.

    Returns:
    - None
    """
    
    # Ensure the output directory exists
    os.makedirs(output_folder, exist_ok=True)
    
    # Supported image file extensions
    supported_extensions = ('.png', '.jpg', '.jpeg', '.bmp', '.gif', '.tiff')
    
    # List all image files in the input folder
    input_images = [
        os.path.join(input_folder, fname) for fname in os.listdir(input_folder)
        if fname.lower().endswith(supported_extensions)
    ]
    
    if not input_images:
        raise ValueError(f"No images found in the input folder: {input_folder}")
    
    print(f"Found {len(input_images)} images in {input_folder}. Starting generation of {num_images} images...")
    
    # Path for the prompts log file
    prompt_log_path = os.path.join(output_folder, "generated_prompts.txt")
    
    with open(prompt_log_path, "w") as log_file:
        for i in tqdm(range(num_images), desc="Generating images"):
            try:
                # Randomly select an initial image
                init_image_path = random.choice(input_images)
                init_image = Image.open(init_image_path).convert("RGB").resize((512, 512))
                
                # Generate a random prompt
                prompt = generate_random_prompt()
                
                # Randomly select guidance_scale between 0.3 and 0.5
                guidance_scale = random.uniform(0.3, 0.5)
                
                # Generate the image using the pipeline
                with torch.no_grad():
                    generated = pipe(
                        prompt=prompt,
                        image=init_image,
                        num_inference_steps=2,
                        strength=1,
                        guidance_scale=guidance_scale
                    )
                
                if hasattr(generated, 'images'):
                    image = generated.images[0]
                else:
                    raise AttributeError("The pipeline did not return an image.")
                
                # Define a unique filename
                output_filename = f"synthetic_{i+1:05d}.png"
                output_path = os.path.join(output_folder, output_filename)
                
                # Save the generated image
                image.save(output_path)
                
                # Log the filename and prompt in the text file
                log_file.write(f"{output_filename}: {prompt}\n")
            
            except Exception as e:
                print(f"Error generating image {i+1}: {e}")
    
    print(f"Successfully generated {num_images} images in {output_folder}. Prompts saved in {prompt_log_path}.")


In [None]:
generate_synthetic_dataset("/kaggle/input/real-pops/real_hd_pops/real_hd_pops",num_generate,"/kaggle/working/pops_sdxl-turbo",pipe,generate_random_prompt)