In [1]:
from huggingface_hub import login
import torch
from diffusers import StableDiffusionPipeline
from PIL import Image
import os
import pandas as pd
from tqdm import tqdm

  from .autonotebook import tqdm as notebook_tqdm


In [None]:
login("{huggingface_token}")

In [None]:
# Generate novel images with novel super class and novel sub class

novel_subclasses = [
    "ocelot", "shoebill", "horned viper", "puffin", "salamander with red spots",
    "albino alligator", "maned wolf", "pangolin", "sika deer", "quokka",
    "fossa", "margay", "markhor", "tarsier", "saola",
    "kakapo", "dhole", "binturong", "numbat", "serval",
    "goblin shark", "springhare", "shoat", "dik-dik", "gelada",
    "quetzal", "wolverine", "capercaillie", "okapi", "zorilla",
    "slow loris", "aye-aye", "tree pangolin", "golden takin", "hoatzin",
    "dugong", "babirusa", "vaquita", "platypus", "tenrec",
    "kinkajou", "eastern quoll", "malayan tapir", "giant otter", "bush baby",
    "red uakari", "proboscis monkey", "aardwolf", "pygmy marmoset", "sun bear"
][:20]

pipe = StableDiffusionPipeline.from_pretrained("runwayml/stable-diffusion-v1-5", torch_dtype=torch.float16)
pipe = pipe.to("cuda")
pipe.enable_attention_slicing()

output_dir = "./data/synthetic_novel_images"
os.makedirs(output_dir, exist_ok=True)
annotations = []

prompt_template = "nature photograph of a novel animal, specifically a {}, centered composition, photorealistic, 8k, the animal is fully visible within frame and not cut off"

# image start with index 6288
image_index = 6288

for subclass in tqdm(novel_subclasses, desc="Generating images"):
    for _ in range(15):
        prompt = prompt_template.format(subclass)
        image = pipe(prompt).images[0]

        filename = f"{image_index}.jpg"
        image.save(os.path.join(output_dir, filename), format='JPEG')

        annotations.append({
            "image": filename,
            "superclass_index": 3,
            "subclass_index": 87,
            "description": prompt
        })

        image_index += 1

df = pd.DataFrame(annotations)
df.to_csv("./data/novel_train_data.csv", index=False)

In [None]:
# Generate novel images with seen super class and novel sub class

known_superclass_map = {
    0: "bird",
    1: "dog",
    2: "reptile"
}

known_superclass_novel_subs = {
    0: ["king eider", "sea eagle", "ibis", "bee-eater", "shoebill stork", "horned screamer", "lyrebird", "cassowary", "crowned crane", "hoatzin"],
    1: ["Carolina dog", "kai ken", "New Guinea singing dog", "coonhound", "Tibetan mastiff", "Azawakh", "Norwegian Lundehund", "Thai ridgeback", "Basenji", "Sloughi"],
    2: ["eyelash viper", "frilled dragon", "tokay gecko", "basilisk lizard", "flying gecko", "blue-tongued skink", "boa constrictor", "caiman lizard", "glass lizard", "gila monster"]
}

for super_idx, super_name in known_superclass_map.items():
    subclasses = known_superclass_novel_subs[super_idx]
    for subclass in tqdm(subclasses, desc=f"Generating for superclass: {super_name}"):
        for _ in range(10):
            prompt = f"nature photograph of a {super_name}, specifically a {subclass}, centered composition, photorealistic, 8k, the animal is fully visible within frame and not cut off"
            image = pipe(prompt).images[0]

            filename = f"{image_index}.jpg"
            image.save(os.path.join(output_dir, filename), format='JPEG')

            annotations.append({
                "image": filename,
                "superclass_index": super_idx,
                "subclass_index": 87,
                "description": prompt
            })

            image_index += 1

df = pd.DataFrame(annotations)
df.to_csv("./data/novel_train_data.csv", index=False)

In [2]:
# Input and output directories
input_dir = "./data/synthetic_novel_images"
output_dir = "./data/synthetic_novel_resized"

os.makedirs(output_dir, exist_ok=True)

# Resize each image in the input folder from 512x512 to 64x64
for filename in os.listdir(input_dir):
    if filename.lower().endswith((".png", ".jpg", ".jpeg")):
        input_path = os.path.join(input_dir, filename)
        output_path = os.path.join(output_dir, filename)

        try:
            # Load and resize image
            img = Image.open(input_path).convert("RGB")
            img = img.resize((64, 64), Image.BICUBIC)
            img.save(output_path)
            
            print(f"Resized and saved: {filename}")
        except Exception as e:
            print(f"Error processing {filename}: {e}")


Resized and saved: 6400.jpg
Resized and saved: 6366.jpg
Resized and saved: 6372.jpg
Resized and saved: 6414.jpg
Resized and saved: 6428.jpg
Resized and saved: 6399.jpg
Resized and saved: 6602.jpg
Resized and saved: 6616.jpg
Resized and saved: 6819.jpg
Resized and saved: 6825.jpg
Resized and saved: 6831.jpg
Resized and saved: 6776.jpg
Resized and saved: 6762.jpg
Resized and saved: 6789.jpg
Resized and saved: 6574.jpg
Resized and saved: 6560.jpg
Resized and saved: 6548.jpg
Resized and saved: 6549.jpg
Resized and saved: 6561.jpg
Resized and saved: 6575.jpg
Resized and saved: 6788.jpg
Resized and saved: 6763.jpg
Resized and saved: 6777.jpg
Resized and saved: 6830.jpg
Resized and saved: 6824.jpg
Resized and saved: 6818.jpg
Resized and saved: 6617.jpg
Resized and saved: 6603.jpg
Resized and saved: 6398.jpg
Resized and saved: 6429.jpg
Resized and saved: 6373.jpg
Resized and saved: 6415.jpg
Resized and saved: 6401.jpg
Resized and saved: 6367.jpg
Resized and saved: 6417.jpg
Resized and saved: 6