In [None]:
import torch
from diffusers import StableDiffusionPipeline
import os

# Configuration
MODEL_ID = "runwayml/stable-diffusion-v1-5"
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
PROMPT = ("A futuristic cityscape at sunset with floating crystalline structures, "
          "holographic wildlife projections, and neon vegetation, "
          "cyberpunk style, intricate details, 8k resolution")
NUM_IMAGES = 3
IMAGE_SIZE = 512  # Optimal for v1-5

# Initialize pipeline
pipe = StableDiffusionPipeline.from_pretrained(
    MODEL_ID,
    torch_dtype=torch.float16 if DEVICE == "cuda" else torch.float32,
    use_auth_token=True,
    safety_checker=None
).to(DEVICE)
pipe.enable_attention_slicing()

# Generate images
for idx in range(NUM_IMAGES):
    generator = torch.Generator(DEVICE).manual_seed(42 + idx)
    image = pipe(
        prompt=PROMPT,
        num_inference_steps=50,
        guidance_scale=8.5,
        generator=generator,
        height=IMAGE_SIZE,
        width=IMAGE_SIZE
    ).images[0]

    # Save only the original image
    image.save(f"gen_image_{idx+1}.png")
    print(f"Generated image {idx+1}")

The cache for model files in Transformers v4.22.0 has been updated. Migrating your old cache. This is a one-time only operation. You can interrupt this and resume the migration later on by calling `transformers.utils.move_cache()`.


0it [00:00, ?it/s]

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


model_index.json:   0%|          | 0.00/541 [00:00<?, ?B/s]

Fetching 13 files:   0%|          | 0/13 [00:00<?, ?it/s]

model.safetensors:   0%|          | 0.00/492M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/472 [00:00<?, ?B/s]

scheduler_config.json:   0%|          | 0.00/308 [00:00<?, ?B/s]

preprocessor_config.json:   0%|          | 0.00/342 [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/1.06M [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/525k [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/806 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/617 [00:00<?, ?B/s]

diffusion_pytorch_model.safetensors:   0%|          | 0.00/335M [00:00<?, ?B/s]

config.json:   0%|          | 0.00/743 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/547 [00:00<?, ?B/s]

diffusion_pytorch_model.safetensors:   0%|          | 0.00/3.44G [00:00<?, ?B/s]

Keyword arguments {'use_auth_token': True} are not expected by StableDiffusionPipeline and will be ignored.


Loading pipeline components...:   0%|          | 0/6 [00:00<?, ?it/s]

You have disabled the safety checker for <class 'diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline'> by passing `safety_checker=None`. Ensure that you abide to the conditions of the Stable Diffusion license and do not expose unfiltered results in services or applications open to the public. Both the diffusers team and Hugging Face strongly recommend to keep the safety filter enabled in all public facing circumstances, disabling it only for use-cases that involve analyzing network behavior or auditing its results. For more information, please have a look at https://github.com/huggingface/diffusers/pull/254 .


  0%|          | 0/50 [00:00<?, ?it/s]

Generated image 1


  0%|          | 0/50 [00:00<?, ?it/s]

Generated image 2


  0%|          | 0/50 [00:00<?, ?it/s]

Generated image 3


In [None]:
import torch
from torchvision import transforms
from PIL import Image
import os

def preprocess_image(image_path, target_size=224):
    """Convert image to normalized tensor with metadata preservation"""
    # Define preprocessing pipeline
    preprocess = transforms.Compose([
        transforms.Resize(target_size),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406],
                           std=[0.229, 0.224, 0.225]),
        transforms.Grayscale(num_output_channels=1)
    ])

    # Load and process image
    img = Image.open(image_path)
    tensor = preprocess(img)

    # Save with metadata
    torch.save({
        'tensor': tensor,
        'original_size': img.size,
        'source': image_path
    }, f"preprocessed_{os.path.basename(image_path)}.pt")

    return tensor

if __name__ == "__main__":
    for i in range(1, 4):
        input_path = f"gen_image_{i}.png"
        if os.path.exists(input_path):
            tensor = preprocess_image(input_path)
            print(f"Processed {input_path} -> Tensor shape: {tensor.shape}")

Processed gen_image_1.png -> Tensor shape: torch.Size([1, 224, 224])
Processed gen_image_2.png -> Tensor shape: torch.Size([1, 224, 224])
Processed gen_image_3.png -> Tensor shape: torch.Size([1, 224, 224])
