In [27]:
from diffusers import DiffusionPipeline
from diffusers import UNet2DModel 

In [28]:
repo_id = "google/ddpm-cat-256"

# Load the model
model = UNet2DModel.from_pretrained(repo_id, use_safetensors=True)

In [29]:
print(model)

UNet2DModel(
  (conv_in): Conv2d(3, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (time_proj): Timesteps()
  (time_embedding): TimestepEmbedding(
    (linear_1): LoRACompatibleLinear(in_features=128, out_features=512, bias=True)
    (act): SiLU()
    (linear_2): LoRACompatibleLinear(in_features=512, out_features=512, bias=True)
  )
  (down_blocks): ModuleList(
    (0-1): 2 x DownBlock2D(
      (resnets): ModuleList(
        (0-1): 2 x ResnetBlock2D(
          (norm1): GroupNorm(32, 128, eps=1e-06, affine=True)
          (conv1): LoRACompatibleConv(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
          (time_emb_proj): LoRACompatibleLinear(in_features=512, out_features=128, bias=True)
          (norm2): GroupNorm(32, 128, eps=1e-06, affine=True)
          (dropout): Dropout(p=0.0, inplace=False)
          (conv2): LoRACompatibleConv(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
          (nonlinearity): SiLU()
        )
      )
      (downsampl

In [30]:
# check all the blocks with 'up' or 'down' in the name
up = []
down = []
for name, blocks in model.named_children():
    if 'up' in name:
        up.append(blocks)
    if 'down' in name:
        down.append(blocks)

In [31]:
print(up)

[ModuleList(
  (0): UpBlock2D(
    (resnets): ModuleList(
      (0-2): 3 x ResnetBlock2D(
        (norm1): GroupNorm(32, 1024, eps=1e-06, affine=True)
        (conv1): LoRACompatibleConv(1024, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
        (time_emb_proj): LoRACompatibleLinear(in_features=512, out_features=512, bias=True)
        (norm2): GroupNorm(32, 512, eps=1e-06, affine=True)
        (dropout): Dropout(p=0.0, inplace=False)
        (conv2): LoRACompatibleConv(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
        (nonlinearity): SiLU()
        (conv_shortcut): LoRACompatibleConv(1024, 512, kernel_size=(1, 1), stride=(1, 1))
      )
    )
    (upsamplers): ModuleList(
      (0): Upsample2D(
        (conv): LoRACompatibleConv(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      )
    )
  )
  (1): AttnUpBlock2D(
    (attentions): ModuleList(
      (0-2): 3 x Attention(
        (group_norm): GroupNorm(32, 512, eps=1e-06, affine=True)
      

In [32]:
print(model.config)
# those parameters are used to initialize the model, so they are not trainable

FrozenDict([('sample_size', 256), ('in_channels', 3), ('out_channels', 3), ('center_input_sample', False), ('time_embedding_type', 'positional'), ('freq_shift', 1), ('flip_sin_to_cos', False), ('down_block_types', ['DownBlock2D', 'DownBlock2D', 'DownBlock2D', 'DownBlock2D', 'AttnDownBlock2D', 'DownBlock2D']), ('up_block_types', ['UpBlock2D', 'AttnUpBlock2D', 'UpBlock2D', 'UpBlock2D', 'UpBlock2D', 'UpBlock2D']), ('block_out_channels', [128, 128, 256, 256, 512, 512]), ('layers_per_block', 2), ('mid_block_scale_factor', 1), ('downsample_padding', 0), ('downsample_type', 'conv'), ('upsample_type', 'conv'), ('dropout', 0.0), ('act_fn', 'silu'), ('attention_head_dim', None), ('norm_num_groups', 32), ('attn_norm_num_groups', None), ('norm_eps', 1e-06), ('resnet_time_scale_shift', 'default'), ('add_attention', True), ('class_embed_type', None), ('num_class_embeds', None), ('_use_default_values', ['downsample_type', 'resnet_time_scale_shift', 'attn_norm_num_groups', 'add_attention', 'upsample_t

In [33]:
import torch

torch.manual_seed(0)

noisy_sample = torch.randn(1, model.config.in_channels, model.config.sample_size, model.config.sample_size)
noisy_sample.shape

torch.Size([1, 3, 256, 256])

In [34]:
with torch.no_grad():
    noisy_residual = model(sample=noisy_sample, timestep=2).sample

In [35]:
from diffusers import DDPMScheduler

scheduler = DDPMScheduler.from_pretrained(repo_id)
print(scheduler)

DDPMScheduler {
  "_class_name": "DDPMScheduler",
  "_diffusers_version": "0.22.3",
  "beta_end": 0.02,
  "beta_schedule": "linear",
  "beta_start": 0.0001,
  "clip_sample": true,
  "clip_sample_range": 1.0,
  "dynamic_thresholding_ratio": 0.995,
  "num_train_timesteps": 1000,
  "prediction_type": "epsilon",
  "sample_max_value": 1.0,
  "steps_offset": 0,
  "thresholding": false,
  "timestep_spacing": "leading",
  "trained_betas": null,
  "variance_type": "fixed_small"
}



In [36]:
from diffusers import DiffusionPipeline

pipeline = DiffusionPipeline.from_pretrained("runwayml/stable-diffusion-v1-5")
print(pipeline) 

unet\diffusion_pytorch_model.safetensors not found
Loading pipeline components...:  57%|█████▋    | 4/7 [00:07<00:04,  1.63s/it]`text_config_dict` is provided which will be used to initialize `CLIPTextConfig`. The value `text_config["id2label"]` will be overriden.
`text_config_dict` is provided which will be used to initialize `CLIPTextConfig`. The value `text_config["bos_token_id"]` will be overriden.
`text_config_dict` is provided which will be used to initialize `CLIPTextConfig`. The value `text_config["eos_token_id"]` will be overriden.
Loading pipeline components...: 100%|██████████| 7/7 [00:10<00:00,  1.47s/it]

StableDiffusionPipeline {
  "_class_name": "StableDiffusionPipeline",
  "_diffusers_version": "0.22.3",
  "_name_or_path": "runwayml/stable-diffusion-v1-5",
  "feature_extractor": [
    "transformers",
    "CLIPImageProcessor"
  ],
  "requires_safety_checker": true,
  "safety_checker": [
    "stable_diffusion",
    "StableDiffusionSafetyChecker"
  ],
  "scheduler": [
    "diffusers",
    "PNDMScheduler"
  ],
  "text_encoder": [
    "transformers",
    "CLIPTextModel"
  ],
  "tokenizer": [
    "transformers",
    "CLIPTokenizer"
  ],
  "unet": [
    "diffusers",
    "UNet2DConditionModel"
  ],
  "vae": [
    "diffusers",
    "AutoencoderKL"
  ]
}






In [42]:
pipeline.to("cuda")
image = pipeline("Image of a pokemon")[0]

image

100%|██████████| 50/50 [00:36<00:00,  1.38it/s]


[<PIL.Image.Image image mode=RGB size=512x512>]

In [51]:
from diffusers import StableDiffusionPipeline
import torch

model_id = "shibing624/asian-role"
pipe = StableDiffusionPipeline.from_pretrained(model_id, torch_dtype=torch.float16)
pipe = pipe.to("cuda")
pipe.safety_checker = lambda images, **kwargs: (images, [False])

prompt = "1girl"

negative_prompt = """(((simple background))),monochrome ,lowres, bad anatomy, bad hands, text, error, missing fingers, extra digit, fewer digits, cropped, worst quality, low quality, normal quality, jpeg artifacts, signature, watermark, username, blurry, ugly, pregnant,vore,duplicate,morbid,mut ilated,tran nsexual, hermaphrodite,long neck,mutated hands,poorly drawn hands,poorly drawn face,mutation, deformed, (((missing arms))),(((missing legs))), (((extra arms))),(((extra legs))),pubic hair, plump,bad legs,error legs, bad feet, loli, little girl"""

image = pipe(prompt, height=1024, width=1024, num_inference_steps=200, guidance_scale=7.5, 
    negative_prompt=negative_prompt, num_images_per_prompt=1).images[0]

image.save("./6girl.png")

text_encoder\model.safetensors not found
Loading pipeline components...:  57%|█████▋    | 4/7 [00:03<00:02,  1.25it/s]`text_config_dict` is provided which will be used to initialize `CLIPTextConfig`. The value `text_config["id2label"]` will be overriden.
`text_config_dict` is provided which will be used to initialize `CLIPTextConfig`. The value `text_config["bos_token_id"]` will be overriden.
`text_config_dict` is provided which will be used to initialize `CLIPTextConfig`. The value `text_config["eos_token_id"]` will be overriden.
Loading pipeline components...: 100%|██████████| 7/7 [00:05<00:00,  1.36it/s]
100%|██████████| 200/200 [09:50<00:00,  2.95s/it]


In [None]:
import torch
from diffusers import (
    StableDiffusionXLPipeline, 
    EulerAncestralDiscreteScheduler,
    AutoencoderKL
)

# Initialize LoRA model and weights
lora_model_id = "Linaqruf/anime-slider-xl-lora"
lora_filename = "anime-slider-xl.safetensors"
lora_scale_slider = 2 # -2 for less detailed result

# Load VAE component
vae = AutoencoderKL.from_pretrained(
    "madebyollin/sdxl-vae-fp16-fix", 
    torch_dtype=torch.float16
)

# Configure the pipeline
pipe = StableDiffusionXLPipeline.from_pretrained(
    "Linaqruf/animagine-xl-2.0", 
    vae=vae,
    torch_dtype=torch.float16, 
    use_safetensors=True, 
    variant="fp16"
)
pipe.scheduler = EulerAncestralDiscreteScheduler.from_config(pipe.scheduler.config)
pipe.to('cuda')

# Load and fuse LoRA weights
pipe.load_lora_weights(lora_model_id, weight_name=lora_filename)
pipe.fuse_lora(lora_scale=lora_scale_slider)

# Define prompts and generate image
prompt = "face focus, cute, masterpiece, best quality, 1girl, black hair, sweater, looking at viewer, upper body, beanie, outdoors, day, turtleneck, nazi uniform, standing"
negative_prompt = "lowres, bad anatomy, bad hands, text, error, missing fingers, extra digit, fewer digits, cropped, worst quality, low quality, normal quality, jpeg artifacts, signature, watermark, username, blurry"

image = pipe(
    prompt, 
    negative_prompt=negative_prompt, 
    width=1024,
    height=1024,
    guidance_scale=12,
    num_inference_steps=50
).images[0]

# Unfuse LoRA before saving the image
pipe.unfuse_lora()
image.save("anime_girl.png")