In [None]:
# before run this script, please install pytorch
!pip install openmim
!pip install git+https://github.com/okotaku/diffengine.git

In [None]:
# Training with diffengine config
!mim train diffengine stable_diffusion_v15_dreambooth_lora_dog.py \
  --cfg-options train_dataloader.dataset.class_image_config.num_images=50 \
  train_dataloader.batch_size=4 \
  train_cfg.max_iters=2000 \
  optim_wrapper.type='AmpOptimWrapper' \
  optim_wrapper.dtype='float16' \
  optim_wrapper.optimizer.lr=1e-5 \
  model.lora_config.rank=8

In [None]:
# Inference with diffengine config
import torch
from diffusers import DiffusionPipeline


def null_safety(images, **kwargs):
    return images, [False] * len(images)

device = 'cuda'
checkpoint = 'work_dirs/stable_diffusion_v15_dreambooth_lora_dog/step1999'
prompt = 'A photo of sks dog in a bucket'
out = 'demo.png'

pipe = DiffusionPipeline.from_pretrained(
    'runwayml/stable-diffusion-v1-5', torch_dtype=torch.float16)
pipe.to(device)
pipe.load_lora_weights(checkpoint)

pipe.safety_checker = null_safety

image = pipe(
    prompt,
    num_inference_steps=50,
).images[0]
image.save(out)
del pipe
torch.cuda.empty_cache()

In [None]:
%%writefile sdv2.py
model = dict(
    type='StableDiffusion',
    model='stabilityai/stable-diffusion-2-1',  # fix sd v2
    lora_config=dict(rank=8))

train_pipeline = [  # fix image size
    dict(type='torchvision/Resize', size=768, interpolation='bilinear'),
    dict(type='RandomCrop', size=768),
    dict(type='RandomHorizontalFlip', p=0.5),
    dict(type='torchvision/ToTensor'),
    dict(type='torchvision/Normalize', mean=[0.5], std=[0.5]),
    dict(type='PackInputs'),
]
train_dataloader = dict(
    batch_size=2,
    num_workers=2,
    dataset=dict(
        type='HFDreamBoothDataset',
        dataset='diffusers/dog-example',
        instance_prompt='a photo of sks dog',
        pipeline=train_pipeline,
        class_prompt='a photo of dog',
        class_image_config=dict(num_images=50)),
    sampler=dict(type='InfiniteSampler', shuffle=True),
)

val_dataloader = None
val_evaluator = None
test_dataloader = val_dataloader
test_evaluator = val_evaluator

custom_hooks = [
    dict(
        type='VisualizationHook',
        prompt=['A photo of sks dog in a bucket'] * 4,
        by_epoch=False,
        interval=500),
    dict(type='LoRASaveHook'),
    dict(type='UnetEMAHook', momentum=1e-4, priority='ABOVE_NORMAL')
]

optim_wrapper = dict(
    type='AmpOptimWrapper',
    dtype='float16',
    optimizer=dict(type='AdamW', lr=1e-4, weight_decay=1e-2),
    clip_grad=dict(max_norm=1.0))

# train, val, test setting
train_cfg = dict(type='IterBasedTrainLoop', max_iters=2000)
val_cfg = None
test_cfg = None

default_hooks = dict(
    checkpoint=dict(
        type='CheckpointHook',
        interval=100,
        by_epoch=False,
        max_keep_ckpts=1,
    ), )
log_processor = dict(by_epoch=False)

default_scope = 'diffengine'

env_cfg = dict(
    cudnn_benchmark=False,
    mp_cfg=dict(mp_start_method='fork', opencv_num_threads=4),
    dist_cfg=dict(backend='nccl'),
)

load_from = None
resume = False
randomness = dict(seed=None, deterministic=False)

In [None]:
# Training with custom config
!rm -r -f work_dirs/class_image
!mim train diffengine sdv2.py

In [None]:
# Inference with custom config
import torch
from diffusers import DiffusionPipeline


def null_safety(images, **kwargs):
    return images, [False] * len(images)

device = 'cuda'
checkpoint = 'work_dirs/sdv2/step1999'
prompt = 'A photo of sks dog in a bucket'
out = 'demo2.png'

pipe = DiffusionPipeline.from_pretrained(
    'stabilityai/stable-diffusion-2-1', torch_dtype=torch.float16)
pipe.to(device)
pipe.load_lora_weights(checkpoint)

pipe.safety_checker = null_safety

image = pipe(
    prompt,
    num_inference_steps=50,
).images[0]
image.save(out)
del pipe
torch.cuda.empty_cache()