# Stable Diffusion on diffusers



**Run this cell after each session restart**

In [None]:
#@title General setup { display-mode: "form", run: "auto" }

!pip install torch torchvision 
!pip install accelerate
!pip install gputil 

# !pip install torchmetrics==0.10.3
# !pip install pytorch-lightning==1.8.3.post0

from IPython.display import HTML, Image, display, clear_output
from moviepy.editor import ImageSequenceClip, ipython_display
import ipywidgets as widgets
import os
import numpy as np

!apt-get -qq install ffmpeg
!pip install ninja
from google.colab import drive
drive.mount('/G', force_remount=True)
gdir = '/G/MyDrive/'
%cd $gdir

#@markdown Copying StyleGAN2 to the directory below on your Google drive (creating it, if it doesn't exist):
work_dir = 'sdfu' #@param {type:"string"}
#@markdown NB: Avoid connecting Google drive manually via the icon in Files section on the left. Doing so may break further operations.

work_dir = gdir + work_dir + '/'
if not os.path.isdir(work_dir):
  !git clone https://github.com/eps696/SDfu $work_dir
%cd $work_dir
!pip install -r requirements.txt
# !pip install git+https://github.com/openai/CLIP.git@main#egg=clip
# !pip install git+https://github.com/facebookresearch/xformers.git

from src.util.text import txt_clean
from src.util.utils import basename
from download import get_model
maindir = '/content/models'
command = ''
args = ''

clear_output()

# Hardware check
!ln -sf /opt/bin/nvidia-smi /usr/bin/nvidia-smi
import GPUtil as GPU
gpu = GPU.getGPUs()[0]
!nvidia-smi -L
print("GPU RAM {0:.0f}MB | Free {1:.0f}MB)".format(gpu.memoryTotal, gpu.memoryFree))
print('\nDone!')

In [None]:
#@title Load model

model = "1.5" #@param ['1.5','1.5-inpainting','2.1', '2-inpainting', '2-depth', '2.1-v']

if model in ['1.5', '1.5-inpainting']:
  get_model("https://www.dropbox.com/s/9wuhum8w0iqs4o7/sdfu-v15-full-fp16.zip?dl=1", '/content/models/v1')
  get_model("https://www.dropbox.com/s/z9uycihl6tybx9y/sdfu-v1-vaes-fp16.zip?dl=1", '/content/models/v1')
if model == '1.5-inpainting':
  get_model("https://www.dropbox.com/s/bzmjnslk2sgsbt9/sdfu-v15i-unet-fp16.zip?dl=1", '/content/models/v1')
if model in ['2.1', '2-inpainting', '2-depth']:
  get_model("https://www.dropbox.com/s/38876tjuklvwq82/sdfu-v21-full-fp16.zip?dl=1", '/content/models/v2')
if model == '2-inpainting':
  get_model("https://www.dropbox.com/s/r5fa1mdxpw9e8k2/sdfu-v2i-unet-fp16.zip?dl=1", '/content/models/v2')
if model == '2-depth':
  get_model("https://www.dropbox.com/s/4visx4qcscirvob/sdfu-v2d-unet-fp16.zip?dl=1", '/content/models/v2')
if model == '2.1-v':
  get_model("https://www.dropbox.com/s/10gbecrugca1ydv/sdfu-v21v-full-fp16.zip?dl=1", '/content/models/v2v')

# clipseg for text masking
get_model("https://www.dropbox.com/s/c0tduhr4g0al1cq/rd64-uni.pth?dl=1", '/content/models/clipseg', unzip=False)


## Generation


### Other settings [optional]

In [None]:
#@markdown Run this cell to override settings, if needed
out_dir = '_out' #@param {type:"string"}
sizeX = 512 #@param {type:"integer"} 
sizeY = 512 #@param {type:"integer"}

cfg_scale = 7.5 #@param {type:"number"}
strength = 0.75 #@param {type:"number"}
steps = 50 #@param {type:"integer"}

sampler = 'ddim' #@param ['ddim', 'pndm', 'euler', 'klms', 'euler_a']
VAE = 'ema' #@param ['original', 'ema', 'mse']
batch = 1 #@param {type:"integer"}
seed = 696 #@param {type:"integer"}
ddim_eta = 0. #@param {type:"number"}

unprompt = 'ugly, low quality, poorly drawn, out of focus, deformed, disfigured, disgusting, bad art, blurry, oversaturated' #@param {type:"string"}
parens = True #@param {type:"boolean"}

args = ''
args += ' -o %s ' % out_dir
args += ' -sz %d-%d ' % (sizeX, sizeY)
args += ' -C %f ' % cfg_scale
args += ' -f %f ' % strength
args += ' -s %d ' % steps
args += ' -sm %s ' % sampler
args += ' --vae %s ' % VAE
args += ' -b %d ' % batch
args += ' -S %d ' % seed
args += ' --ddim_eta %f ' % ddim_eta
args += ' -un %s ' % unprompt
if parens:
  args += ' -par '



### Inputs

In [None]:
#@markdown All paths below are relative to the work directory on G drive (set during General setup above). 

#@markdown Specify a text string or path to a text file to use **txt2img**:
prompt = '' #@param {type:"string"}

#@markdown Specify path to an image or directory to use **img2img**:
images = '' #@param {type:"string"}
#@markdown Specify mask as an image or directory or text string to use **inpainting**:
mask = '' #@param {type:"string"}
# mask_text = 'human, person' #@param {type:"string"}

#@markdown Load finetuned files if needed:
method = "custom" #@param ['text','lora','custom']
load_file = '' #@param {type:"string"}

maindir = '/content/models'
%cd $work_dir
command = ' -md %s ' % maindir
workname = ''

if len(prompt) > 0:
  if os.path.exists(prompt):
    print('found', prompt)
    command += ' -t %s ' % prompt
  else:
    command += ' -t "%s" ' % prompt
  workname = txt_clean(basename(prompt))[:44]

if len(images) > 0 and os.path.exists(images):
  print('found', images)
  command += ' -im %s ' % images
  if len(workname) == 0:
    workname = txt_clean(basename(images))

if len(mask) > 0:
  if not os.path.exists(mask): 
      mask = '"%s"' % mask
  command += ' -M %s ' % mask

if len(load_file) > 0 and os.path.exists(load_file):
  print('found', load_file)
  cmd = 'rt' if method=='text' else 'rl' if method=='lora' else 'rd'
  command += ' -%s %s ' % (cmd, load_file)

!echo $command $args


### Generate

In [None]:
#@markdown Separate frames

%run src/gen.py $command $args

In [None]:
#@markdown Interpolations

latent_blending = True #@param {type:"boolean"}
frame_step = 10 #@param {type:"integer"}
num_repeat = 1 #@param {type:"integer"}

command += ' -fs %d -n %d ' % (frame_step, num_repeat)

if latent_blending:
  command += ' --latblend '

%run src/latwalk.py $command $args


set `latent_blending` if you need smooth transitions.  
`frame_step` = length of the transition between prompts or images (in frames).  
`num_repeat` = multiplies inputs to make animation e.g. from a single prompt or image.

## Fine-tune

In [None]:
#@title Data setup 
#@markdown Put your target images as zip-archive onto Google drive and type its path below (relative to G-drive root). 
#@markdown If you use Custom diffusion, prepare also a bunch of generic reference images of similar class, to start from. 
#@markdown `new_token` will be used in the prompts to summon your target imagery, e.g. as `<mycat1>`. 
new_token = 'mycat1' #@param {type:"string"}
ref_class = 'cat' #@param {type:"string"}
target_data = 'tgt.zip' #@param {type:"string"}
ref_data = 'ref.zip' #@param {type:"string"}

data_dir = os.path.join('/content/data/', new_token)
!rm -rf $data_dir
os.makedirs(data_dir)
%cd $data_dir

tgt_path = os.path.join(gdir, target_data)
!unzip -j -o -q $tgt_path -d tgt
tgt_dir = os.path.join(data_dir, 'tgt')

ref_path = os.path.join(gdir, ref_data)
if len(ref_data) > 0 and os.path.isfile(ref_path):
  !unzip -j -o -q $ref_path -d ref
  ref_dir = os.path.join(data_dir, 'ref')
  with_prior = True
else:
  with_prior = False

%cd $work_dir


Use either of methods:
* [Textual inversion](https://textual-inversion.github.io) = adds new token to the text encoder. Generic but stable. Trained embeddings can be combined together on load.
* [LoRA](https://github.com/cloneofsimo/lora) = adds new token + partially finetunes Unet attention layers. Faster, precise, but may interfere with wider spectrum of topics.
* [Custom diffusion](https://github.com/adobe-research/custom-diffusion) = similar to LoRA (in a way). Can achieve impressive reproduction quality (including faces) with simple prompts, but may lose the point with too complex ones. To train it, you'll need to specify above both **target** reference images and **generic** ones (more random, of similar subjects). Apparently, you can generate the latter with SD itself.  

Mark `style` if you're training for a style, rather than an object.  
Mark `low_mem` if you get OOM.  

In [None]:
#@title Run training
method = "custom" #@param ['text','lora','custom']
style = False #@param  {type:"boolean"}
low_mem = True #@param  {type:"boolean"}
train_steps = 2000 #@param  {type:"integer"}
save_steps = 500 #@param  {type:"integer"}
batch = 1 #@param  {type:"integer"}

command = ' -t $s --term %s --data %s ' % (new_token, ref_class, tgt_dir)
command += ' -ts %d --save_step %d -b %d ' %(train_steps, save_steps, batch)
command += ' -val -md %s ' % maindir

if low_mem:
  command += ' --low_mem '
if style
  command += ' --style '

if method == 'text':
  %run src/train.py $command -lr 0.001 --type text
elif method == 'lora':
  %run src/train.py $command -lr 0.0001 --type lora
elif method == 'custom':
  %run src/train.py $command --term_data $ref_dir --type custom
