<a href="https://colab.research.google.com/github/dmarx/notebooks/blob/animate_diff/AnimateDiff.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Unofficial AnimateDiff Demo

> "[AnimateDiff](https://animatediff.github.io/): Animate Your Personalized Text-to-Image Diffusion Models without Specific Tuning"  
> by [Yuwei Guo](guoyuwei@pjlab.org.cn), [Ceyuan Yang](https://ceyuan.me/) [Anyi Rao](https://anyirao.com/), [Yaohui Wang](https://wyhsirius.github.io/) [Yu Qiao](https://wyhsirius.github.io/) [Dahua Lin](http://dahua.site/) [Bo Dai](https://daibo.info/)


```
@misc{guo2023animatediff,
  title={AnimateDiff: Animate Your Personalized Text-to-Image Diffusion Models without Specific Tuning},
  author={Yuwei Guo, Ceyuan Yang, Anyi Rao, Yaohui Wang, Yu Qiao, Dahua Lin, Bo Dai},
  booktitle={arXiv preprint arxiv:2307.04725},
  year={2023},
  archivePrefix={arXiv},
  primaryClass={cs.CV}
}
```


[Notebook](https://colab.research.google.com/github/dmarx/notebooks/blob/main/AnimateDiff.ipynb) by [David Marx](https://twitter.com/DigThatData), notebook brought to you by [Stability AI](https://stability.ai/)

Setup adapted from Camenduru.  
Rest of code mostly cannibalized from: https://github.com/guoyww/AnimateDiff/blob/main/scripts/animate.py

Shared under MIT License.

To report bugs or offer suggestions regarding the notebook, file an issue here: https://github.com/dmarx/notebooks/

In [None]:
# @title setup

fpath_sd_model = "/content/models/StableDiffusion/" # @param {type:"string"}
fpath_motion_prior = "/content/models/Motion_Module/" # @param {type:"string"}
fpath_dreambooth_lora = "/content/models/DreamBooth_LoRA/" # @param {type:"string"}

#####################

# Install stuff

!pip install napm einops omegaconf safetensors diffusers[torch]==0.11.1 transformers

# TODO: use huggingface hub rust downloader, backout aria dependency
!apt -y install -qq aria2


# Handle uninstallable research dependencies
import napm
napm.pseudoinstall_git_repo("https://github.com/guoyww/animatediff/", package_name='animatediff', add_install_dir_to_path=True)

# Download models as needed

from pathlib import Path

if not Path(fpath_sd_model).exists():
    !mkdir -p {fpath_sd_model}
    !git clone -b fp16 https://huggingface.co/runwayml/stable-diffusion-v1-5 {fpath_sd_model}

if not Path(fpath_motion_prior).exists():
    !mkdir -p {fpath_motion_prior}
    #!aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/camenduru/AnimateDiff/resolve/main/mm_sd_v14.ckpt -d {fpath_motion_prior} -o mm_sd_v14.ckpt
    !aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/camenduru/AnimateDiff/resolve/main/mm_sd_v15.ckpt -d {fpath_motion_prior} -o mm_sd_v15.ckpt

if not Path(fpath_dreambooth_lora).exists():
    !mkdir -p {fpath_dreambooth_lora}
    !aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/camenduru/AnimateDiff/resolve/main/toonyou_beta3.safetensors -d {fpath_dreambooth_lora} -o toonyou_beta3.safetensors

############################################

# import stuff

#import argparse
#import inspect
#import csv #, pdb, glob
#import pdb
#import glob
#import math
#from einops import rearrange, repeat

import datetime
import os
from pathlib import Path
from types import SimpleNamespace

import diffusers
from diffusers import AutoencoderKL, DDIMScheduler
from omegaconf import OmegaConf
from safetensors import safe_open
import torch
from tqdm.auto import tqdm
from transformers import CLIPTextModel, CLIPTokenizer

# napm needs to be imported before animatediff
import napm
from animatediff.models.unet import UNet3DConditionModel
from animatediff.pipelines.pipeline_animation import AnimationPipeline
from animatediff.utils.util import save_videos_grid
from animatediff.utils.convert_from_ckpt import convert_ldm_unet_checkpoint, convert_ldm_clip_checkpoint, convert_ldm_vae_checkpoint
from animatediff.utils.convert_lora_safetensor_to_diffusers import convert_lora

# dig up the config yaml from the napm dependency
cfg = napm.config.NapmConfig().load()
PKG_ROOT = Path(cfg['packages']['animatediff']['install_dir'])

inference_config = OmegaConf.load(PKG_ROOT/"configs/inference/inference.yaml")


In [None]:
# @title SETTINGS (crystal says i should fix this name later)

# --pretrained_model_path /content/animatediff/models/StableDiffusion --L 16 --W 256 --H 256
args = SimpleNamespace()

#args.pretrained_model_path = "/content/models/StableDiffusion" # @param {type:"string"}
args.pretrained_model_path = fpath_sd_model
args.L = 16 # @param {type:"integer"}
args.W = 448 # @param {type:"integer"}
args.H = 320 # @param {type:"integer"}


model_config = SimpleNamespace()
#model_config.path = '/content/models/DreamBooth_LoRA/toonyou_beta3.safetensors' # @param {type:"string"}
model_config.path = str(list(Path(fpath_dreambooth_lora).glob('*.safetensors'))[0])
 #PKG_ROOT/"models/Motion_Module/mm_sd_v15.ckpt"
#model_config.motion_module = "/content/models/Motion_Module/mm_sd_v15.ckpt" # @param {type:"string"}
model_config.motion_module = str(list(Path(fpath_motion_prior).glob("*.ckpt"))[0])
model_config.seed = 10788741199826055526 # @param {type:"integer"}
model_config.steps = 25 # @param {type:"integer"}
model_config.guidance_scale = 7.5 # @param {type:"number"}
model_config.prompt = "apollo 13, 'houston we have a problem', tom hanks playing an astronaut" # @param {type:"string"}
model_config.n_prompt = "stationary, motionless, boring, watermark, trademark, copyright, text, shutterstock" # @param {type:"string"}

#unet_additional_kwargs = {}


In [None]:
# @title Load Models

tokenizer    = CLIPTokenizer.from_pretrained(args.pretrained_model_path, subfolder="tokenizer")
text_encoder = CLIPTextModel.from_pretrained(args.pretrained_model_path, subfolder="text_encoder")
vae          = AutoencoderKL.from_pretrained(args.pretrained_model_path, subfolder="vae")
unet         = UNet3DConditionModel.from_pretrained_2d(args.pretrained_model_path, subfolder="unet", unet_additional_kwargs=OmegaConf.to_container(inference_config.get("unet_additional_kwargs", {})))

pipeline = AnimationPipeline(
  vae=vae, text_encoder=text_encoder, tokenizer=tokenizer, unet=unet,
  scheduler=DDIMScheduler(**OmegaConf.to_container(inference_config.noise_scheduler_kwargs)),
).to("cuda")


# probably wanna change this
func_args = SimpleNamespace

motion_module_state_dict = torch.load(model_config.motion_module, map_location="cpu")
if "global_step" in motion_module_state_dict:
  func_args.update({"global_step": motion_module_state_dict["global_step"]})
missing, unexpected = pipeline.unet.load_state_dict(motion_module_state_dict, strict=False)
assert len(unexpected) == 0



 # 1.2 T2I
if model_config.path != "":
    if model_config.path.endswith(".ckpt"):
        state_dict = torch.load(model_config.path)
        pipeline.unet.load_state_dict(state_dict)

    elif model_config.path.endswith(".safetensors"):
        state_dict = {}
        with safe_open(model_config.path, framework="pt", device="cpu") as f:
            for key in f.keys():
                state_dict[key] = f.get_tensor(key)

        is_lora = all("lora" in k for k in state_dict.keys())
        if not is_lora:
            base_state_dict = state_dict
        else:
            base_state_dict = {}
            with safe_open(model_config.base, framework="pt", device="cpu") as f:
                for key in f.keys():
                    base_state_dict[key] = f.get_tensor(key)

        # vae
        converted_vae_checkpoint = convert_ldm_vae_checkpoint(base_state_dict, pipeline.vae.config)
        pipeline.vae.load_state_dict(converted_vae_checkpoint)
        # unet
        converted_unet_checkpoint = convert_ldm_unet_checkpoint(base_state_dict, pipeline.unet.config)
        pipeline.unet.load_state_dict(converted_unet_checkpoint, strict=False)
        # text_model
        pipeline.text_encoder = convert_ldm_clip_checkpoint(base_state_dict)

        # import pdb
        # pdb.set_trace()
        if is_lora:
            pipeline = convert_lora(pipeline, state_dict, alpha=model_config.lora_alpha)

pipeline.to("cuda")
### <<< create validation pipeline <<< ###

In [None]:
sample = pipeline(
    prompt=model_config.prompt,
    negative_prompt     = model_config.n_prompt,
    num_inference_steps = model_config.steps,
    guidance_scale      = model_config.guidance_scale,
    width               = args.W,
    height              = args.H,
    video_length        = args.L,
).videos

samples = torch.concat([sample])
savedir="."
outpath = f"{savedir}/sample.gif"
save_videos_grid(samples, outpath , n_rows=1)

In [None]:
# @title Show me the gif

from IPython.display import Image

Image(outpath)
