[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/camenduru/latent-consistency-model-colab/blob/main/wip/lcm_controlnet_canny_video_colab.ipynb)

In [None]:
!pip install git+https://github.com/huggingface/diffusers -U
!pip install -q controlnet-aux transformers accelerate peft gradio==3.50.2
!pip install -q https://download.pytorch.org/whl/cu118/xformers-0.0.22.post4%2Bcu118-cp310-cp310-manylinux2014_x86_64.whl

from diffusers import StableDiffusionControlNetPipeline, UNet2DConditionModel, ControlNetModel, LCMScheduler
import torch

controlnet = ControlNetModel.from_pretrained("lllyasviel/control_v11p_sd15_canny", torch_dtype=torch.float16)
pipe = StableDiffusionControlNetPipeline.from_pretrained("ckpt/anything-v3-vae-swapped", controlnet=controlnet, torch_dtype=torch.float16, safety_checker=None,).to("cuda")
pipe.scheduler = LCMScheduler.from_config(pipe.scheduler.config)
pipe.load_lora_weights("latent-consistency/lcm-lora-sdv1-5")
# pipe.fuse_lora()

import os, cv2, torch, time, random
import numpy as np
from moviepy.editor import *
from PIL import Image
from yt_dlp import YoutubeDL
import gradio as gr


def download_video(url):
  ydl_opts = {'overwrites':True, 'format':'bestvideo[ext=mp4]+bestaudio[ext=m4a]/mp4', 'outtmpl':'/content/video.mp4'}
  with YoutubeDL(ydl_opts) as ydl:
    ydl.download(url)
    return f"/content/video.mp4"

def get_frames(video_in):
    frames = []
    clip = VideoFileClip(video_in)
    if clip.fps > 30:
        print("vide rate is over 30, resetting to 30")
        clip_resized = clip.resize(height=512)
        clip_resized.write_videofile("video_resized.mp4", fps=30, verbose=False)
    else:
        print("video rate is OK")
        clip_resized = clip.resize(height=512)
        clip_resized.write_videofile("video_resized.mp4", fps=clip.fps, verbose=False)
    print("video resized to 512 height")
    cap= cv2.VideoCapture("video_resized.mp4")
    fps = cap.get(cv2.CAP_PROP_FPS)
    print("video fps: " + str(fps))
    i=0
    while(cap.isOpened()):
        ret, frame = cap.read()
        if ret == False:
            break
        cv2.imwrite('in'+str(i)+'.jpg',frame)
        frames.append('in'+str(i)+'.jpg')
        i+=1
    cap.release()
    cv2.destroyAllWindows()
    print("broke the video into frames")
    return frames, fps

def create_video(frames, fps):
    print("building video result")
    clip = ImageSequenceClip(frames, fps=fps)
    clip.write_videofile("/content/output.mp4", fps=fps, verbose=False)
    return "/content/output.mp4"

def infer(prompt, video_in, seed_inp, trim_value):
    print(prompt)
    break_vid = get_frames(video_in)
    frames_list= break_vid[0]
    fps = break_vid[1]
    n_frame = int(trim_value*fps)
    if n_frame >= len(frames_list):
        print("video is shorter than the cut value")
        n_frame = len(frames_list)
    result_frames = []
    print("set stop frames to: " + str(n_frame))
    for i in frames_list[0:int(n_frame)]:
        input_image = np.array(input_image)
        input_image = cv2.Canny(input_image, 100, 200)
        input_image = input_image[:, :, None]
        input_image = np.concatenate([input_image, input_image, input_image], axis=2)
        canny_image = Image.fromarray(input_image)
        image = pipe(prompt, image=canny_image, num_inference_steps=4, guidance_scale=1.0, controlnet_conditioning_scale=controlnet_conditioning_scale, cross_attention_kwargs={"scale": 0.5}).images[0]
        images = image
        rgb_im = images[0].convert("RGB")
        rgb_im.save(f"out-{i}.jpg")
        result_frames.append(f"out-{i}.jpg")
        print("frame " + i + "/" + str(n_frame) + ": done;")
    final_vid = create_video(result_frames, fps)
    print("Done!")
    return final_vid

with gr.Blocks() as demo:
    with gr.Column(elem_id="col-container"):
        with gr.Row():
            with gr.Column():
                input_text = gr.Textbox(show_label=False, value="https://youtu.be/EU3hIXXeiz4")
                input_download_button = gr.Button(value="Download from YouTube or Twitch")
                prompt = gr.Textbox(label="Prompt", placeholder="enter prompt", show_label=False, elem_id="prompt-in")
                video_inp = gr.Video(label="Video source", source="upload", type="filepath", elem_id="input-vid")
                input_download_button.click(download_video, inputs=[input_text], outputs=[video_inp])
            with gr.Column():
                video_out = gr.Video(label="Pix2pix video result", type="filepath", elem_id="video-output")
                with gr.Row():
                  seed_inp = gr.Slider(label="Seed", minimum=0, maximum=2147483647, step=1, value=69)
                  trim_in = gr.Slider(label="Cut video at (s)", minimun=1, maximum=600, step=1, value=1)
                submit_btn = gr.Button("Generate Pix2Pix video")
        inputs = [prompt,video_inp,seed_inp, trim_in]
        submit_btn.click(infer, inputs=inputs, outputs=[video_out])
demo.queue().launch(debug=True, share=True, inline=False)