[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/camenduru/latent-consistency-model-colab/blob/main/wip/lcm_controlnet_canny_video_colab.ipynb)

In [None]:
!pip install git+https://github.com/huggingface/diffusers -U
!pip install -q yt_dlp controlnet-aux transformers accelerate peft gradio==3.50.2
!pip install -q https://download.pytorch.org/whl/cu118/xformers-0.0.22.post4%2Bcu118-cp310-cp310-manylinux2014_x86_64.whl

from diffusers import StableDiffusionControlNetPipeline, UNet2DConditionModel, ControlNetModel, LCMScheduler
import torch, cv2, os
from PIL import Image

# controlnet = ControlNetModel.from_pretrained("lllyasviel/control_v11e_sd15_ip2p", torch_dtype=torch.float16)
controlnet = ControlNetModel.from_pretrained("lllyasviel/control_v11p_sd15_canny", torch_dtype=torch.float16)
pipe = StableDiffusionControlNetPipeline.from_pretrained("ckpt/anything-v3-vae-swapped", controlnet=controlnet, torch_dtype=torch.float16, safety_checker=None,).to("cuda")
pipe.scheduler = LCMScheduler.from_config(pipe.scheduler.config)
pipe.load_lora_weights("latent-consistency/lcm-lora-sdv1-5")
pipe.set_progress_bar_config(disable=True)

def separate_frames_from_video(video_path, output_folder, prefix='in', extension='.jpg'):
    video_capture = cv2.VideoCapture(video_path)
    fps = int(video_capture.get(cv2.CAP_PROP_FPS))
    frame_count = int(video_capture.get(cv2.CAP_PROP_FRAME_COUNT))
    os.makedirs(output_folder, exist_ok=True)
    for frame_number in range(frame_count):
        ret, frame = video_capture.read()
        if not ret:
            break
        output_path = os.path.join(output_folder, f'{prefix}{frame_number}{extension}')
        cv2.imwrite(output_path, frame)
    video_capture.release()

def apply_canny_edge_detection(image_path, output_path, threshold1=0, threshold2=100):
    img = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE)
    edges = cv2.Canny(img, threshold1, threshold2)
    cv2.imwrite(output_path, edges)

def get_ordered_images(folder_path, prefix='in', extension='.jpg'):
    files = os.listdir(folder_path)
    image_files = [file for file in files if file.startswith(prefix) and file.endswith(extension)]
    image_files.sort(key=lambda x: int(x[len(prefix):-len(extension)]) if x[len(prefix):-len(extension)].isdigit() else float('inf'))
    return image_files

def generate(folder_path, prefix='in', extension='.jpg'):
    image_files = get_ordered_images(folder_path, prefix, extension)
    for image_file in image_files:
        apply_canny_edge_detection(f'/content/in/{image_file}', f'/content/canny/{image_file}')
        canny_image = Image.open(f'/content/canny/{image_file}')
        image = pipe("1girl", width=640, height=360, image=canny_image, num_inference_steps=4, guidance_scale=1.0, controlnet_conditioning_scale=0.9, cross_attention_kwargs={"scale": 0.5}).images[0]
        image.save(f'/content/out/{image_file}')

def generate_video_from_images(folder_path, output_video_path, prefix='in', extension='.jpg', fps=30):
    image_files = [file for file in os.listdir(folder_path) if file.startswith(prefix) and file.endswith(extension)]
    image_files.sort(key=lambda x: int(x[len(prefix):-len(extension)]) if x[len(prefix):-len(extension)].isdigit() else float('inf'))
    first_image_path = os.path.join(folder_path, image_files[0])
    first_image = cv2.imread(first_image_path)
    height, width, _ = first_image.shape
    fourcc = cv2.VideoWriter_fourcc(*'mp4v')
    video_writer = cv2.VideoWriter(output_video_path, fourcc, fps, (width, height))
    for image_file in image_files:
        image_path = os.path.join(folder_path, image_file)
        frame = cv2.imread(image_path)
        video_writer.write(frame)
    video_writer.release()

!mkdir /content/in /content/canny /content/out
separate_frames_from_video('/content/video.mp4', '/content/in')
generate('/content/in')
generate_video_from_images('/content/out', '/content/output_video.mp4')