In [2]:
pip install imageio[ffmpeg]

Collecting imageio-ffmpeg (from imageio[ffmpeg])
  Downloading imageio_ffmpeg-0.6.0-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Downloading imageio_ffmpeg-0.6.0-py3-none-manylinux2014_x86_64.whl (29.5 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m29.5/29.5 MB[0m [31m13.6 MB/s[0m eta [36m0:00:00[0m00:01[0m00:01[0m
[?25hInstalling collected packages: imageio-ffmpeg
Successfully installed imageio-ffmpeg-0.6.0
Note: you may need to restart the kernel to use updated packages.


In [3]:
import torch
from diffusers import DiffusionPipeline
from PIL import Image
from deep_translator import GoogleTranslator
import numpy as np
import imageio
from tqdm import tqdm
import os
import gc

def safe_image_list_from_array(arr):
    arr = np.asarray(arr)
    if arr.ndim == 4:
        return [Image.fromarray(((frame * 255).clip(0, 255)).astype(np.uint8)) for frame in arr]
    elif arr.ndim == 3:
        return [Image.fromarray(((arr * 255).clip(0, 255)).astype(np.uint8))]
    else:
        raise ValueError(f"Unsupported array shape: {arr.shape}")

def translate_prompt(prompt_text, fallback="Two cats playing in the park"):
    try:
        return GoogleTranslator(source='auto', target='en').translate(prompt_text)
    except Exception as e:
        print(f"[Translation Error] {e}")
        return fallback

def resize_frames(frames):
    first_frame = np.array(frames[0])
    height, width, _ = first_frame.shape
    return [frame.resize((width, height), Image.BICUBIC) for frame in tqdm(frames, desc="Resizing frames...")]

def save_mp4(frames, output_path, fps=8, bitrate="5M"):
    tqdm.write(f"Saving MP4 to {output_path}...")
    imageio.mimsave(output_path, frames, fps=fps, codec='libx264', bitrate=bitrate)
    tqdm.write("MP4 saved.")

def save_gif(frames, output_path, fps=8):
    tqdm.write(f"Saving GIF to {output_path}...")
    frames[0].save(
        output_path,
        save_all=True,
        append_images=frames[1:],
        duration=int(1000 / fps),  # milliseconds per frame
        loop=0,
        optimize=True
    )
    tqdm.write("GIF saved.")

def cleanup():
    torch.cuda.empty_cache()
    gc.collect()

def main():
    # --- Configurable parameters ---
    prompt_id = "Tampilkan video dua ekor kucing sedang bermain di taman"
    model_id = "damo-vilab/text-to-video-ms-1.7b"
    mp4_output_path = "cats_playing.mp4"
    gif_output_path = "cats_playing.gif"
    fps = 8
    num_frames = 48
    guidance = 10.0
    inference_steps = 25
    bitrate = "5M"
    # -------------------------------

    print(f"Original prompt: {prompt_id}")
    prompt = translate_prompt(prompt_id)
    print(f"Translated prompt: {prompt}")

    print("Loading model...")
    pipe = DiffusionPipeline.from_pretrained(model_id, torch_dtype=torch.float16, variant="fp16").to("cuda")
    pipe.enable_model_cpu_offload()

    print("Generating video frames...")
    result = pipe(prompt=prompt, num_inference_steps=inference_steps, num_frames=num_frames, guidance_scale=guidance)
    video_frames = result["frames"]

    # Convert and standardize to RGB PIL images
    frames_rgb = []
    for frame in tqdm(video_frames, desc="Converting frames..."):
        if isinstance(frame, np.ndarray):
            imgs = safe_image_list_from_array(frame)
            frames_rgb.extend([img.convert("RGB") for img in imgs])
        else:
            frames_rgb.append(frame.convert("RGB"))

    frames_resized = resize_frames(frames_rgb)

    # Save as both MP4 and GIF
    save_mp4(frames_resized, mp4_output_path, fps=fps, bitrate=bitrate)
    save_gif(frames_resized, gif_output_path, fps=fps)

    print(f"MP4 saved at: {os.path.abspath(mp4_output_path)}")
    print(f"GIF saved at: {os.path.abspath(gif_output_path)}")

    cleanup()

if __name__ == "__main__":
    main()


Original prompt: Tampilkan video dua ekor kucing sedang bermain di taman
Translated prompt: Show videos of two cats playing in the park
Loading model...


Loading pipeline components...:   0%|          | 0/5 [00:00<?, ?it/s]

The TextToVideoSDPipeline has been deprecated and will not receive bug fixes or feature updates after Diffusers version 0.33.1. 


Generating video frames...


  0%|          | 0/25 [00:00<?, ?it/s]

Converting frames...: 100%|██████████| 1/1 [00:00<00:00, 16.49it/s]
Resizing frames...: 100%|██████████| 48/48 [00:00<00:00, 8263.62it/s]


Saving MP4 to cats_playing.mp4...
MP4 saved.
Saving GIF to cats_playing.gif...
GIF saved.
MP4 saved at: /home/alif_ahmad/work/Script/cats_playing.mp4
GIF saved at: /home/alif_ahmad/work/Script/cats_playing.gif
