In [1]:
import torch
from diffusers import BitsAndBytesConfig as DiffusersBitsAndBytesConfig, LTXVideoTransformer3DModel, LTXPipeline
from diffusers.utils import export_to_video
from transformers import BitsAndBytesConfig as BitsAndBytesConfig, T5EncoderModel

# torch.backends.cuda.matmul.allow_tf32 = True

quant_config = BitsAndBytesConfig(load_in_8bit=True)
text_encoder_8bit = T5EncoderModel.from_pretrained(
    "Lightricks/LTX-Video",
    subfolder="text_encoder",
    quantization_config=quant_config,
    torch_dtype=torch.float16,
)

quant_config = DiffusersBitsAndBytesConfig(load_in_8bit=True)
transformer_8bit = LTXVideoTransformer3DModel.from_pretrained(
    "Lightricks/LTX-Video",
    subfolder="transformer",
    quantization_config=quant_config,
    torch_dtype=torch.float16,
)

pipeline = LTXPipeline.from_pretrained(
    "Lightricks/LTX-Video",
    text_encoder=text_encoder_8bit,
    transformer=transformer_8bit,
    torch_dtype=torch.float16,
    device_map="balanced",
)

prompt = "An elderly gentleman, with a serene expression, sits at the water's edge, a steaming cup of tea by his side. He is engrossed in his artwork, brush in hand, as he renders an oil painting on a canvas that's propped up against a small, weathered table. The sea breeze whispers through his silver hair, gently billowing his loose-fitting white shirt, while the salty air adds an intangible element to his masterpiece in progress. The scene is one of tranquility and inspiration, with the artist's canvas capturing the vibrant hues of the setting sun reflecting off the tranquil sea."
prompt = "A woman with long brown hair and light skin smiles at another woman with long blonde hair. The woman with brown hair wears a black jacket and has a small, barely noticeable mole on her right cheek. The camera angle is a close-up, focused on the woman with brown hair's face. The lighting is warm and natural, likely from the setting sun, casting a soft glow on the scene. The scene appears to be real-life footage."
prompt = "A man walks towards a window, looks out, and then turns around. He has short, dark hair, dark skin, and is wearing a brown coat over a red and gray scarf. He walks from left to right towards a window, his gaze fixed on something outside. The camera follows him from behind at a medium distance. The room is brightly lit, with white walls and a large window covered by a white curtain. As he approaches the window, he turns his head slightly to the left, then back to the right. He then turns his entire body to the right, facing the window. The camera remains stationary as he stands in front of the window. The scene is captured in real-life footage."
prompt = "a garden scene with park benches and a man sitting on a bench, closed eyes, short, dark hair, dark skin, wearing a brown coat over a red and gray scarf. camera zooms into person and stops with him in focus. then he calmly opens his eys, smiles and stands up. The scene is captured in real-life footage."

video = pipeline(prompt=prompt, num_frames=161, num_inference_steps=50).frames[0]
export_to_video(video, "ship.mp4", fps=24)


Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]

Fetching 2 files:   0%|          | 0/2 [00:00<?, ?it/s]

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

Loading pipeline components...:   0%|          | 0/5 [00:00<?, ?it/s]

  0%|          | 0/50 [00:00<?, ?it/s]

'ship.mp4'

In [1]:
import torch
from diffusers import AutoencoderKLLTXVideo, LTXImageToVideoPipeline, LTXVideoTransformer3DModel
from diffusers.utils import export_to_video, load_image # Import load_image

# `single_file_url` could also be https://huggingface.co/Lightricks/LTX-Video/ltx-video-2b-v0.9.1.safetensors
single_file_url = "https://huggingface.co/Lightricks/LTX-Video/ltx-video-2b-v0.9.safetensors"

# Determine device
device = "cuda" if torch.cuda.is_available() else "cpu"
print(f"Using device: {device}")

# It's good practice to move models to the device
transformer = LTXVideoTransformer3DModel.from_single_file(
  single_file_url, torch_dtype=torch.bfloat16
).to(device)
vae = AutoencoderKLLTXVideo.from_single_file(single_file_url, torch_dtype=torch.bfloat16).to(device)
pipe = LTXImageToVideoPipeline.from_pretrained(
  "Lightricks/LTX-Video", transformer=transformer, vae=vae, torch_dtype=torch.bfloat16
).to(device)


prompt = "A person meditating in a garden, calmly opens his eys, smiles and standup, goes to his car and opens door."

# Load the image first
image_path = "./instagram_content/scene_0_keyframe.png"
try:
    input_image = load_image(image_path)
except FileNotFoundError:
    print(f"Error: Image file not found at {image_path}")
    print("Please ensure the image exists at the specified path.")
    exit()
except Exception as e:
    print(f"Error loading image: {e}")
    exit()

print(f"Successfully loaded image from {image_path} of type: {type(input_image)}")

video = pipe(
    image = input_image, # Pass the loaded image object
    prompt=prompt,
    num_frames=161, # Default is 161, can be adjusted
    num_inference_steps=50, # Default is 50
    # You might want to specify height and width if your input image is not the default expected size
    # height=480, # Example, default is 480 for this model
    # width=704,  # Example, default is 704 for this model
).frames[0]

output_video_path = "ship.mp4"
export_to_video(video, output_video_path, fps=24)
print(f"Video exported to {output_video_path}")

Using device: cuda


Loading pipeline components...:   0%|          | 0/5 [00:00<?, ?it/s]

Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]

Successfully loaded image from ./instagram_content/scene_0_keyframe.png of type: <class 'PIL.Image.Image'>


  0%|          | 0/50 [00:00<?, ?it/s]

Video exported to ship.mp4
