# Music Video Synthesis
* Extract lyrics from song with timestamps
* Compose scenes, include timestamps
* Generate images for each scene
* A human should evalute photos and scenes, creating a curated one with the desired characteristics
* Construct video text prompt for each scene
* Build videos for each scene, use referall link to sign up: https://www.segmind.com/invite/773118b7-41f4-4154-87f4-49326d973ec3
* Stitch together

# We will use openai whipser for stability

In [1]:
#!pip install --quiet --upgrade pip
#!pip3 install torch torchvision torchaudio optimum-quanto torchao --index-url https://download.pytorch.org/whl/cu126
#!pip install --quiet --upgrade openai-whisper openai
# Ubuntu or Debian
#!sudo apt update && sudo apt install ffmpeg
#!pip install setuptools-rust
#!pip install -U diffusers imageio imageio_ffmpeg opencv-python moviepy transformers huggingface-hub optimum pillow safetensors optimum-quanto accelerate
#!pip install git+https://github.com/xhinker/sd_embed.git@main
#!pip install accelerate flash_attention numba -U
#!pip install flash_attn --no-build-isolation
#!pip install -r requirements.txt -U

In [2]:
import argparse
import base64
import cv2
import diffusers
import gc
import imageio
import imageio_ffmpeg
import json
import math
import moviepy as mp
import numpy as np
import os
import psutil
import random
import requests
import sys
import tempfile
import time
import transformers
import torch
import torch.multiprocessing as mp
import whisper

from contextlib import contextmanager
from datetime import datetime, timedelta
from diffusers import AutoencoderKL, AutoPipelineForText2Image
from diffusers import FlowMatchEulerDiscreteScheduler
from diffusers import EulerDiscreteScheduler, EulerAncestralDiscreteScheduler, DPMSolverMultistepScheduler, PNDMScheduler, DDIMScheduler
from diffusers.image_processor import VaeImageProcessor
from diffusers.pipelines.flux.pipeline_flux import FluxPipeline
from diffusers.models.transformers.transformer_flux import FluxTransformer2DModel
from diffusers.utils import export_to_video, load_video, load_image
from hyvideo.utils.file_utils import save_videos_grid
from hyvideo.config import parse_args
from hyvideo.inference import HunyuanVideoSampler
from hyvideo.constants import NEGATIVE_PROMPT
from mmgp import offload, profile_type
from huggingface_hub import hf_hub_download, snapshot_download
from mmgp import offload, profile_type
from numba import cuda
from openai import OpenAI
from optimum.quanto import freeze, qfloat8, quantize, requantize
from pathlib import Path
from PIL import Image
from safetensors.torch import load_file as load_safetensors, save_file as save_safetensors
from sd_embed.embedding_funcs import get_weighted_text_embeddings_flux1
from torchao.quantization import quantize_, int8_weight_only, int8_dynamic_activation_int8_weight
from transformers import CLIPTextModel, CLIPTokenizer, T5TokenizerFast, T5EncoderModel
from transformers import CLIPImageProcessor, CLIPVisionModelWithProjection

os.environ["TOKENIZERS_PARALLELISM"] = "false"
# Define the paths where quantized weights will be saved

dtype = torch.bfloat16
MAX_SEED = np.iinfo(np.int32).max
device = "cuda" if torch.cuda.is_available() else "cpu"
retry_limit = 3
quantization = int8_weight_only

WIDTH = 848
HEIGHT = 480

2025-02-02 11:01:31.000494: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2025-02-02 11:01:31.153081: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1738515691.213136     905 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1738515691.230773     905 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2025-02-02 11:01:31.383592: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instr

In [3]:
# Configuration
CONFIG = {
    "openai_api_key": "",
    "openai_model": "gpt-4o-mini",
    "openai_model_large": "gpt-4o",
    "hf_token": "",
    "base_working_dir": "./images",
    "base_video_dir": "./output",
    "audio_files": [
        #"/mnt/d/Share/Audio/DidgeridooUnity.mp3",
        "/mnt/d/Share/Audio/WingsOfLight.mp3",
        "/mnt/d/Share/Audio/WingsOfLight.mp3",
        "/mnt/d/Share/Audio/WingsOfLight.mp3",
        # Add more audio file paths here
    ],
    "device": device,
    "dtype": dtype,
    "retry_limit": retry_limit,
    "MAX_SEED": MAX_SEED
}

# Ensure base directories exist
os.makedirs(CONFIG["base_working_dir"], exist_ok=True)
os.makedirs(CONFIG["base_video_dir"], exist_ok=True)

args = argparse.Namespace(
    quantize_transformer=False,
    lora_weight=[],
    lora_multiplier=[],
    profile=-1,
    verbose=1,
    server_port=0,
    server_name='',
    open_browser=False,
    model='HYVideo-T/2-cfgdistill',
    latent_channels=16,
    precision='bf16',
    rope_theta=256,
    vae='884-16c-hy',
    vae_precision='fp16',
    vae_tiling=True,
    text_encoder='llm',
    text_encoder_precision='fp16',
    text_states_dim=4096,
    text_len=256,
    tokenizer='llm',
    prompt_template='dit-llm-encode',
    prompt_template_video='dit-llm-encode-video',
    hidden_state_skip_layer=2,
    apply_final_norm=False,
    text_encoder_2='clipL',
    text_encoder_precision_2='fp16',
    text_states_dim_2=768,
    tokenizer_2='clipL',
    text_len_2=77,
    denoise_type='flow',
    flow_shift=7.0,
    flow_reverse=True,
    flow_solver='euler',
    use_linear_quadratic_schedule=False,
    linear_schedule_end=25,
    model_base='ckpts',
    dit_weight='ckpts/hunyuan-video-t2v-720p/transformers/mp_rank_00_model_states.pt',
    model_resolution='540p',
    load_key='module',
    use_cpu_offload=False,
    batch_size=1,
    infer_steps=50,
    disable_autocast=False,
    save_path='./results',
    save_path_suffix='',
    name_suffix='',
    num_videos=1,
    video_size=(720, 1280),
    video_length=129,
    prompt=None,
    seed_type='auto',
    seed=None,
    neg_prompt=None,
    cfg_scale=1.0,
    embedded_cfg_scale=6.0,
    reproduce=False,
    ulysses_degree=1,
    ring_degree=1
)

In [4]:
class SamplerArgs:
    """
    Minimal container for sampler-related settings.
    Extend this if you need additional fields that HunyuanVideoSampler
    or your pipeline expects.
    """
    def __init__(self):
        # Set to True if you want flow reversal in the pipeline
        self.flow_reverse = True    
def reset_memory(device):
    gc.collect()
    torch.cuda.empty_cache()
    torch.cuda.reset_peak_memory_stats(device)
    torch.cuda.reset_accumulated_memory_stats(device)
    torch.cuda.synchronize()
    
def get_openai_prompt_response(
    prompt: str,
    config: dict,
    max_tokens: int = 6000,
    temperature: float = 0.33,
    openai_model: str = "",
):
    """
    Sends a prompt to OpenAI's API and retrieves the response with retry logic.
    """
    client = OpenAI(api_key=config["openai_api_key"])
    response = client.chat.completions.create(
        max_tokens=max_tokens,
        messages=[
            {
                "role": "system",
                "content": """Act as a helpful assistant, you are an expert editor.""",
            },
            {"role": "user", "content": prompt},
        ],
        model=openai_model or config["openai_model"],
        temperature=temperature,
    )

    retry_count = 0
    while retry_count < config["retry_limit"]:
        try:
            message_content = response.choices[0].message.content
            return message_content
        except Exception as e:
            print(f"Error occurred: {e}")
            retry_count += 1
            if retry_count == config["retry_limit"]:
                print("Retry limit reached. Moving to the next iteration.")
                return ""
            else:
                print(f"Retrying... (Attempt {retry_count}/{config['retry_limit']})")
                time.sleep(1)  # Optional: wait before retrying


def load_flux_pipe():
    bfl_repo = "black-forest-labs/FLUX.1-dev"
    revision = "refs/pr/3"
    adapter_id = "alimama-creative/FLUX.1-Turbo-Alpha"

    scheduler = FlowMatchEulerDiscreteScheduler.from_pretrained(bfl_repo, subfolder="scheduler", revision=revision)
    text_encoder = CLIPTextModel.from_pretrained("openai/clip-vit-large-patch14", torch_dtype=dtype)
    tokenizer = CLIPTokenizer.from_pretrained("openai/clip-vit-large-patch14", torch_dtype=dtype)
    text_encoder_2 = T5EncoderModel.from_pretrained(bfl_repo, subfolder="text_encoder_2", torch_dtype=dtype, revision=revision)
    tokenizer_2 = T5TokenizerFast.from_pretrained(bfl_repo, subfolder="tokenizer_2", torch_dtype=dtype, revision=revision)
    vae = AutoencoderKL.from_pretrained(bfl_repo, subfolder="vae", torch_dtype=dtype, revision=revision)
    transformer = FluxTransformer2DModel.from_pretrained(bfl_repo, subfolder="transformer", torch_dtype=dtype, revision=revision)
    
    quantize_(transformer, quantization())
    quantize_(text_encoder_2, quantization())
    pipe = FluxPipeline(
        scheduler=scheduler,
        text_encoder=text_encoder,
        tokenizer=tokenizer,
        text_encoder_2=text_encoder_2,
        tokenizer_2=tokenizer_2,
        vae=vae,
        transformer=transformer,
    )

    pipe = pipe.to('cuda')
    pipe.load_lora_weights(adapter_id)

    return pipe


def gen_flux_image(pipe, prompt, config: dict, height=1024, width=1024, guidance_scale=3.5, num_inference_steps=8, max_sequence_length=512, seed=-1):
    """
    Generates an image based on the provided prompt using the Flux pipeline.
    """
    if seed == -1:
        seed = random.randint(0, MAX_SEED)
        
    with torch.no_grad():
        prompt_embeds, pooled_prompt_embeds = get_weighted_text_embeddings_flux1(
            pipe        = pipe,
            prompt    = prompt
        )
        
        image = pipe(
            prompt_embeds               = prompt_embeds,
            pooled_prompt_embeds      = pooled_prompt_embeds,
            height=height,
            width=width,
            guidance_scale=guidance_scale,
            output_type="pil",
            num_inference_steps=num_inference_steps,
            max_sequence_length=max_sequence_length,
            generator=torch.Generator("cpu").manual_seed(seed)
        ).images[0]

        # Delete variables
        del prompt_embeds
        del pooled_prompt_embeds
        torch.cuda.empty_cache()

        return image


def image_file_to_base64(image_path):
    with open(image_path, 'rb') as f:
        image_data = f.read()
    return base64.b64encode(image_data).decode('utf-8')

# Use this function to fetch an image from a URL and convert it to base64
def image_url_to_base64(image_url):
    response = requests.get(image_url)
    image_data = response.content
    return base64.b64encode(image_data).decode('utf-8')
    
def load_hunyuan_video_sampler(
    server_config_filename: str = "gradio_config.json",
    forced_profile_no: int = -1,
    verbose_level: int = 1,
    quantize_transformer: bool = True,
    lora_weight: list = None,
    lora_multiplier: list = None,
    device: str = "cpu",
) -> HunyuanVideoSampler:
    """
    Loads the HunyuanVideo pipeline according to settings in `server_config_filename`.
    If `forced_profile_no` is >= 0, that overrides the 'profile' field in the server config.
    This version does NOT parse any command-line arguments.
    """
    if lora_weight is None:
        lora_weight = []
    if lora_multiplier is None:
        lora_multiplier = []

    # -----------------------------------------------------------------------
    # Read or create server_config
    # -----------------------------------------------------------------------
    if not Path(server_config_filename).is_file():
        # Default config if none present
        server_config = {
            "attention_mode": "sage",
            "transformer_filename": "ckpts/hunyuan-video-t2v-720p/transformers/hunyuan_video_720_quanto_int8.safetensors",
            "text_encoder_filename": "ckpts/text_encoder/llava-llama-3-8b-v1_1_quanto_int8.safetensors",
            "compile": "",
            "profile": profile_type.HighRAM_LowVRAM,
        }
        with open(server_config_filename, "w", encoding="utf-8") as writer:
            writer.write(json.dumps(server_config))
    else:
        with open(server_config_filename, "r", encoding="utf-8") as reader:
            text = reader.read()
        server_config = json.loads(text)

    # Pull out config
    transformer_filename = server_config["transformer_filename"]
    text_encoder_filename = server_config["text_encoder_filename"]
    attention_mode = server_config["attention_mode"]
    profile = forced_profile_no if forced_profile_no >= 0 else server_config["profile"]
    compile_mode = server_config.get("compile", "")

    # -----------------------------------------------------------------------
    # Download any missing models from HF or any other source (if needed)
    # -----------------------------------------------------------------------
    def download_models(transformer_filename, text_encoder_filename):
        """
        Stub: Implement your huggingface_hub logic here if needed.
        """
        pass

    download_models(transformer_filename, text_encoder_filename)

    # -----------------------------------------------------------------------
    # Optional: tweak VAE config, etc.
    # -----------------------------------------------------------------------
    offload.default_verboseLevel = verbose_level

    vae_config_path = "./ckpts/hunyuan-video-t2v-720p/vae/config.json"
    if os.path.isfile(vae_config_path):
        with open(vae_config_path, "r", encoding="utf-8") as reader:
            vae_config = json.loads(reader.read())
        # Example: reduce time window used by the VAE for temporal splitting
        if vae_config.get("sample_tsize", 64) == 64:
            vae_config["sample_tsize"] = 32
        with open(vae_config_path, "w", encoding="utf-8") as writer:
            writer.write(json.dumps(vae_config))

    # -----------------------------------------------------------------------
    # Decide how to pin memory, partial pin, etc. 
    # -----------------------------------------------------------------------
    if profile == 5:
        pinToMemory = False
        partialPinning = False
    else:
        pinToMemory = True
        physical_memory = psutil.virtual_memory().total
        # E.g. partial pin if <= 32 GB of RAM
        partialPinning = physical_memory <= (2**30) * 32

    # -----------------------------------------------------------------------
    # Load the pipeline
    # -----------------------------------------------------------------------
    hunyuan_video_sampler = HunyuanVideoSampler.from_pretrained(
        transformer_filename,
        text_encoder_filename,
        attention_mode=attention_mode,
        pinToMemory=pinToMemory,
        partialPinning=partialPinning,
        args=args,      # passes our simple SamplerArgs object
        device=device,
    )

    pipe = hunyuan_video_sampler.pipeline

    # -----------------------------------------------------------------------
    # Optionally load LoRAs
    # -----------------------------------------------------------------------
    if len(lora_weight) > 0:
        offload.load_loras_into_model(pipe.transformer, lora_weight, lora_multiplier)

    # -----------------------------------------------------------------------
    # Profile, compile, or quantize
    # -----------------------------------------------------------------------
    offload.profile(
        pipe,
        profile_no=profile,
        compile=compile_mode,
        quantizeTransformer=quantize_transformer,
    )

    return hunyuan_video_sampler


def generate_video(
    hunyuan_video_sampler,
    height=HEIGHT,
    width=WIDTH,
    video_length=121,
    infer_steps=50,
    prompt="A cat walks on the grass, realistic style.",
    negative_prompt="Aerial view, overexposed, low quality, deformation",
    flow_shift=7.0,
    filename="./output.mp4",
    seed=42,
    cfg_scale=7.5,
    batch_size=1,
    embedded_cfg_scale=1.0,
):

       # TeaCache
    trans = hunyuan_video_sampler.pipeline.transformer.__class__
    trans.enable_teacache = False
    if trans.enable_teacache:
        trans.num_steps = num_inference_steps
        trans.cnt = 0
        trans.rel_l1_thresh = 0.15 # 0.1 for 1.6x speedup, 0.15 for 2.1x speedup
        trans.accumulated_rel_l1_distance = 0
        trans.previous_modulated_input = None
        trans.previous_residual = None
        
    """
    Generates and saves a video using the provided sampler, based on the specified parameters.
    The result is written to 'filename'.
    """
    outputs = hunyuan_video_sampler.predict(
        prompt=prompt,
        height=480,
        width=848,
        video_length=121,
        seed=seed,
        negative_prompt=negative_prompt,
        infer_steps=50,
        guidance_scale=1.0,
        num_videos_per_prompt=1,
        flow_shift=7.0,
        batch_size=batch_size,
        embedded_guidance_scale=6.0,
    )

    samples = outputs["samples"]
    # Assuming one video per prompt:
    for i, sample in enumerate(samples):
        # shape is (C, T, H, W)
        sample = sample.unsqueeze(0)  # (1, C, T, H, W)
        save_videos_grid(sample, filename, fps=24)

    return filename


def unload_hunyuan_video_sampler(hunyuan_video_sampler):
    """
    Frees the memory used by the pipeline.
    In a normal Python script, deleting references and calling torch.cuda.empty_cache()
    is usually enough.
    """
    del hunyuan_video_sampler
    torch.cuda.empty_cache()


def create_scenes(text: str, video_summary: str, config: dict):
    """
    Creates scenes based on the extracted lyrics using OpenAI's API.
    """
    # Generate scenes JSON
    prompt = f'''Create a json list of diverse, unique scenes (groupings of text), scene_description (200 words or less), and action_sequence (30 words or less) from the following text.  Scenes should be groups of lyrics with new scenes when the lyric context changes.  Text: {text}   
The json list should have the start value for the first item in the scene and the text that is combined for all items in the same scene.  
The scene_description should include sensory rich details such as attire, setting, mood, lighting, and changing compositions, painting a clear visual scene consistent with the video theme and different from other scenes.  Use theme descriptions, such as graphic novel, water color, render, oil painting, etc.  Scenes should avoid depictions of literal people, unless they are close up of a single person.  Favor symbolism and artistic illustrations of concepts and feeling, emotion.  Avoid depections of literal people. Evoke strong emotions with atmospheric lighting and tone. Use descriptive phrases to capture the mood (e.g., flickering neon, golden twilight, ethereal glow).
Avoid scenes with many people moving.
The action_sequence should describe the action in the scene.  Scenes should be unique, creative, imaginative, and awe-inspiring to create an amazing video.  Create beautiful and mesmerizing scene descriptions that are creative, unique, artistic, and imaginative. Each scene must be unique, imaginative, and visually captivating, blending creativity with artistic flair. Use powerful, descriptive language to craft scenes that are awe-inspiring and leave the audience in wonder. These scenes should evoke a sense of beauty, grandeur, mystery, or anything emotional, drawing from both realistic and fantastical elements. Ensure the descriptions are immersive, emotionally resonant, and filled with unexpected twists that engage the senses and imagination, suitable for creating a stunning, cinematic video experience.  Use descriptions of special effects in the scenes.  
Action should avoid sudden or fast movement or zooms, avoid any fast camera movement.  Avoid human movements like walking, dancing, shopping, etc.
Return only the json list, less jargon. The json list fields should be: start, text, scene_description, action_sequence'''

    result = get_openai_prompt_response(prompt, config, openai_model=config["openai_model"], temperature=0.66)
    result = result.replace("```", "").replace("```json\n", "").replace("json\n", "").replace("\n", "")
    scenes = json.loads(result)
    return scenes

def revise_scenes(scenes, config: dict):
    """
    Revise scenes based on the extracted scenes.
    """
    # Generate scenes JSON
    prompt = f'''Revise the JSON scenes to update the scene_description and action_sequence to engage the senses and imagination, suitable for creating a stunning, cinematic video experience.  We want unique scenes, even ones in the same sequence. Use descriptions of special effects in the scenes.  JSON scenes: {scenes}   
The scene_description (200 words or less) should include details such as attire, setting, mood, lighting, and any significant movements or expressions, painting a clear visual scene consistent with the video theme and different from other scenes. Use theme descriptions, such as graphic novel, water color, render, oil painting, etc.  Scenes should avoid depictions of literal people, unless they are close up of a single person.  Favor symbolism and artistic illustrations of concepts and feeling, emotion.  Avoid depections of literal people. Evoke strong emotions with atmospheric lighting and tone. Use descriptive phrases to capture the mood (e.g., flickering neon, golden twilight, ethereal glow).
The action_sequence (30 words or less) should describe the action in the scene.  The goal is to create input to create a stunning, cinematic video experience.   
Action should avoid sudden or fast movement or zooms, avoid any fast camera movement. Avoid human movements like walking, dancing, shopping, etc.
Only update the scene_description and action_sequence. We do not want to have similar scene_descriptions and action_sequences for consecutive scenes, we want unique scenes that tell a brilliant, cohesive story.  Please update the scene_description and action_sequence to be differemt, creative, and consistent.  
Do not delete any items as having scenes with the given start times are important. 
Return only the json list, less jargon. The json list fields should be: start, text, scene_description, action_sequence'''

    result = get_openai_prompt_response(prompt, config, openai_model=config["openai_model"], temperature=0.33)
    result = result.replace("```", "").replace("```json\n", "").replace("json\n", "").replace("\n", "")
    scenes = json.loads(result)
    return scenes


def process_audio_scenes(audio_file: str, config: dict):
    # set maximum duration for an image basis, should be in intervals of video generation length
    video_gen_length = 5
    max_duration_seconds  = video_gen_length * 3
    """
    Processes a single audio file through the entire workflow.
    """
    # Create unique identifier based on audio file name
    audio_basename = os.path.splitext(os.path.basename(audio_file))[0]
    timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
    unique_id = f"{audio_basename}_{timestamp}"

    # Create unique directories for images and videos
    print(f"Create unique directories for images and videos")
    audio_images_dir = os.path.join(config["base_working_dir"], unique_id)
    audio_videos_dir = os.path.join(config["base_video_dir"], unique_id)
    os.makedirs(audio_images_dir, exist_ok=True)
    os.makedirs(audio_videos_dir, exist_ok=True)

    # Step 1: Transcribe audio using Whisper
    print(f"Transcribe audio using Whisper")
    model = whisper.load_model("turbo")
    result = model.transcribe(audio_file)

    # Cleanup Whisper model memory
    del model
    gc.collect()
    torch.cuda.empty_cache()

    segments = result['segments']

    # Extract list of start times and texts
    segment_texts_and_start_times = [(segment['text'].strip(), segment['start']) for segment in segments]

    # Combine texts
    text = ""
    for segment_text, start in segment_texts_and_start_times:
        text += f"Start: {start}, Text: {segment_text}\n"

    last_end_value = segments[-1]['end']

    # Path to scenes.json file
    scenes_file_path = os.path.join(audio_images_dir, "scenes.json")

    # Check if scenes.json exists
    if os.path.exists(scenes_file_path):
        print(f"Scenes file already exists at {scenes_file_path}. Skipping scene generation.")
        with open(scenes_file_path, "r") as scenes_file:
            scenes = json.load(scenes_file)
        return scenes, audio_images_dir, audio_videos_dir, last_end_value

    # Step 2: Generate video summary using OpenAI
    print(f"Generate video summary using OpenAI")
    video_summary_prompt = f'Create a short summary that describes a music video based on these lyrics: {text}'
    video_summary = get_openai_prompt_response(video_summary_prompt, config, openai_model=config["openai_model"])

    # Step 3: Create scenes based on lyrics
    print(f"Create scenes based on lyrics")
    try:
        scenes = create_scenes(text, video_summary, config)
    except:
        try:
            scenes = create_scenes(text, video_summary, config)
        except:
            try:
                scenes = create_scenes(text, video_summary, config)
            except: 
                return "", audio_images_dir, audio_videos_dir, last_end_value
            
    # we don't want scenes longer than 18 seconds
    new_scenes = []
    for i in range(len(scenes)):
        scene = scenes[i]
        if i == 0:
            start_time = 0
        else:
            start_time = scene['start']
        # Determine the end time
        if i < len(scenes) - 1:
            end_time = scenes[i + 1]['start']
        else:
            end_time = last_end_value
        duration = end_time - start_time
        # Split the scene if duration exceeds max_duration_seconds seconds
        while duration > max_duration_seconds:
            new_scene = scene.copy()
            new_scene['start'] = start_time
            new_scenes.append(new_scene)
            start_time += max_duration_seconds
            duration = end_time - start_time
        # Append the remaining part of the scene
        if duration > 0:
            new_scene = scene.copy()
            new_scene['start'] = start_time
            new_scenes.append(new_scene)
    # Replace the original scenes with the new list
    scenes = new_scenes
    # improve the scenes with a revision
    try:
        scenes_revised = revise_scenes(scenes, config)
        scenes = scenes_revised
        print(f'revised scenes')
    except:
        try:
            scenes_revised = revise_scenes(scenes, config)
            scenes = scenes_revised
            print(f'revised scenes')
        except:
            print('cannot revise scenes')
            
    
    # Save the scenes to scenes.json
    with open(scenes_file_path, "w") as scenes_file:
        json.dump(scenes, scenes_file)
        
    return scenes, audio_images_dir, audio_videos_dir, last_end_value, timestamp

def process_audio_images(config: dict, scenes, audio_images_dir):
    # Step 4: Load Flux pipeline and generate images
    print(f"Load Flux pipeline and generate images")
    flux_pipe = load_flux_pipe()
    height = HEIGHT
    width = WIDTH
    guidance_scale = 3.9
    num_inference_steps = 8
    max_sequence_length = 512
    seed = -1

    try:
        # Generate images for each scene
        image_num = 1
        for scene in scenes:
            image_prompt = scene['scene_description']
            image = gen_flux_image(flux_pipe, image_prompt, config, height, width, guidance_scale, num_inference_steps, max_sequence_length, seed)
            filename = f"image_{str(image_num).zfill(2)}.jpg"
            image_path = os.path.join(audio_images_dir, filename)
            image.save(image_path, dpi=(300, 300))
            del image
            torch.cuda.empty_cache()
            image_num += 1
    finally:
        # Move the pipeline back to CPU and delete it
        flux_pipe.to('cpu')
        del flux_pipe
        gc.collect()
        torch.cuda.empty_cache()
    return

def process_audio_video(config: dict, scenes, audio_images_dir, audio_videos_dir, last_end_value, timestamp, skip_first):
    video_num = 1
    negative_prompt = "Aerial view, aerial view, overexposed, low quality, deformation, a poor composition, bad hands, bad teeth, bad eyes, bad limbs, distortion"
   
    hunyuan_video_sampler = load_hunyuan_video_sampler(
        server_config_filename="gradio_config.json",
        forced_profile_no=-1,
        verbose_level=1,
        quantize_transformer=True,
        lora_weight=[],
        lora_multiplier=[],
        device="cuda",  # or "cpu"
    )
    # Step 7: Generate video sequences
    for i, scene in enumerate(scenes):
        prompt = scene["scene_description"] + " " + scene["action_sequence"]

        # Calculate duration to keep the video in 6-second increments
        if i + 1 < len(scenes):
            next_start_time = scenes[i + 1]["start"]
        else:
            next_start_time = last_end_value  # Use the final ending time for the last scene

        if i == 0:
            duration = next_start_time
        else:
            duration = next_start_time - scene["start"]
        num_video_segments = int((duration + 2) // 5)

        print(f"Scene {i+1} has {num_video_segments} segments")
        for j in range(num_video_segments):
            video_name = f"video_{str(video_num).zfill(2)}_{str(i+1).zfill(2)}_{str(j+1).zfill(2)}_{timestamp}.mp4"
            video_output_path = os.path.join(audio_videos_dir, video_name)
            if video_num > skip_first:
                seed = random.randint(0, MAX_SEED)
                generate_video(hunyuan_video_sampler=hunyuan_video_sampler, height=HEIGHT, width=WIDTH, video_length=121, infer_steps=50,
                    prompt=prompt, negative_prompt=negative_prompt, flow_shift=7.0, filename=video_output_path,
                    seed=seed, cfg_scale=7.5, batch_size=1, embedded_cfg_scale=1.0)
                
                time.sleep(1)  # Pause for 1 second

            video_num += 1  # Increment video number for the next segment
    
    unload_hunyuan_video_sampler(hunyuan_video_sampler)
    return


def process_all_audios(audio_file, config: dict):
    """
    Processes a list of audio files through the workflow.
    """
    print(f"Processing audio file: {audio_file}")
    scenes, audio_images_dir, audio_videos_dir, last_end_value, timestamp = process_audio_scenes(audio_file, config)
    print(f'{len(scenes)} scenes:\n{json.dumps(scenes, indent=4)}')
    # Create starting images for scenes
    process_audio_images(config, scenes, audio_images_dir)
    return config, scenes, audio_images_dir, audio_videos_dir, last_end_value, timestamp

def create_video(images_only):
    config, scenes, audio_images_dir, audio_videos_dir, last_end_value, timestamp = process_all_audios(audio_file, CONFIG)
    if not images_only:
        process_audio_video(config, scenes, audio_images_dir, audio_videos_dir, last_end_value, timestamp, 0)
    print(f'audio_images_dir: {audio_images_dir}')
    print(f'audio_videos_dir: {audio_videos_dir}')
    print(f'last_end_value: {last_end_value}')
    print(f'timestamp: {timestamp}')
    
    return
    


In [5]:
# run and curate images for scenes
human_in_loop = False
for audio_file in CONFIG["audio_files"]:
    create_video(human_in_loop)

reset_memory(device)

Processing audio file: /mnt/d/Share/Audio/WingsOfLight.mp3
Create unique directories for images and videos
Transcribe audio using Whisper


  checkpoint = torch.load(fp, map_location=device)


Generate video summary using OpenAI
Create scenes based on lyrics
revised scenes
25 scenes:
[
    {
        "start": 0,
        "text": "Angels. Some see them as baby-faced protectors. Others envision them locked in battle with demons. But what if they're more than our eyes can handle?",
        "scene_description": "A vast celestial expanse unfolds, where soft, golden twilight mingles with swirling iridescent clouds. Ethereal beings, adorned in flowing robes of shimmering light, hover gracefully, their wings glistening like dew-kissed petals. The air is thick with a palpable serenity, as gentle whispers of hope echo through the atmosphere. Shadows flicker playfully, hinting at unseen conflicts, yet a profound tranquility reigns. The palette is a harmonious blend of pastel blues and warm yellows, creating a watercolor dreamscape that invites exploration of divine mysteries.",
        "action_sequence": "Angels glide softly through the air, their luminous forms illuminating hidden pathw

Downloading shards:   0%|          | 0/2 [00:00<?, ?it/s]

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

You set `add_prefix_space`. The tokenizer needs to be converted from the slow tokenizers


Fetching 3 files:   0%|          | 0/3 [00:00<?, ?it/s]

Token indices sequence length is longer than the specified maximum sequence length for this model (115 > 77). Running this sequence through the model will result in indexing errors


  0%|          | 0/8 [00:00<?, ?it/s]

  0%|          | 0/8 [00:00<?, ?it/s]

  0%|          | 0/8 [00:00<?, ?it/s]

  0%|          | 0/8 [00:00<?, ?it/s]

  0%|          | 0/8 [00:00<?, ?it/s]

  0%|          | 0/8 [00:00<?, ?it/s]

  0%|          | 0/8 [00:00<?, ?it/s]

  0%|          | 0/8 [00:00<?, ?it/s]

  0%|          | 0/8 [00:00<?, ?it/s]

  0%|          | 0/8 [00:00<?, ?it/s]

  0%|          | 0/8 [00:00<?, ?it/s]

  0%|          | 0/8 [00:00<?, ?it/s]

  0%|          | 0/8 [00:00<?, ?it/s]

  0%|          | 0/8 [00:00<?, ?it/s]

  0%|          | 0/8 [00:00<?, ?it/s]

  0%|          | 0/8 [00:00<?, ?it/s]

  0%|          | 0/8 [00:00<?, ?it/s]

  0%|          | 0/8 [00:00<?, ?it/s]

  0%|          | 0/8 [00:00<?, ?it/s]

  0%|          | 0/8 [00:00<?, ?it/s]

  0%|          | 0/8 [00:00<?, ?it/s]

  0%|          | 0/8 [00:00<?, ?it/s]

  0%|          | 0/8 [00:00<?, ?it/s]

  0%|          | 0/8 [00:00<?, ?it/s]

  0%|          | 0/8 [00:00<?, ?it/s]

[32m2025-02-02 11:05:26.444[0m | [1mINFO    [0m | [36mhyvideo.inference[0m:[36mfrom_pretrained[0m:[36m153[0m - [1mGot text-to-video model root path: ckpts/hunyuan-video-t2v-720p/transformers/hunyuan_video_720_quanto_int8.safetensors[0m
[32m2025-02-02 11:05:26.444[0m | [1mINFO    [0m | [36mhyvideo.inference[0m:[36mfrom_pretrained[0m:[36m187[0m - [1mBuilding model...[0m
[32m2025-02-02 11:05:26.478[0m | [1mINFO    [0m | [36mhyvideo.inference[0m:[36mfrom_pretrained[0m:[36m201[0m - [1mLoading torch model ckpts/hunyuan-video-t2v-720p/transformers/hunyuan_video_720_quanto_int8.safetensors...[0m


Pinning data of 'ckpts/hunyuan-video-t2v-720p/transformers/hunyuan_video_720_quanto_int8.safetensors' to reserved RAM


[32m2025-02-02 11:05:35.359[0m | [1mINFO    [0m | [36mhyvideo.vae[0m:[36mload_vae[0m:[36m29[0m - [1mLoading 3D VAE model (884-16c-hy) from: ./ckpts/hunyuan-video-t2v-720p/vae[0m


The whole model was pinned to reserved RAM: 54 large blocks spread across 12580.24 MB


[32m2025-02-02 11:05:36.353[0m | [1mINFO    [0m | [36mhyvideo.vae[0m:[36mload_vae[0m:[36m55[0m - [1mVAE to dtype: torch.float16[0m
[32m2025-02-02 11:05:36.743[0m | [1mINFO    [0m | [36mhyvideo.text_encoder[0m:[36mload_tokenizer[0m:[36m64[0m - [1mLoading tokenizer (llm) from: ./ckpts/text_encoder[0m
You are using the default legacy behaviour of the <class 'transformers.models.llama.tokenization_llama_fast.LlamaTokenizerFast'>. This is expected, and simply means that the `legacy` (previous) behavior will be used so nothing changes for you. If you want to use the new behaviour, set `legacy=False`. This should only be set if you understand what it means, and thoroughly read the reason why this was added as explained in https://github.com/huggingface/transformers/pull/24565 - if you loaded a llama tokenizer from a GGUF file you can ignore this message.
[32m2025-02-02 11:05:36.911[0m | [1mINFO    [0m | [36mhyvideo.text_encoder[0m:[36mload_text_encoder[0m:[36m

[1m[95m************ Memory Management for the GPU Poor (mmgp 3.1) by DeepBeepMeep ************[0m[0m
You have chosen a profile that requires at least 48 GB of RAM and 12 GB of VRAM. Some RAM is consumed to reduce VRAM consumption.
Model 'transformer' is already quantized to format 'qint8'
Pinning data of 'vae' to reserved RAM
The whole model was pinned to reserved RAM: 2 large blocks spread across 470.12 MB
Pinning data of 'text_encoder' to reserved RAM
The whole model was pinned to reserved RAM: 34 large blocks spread across 7661.63 MB
Model 'transformer' already pinned to reserved memory
Pinning data of 'text_encoder_2' to reserved RAM
The whole model was pinned to reserved RAM: 1 large blocks spread across 234.72 MB


[32m2025-02-02 11:05:44.984[0m | [1mINFO    [0m | [36mhyvideo.inference[0m:[36mpredict[0m:[36m587[0m - [1mInput (height, width, video_length) = (480, 848, 121)[0m
[32m2025-02-02 11:05:44.990[0m | [34m[1mDEBUG   [0m | [36mhyvideo.inference[0m:[36mpredict[0m:[36m647[0m - [34m[1m
                        height: 480
                         width: 848
                  video_length: 121
                        prompt: ['A vast celestial expanse unfolds, where soft, golden twilight mingles with swirling iridescent clouds. Ethereal beings, adorned in flowing robes of shimmering light, hover gracefully, their wings glistening like dew-kissed petals. The air is thick with a palpable serenity, as gentle whispers of hope echo through the atmosphere. Shadows flicker playfully, hinting at unseen conflicts, yet a profound tranquility reigns. The palette is a harmonious blend of pastel blues and warm yellows, creating a watercolor dreamscape that invites exploration of divin

Scene 1 has 3 segments


  0%|          | 0/50 [00:00<?, ?it/s]

[32m2025-02-02 11:22:37.944[0m | [1mINFO    [0m | [36mhyvideo.inference[0m:[36mpredict[0m:[36m681[0m - [1mSuccess, time: 1012.9533848762512[0m
[32m2025-02-02 11:22:39.468[0m | [1mINFO    [0m | [36mhyvideo.inference[0m:[36mpredict[0m:[36m587[0m - [1mInput (height, width, video_length) = (480, 848, 121)[0m
[32m2025-02-02 11:22:39.478[0m | [34m[1mDEBUG   [0m | [36mhyvideo.inference[0m:[36mpredict[0m:[36m647[0m - [34m[1m
                        height: 480
                         width: 848
                  video_length: 121
                        prompt: ['A vast celestial expanse unfolds, where soft, golden twilight mingles with swirling iridescent clouds. Ethereal beings, adorned in flowing robes of shimmering light, hover gracefully, their wings glistening like dew-kissed petals. The air is thick with a palpable serenity, as gentle whispers of hope echo through the atmosphere. Shadows flicker playfully, hinting at unseen conflicts, yet a profound

  0%|          | 0/50 [00:00<?, ?it/s]

[32m2025-02-02 11:39:25.696[0m | [1mINFO    [0m | [36mhyvideo.inference[0m:[36mpredict[0m:[36m681[0m - [1mSuccess, time: 1006.2170414924622[0m
[32m2025-02-02 11:39:27.240[0m | [1mINFO    [0m | [36mhyvideo.inference[0m:[36mpredict[0m:[36m587[0m - [1mInput (height, width, video_length) = (480, 848, 121)[0m
[32m2025-02-02 11:39:27.248[0m | [34m[1mDEBUG   [0m | [36mhyvideo.inference[0m:[36mpredict[0m:[36m647[0m - [34m[1m
                        height: 480
                         width: 848
                  video_length: 121
                        prompt: ['A vast celestial expanse unfolds, where soft, golden twilight mingles with swirling iridescent clouds. Ethereal beings, adorned in flowing robes of shimmering light, hover gracefully, their wings glistening like dew-kissed petals. The air is thick with a palpable serenity, as gentle whispers of hope echo through the atmosphere. Shadows flicker playfully, hinting at unseen conflicts, yet a profound

  0%|          | 0/50 [00:00<?, ?it/s]

[32m2025-02-02 11:56:15.491[0m | [1mINFO    [0m | [36mhyvideo.inference[0m:[36mpredict[0m:[36m681[0m - [1mSuccess, time: 1008.2424352169037[0m
[32m2025-02-02 11:56:16.957[0m | [1mINFO    [0m | [36mhyvideo.inference[0m:[36mpredict[0m:[36m587[0m - [1mInput (height, width, video_length) = (480, 848, 121)[0m
[32m2025-02-02 11:56:16.961[0m | [34m[1mDEBUG   [0m | [36mhyvideo.inference[0m:[36mpredict[0m:[36m647[0m - [34m[1m
                        height: 480
                         width: 848
                  video_length: 121
                        prompt: ['In a twilight realm, a tapestry of stars twinkles against a deep indigo sky. Ethereal figures, cloaked in flowing silks that shimmer like starlight, drift among clouds of silver mist. The atmosphere hums with a gentle melody, each note resonating with the heartbeat of the universe. Shadows weave through the mist, creating a dance of light and dark, while the colors shift subtly, evoking a sense of

Scene 2 has 3 segments


  0%|          | 0/50 [00:00<?, ?it/s]

[32m2025-02-02 12:13:03.771[0m | [1mINFO    [0m | [36mhyvideo.inference[0m:[36mpredict[0m:[36m681[0m - [1mSuccess, time: 1006.8104362487793[0m
[32m2025-02-02 12:13:05.216[0m | [1mINFO    [0m | [36mhyvideo.inference[0m:[36mpredict[0m:[36m587[0m - [1mInput (height, width, video_length) = (480, 848, 121)[0m
[32m2025-02-02 12:13:05.221[0m | [34m[1mDEBUG   [0m | [36mhyvideo.inference[0m:[36mpredict[0m:[36m647[0m - [34m[1m
                        height: 480
                         width: 848
                  video_length: 121
                        prompt: ['In a twilight realm, a tapestry of stars twinkles against a deep indigo sky. Ethereal figures, cloaked in flowing silks that shimmer like starlight, drift among clouds of silver mist. The atmosphere hums with a gentle melody, each note resonating with the heartbeat of the universe. Shadows weave through the mist, creating a dance of light and dark, while the colors shift subtly, evoking a sense of

  0%|          | 0/50 [00:00<?, ?it/s]

[32m2025-02-02 12:29:54.610[0m | [1mINFO    [0m | [36mhyvideo.inference[0m:[36mpredict[0m:[36m681[0m - [1mSuccess, time: 1009.3885638713837[0m
[32m2025-02-02 12:29:56.078[0m | [1mINFO    [0m | [36mhyvideo.inference[0m:[36mpredict[0m:[36m587[0m - [1mInput (height, width, video_length) = (480, 848, 121)[0m
[32m2025-02-02 12:29:56.087[0m | [34m[1mDEBUG   [0m | [36mhyvideo.inference[0m:[36mpredict[0m:[36m647[0m - [34m[1m
                        height: 480
                         width: 848
                  video_length: 121
                        prompt: ['In a twilight realm, a tapestry of stars twinkles against a deep indigo sky. Ethereal figures, cloaked in flowing silks that shimmer like starlight, drift among clouds of silver mist. The atmosphere hums with a gentle melody, each note resonating with the heartbeat of the universe. Shadows weave through the mist, creating a dance of light and dark, while the colors shift subtly, evoking a sense of

  0%|          | 0/50 [00:00<?, ?it/s]

[32m2025-02-02 12:46:53.887[0m | [1mINFO    [0m | [36mhyvideo.inference[0m:[36mpredict[0m:[36m681[0m - [1mSuccess, time: 1017.7991952896118[0m
[32m2025-02-02 12:46:55.353[0m | [1mINFO    [0m | [36mhyvideo.inference[0m:[36mpredict[0m:[36m587[0m - [1mInput (height, width, video_length) = (480, 848, 121)[0m
[32m2025-02-02 12:46:55.360[0m | [34m[1mDEBUG   [0m | [36mhyvideo.inference[0m:[36mpredict[0m:[36m647[0m - [34m[1m
                        height: 480
                         width: 848
                  video_length: 121
                        prompt: ['A tranquil garden blooms under a soft, golden glow, where flowers sway gently in a warm breeze. Ethereal beings, dressed in robes of translucent light, tend to the blossoms, their hands brushing against petals that shimmer like gemstones. The air is fragrant with the scent of blooming jasmine, and the atmosphere is infused with a sense of nurturing love. Sunlight filters through the leaves, castin

Scene 3 has 3 segments


  0%|          | 0/50 [00:00<?, ?it/s]

[32m2025-02-02 13:03:50.500[0m | [1mINFO    [0m | [36mhyvideo.inference[0m:[36mpredict[0m:[36m681[0m - [1mSuccess, time: 1015.1390159130096[0m
[32m2025-02-02 13:03:52.006[0m | [1mINFO    [0m | [36mhyvideo.inference[0m:[36mpredict[0m:[36m587[0m - [1mInput (height, width, video_length) = (480, 848, 121)[0m
[32m2025-02-02 13:03:52.012[0m | [34m[1mDEBUG   [0m | [36mhyvideo.inference[0m:[36mpredict[0m:[36m647[0m - [34m[1m
                        height: 480
                         width: 848
                  video_length: 121
                        prompt: ['A tranquil garden blooms under a soft, golden glow, where flowers sway gently in a warm breeze. Ethereal beings, dressed in robes of translucent light, tend to the blossoms, their hands brushing against petals that shimmer like gemstones. The air is fragrant with the scent of blooming jasmine, and the atmosphere is infused with a sense of nurturing love. Sunlight filters through the leaves, castin

  0%|          | 0/50 [00:00<?, ?it/s]

[32m2025-02-02 13:20:42.495[0m | [1mINFO    [0m | [36mhyvideo.inference[0m:[36mpredict[0m:[36m681[0m - [1mSuccess, time: 1010.4824481010437[0m
[32m2025-02-02 13:20:44.032[0m | [1mINFO    [0m | [36mhyvideo.inference[0m:[36mpredict[0m:[36m587[0m - [1mInput (height, width, video_length) = (480, 848, 121)[0m
[32m2025-02-02 13:20:44.035[0m | [34m[1mDEBUG   [0m | [36mhyvideo.inference[0m:[36mpredict[0m:[36m647[0m - [34m[1m
                        height: 480
                         width: 848
                  video_length: 121
                        prompt: ['A tranquil garden blooms under a soft, golden glow, where flowers sway gently in a warm breeze. Ethereal beings, dressed in robes of translucent light, tend to the blossoms, their hands brushing against petals that shimmer like gemstones. The air is fragrant with the scent of blooming jasmine, and the atmosphere is infused with a sense of nurturing love. Sunlight filters through the leaves, castin

  0%|          | 0/50 [00:00<?, ?it/s]

[32m2025-02-02 13:37:31.726[0m | [1mINFO    [0m | [36mhyvideo.inference[0m:[36mpredict[0m:[36m681[0m - [1mSuccess, time: 1007.6903343200684[0m
[32m2025-02-02 13:37:33.201[0m | [1mINFO    [0m | [36mhyvideo.inference[0m:[36mpredict[0m:[36m587[0m - [1mInput (height, width, video_length) = (480, 848, 121)[0m
[32m2025-02-02 13:37:33.204[0m | [34m[1mDEBUG   [0m | [36mhyvideo.inference[0m:[36mpredict[0m:[36m647[0m - [34m[1m
                        height: 480
                         width: 848
                  video_length: 121
                        prompt: ['A serene lake reflects the twilight sky, where soft hues of pink and lavender blend seamlessly. Ethereal figures, adorned in flowing garments that ripple like water, glide above the surface, their presence creating gentle ripples that shimmer in the fading light. The atmosphere is tranquil, filled with the soothing sounds of nature, as fireflies dance in the air, adding a touch of magic. This scen

Scene 4 has 3 segments


  0%|          | 0/50 [00:00<?, ?it/s]

[32m2025-02-02 13:54:20.113[0m | [1mINFO    [0m | [36mhyvideo.inference[0m:[36mpredict[0m:[36m681[0m - [1mSuccess, time: 1006.9080157279968[0m
[32m2025-02-02 13:54:21.596[0m | [1mINFO    [0m | [36mhyvideo.inference[0m:[36mpredict[0m:[36m587[0m - [1mInput (height, width, video_length) = (480, 848, 121)[0m
[32m2025-02-02 13:54:21.599[0m | [34m[1mDEBUG   [0m | [36mhyvideo.inference[0m:[36mpredict[0m:[36m647[0m - [34m[1m
                        height: 480
                         width: 848
                  video_length: 121
                        prompt: ['A serene lake reflects the twilight sky, where soft hues of pink and lavender blend seamlessly. Ethereal figures, adorned in flowing garments that ripple like water, glide above the surface, their presence creating gentle ripples that shimmer in the fading light. The atmosphere is tranquil, filled with the soothing sounds of nature, as fireflies dance in the air, adding a touch of magic. This scen

  0%|          | 0/50 [00:00<?, ?it/s]

[32m2025-02-02 14:11:09.323[0m | [1mINFO    [0m | [36mhyvideo.inference[0m:[36mpredict[0m:[36m681[0m - [1mSuccess, time: 1007.723305940628[0m
[32m2025-02-02 14:11:11.743[0m | [1mINFO    [0m | [36mhyvideo.inference[0m:[36mpredict[0m:[36m587[0m - [1mInput (height, width, video_length) = (480, 848, 121)[0m
[32m2025-02-02 14:11:11.751[0m | [34m[1mDEBUG   [0m | [36mhyvideo.inference[0m:[36mpredict[0m:[36m647[0m - [34m[1m
                        height: 480
                         width: 848
                  video_length: 121
                        prompt: ['A serene lake reflects the twilight sky, where soft hues of pink and lavender blend seamlessly. Ethereal figures, adorned in flowing garments that ripple like water, glide above the surface, their presence creating gentle ripples that shimmer in the fading light. The atmosphere is tranquil, filled with the soothing sounds of nature, as fireflies dance in the air, adding a touch of magic. This scene

  0%|          | 0/50 [00:00<?, ?it/s]

[32m2025-02-02 14:28:01.949[0m | [1mINFO    [0m | [36mhyvideo.inference[0m:[36mpredict[0m:[36m681[0m - [1mSuccess, time: 1010.1968483924866[0m
[32m2025-02-02 14:28:03.442[0m | [1mINFO    [0m | [36mhyvideo.inference[0m:[36mpredict[0m:[36m587[0m - [1mInput (height, width, video_length) = (480, 848, 121)[0m
[32m2025-02-02 14:28:03.450[0m | [34m[1mDEBUG   [0m | [36mhyvideo.inference[0m:[36mpredict[0m:[36m647[0m - [34m[1m
                        height: 480
                         width: 848
                  video_length: 121
                        prompt: ["An enchanting forest emerges, where ancient trees stretch towards the heavens, their leaves whispering secrets in the breeze. Ethereal beings, cloaked in robes of emerald and gold, weave through the branches, their presence illuminating the darkened pathways. The air is alive with the sounds of rustling leaves and distant chimes, creating a symphony of nature's beauty. Soft beams of light filter t

Scene 5 has 3 segments


  0%|          | 0/50 [00:00<?, ?it/s]

[32m2025-02-02 14:44:50.487[0m | [1mINFO    [0m | [36mhyvideo.inference[0m:[36mpredict[0m:[36m681[0m - [1mSuccess, time: 1007.0360462665558[0m
[32m2025-02-02 14:44:52.023[0m | [1mINFO    [0m | [36mhyvideo.inference[0m:[36mpredict[0m:[36m587[0m - [1mInput (height, width, video_length) = (480, 848, 121)[0m
[32m2025-02-02 14:44:52.028[0m | [34m[1mDEBUG   [0m | [36mhyvideo.inference[0m:[36mpredict[0m:[36m647[0m - [34m[1m
                        height: 480
                         width: 848
                  video_length: 121
                        prompt: ["An enchanting forest emerges, where ancient trees stretch towards the heavens, their leaves whispering secrets in the breeze. Ethereal beings, cloaked in robes of emerald and gold, weave through the branches, their presence illuminating the darkened pathways. The air is alive with the sounds of rustling leaves and distant chimes, creating a symphony of nature's beauty. Soft beams of light filter t

  0%|          | 0/50 [00:00<?, ?it/s]

[32m2025-02-02 15:01:40.751[0m | [1mINFO    [0m | [36mhyvideo.inference[0m:[36mpredict[0m:[36m681[0m - [1mSuccess, time: 1008.7224991321564[0m
[32m2025-02-02 15:01:42.268[0m | [1mINFO    [0m | [36mhyvideo.inference[0m:[36mpredict[0m:[36m587[0m - [1mInput (height, width, video_length) = (480, 848, 121)[0m
[32m2025-02-02 15:01:42.272[0m | [34m[1mDEBUG   [0m | [36mhyvideo.inference[0m:[36mpredict[0m:[36m647[0m - [34m[1m
                        height: 480
                         width: 848
                  video_length: 121
                        prompt: ["An enchanting forest emerges, where ancient trees stretch towards the heavens, their leaves whispering secrets in the breeze. Ethereal beings, cloaked in robes of emerald and gold, weave through the branches, their presence illuminating the darkened pathways. The air is alive with the sounds of rustling leaves and distant chimes, creating a symphony of nature's beauty. Soft beams of light filter t

  0%|          | 0/50 [00:00<?, ?it/s]

[32m2025-02-02 15:18:37.420[0m | [1mINFO    [0m | [36mhyvideo.inference[0m:[36mpredict[0m:[36m681[0m - [1mSuccess, time: 1015.1474347114563[0m
[32m2025-02-02 15:18:38.924[0m | [1mINFO    [0m | [36mhyvideo.inference[0m:[36mpredict[0m:[36m587[0m - [1mInput (height, width, video_length) = (480, 848, 121)[0m
[32m2025-02-02 15:18:38.927[0m | [34m[1mDEBUG   [0m | [36mhyvideo.inference[0m:[36mpredict[0m:[36m647[0m - [34m[1m
                        height: 480
                         width: 848
                  video_length: 121
                        prompt: ['A celestial ocean glimmers under a starlit sky, where waves of light ripple gently across the surface. Ethereal beings, adorned in flowing garments of silver and blue, glide above the water, their movements fluid and serene. The atmosphere is tranquil, with the soft sound of waves lapping against the shore creating a soothing backdrop. Wisps of fog drift lazily, enhancing the dreamlike quality of 

Scene 6 has 3 segments


  0%|          | 0/50 [00:00<?, ?it/s]

[32m2025-02-02 15:35:52.563[0m | [1mINFO    [0m | [36mhyvideo.inference[0m:[36mpredict[0m:[36m681[0m - [1mSuccess, time: 1033.6350102424622[0m
[32m2025-02-02 15:35:54.146[0m | [1mINFO    [0m | [36mhyvideo.inference[0m:[36mpredict[0m:[36m587[0m - [1mInput (height, width, video_length) = (480, 848, 121)[0m
[32m2025-02-02 15:35:54.154[0m | [34m[1mDEBUG   [0m | [36mhyvideo.inference[0m:[36mpredict[0m:[36m647[0m - [34m[1m
                        height: 480
                         width: 848
                  video_length: 121
                        prompt: ['A celestial ocean glimmers under a starlit sky, where waves of light ripple gently across the surface. Ethereal beings, adorned in flowing garments of silver and blue, glide above the water, their movements fluid and serene. The atmosphere is tranquil, with the soft sound of waves lapping against the shore creating a soothing backdrop. Wisps of fog drift lazily, enhancing the dreamlike quality of 

  0%|          | 0/50 [00:00<?, ?it/s]

[32m2025-02-02 15:53:03.772[0m | [1mINFO    [0m | [36mhyvideo.inference[0m:[36mpredict[0m:[36m681[0m - [1mSuccess, time: 1029.6165308952332[0m
[32m2025-02-02 15:53:05.610[0m | [1mINFO    [0m | [36mhyvideo.inference[0m:[36mpredict[0m:[36m587[0m - [1mInput (height, width, video_length) = (480, 848, 121)[0m
[32m2025-02-02 15:53:05.613[0m | [34m[1mDEBUG   [0m | [36mhyvideo.inference[0m:[36mpredict[0m:[36m647[0m - [34m[1m
                        height: 480
                         width: 848
                  video_length: 121
                        prompt: ['A celestial ocean glimmers under a starlit sky, where waves of light ripple gently across the surface. Ethereal beings, adorned in flowing garments of silver and blue, glide above the water, their movements fluid and serene. The atmosphere is tranquil, with the soft sound of waves lapping against the shore creating a soothing backdrop. Wisps of fog drift lazily, enhancing the dreamlike quality of 

  0%|          | 0/50 [00:00<?, ?it/s]

[32m2025-02-02 16:10:08.582[0m | [1mINFO    [0m | [36mhyvideo.inference[0m:[36mpredict[0m:[36m681[0m - [1mSuccess, time: 1022.9682924747467[0m
[32m2025-02-02 16:10:10.120[0m | [1mINFO    [0m | [36mhyvideo.inference[0m:[36mpredict[0m:[36m587[0m - [1mInput (height, width, video_length) = (480, 848, 121)[0m
[32m2025-02-02 16:10:10.126[0m | [34m[1mDEBUG   [0m | [36mhyvideo.inference[0m:[36mpredict[0m:[36m647[0m - [34m[1m
                        height: 480
                         width: 848
                  video_length: 121
                        prompt: ['A majestic mountain range rises against a twilight sky, where hues of orange and purple blend harmoniously. Ethereal figures, cloaked in robes that mimic the colors of the sunset, stand atop the peaks, their wings unfurling like banners of hope. The air is crisp and invigorating, filled with the scent of pine and earth. A gentle breeze carries whispers of ancient wisdom, creating a sense of connec

Scene 7 has 3 segments


  0%|          | 0/50 [00:00<?, ?it/s]

[32m2025-02-02 16:27:16.233[0m | [1mINFO    [0m | [36mhyvideo.inference[0m:[36mpredict[0m:[36m681[0m - [1mSuccess, time: 1026.1063694953918[0m
[32m2025-02-02 16:27:17.830[0m | [1mINFO    [0m | [36mhyvideo.inference[0m:[36mpredict[0m:[36m587[0m - [1mInput (height, width, video_length) = (480, 848, 121)[0m
[32m2025-02-02 16:27:17.844[0m | [34m[1mDEBUG   [0m | [36mhyvideo.inference[0m:[36mpredict[0m:[36m647[0m - [34m[1m
                        height: 480
                         width: 848
                  video_length: 121
                        prompt: ['A majestic mountain range rises against a twilight sky, where hues of orange and purple blend harmoniously. Ethereal figures, cloaked in robes that mimic the colors of the sunset, stand atop the peaks, their wings unfurling like banners of hope. The air is crisp and invigorating, filled with the scent of pine and earth. A gentle breeze carries whispers of ancient wisdom, creating a sense of connec

  0%|          | 0/50 [00:00<?, ?it/s]

[32m2025-02-02 16:44:42.879[0m | [1mINFO    [0m | [36mhyvideo.inference[0m:[36mpredict[0m:[36m681[0m - [1mSuccess, time: 1045.0342240333557[0m
[32m2025-02-02 16:44:44.529[0m | [1mINFO    [0m | [36mhyvideo.inference[0m:[36mpredict[0m:[36m587[0m - [1mInput (height, width, video_length) = (480, 848, 121)[0m
[32m2025-02-02 16:44:44.532[0m | [34m[1mDEBUG   [0m | [36mhyvideo.inference[0m:[36mpredict[0m:[36m647[0m - [34m[1m
                        height: 480
                         width: 848
                  video_length: 121
                        prompt: ['A majestic mountain range rises against a twilight sky, where hues of orange and purple blend harmoniously. Ethereal figures, cloaked in robes that mimic the colors of the sunset, stand atop the peaks, their wings unfurling like banners of hope. The air is crisp and invigorating, filled with the scent of pine and earth. A gentle breeze carries whispers of ancient wisdom, creating a sense of connec

  0%|          | 0/50 [00:00<?, ?it/s]

[32m2025-02-02 17:01:52.517[0m | [1mINFO    [0m | [36mhyvideo.inference[0m:[36mpredict[0m:[36m681[0m - [1mSuccess, time: 1027.9845027923584[0m
[32m2025-02-02 17:01:54.390[0m | [1mINFO    [0m | [36mhyvideo.inference[0m:[36mpredict[0m:[36m587[0m - [1mInput (height, width, video_length) = (480, 848, 121)[0m
[32m2025-02-02 17:01:54.397[0m | [34m[1mDEBUG   [0m | [36mhyvideo.inference[0m:[36mpredict[0m:[36m647[0m - [34m[1m
                        height: 480
                         width: 848
                  video_length: 121
                        prompt: ['A vibrant cityscape glows under a canopy of stars, where ethereal beings weave through the streets, their robes shimmering like the night sky. The atmosphere is alive with the sounds of laughter and music, as colorful lights flicker like fireflies. Angels, adorned in garments of light, interact with the environment, their presence infusing the city with warmth and joy. This scene captures the esse

Scene 8 has 1 segments


  0%|          | 0/50 [00:00<?, ?it/s]

[32m2025-02-02 17:18:53.761[0m | [1mINFO    [0m | [36mhyvideo.inference[0m:[36mpredict[0m:[36m681[0m - [1mSuccess, time: 1019.3631725311279[0m
[32m2025-02-02 17:18:55.629[0m | [1mINFO    [0m | [36mhyvideo.inference[0m:[36mpredict[0m:[36m587[0m - [1mInput (height, width, video_length) = (480, 848, 121)[0m
[32m2025-02-02 17:18:55.640[0m | [34m[1mDEBUG   [0m | [36mhyvideo.inference[0m:[36mpredict[0m:[36m647[0m - [34m[1m
                        height: 480
                         width: 848
                  video_length: 121
                        prompt: ['A mystical night unfolds, where shadows stretch and twist against a backdrop of shimmering stars. Ethereal figures, draped in flowing silks that glimmer like moonlight, emerge from the darkness, cradling delicate orbs of light. The air is filled with the haunting strains of violins, weaving a melody that resonates deep within the soul. Wisps of silver mist swirl around, creating an atmosphere of e

Scene 9 has 3 segments


  0%|          | 0/50 [00:00<?, ?it/s]

[32m2025-02-02 17:35:53.211[0m | [1mINFO    [0m | [36mhyvideo.inference[0m:[36mpredict[0m:[36m681[0m - [1mSuccess, time: 1017.5704500675201[0m
[32m2025-02-02 17:35:55.596[0m | [1mINFO    [0m | [36mhyvideo.inference[0m:[36mpredict[0m:[36m587[0m - [1mInput (height, width, video_length) = (480, 848, 121)[0m
[32m2025-02-02 17:35:55.599[0m | [34m[1mDEBUG   [0m | [36mhyvideo.inference[0m:[36mpredict[0m:[36m647[0m - [34m[1m
                        height: 480
                         width: 848
                  video_length: 121
                        prompt: ['A mystical night unfolds, where shadows stretch and twist against a backdrop of shimmering stars. Ethereal figures, draped in flowing silks that glimmer like moonlight, emerge from the darkness, cradling delicate orbs of light. The air is filled with the haunting strains of violins, weaving a melody that resonates deep within the soul. Wisps of silver mist swirl around, creating an atmosphere of e

  0%|          | 0/50 [00:00<?, ?it/s]

[32m2025-02-02 17:52:46.517[0m | [1mINFO    [0m | [36mhyvideo.inference[0m:[36mpredict[0m:[36m681[0m - [1mSuccess, time: 1010.9174227714539[0m
[32m2025-02-02 17:52:48.130[0m | [1mINFO    [0m | [36mhyvideo.inference[0m:[36mpredict[0m:[36m587[0m - [1mInput (height, width, video_length) = (480, 848, 121)[0m
[32m2025-02-02 17:52:48.138[0m | [34m[1mDEBUG   [0m | [36mhyvideo.inference[0m:[36mpredict[0m:[36m647[0m - [34m[1m
                        height: 480
                         width: 848
                  video_length: 121
                        prompt: ['A mystical night unfolds, where shadows stretch and twist against a backdrop of shimmering stars. Ethereal figures, draped in flowing silks that glimmer like moonlight, emerge from the darkness, cradling delicate orbs of light. The air is filled with the haunting strains of violins, weaving a melody that resonates deep within the soul. Wisps of silver mist swirl around, creating an atmosphere of e

  0%|          | 0/50 [00:00<?, ?it/s]

[32m2025-02-02 18:09:36.500[0m | [1mINFO    [0m | [36mhyvideo.inference[0m:[36mpredict[0m:[36m681[0m - [1mSuccess, time: 1008.3612308502197[0m
[32m2025-02-02 18:09:38.039[0m | [1mINFO    [0m | [36mhyvideo.inference[0m:[36mpredict[0m:[36m587[0m - [1mInput (height, width, video_length) = (480, 848, 121)[0m
[32m2025-02-02 18:09:38.046[0m | [34m[1mDEBUG   [0m | [36mhyvideo.inference[0m:[36mpredict[0m:[36m647[0m - [34m[1m
                        height: 480
                         width: 848
                  video_length: 121
                        prompt: ['A surreal landscape emerges, where soft, billowing clouds float above a tranquil sea of stars. Ethereal beings, adorned in robes of shimmering light, drift gracefully, their forms merging with the mist. The air is thick with emotion, as the gentle strains of violins echo through the night, creating a hauntingly beautiful atmosphere. Shadows dance playfully, while the colors blend seamlessly, evoki

Scene 10 has 3 segments


  0%|          | 0/50 [00:00<?, ?it/s]

[32m2025-02-02 18:26:25.683[0m | [1mINFO    [0m | [36mhyvideo.inference[0m:[36mpredict[0m:[36m681[0m - [1mSuccess, time: 1007.636066198349[0m
[32m2025-02-02 18:26:27.172[0m | [1mINFO    [0m | [36mhyvideo.inference[0m:[36mpredict[0m:[36m587[0m - [1mInput (height, width, video_length) = (480, 848, 121)[0m
[32m2025-02-02 18:26:27.176[0m | [34m[1mDEBUG   [0m | [36mhyvideo.inference[0m:[36mpredict[0m:[36m647[0m - [34m[1m
                        height: 480
                         width: 848
                  video_length: 121
                        prompt: ['A surreal landscape emerges, where soft, billowing clouds float above a tranquil sea of stars. Ethereal beings, adorned in robes of shimmering light, drift gracefully, their forms merging with the mist. The air is thick with emotion, as the gentle strains of violins echo through the night, creating a hauntingly beautiful atmosphere. Shadows dance playfully, while the colors blend seamlessly, evokin

  0%|          | 0/50 [00:00<?, ?it/s]

[32m2025-02-02 18:43:15.072[0m | [1mINFO    [0m | [36mhyvideo.inference[0m:[36mpredict[0m:[36m681[0m - [1mSuccess, time: 1007.8964469432831[0m
[32m2025-02-02 18:43:16.620[0m | [1mINFO    [0m | [36mhyvideo.inference[0m:[36mpredict[0m:[36m587[0m - [1mInput (height, width, video_length) = (480, 848, 121)[0m
[32m2025-02-02 18:43:16.627[0m | [34m[1mDEBUG   [0m | [36mhyvideo.inference[0m:[36mpredict[0m:[36m647[0m - [34m[1m
                        height: 480
                         width: 848
                  video_length: 121
                        prompt: ['A surreal landscape emerges, where soft, billowing clouds float above a tranquil sea of stars. Ethereal beings, adorned in robes of shimmering light, drift gracefully, their forms merging with the mist. The air is thick with emotion, as the gentle strains of violins echo through the night, creating a hauntingly beautiful atmosphere. Shadows dance playfully, while the colors blend seamlessly, evoki

  0%|          | 0/50 [00:00<?, ?it/s]

[32m2025-02-02 19:00:05.068[0m | [1mINFO    [0m | [36mhyvideo.inference[0m:[36mpredict[0m:[36m681[0m - [1mSuccess, time: 1008.4404366016388[0m
[32m2025-02-02 19:00:06.574[0m | [1mINFO    [0m | [36mhyvideo.inference[0m:[36mpredict[0m:[36m587[0m - [1mInput (height, width, video_length) = (480, 848, 121)[0m
[32m2025-02-02 19:00:06.584[0m | [34m[1mDEBUG   [0m | [36mhyvideo.inference[0m:[36mpredict[0m:[36m647[0m - [34m[1m
                        height: 480
                         width: 848
                  video_length: 121
                        prompt: ['A dreamlike meadow unfurls under a canopy of twinkling stars, where soft grasses sway gently in the night breeze. Ethereal figures, cloaked in robes of soft pastels, dance among the wildflowers, their movements fluid and graceful. The air is fragrant with the scent of blooming jasmine, as the haunting melody of violins fills the atmosphere, creating a sense of longing and nostalgia. The scene is b

Scene 11 has 3 segments


  0%|          | 0/50 [00:00<?, ?it/s]

[32m2025-02-02 19:17:00.687[0m | [1mINFO    [0m | [36mhyvideo.inference[0m:[36mpredict[0m:[36m681[0m - [1mSuccess, time: 1014.1029622554779[0m
[32m2025-02-02 19:17:02.268[0m | [1mINFO    [0m | [36mhyvideo.inference[0m:[36mpredict[0m:[36m587[0m - [1mInput (height, width, video_length) = (480, 848, 121)[0m
[32m2025-02-02 19:17:02.275[0m | [34m[1mDEBUG   [0m | [36mhyvideo.inference[0m:[36mpredict[0m:[36m647[0m - [34m[1m
                        height: 480
                         width: 848
                  video_length: 121
                        prompt: ['A dreamlike meadow unfurls under a canopy of twinkling stars, where soft grasses sway gently in the night breeze. Ethereal figures, cloaked in robes of soft pastels, dance among the wildflowers, their movements fluid and graceful. The air is fragrant with the scent of blooming jasmine, as the haunting melody of violins fills the atmosphere, creating a sense of longing and nostalgia. The scene is b

  0%|          | 0/50 [00:00<?, ?it/s]

[32m2025-02-02 19:33:58.421[0m | [1mINFO    [0m | [36mhyvideo.inference[0m:[36mpredict[0m:[36m681[0m - [1mSuccess, time: 1016.1454222202301[0m
[32m2025-02-02 19:33:59.936[0m | [1mINFO    [0m | [36mhyvideo.inference[0m:[36mpredict[0m:[36m587[0m - [1mInput (height, width, video_length) = (480, 848, 121)[0m
[32m2025-02-02 19:33:59.945[0m | [34m[1mDEBUG   [0m | [36mhyvideo.inference[0m:[36mpredict[0m:[36m647[0m - [34m[1m
                        height: 480
                         width: 848
                  video_length: 121
                        prompt: ['A dreamlike meadow unfurls under a canopy of twinkling stars, where soft grasses sway gently in the night breeze. Ethereal figures, cloaked in robes of soft pastels, dance among the wildflowers, their movements fluid and graceful. The air is fragrant with the scent of blooming jasmine, as the haunting melody of violins fills the atmosphere, creating a sense of longing and nostalgia. The scene is b

  0%|          | 0/50 [00:00<?, ?it/s]

[32m2025-02-02 19:50:48.581[0m | [1mINFO    [0m | [36mhyvideo.inference[0m:[36mpredict[0m:[36m681[0m - [1mSuccess, time: 1008.6348102092743[0m
[32m2025-02-02 19:50:50.087[0m | [1mINFO    [0m | [36mhyvideo.inference[0m:[36mpredict[0m:[36m587[0m - [1mInput (height, width, video_length) = (480, 848, 121)[0m
[32m2025-02-02 19:50:50.096[0m | [34m[1mDEBUG   [0m | [36mhyvideo.inference[0m:[36mpredict[0m:[36m647[0m - [34m[1m
                        height: 480
                         width: 848
                  video_length: 121
                        prompt: ['A tranquil glade appears, where ancient trees stand sentinel under a shimmering moonlight. Ethereal beings, adorned in flowing garments of silver and blue, gather around a crystal-clear pond, their reflections shimmering on the surface. The air is filled with the soft sounds of nature, as the gentle strains of violins resonate in harmony with the rustling leaves. This scene captures a moment of se

Scene 12 has 3 segments


  0%|          | 0/50 [00:00<?, ?it/s]

[32m2025-02-02 20:07:43.601[0m | [1mINFO    [0m | [36mhyvideo.inference[0m:[36mpredict[0m:[36m681[0m - [1mSuccess, time: 1013.504429101944[0m
[32m2025-02-02 20:07:46.510[0m | [1mINFO    [0m | [36mhyvideo.inference[0m:[36mpredict[0m:[36m587[0m - [1mInput (height, width, video_length) = (480, 848, 121)[0m
[32m2025-02-02 20:07:46.517[0m | [34m[1mDEBUG   [0m | [36mhyvideo.inference[0m:[36mpredict[0m:[36m647[0m - [34m[1m
                        height: 480
                         width: 848
                  video_length: 121
                        prompt: ['A tranquil glade appears, where ancient trees stand sentinel under a shimmering moonlight. Ethereal beings, adorned in flowing garments of silver and blue, gather around a crystal-clear pond, their reflections shimmering on the surface. The air is filled with the soft sounds of nature, as the gentle strains of violins resonate in harmony with the rustling leaves. This scene captures a moment of ser

  0%|          | 0/50 [00:00<?, ?it/s]

[32m2025-02-02 20:24:42.536[0m | [1mINFO    [0m | [36mhyvideo.inference[0m:[36mpredict[0m:[36m681[0m - [1mSuccess, time: 1016.0187361240387[0m
[32m2025-02-02 20:24:44.074[0m | [1mINFO    [0m | [36mhyvideo.inference[0m:[36mpredict[0m:[36m587[0m - [1mInput (height, width, video_length) = (480, 848, 121)[0m
[32m2025-02-02 20:24:44.076[0m | [34m[1mDEBUG   [0m | [36mhyvideo.inference[0m:[36mpredict[0m:[36m647[0m - [34m[1m
                        height: 480
                         width: 848
                  video_length: 121
                        prompt: ['A tranquil glade appears, where ancient trees stand sentinel under a shimmering moonlight. Ethereal beings, adorned in flowing garments of silver and blue, gather around a crystal-clear pond, their reflections shimmering on the surface. The air is filled with the soft sounds of nature, as the gentle strains of violins resonate in harmony with the rustling leaves. This scene captures a moment of se

  0%|          | 0/50 [00:00<?, ?it/s]

[32m2025-02-02 20:41:32.161[0m | [1mINFO    [0m | [36mhyvideo.inference[0m:[36mpredict[0m:[36m681[0m - [1mSuccess, time: 1008.0844993591309[0m
[32m2025-02-02 20:41:33.642[0m | [1mINFO    [0m | [36mhyvideo.inference[0m:[36mpredict[0m:[36m587[0m - [1mInput (height, width, video_length) = (480, 848, 121)[0m
[32m2025-02-02 20:41:33.651[0m | [34m[1mDEBUG   [0m | [36mhyvideo.inference[0m:[36mpredict[0m:[36m647[0m - [34m[1m
                        height: 480
                         width: 848
                  video_length: 121
                        prompt: ['A surreal dreamscape unfolds, where layers of soft clouds blend into deep, vibrant hues. Ethereal figures, cloaked in flowing robes of deep indigo and gold, drift through the mist, their presence a gentle reminder of guidance. The atmosphere is introspective, with reflections shimmering on the surface of a tranquil lake. Wisps of fog drift lazily, enhancing the sense of tranquility, while the ligh

Scene 13 has 4 segments


  0%|          | 0/50 [00:00<?, ?it/s]

[32m2025-02-02 20:58:25.396[0m | [1mINFO    [0m | [36mhyvideo.inference[0m:[36mpredict[0m:[36m681[0m - [1mSuccess, time: 1011.7450828552246[0m
[32m2025-02-02 20:58:26.958[0m | [1mINFO    [0m | [36mhyvideo.inference[0m:[36mpredict[0m:[36m587[0m - [1mInput (height, width, video_length) = (480, 848, 121)[0m
[32m2025-02-02 20:58:26.961[0m | [34m[1mDEBUG   [0m | [36mhyvideo.inference[0m:[36mpredict[0m:[36m647[0m - [34m[1m
                        height: 480
                         width: 848
                  video_length: 121
                        prompt: ['A surreal dreamscape unfolds, where layers of soft clouds blend into deep, vibrant hues. Ethereal figures, cloaked in flowing robes of deep indigo and gold, drift through the mist, their presence a gentle reminder of guidance. The atmosphere is introspective, with reflections shimmering on the surface of a tranquil lake. Wisps of fog drift lazily, enhancing the sense of tranquility, while the ligh

  0%|          | 0/50 [00:00<?, ?it/s]

[32m2025-02-02 21:15:19.827[0m | [1mINFO    [0m | [36mhyvideo.inference[0m:[36mpredict[0m:[36m681[0m - [1mSuccess, time: 1012.8651037216187[0m
[32m2025-02-02 21:15:21.334[0m | [1mINFO    [0m | [36mhyvideo.inference[0m:[36mpredict[0m:[36m587[0m - [1mInput (height, width, video_length) = (480, 848, 121)[0m
[32m2025-02-02 21:15:21.344[0m | [34m[1mDEBUG   [0m | [36mhyvideo.inference[0m:[36mpredict[0m:[36m647[0m - [34m[1m
                        height: 480
                         width: 848
                  video_length: 121
                        prompt: ['A surreal dreamscape unfolds, where layers of soft clouds blend into deep, vibrant hues. Ethereal figures, cloaked in flowing robes of deep indigo and gold, drift through the mist, their presence a gentle reminder of guidance. The atmosphere is introspective, with reflections shimmering on the surface of a tranquil lake. Wisps of fog drift lazily, enhancing the sense of tranquility, while the ligh

  0%|          | 0/50 [00:00<?, ?it/s]

[32m2025-02-02 21:32:10.835[0m | [1mINFO    [0m | [36mhyvideo.inference[0m:[36mpredict[0m:[36m681[0m - [1mSuccess, time: 1009.4897844791412[0m
[32m2025-02-02 21:32:12.355[0m | [1mINFO    [0m | [36mhyvideo.inference[0m:[36mpredict[0m:[36m587[0m - [1mInput (height, width, video_length) = (480, 848, 121)[0m
[32m2025-02-02 21:32:12.357[0m | [34m[1mDEBUG   [0m | [36mhyvideo.inference[0m:[36mpredict[0m:[36m647[0m - [34m[1m
                        height: 480
                         width: 848
                  video_length: 121
                        prompt: ['A surreal dreamscape unfolds, where layers of soft clouds blend into deep, vibrant hues. Ethereal figures, cloaked in flowing robes of deep indigo and gold, drift through the mist, their presence a gentle reminder of guidance. The atmosphere is introspective, with reflections shimmering on the surface of a tranquil lake. Wisps of fog drift lazily, enhancing the sense of tranquility, while the ligh

  0%|          | 0/50 [00:00<?, ?it/s]

[32m2025-02-02 21:49:03.857[0m | [1mINFO    [0m | [36mhyvideo.inference[0m:[36mpredict[0m:[36m681[0m - [1mSuccess, time: 1011.4990420341492[0m
[32m2025-02-02 21:49:05.359[0m | [1mINFO    [0m | [36mhyvideo.inference[0m:[36mpredict[0m:[36m587[0m - [1mInput (height, width, video_length) = (480, 848, 121)[0m
[32m2025-02-02 21:49:05.367[0m | [34m[1mDEBUG   [0m | [36mhyvideo.inference[0m:[36mpredict[0m:[36m647[0m - [34m[1m
                        height: 480
                         width: 848
                  video_length: 121
                        prompt: ['A mystical valley emerges, where soft, rolling hills are cloaked in a blanket of mist. Ethereal beings, adorned in robes of shimmering silver, wander through the landscape, their movements fluid and graceful. The air is filled with the gentle sounds of nature, as the soft glow of twilight bathes the scene in warmth. Shadows dance playfully, creating a magical atmosphere that invites introspection 

Scene 14 has 3 segments


  0%|          | 0/50 [00:00<?, ?it/s]

[32m2025-02-02 22:05:53.007[0m | [1mINFO    [0m | [36mhyvideo.inference[0m:[36mpredict[0m:[36m681[0m - [1mSuccess, time: 1007.6390855312347[0m
[32m2025-02-02 22:05:54.467[0m | [1mINFO    [0m | [36mhyvideo.inference[0m:[36mpredict[0m:[36m587[0m - [1mInput (height, width, video_length) = (480, 848, 121)[0m
[32m2025-02-02 22:05:54.472[0m | [34m[1mDEBUG   [0m | [36mhyvideo.inference[0m:[36mpredict[0m:[36m647[0m - [34m[1m
                        height: 480
                         width: 848
                  video_length: 121
                        prompt: ['A mystical valley emerges, where soft, rolling hills are cloaked in a blanket of mist. Ethereal beings, adorned in robes of shimmering silver, wander through the landscape, their movements fluid and graceful. The air is filled with the gentle sounds of nature, as the soft glow of twilight bathes the scene in warmth. Shadows dance playfully, creating a magical atmosphere that invites introspection 

  0%|          | 0/50 [00:00<?, ?it/s]

[32m2025-02-02 22:22:39.864[0m | [1mINFO    [0m | [36mhyvideo.inference[0m:[36mpredict[0m:[36m681[0m - [1mSuccess, time: 1005.3914721012115[0m
[32m2025-02-02 22:22:41.369[0m | [1mINFO    [0m | [36mhyvideo.inference[0m:[36mpredict[0m:[36m587[0m - [1mInput (height, width, video_length) = (480, 848, 121)[0m
[32m2025-02-02 22:22:41.372[0m | [34m[1mDEBUG   [0m | [36mhyvideo.inference[0m:[36mpredict[0m:[36m647[0m - [34m[1m
                        height: 480
                         width: 848
                  video_length: 121
                        prompt: ['A mystical valley emerges, where soft, rolling hills are cloaked in a blanket of mist. Ethereal beings, adorned in robes of shimmering silver, wander through the landscape, their movements fluid and graceful. The air is filled with the gentle sounds of nature, as the soft glow of twilight bathes the scene in warmth. Shadows dance playfully, creating a magical atmosphere that invites introspection 

  0%|          | 0/50 [00:00<?, ?it/s]

[32m2025-02-02 22:39:30.114[0m | [1mINFO    [0m | [36mhyvideo.inference[0m:[36mpredict[0m:[36m681[0m - [1mSuccess, time: 1008.7413830757141[0m
[32m2025-02-02 22:39:31.666[0m | [1mINFO    [0m | [36mhyvideo.inference[0m:[36mpredict[0m:[36m587[0m - [1mInput (height, width, video_length) = (480, 848, 121)[0m
[32m2025-02-02 22:39:31.676[0m | [34m[1mDEBUG   [0m | [36mhyvideo.inference[0m:[36mpredict[0m:[36m647[0m - [34m[1m
                        height: 480
                         width: 848
                  video_length: 121
                        prompt: ["A serene landscape unfolds, where a tranquil river flows gently through a lush forest. Ethereal figures, cloaked in robes of deep green and gold, stand by the water's edge, their reflections shimmering in the current. The air is filled with the scent of fresh pine and blooming flowers, creating an atmosphere of peace and harmony. Soft beams of sunlight filter through the leaves, casting a golden h

Scene 15 has 2 segments


  0%|          | 0/50 [00:00<?, ?it/s]

[32m2025-02-02 22:56:18.131[0m | [1mINFO    [0m | [36mhyvideo.inference[0m:[36mpredict[0m:[36m681[0m - [1mSuccess, time: 1006.4545087814331[0m
[32m2025-02-02 22:56:19.649[0m | [1mINFO    [0m | [36mhyvideo.inference[0m:[36mpredict[0m:[36m587[0m - [1mInput (height, width, video_length) = (480, 848, 121)[0m
[32m2025-02-02 22:56:19.658[0m | [34m[1mDEBUG   [0m | [36mhyvideo.inference[0m:[36mpredict[0m:[36m647[0m - [34m[1m
                        height: 480
                         width: 848
                  video_length: 121
                        prompt: ["A serene landscape unfolds, where a tranquil river flows gently through a lush forest. Ethereal figures, cloaked in robes of deep green and gold, stand by the water's edge, their reflections shimmering in the current. The air is filled with the scent of fresh pine and blooming flowers, creating an atmosphere of peace and harmony. Soft beams of sunlight filter through the leaves, casting a golden h

  0%|          | 0/50 [00:00<?, ?it/s]

[32m2025-02-02 23:13:09.003[0m | [1mINFO    [0m | [36mhyvideo.inference[0m:[36mpredict[0m:[36m681[0m - [1mSuccess, time: 1009.3444700241089[0m
[32m2025-02-02 23:13:10.737[0m | [1mINFO    [0m | [36mhyvideo.inference[0m:[36mpredict[0m:[36m587[0m - [1mInput (height, width, video_length) = (480, 848, 121)[0m
[32m2025-02-02 23:13:10.745[0m | [34m[1mDEBUG   [0m | [36mhyvideo.inference[0m:[36mpredict[0m:[36m647[0m - [34m[1m
                        height: 480
                         width: 848
                  video_length: 121
                        prompt: ['A vibrant field of wildflowers dances under a soft, golden sun, each petal capturing the light like precious gems. Ethereal beings, adorned in flowing robes of pastel colors, drift gracefully among the blossoms, their presence nurturing and warm. The air is fragrant with the scent of blooming flowers, creating an atmosphere of joy and compassion. As the light casts a golden hue over everything, th

Scene 16 has 3 segments


  0%|          | 0/50 [00:00<?, ?it/s]

[32m2025-02-02 23:29:57.866[0m | [1mINFO    [0m | [36mhyvideo.inference[0m:[36mpredict[0m:[36m681[0m - [1mSuccess, time: 1007.119546175003[0m
[32m2025-02-02 23:29:59.397[0m | [1mINFO    [0m | [36mhyvideo.inference[0m:[36mpredict[0m:[36m587[0m - [1mInput (height, width, video_length) = (480, 848, 121)[0m
[32m2025-02-02 23:29:59.409[0m | [34m[1mDEBUG   [0m | [36mhyvideo.inference[0m:[36mpredict[0m:[36m647[0m - [34m[1m
                        height: 480
                         width: 848
                  video_length: 121
                        prompt: ['A vibrant field of wildflowers dances under a soft, golden sun, each petal capturing the light like precious gems. Ethereal beings, adorned in flowing robes of pastel colors, drift gracefully among the blossoms, their presence nurturing and warm. The air is fragrant with the scent of blooming flowers, creating an atmosphere of joy and compassion. As the light casts a golden hue over everything, the

  0%|          | 0/50 [00:00<?, ?it/s]

[32m2025-02-02 23:46:45.395[0m | [1mINFO    [0m | [36mhyvideo.inference[0m:[36mpredict[0m:[36m681[0m - [1mSuccess, time: 1005.9859437942505[0m
[32m2025-02-02 23:46:49.248[0m | [1mINFO    [0m | [36mhyvideo.inference[0m:[36mpredict[0m:[36m587[0m - [1mInput (height, width, video_length) = (480, 848, 121)[0m
[32m2025-02-02 23:46:49.257[0m | [34m[1mDEBUG   [0m | [36mhyvideo.inference[0m:[36mpredict[0m:[36m647[0m - [34m[1m
                        height: 480
                         width: 848
                  video_length: 121
                        prompt: ['A vibrant field of wildflowers dances under a soft, golden sun, each petal capturing the light like precious gems. Ethereal beings, adorned in flowing robes of pastel colors, drift gracefully among the blossoms, their presence nurturing and warm. The air is fragrant with the scent of blooming flowers, creating an atmosphere of joy and compassion. As the light casts a golden hue over everything, th

  0%|          | 0/50 [00:00<?, ?it/s]

[32m2025-02-03 00:03:36.718[0m | [1mINFO    [0m | [36mhyvideo.inference[0m:[36mpredict[0m:[36m681[0m - [1mSuccess, time: 1007.4598152637482[0m
[32m2025-02-03 00:03:38.215[0m | [1mINFO    [0m | [36mhyvideo.inference[0m:[36mpredict[0m:[36m587[0m - [1mInput (height, width, video_length) = (480, 848, 121)[0m
[32m2025-02-03 00:03:38.222[0m | [34m[1mDEBUG   [0m | [36mhyvideo.inference[0m:[36mpredict[0m:[36m647[0m - [34m[1m
                        height: 480
                         width: 848
                  video_length: 121
                        prompt: ['A lush meadow stretches beneath a radiant sun, where vibrant flowers sway gently in the breeze. Ethereal figures, cloaked in robes of soft colors, move gracefully among the blossoms, their wings creating a delicate rustle. The air is filled with the sweet scent of blooming petals, and the atmosphere hums with a sense of joy and abundance. Sunlight filters through the leaves, casting playful shadow

Scene 17 has 3 segments


  0%|          | 0/50 [00:00<?, ?it/s]

[32m2025-02-03 00:20:25.176[0m | [1mINFO    [0m | [36mhyvideo.inference[0m:[36mpredict[0m:[36m681[0m - [1mSuccess, time: 1006.9535834789276[0m
[32m2025-02-03 00:20:26.673[0m | [1mINFO    [0m | [36mhyvideo.inference[0m:[36mpredict[0m:[36m587[0m - [1mInput (height, width, video_length) = (480, 848, 121)[0m
[32m2025-02-03 00:20:26.680[0m | [34m[1mDEBUG   [0m | [36mhyvideo.inference[0m:[36mpredict[0m:[36m647[0m - [34m[1m
                        height: 480
                         width: 848
                  video_length: 121
                        prompt: ['A lush meadow stretches beneath a radiant sun, where vibrant flowers sway gently in the breeze. Ethereal figures, cloaked in robes of soft colors, move gracefully among the blossoms, their wings creating a delicate rustle. The air is filled with the sweet scent of blooming petals, and the atmosphere hums with a sense of joy and abundance. Sunlight filters through the leaves, casting playful shadow

  0%|          | 0/50 [00:00<?, ?it/s]

[32m2025-02-03 00:37:14.641[0m | [1mINFO    [0m | [36mhyvideo.inference[0m:[36mpredict[0m:[36m681[0m - [1mSuccess, time: 1007.9605386257172[0m
[32m2025-02-03 00:37:16.127[0m | [1mINFO    [0m | [36mhyvideo.inference[0m:[36mpredict[0m:[36m587[0m - [1mInput (height, width, video_length) = (480, 848, 121)[0m
[32m2025-02-03 00:37:16.136[0m | [34m[1mDEBUG   [0m | [36mhyvideo.inference[0m:[36mpredict[0m:[36m647[0m - [34m[1m
                        height: 480
                         width: 848
                  video_length: 121
                        prompt: ['A lush meadow stretches beneath a radiant sun, where vibrant flowers sway gently in the breeze. Ethereal figures, cloaked in robes of soft colors, move gracefully among the blossoms, their wings creating a delicate rustle. The air is filled with the sweet scent of blooming petals, and the atmosphere hums with a sense of joy and abundance. Sunlight filters through the leaves, casting playful shadow

  0%|          | 0/50 [00:00<?, ?it/s]

[32m2025-02-03 00:54:04.296[0m | [1mINFO    [0m | [36mhyvideo.inference[0m:[36mpredict[0m:[36m681[0m - [1mSuccess, time: 1008.1590254306793[0m
[32m2025-02-03 00:54:05.819[0m | [1mINFO    [0m | [36mhyvideo.inference[0m:[36mpredict[0m:[36m587[0m - [1mInput (height, width, video_length) = (480, 848, 121)[0m
[32m2025-02-03 00:54:05.823[0m | [34m[1mDEBUG   [0m | [36mhyvideo.inference[0m:[36mpredict[0m:[36m647[0m - [34m[1m
                        height: 480
                         width: 848
                  video_length: 121
                        prompt: ['A tranquil garden blooms under a soft, golden light, where flowers sway gently in a warm breeze. Ethereal beings, adorned in flowing robes of shimmering light, tend to the blossoms, their hands brushing against petals that shimmer like gemstones. The air is fragrant with the scent of blooming jasmine, and the atmosphere is infused with a sense of nurturing love. Sunlight filters through the leaves

Scene 18 has 3 segments


  0%|          | 0/50 [00:00<?, ?it/s]

[32m2025-02-03 01:10:52.453[0m | [1mINFO    [0m | [36mhyvideo.inference[0m:[36mpredict[0m:[36m681[0m - [1mSuccess, time: 1006.6292922496796[0m
[32m2025-02-03 01:10:53.954[0m | [1mINFO    [0m | [36mhyvideo.inference[0m:[36mpredict[0m:[36m587[0m - [1mInput (height, width, video_length) = (480, 848, 121)[0m
[32m2025-02-03 01:10:53.957[0m | [34m[1mDEBUG   [0m | [36mhyvideo.inference[0m:[36mpredict[0m:[36m647[0m - [34m[1m
                        height: 480
                         width: 848
                  video_length: 121
                        prompt: ['A tranquil garden blooms under a soft, golden light, where flowers sway gently in a warm breeze. Ethereal beings, adorned in flowing robes of shimmering light, tend to the blossoms, their hands brushing against petals that shimmer like gemstones. The air is fragrant with the scent of blooming jasmine, and the atmosphere is infused with a sense of nurturing love. Sunlight filters through the leaves

  0%|          | 0/50 [00:00<?, ?it/s]

[32m2025-02-03 01:27:43.784[0m | [1mINFO    [0m | [36mhyvideo.inference[0m:[36mpredict[0m:[36m681[0m - [1mSuccess, time: 1009.8260288238525[0m
[32m2025-02-03 01:27:45.303[0m | [1mINFO    [0m | [36mhyvideo.inference[0m:[36mpredict[0m:[36m587[0m - [1mInput (height, width, video_length) = (480, 848, 121)[0m
[32m2025-02-03 01:27:45.313[0m | [34m[1mDEBUG   [0m | [36mhyvideo.inference[0m:[36mpredict[0m:[36m647[0m - [34m[1m
                        height: 480
                         width: 848
                  video_length: 121
                        prompt: ['A tranquil garden blooms under a soft, golden light, where flowers sway gently in a warm breeze. Ethereal beings, adorned in flowing robes of shimmering light, tend to the blossoms, their hands brushing against petals that shimmer like gemstones. The air is fragrant with the scent of blooming jasmine, and the atmosphere is infused with a sense of nurturing love. Sunlight filters through the leaves

  0%|          | 0/50 [00:00<?, ?it/s]

[32m2025-02-03 01:44:29.889[0m | [1mINFO    [0m | [36mhyvideo.inference[0m:[36mpredict[0m:[36m681[0m - [1mSuccess, time: 1004.575159072876[0m
[32m2025-02-03 01:44:31.380[0m | [1mINFO    [0m | [36mhyvideo.inference[0m:[36mpredict[0m:[36m587[0m - [1mInput (height, width, video_length) = (480, 848, 121)[0m
[32m2025-02-03 01:44:31.389[0m | [34m[1mDEBUG   [0m | [36mhyvideo.inference[0m:[36mpredict[0m:[36m647[0m - [34m[1m
                        height: 480
                         width: 848
                  video_length: 121
                        prompt: ['A vibrant field of wildflowers dances under a soft, golden sun, each petal capturing the light like precious gems. Ethereal beings, adorned in flowing robes of pastel colors, drift gracefully among the blossoms, their presence nurturing and warm. The air is fragrant with the scent of blooming flowers, creating an atmosphere of joy and compassion. As the light casts a golden hue over everything, the

Scene 19 has 3 segments


  0%|          | 0/50 [00:00<?, ?it/s]

[32m2025-02-03 02:01:19.912[0m | [1mINFO    [0m | [36mhyvideo.inference[0m:[36mpredict[0m:[36m681[0m - [1mSuccess, time: 1008.5223784446716[0m
[32m2025-02-03 02:01:21.408[0m | [1mINFO    [0m | [36mhyvideo.inference[0m:[36mpredict[0m:[36m587[0m - [1mInput (height, width, video_length) = (480, 848, 121)[0m
[32m2025-02-03 02:01:21.416[0m | [34m[1mDEBUG   [0m | [36mhyvideo.inference[0m:[36mpredict[0m:[36m647[0m - [34m[1m
                        height: 480
                         width: 848
                  video_length: 121
                        prompt: ['A vibrant field of wildflowers dances under a soft, golden sun, each petal capturing the light like precious gems. Ethereal beings, adorned in flowing robes of pastel colors, drift gracefully among the blossoms, their presence nurturing and warm. The air is fragrant with the scent of blooming flowers, creating an atmosphere of joy and compassion. As the light casts a golden hue over everything, th

  0%|          | 0/50 [00:00<?, ?it/s]

[32m2025-02-03 02:18:10.095[0m | [1mINFO    [0m | [36mhyvideo.inference[0m:[36mpredict[0m:[36m681[0m - [1mSuccess, time: 1008.6776804924011[0m
[32m2025-02-03 02:18:11.617[0m | [1mINFO    [0m | [36mhyvideo.inference[0m:[36mpredict[0m:[36m587[0m - [1mInput (height, width, video_length) = (480, 848, 121)[0m
[32m2025-02-03 02:18:11.624[0m | [34m[1mDEBUG   [0m | [36mhyvideo.inference[0m:[36mpredict[0m:[36m647[0m - [34m[1m
                        height: 480
                         width: 848
                  video_length: 121
                        prompt: ['A vibrant field of wildflowers dances under a soft, golden sun, each petal capturing the light like precious gems. Ethereal beings, adorned in flowing robes of pastel colors, drift gracefully among the blossoms, their presence nurturing and warm. The air is fragrant with the scent of blooming flowers, creating an atmosphere of joy and compassion. As the light casts a golden hue over everything, th

  0%|          | 0/50 [00:00<?, ?it/s]

[32m2025-02-03 02:34:57.477[0m | [1mINFO    [0m | [36mhyvideo.inference[0m:[36mpredict[0m:[36m681[0m - [1mSuccess, time: 1005.8519771099091[0m
[32m2025-02-03 02:34:58.948[0m | [1mINFO    [0m | [36mhyvideo.inference[0m:[36mpredict[0m:[36m587[0m - [1mInput (height, width, video_length) = (480, 848, 121)[0m
[32m2025-02-03 02:34:58.955[0m | [34m[1mDEBUG   [0m | [36mhyvideo.inference[0m:[36mpredict[0m:[36m647[0m - [34m[1m
                        height: 480
                         width: 848
                  video_length: 121
                        prompt: ['A vibrant field of flowers sways gently under a soft, warm breeze, each petal capturing the sunlight like jewels. The colors are rich and varied, creating an atmosphere of joy and compassion. Angels drift through the blossoms, their wings creating a soft rustle, as they tend to the flowers, symbolizing care and nurturing. The scene is infused with a sense of abundance and warmth, with the light ca

Scene 20 has 3 segments


  0%|          | 0/50 [00:00<?, ?it/s]

[32m2025-02-03 02:51:48.387[0m | [1mINFO    [0m | [36mhyvideo.inference[0m:[36mpredict[0m:[36m681[0m - [1mSuccess, time: 1009.4319794178009[0m
[32m2025-02-03 02:51:49.932[0m | [1mINFO    [0m | [36mhyvideo.inference[0m:[36mpredict[0m:[36m587[0m - [1mInput (height, width, video_length) = (480, 848, 121)[0m
[32m2025-02-03 02:51:49.940[0m | [34m[1mDEBUG   [0m | [36mhyvideo.inference[0m:[36mpredict[0m:[36m647[0m - [34m[1m
                        height: 480
                         width: 848
                  video_length: 121
                        prompt: ['A vibrant field of flowers sways gently under a soft, warm breeze, each petal capturing the sunlight like jewels. The colors are rich and varied, creating an atmosphere of joy and compassion. Angels drift through the blossoms, their wings creating a soft rustle, as they tend to the flowers, symbolizing care and nurturing. The scene is infused with a sense of abundance and warmth, with the light ca

  0%|          | 0/50 [00:00<?, ?it/s]

[32m2025-02-03 03:08:35.530[0m | [1mINFO    [0m | [36mhyvideo.inference[0m:[36mpredict[0m:[36m681[0m - [1mSuccess, time: 1005.5888245105743[0m
[32m2025-02-03 03:08:40.155[0m | [1mINFO    [0m | [36mhyvideo.inference[0m:[36mpredict[0m:[36m587[0m - [1mInput (height, width, video_length) = (480, 848, 121)[0m
[32m2025-02-03 03:08:40.160[0m | [34m[1mDEBUG   [0m | [36mhyvideo.inference[0m:[36mpredict[0m:[36m647[0m - [34m[1m
                        height: 480
                         width: 848
                  video_length: 121
                        prompt: ['A vibrant field of flowers sways gently under a soft, warm breeze, each petal capturing the sunlight like jewels. The colors are rich and varied, creating an atmosphere of joy and compassion. Angels drift through the blossoms, their wings creating a soft rustle, as they tend to the flowers, symbolizing care and nurturing. The scene is infused with a sense of abundance and warmth, with the light ca

  0%|          | 0/50 [00:00<?, ?it/s]

[32m2025-02-03 03:25:27.350[0m | [1mINFO    [0m | [36mhyvideo.inference[0m:[36mpredict[0m:[36m681[0m - [1mSuccess, time: 1007.1900134086609[0m
[32m2025-02-03 03:25:28.771[0m | [1mINFO    [0m | [36mhyvideo.inference[0m:[36mpredict[0m:[36m587[0m - [1mInput (height, width, video_length) = (480, 848, 121)[0m
[32m2025-02-03 03:25:28.777[0m | [34m[1mDEBUG   [0m | [36mhyvideo.inference[0m:[36mpredict[0m:[36m647[0m - [34m[1m
                        height: 480
                         width: 848
                  video_length: 121
                        prompt: ['A vibrant field of flowers sways gently under a soft, warm breeze, each petal capturing the sunlight like jewels. The colors are rich and varied, creating an atmosphere of joy and compassion. Angels drift through the blossoms, their wings creating a soft rustle, as they tend to the flowers, symbolizing care and nurturing. The scene is infused with a sense of abundance and warmth, with the light ca

Scene 21 has 3 segments


  0%|          | 0/50 [00:00<?, ?it/s]

[32m2025-02-03 03:42:18.513[0m | [1mINFO    [0m | [36mhyvideo.inference[0m:[36mpredict[0m:[36m681[0m - [1mSuccess, time: 1009.7354643344879[0m
[32m2025-02-03 03:42:20.001[0m | [1mINFO    [0m | [36mhyvideo.inference[0m:[36mpredict[0m:[36m587[0m - [1mInput (height, width, video_length) = (480, 848, 121)[0m
[32m2025-02-03 03:42:20.010[0m | [34m[1mDEBUG   [0m | [36mhyvideo.inference[0m:[36mpredict[0m:[36m647[0m - [34m[1m
                        height: 480
                         width: 848
                  video_length: 121
                        prompt: ['A vibrant field of flowers sways gently under a soft, warm breeze, each petal capturing the sunlight like jewels. The colors are rich and varied, creating an atmosphere of joy and compassion. Angels drift through the blossoms, their wings creating a soft rustle, as they tend to the flowers, symbolizing care and nurturing. The scene is infused with a sense of abundance and warmth, with the light ca

  0%|          | 0/50 [00:00<?, ?it/s]

[32m2025-02-03 03:59:08.159[0m | [1mINFO    [0m | [36mhyvideo.inference[0m:[36mpredict[0m:[36m681[0m - [1mSuccess, time: 1008.1485204696655[0m
[32m2025-02-03 03:59:09.666[0m | [1mINFO    [0m | [36mhyvideo.inference[0m:[36mpredict[0m:[36m587[0m - [1mInput (height, width, video_length) = (480, 848, 121)[0m
[32m2025-02-03 03:59:09.672[0m | [34m[1mDEBUG   [0m | [36mhyvideo.inference[0m:[36mpredict[0m:[36m647[0m - [34m[1m
                        height: 480
                         width: 848
                  video_length: 121
                        prompt: ['A vibrant field of flowers sways gently under a soft, warm breeze, each petal capturing the sunlight like jewels. The colors are rich and varied, creating an atmosphere of joy and compassion. Angels drift through the blossoms, their wings creating a soft rustle, as they tend to the flowers, symbolizing care and nurturing. The scene is infused with a sense of abundance and warmth, with the light ca

  0%|          | 0/50 [00:00<?, ?it/s]

[32m2025-02-03 04:15:57.406[0m | [1mINFO    [0m | [36mhyvideo.inference[0m:[36mpredict[0m:[36m681[0m - [1mSuccess, time: 1007.7329790592194[0m
[32m2025-02-03 04:15:58.889[0m | [1mINFO    [0m | [36mhyvideo.inference[0m:[36mpredict[0m:[36m587[0m - [1mInput (height, width, video_length) = (480, 848, 121)[0m
[32m2025-02-03 04:15:58.896[0m | [34m[1mDEBUG   [0m | [36mhyvideo.inference[0m:[36mpredict[0m:[36m647[0m - [34m[1m
                        height: 480
                         width: 848
                  video_length: 121
                        prompt: ['A vibrant field of flowers sways gently under a soft, warm breeze, each petal capturing the sunlight like jewels. The colors are rich and varied, creating an atmosphere of joy and compassion. Angels drift through the blossoms, their wings creating a soft rustle, as they tend to the flowers, symbolizing care and nurturing. The scene is infused with a sense of abundance and warmth, with the light ca

Scene 22 has 3 segments


  0%|          | 0/50 [00:00<?, ?it/s]

[32m2025-02-03 04:32:45.929[0m | [1mINFO    [0m | [36mhyvideo.inference[0m:[36mpredict[0m:[36m681[0m - [1mSuccess, time: 1007.0320868492126[0m
[32m2025-02-03 04:32:47.409[0m | [1mINFO    [0m | [36mhyvideo.inference[0m:[36mpredict[0m:[36m587[0m - [1mInput (height, width, video_length) = (480, 848, 121)[0m
[32m2025-02-03 04:32:47.416[0m | [34m[1mDEBUG   [0m | [36mhyvideo.inference[0m:[36mpredict[0m:[36m647[0m - [34m[1m
                        height: 480
                         width: 848
                  video_length: 121
                        prompt: ['A vibrant field of flowers sways gently under a soft, warm breeze, each petal capturing the sunlight like jewels. The colors are rich and varied, creating an atmosphere of joy and compassion. Angels drift through the blossoms, their wings creating a soft rustle, as they tend to the flowers, symbolizing care and nurturing. The scene is infused with a sense of abundance and warmth, with the light ca

  0%|          | 0/50 [00:00<?, ?it/s]

[32m2025-02-03 04:49:33.053[0m | [1mINFO    [0m | [36mhyvideo.inference[0m:[36mpredict[0m:[36m681[0m - [1mSuccess, time: 1005.63574051857[0m
[32m2025-02-03 04:49:34.542[0m | [1mINFO    [0m | [36mhyvideo.inference[0m:[36mpredict[0m:[36m587[0m - [1mInput (height, width, video_length) = (480, 848, 121)[0m
[32m2025-02-03 04:49:34.547[0m | [34m[1mDEBUG   [0m | [36mhyvideo.inference[0m:[36mpredict[0m:[36m647[0m - [34m[1m
                        height: 480
                         width: 848
                  video_length: 121
                        prompt: ['A vibrant field of flowers sways gently under a soft, warm breeze, each petal capturing the sunlight like jewels. The colors are rich and varied, creating an atmosphere of joy and compassion. Angels drift through the blossoms, their wings creating a soft rustle, as they tend to the flowers, symbolizing care and nurturing. The scene is infused with a sense of abundance and warmth, with the light cast

  0%|          | 0/50 [00:00<?, ?it/s]

[32m2025-02-03 05:06:24.043[0m | [1mINFO    [0m | [36mhyvideo.inference[0m:[36mpredict[0m:[36m681[0m - [1mSuccess, time: 1009.4943416118622[0m
[32m2025-02-03 05:06:25.483[0m | [1mINFO    [0m | [36mhyvideo.inference[0m:[36mpredict[0m:[36m587[0m - [1mInput (height, width, video_length) = (480, 848, 121)[0m
[32m2025-02-03 05:06:25.487[0m | [34m[1mDEBUG   [0m | [36mhyvideo.inference[0m:[36mpredict[0m:[36m647[0m - [34m[1m
                        height: 480
                         width: 848
                  video_length: 121
                        prompt: ['A vibrant field of flowers sways gently under a soft, warm breeze, each petal capturing the sunlight like jewels. The colors are rich and varied, creating an atmosphere of joy and compassion. Angels drift through the blossoms, their wings creating a soft rustle, as they tend to the flowers, symbolizing care and nurturing. The scene is infused with a sense of abundance and warmth, with the light ca

Scene 23 has 3 segments


  0%|          | 0/50 [00:00<?, ?it/s]

[32m2025-02-03 05:23:14.960[0m | [1mINFO    [0m | [36mhyvideo.inference[0m:[36mpredict[0m:[36m681[0m - [1mSuccess, time: 1009.4727871417999[0m
[32m2025-02-03 05:23:16.421[0m | [1mINFO    [0m | [36mhyvideo.inference[0m:[36mpredict[0m:[36m587[0m - [1mInput (height, width, video_length) = (480, 848, 121)[0m
[32m2025-02-03 05:23:16.428[0m | [34m[1mDEBUG   [0m | [36mhyvideo.inference[0m:[36mpredict[0m:[36m647[0m - [34m[1m
                        height: 480
                         width: 848
                  video_length: 121
                        prompt: ['A vibrant field of flowers sways gently under a soft, warm breeze, each petal capturing the sunlight like jewels. The colors are rich and varied, creating an atmosphere of joy and compassion. Angels drift through the blossoms, their wings creating a soft rustle, as they tend to the flowers, symbolizing care and nurturing. The scene is infused with a sense of abundance and warmth, with the light ca

  0%|          | 0/50 [00:00<?, ?it/s]

[32m2025-02-03 05:40:03.418[0m | [1mINFO    [0m | [36mhyvideo.inference[0m:[36mpredict[0m:[36m681[0m - [1mSuccess, time: 1006.9901716709137[0m
[32m2025-02-03 05:40:04.888[0m | [1mINFO    [0m | [36mhyvideo.inference[0m:[36mpredict[0m:[36m587[0m - [1mInput (height, width, video_length) = (480, 848, 121)[0m
[32m2025-02-03 05:40:04.895[0m | [34m[1mDEBUG   [0m | [36mhyvideo.inference[0m:[36mpredict[0m:[36m647[0m - [34m[1m
                        height: 480
                         width: 848
                  video_length: 121
                        prompt: ['A vibrant field of flowers sways gently under a soft, warm breeze, each petal capturing the sunlight like jewels. The colors are rich and varied, creating an atmosphere of joy and compassion. Angels drift through the blossoms, their wings creating a soft rustle, as they tend to the flowers, symbolizing care and nurturing. The scene is infused with a sense of abundance and warmth, with the light ca

  0%|          | 0/50 [00:00<?, ?it/s]

[32m2025-02-03 05:56:54.083[0m | [1mINFO    [0m | [36mhyvideo.inference[0m:[36mpredict[0m:[36m681[0m - [1mSuccess, time: 1009.1875014305115[0m
[32m2025-02-03 05:56:55.571[0m | [1mINFO    [0m | [36mhyvideo.inference[0m:[36mpredict[0m:[36m587[0m - [1mInput (height, width, video_length) = (480, 848, 121)[0m
[32m2025-02-03 05:56:55.577[0m | [34m[1mDEBUG   [0m | [36mhyvideo.inference[0m:[36mpredict[0m:[36m647[0m - [34m[1m
                        height: 480
                         width: 848
                  video_length: 121
                        prompt: ['A vibrant field of flowers sways gently under a soft, warm breeze, each petal capturing the sunlight like jewels. The colors are rich and varied, creating an atmosphere of joy and compassion. Angels drift through the blossoms, their wings creating a soft rustle, as they tend to the flowers, symbolizing care and nurturing. The scene is infused with a sense of abundance and warmth, with the light ca

Scene 24 has 3 segments


  0%|          | 0/50 [00:00<?, ?it/s]

[32m2025-02-03 06:13:42.526[0m | [1mINFO    [0m | [36mhyvideo.inference[0m:[36mpredict[0m:[36m681[0m - [1mSuccess, time: 1006.9484024047852[0m
[32m2025-02-03 06:13:43.974[0m | [1mINFO    [0m | [36mhyvideo.inference[0m:[36mpredict[0m:[36m587[0m - [1mInput (height, width, video_length) = (480, 848, 121)[0m
[32m2025-02-03 06:13:43.977[0m | [34m[1mDEBUG   [0m | [36mhyvideo.inference[0m:[36mpredict[0m:[36m647[0m - [34m[1m
                        height: 480
                         width: 848
                  video_length: 121
                        prompt: ['A vibrant field of flowers sways gently under a soft, warm breeze, each petal capturing the sunlight like jewels. The colors are rich and varied, creating an atmosphere of joy and compassion. Angels drift through the blossoms, their wings creating a soft rustle, as they tend to the flowers, symbolizing care and nurturing. The scene is infused with a sense of abundance and warmth, with the light ca

  0%|          | 0/50 [00:00<?, ?it/s]

[32m2025-02-03 06:30:33.199[0m | [1mINFO    [0m | [36mhyvideo.inference[0m:[36mpredict[0m:[36m681[0m - [1mSuccess, time: 1009.2225511074066[0m
[32m2025-02-03 06:30:34.709[0m | [1mINFO    [0m | [36mhyvideo.inference[0m:[36mpredict[0m:[36m587[0m - [1mInput (height, width, video_length) = (480, 848, 121)[0m
[32m2025-02-03 06:30:34.713[0m | [34m[1mDEBUG   [0m | [36mhyvideo.inference[0m:[36mpredict[0m:[36m647[0m - [34m[1m
                        height: 480
                         width: 848
                  video_length: 121
                        prompt: ['A vibrant field of flowers sways gently under a soft, warm breeze, each petal capturing the sunlight like jewels. The colors are rich and varied, creating an atmosphere of joy and compassion. Angels drift through the blossoms, their wings creating a soft rustle, as they tend to the flowers, symbolizing care and nurturing. The scene is infused with a sense of abundance and warmth, with the light ca

  0%|          | 0/50 [00:00<?, ?it/s]

[32m2025-02-03 06:47:22.853[0m | [1mINFO    [0m | [36mhyvideo.inference[0m:[36mpredict[0m:[36m681[0m - [1mSuccess, time: 1008.13964676857[0m
[32m2025-02-03 06:47:24.369[0m | [1mINFO    [0m | [36mhyvideo.inference[0m:[36mpredict[0m:[36m587[0m - [1mInput (height, width, video_length) = (480, 848, 121)[0m
[32m2025-02-03 06:47:24.375[0m | [34m[1mDEBUG   [0m | [36mhyvideo.inference[0m:[36mpredict[0m:[36m647[0m - [34m[1m
                        height: 480
                         width: 848
                  video_length: 121
                        prompt: ['A vibrant field of flowers sways gently under a soft, warm breeze, each petal capturing the sunlight like jewels. The colors are rich and varied, creating an atmosphere of joy and compassion. Angels drift through the blossoms, their wings creating a soft rustle, as they tend to the flowers, symbolizing care and nurturing. The scene is infused with a sense of abundance and warmth, with the light cast

Scene 25 has 2 segments


  0%|          | 0/50 [00:00<?, ?it/s]

[32m2025-02-03 07:04:12.349[0m | [1mINFO    [0m | [36mhyvideo.inference[0m:[36mpredict[0m:[36m681[0m - [1mSuccess, time: 1007.9732949733734[0m
[32m2025-02-03 07:04:13.868[0m | [1mINFO    [0m | [36mhyvideo.inference[0m:[36mpredict[0m:[36m587[0m - [1mInput (height, width, video_length) = (480, 848, 121)[0m
[32m2025-02-03 07:04:13.872[0m | [34m[1mDEBUG   [0m | [36mhyvideo.inference[0m:[36mpredict[0m:[36m647[0m - [34m[1m
                        height: 480
                         width: 848
                  video_length: 121
                        prompt: ['A vibrant field of flowers sways gently under a soft, warm breeze, each petal capturing the sunlight like jewels. The colors are rich and varied, creating an atmosphere of joy and compassion. Angels drift through the blossoms, their wings creating a soft rustle, as they tend to the flowers, symbolizing care and nurturing. The scene is infused with a sense of abundance and warmth, with the light ca

  0%|          | 0/50 [00:00<?, ?it/s]

[32m2025-02-03 07:21:00.810[0m | [1mINFO    [0m | [36mhyvideo.inference[0m:[36mpredict[0m:[36m681[0m - [1mSuccess, time: 1006.9379081726074[0m


audio_images_dir: ./images/WingsOfLight_20250202_110141
audio_videos_dir: ./output/WingsOfLight_20250202_110141
last_end_value: 365.04
timestamp: 20250202_110141
Processing audio file: /mnt/d/Share/Audio/WingsOfLight.mp3
Create unique directories for images and videos
Transcribe audio using Whisper


  checkpoint = torch.load(fp, map_location=device)


Generate video summary using OpenAI
Create scenes based on lyrics
revised scenes
26 scenes:
[
    {
        "start": 0,
        "text": "Angels, some see them as baby-faced protectors, others envision them locked in battle with demons.",
        "scene_description": "In a dreamlike watercolor world, soft pastels swirl into a twilight sky where luminous angels glide through ethereal clouds. Their wings shimmer with a celestial glow, casting gentle light on the encroaching shadows of demons below. The atmosphere is thick with tension, as beams of hope pierce the darkness, illuminating the serene yet haunting landscape, where chaos and tranquility coexist.",
        "action_sequence": "Angels unfurl their wings, radiating soft light that encircles the lurking shadows, creating a dance of illumination and darkness."
    },
    {
        "start": 15,
        "text": "Angels, some see them as baby-faced protectors, others envision them locked in battle with demons.",
        "scene_descripti

Downloading shards:   0%|          | 0/2 [00:00<?, ?it/s]

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

HfHubHTTPError: 503 Server Error: Service Temporarily Unavailable for url: https://huggingface.co/api/models/black-forest-labs/FLUX.1-dev/revision/refs%2Fpr%2F3

## Video is expensive, only process after curating scenes and images

In [None]:
human_in_loop = True
skip_first = 0
if human_in_loop:
    scenes_file_path = './images/RememberUs_20250122_202916/scenes.json'
    audio_images_dir = './images/RememberUs_20250122_202916'
    audio_videos_dir = './output/RememberUs_20250122_202916'
    timestamp = '20250122_202916'
    last_end_value = 156.0
    
    with open(scenes_file_path, "r") as scenes_file:
        scenes = json.load(scenes_file)
    process_audio_video(CONFIG, scenes, audio_images_dir, audio_videos_dir, last_end_value, timestamp, skip_first)