In [1]:
import numpy as np
import cv2
from PIL import Image, ImageDraw, ImageFont

def split_text_into_segments(text, font, max_width):
    words = text.split()
    segments = []
    current_segment = ""
    for word in words:
        test_line = current_segment + (" " if current_segment else "") + word
        w = font.getbbox(test_line)[2]
        if w <= max_width:
            current_segment = test_line
        else:
            if current_segment:
                segments.append(current_segment)
            current_segment = word
    if current_segment:
        segments.append(current_segment)
    print(f"print segments: {segments}")    
    return segments

def zoom_in(image, num_frames=30, zoom_factor=0.5):
    frames = []
    h, w = image.shape[:2]
    for i in range(num_frames):
        scale = 1.0 + (i / num_frames) * zoom_factor
        center = (w // 2, h // 2)
        M = cv2.getRotationMatrix2D(center, 0, scale)
        frame = cv2.warpAffine(image, M, (w, h))
        frames.append(frame)
    return frames

def zoom_out(image, num_frames=30, zoom_factor=0.5):
    frames = []
    h, w = image.shape[:2]
    for i in range(num_frames):
        scale = 1.0 + ((num_frames - i - 1) / num_frames) * zoom_factor
        center = (w // 2, h // 2)
        M = cv2.getRotationMatrix2D(center, 0, scale)
        frame = cv2.warpAffine(image, M, (w, h))
        frames.append(frame)
    return frames

def create_video_with_typewriter_effect(scene_data, output_filename, fontsize=60, video_fps=15):
    font = ImageFont.truetype("arial.ttf", fontsize)
    video_width = 1080
    video_height = 1920
    video_size = (video_width, video_height)
    video = cv2.VideoWriter(output_filename, 
                            cv2.VideoWriter_fourcc(*'mp4v'), 
                            video_fps, 
                            video_size)
    
    for idx, scene in enumerate(scene_data):
        narration = scene['Narration']
        scene_duration = scene['Audio_duration']
        image_path = scene.get('Save_image_path', None)
        num_frames = int(scene_duration * video_fps)
        
        if image_path:
            image = cv2.imread(image_path)
            image = cv2.resize(image, (video_width, video_height))
            frames = zoom_in(image, num_frames) if idx % 2 == 0 else zoom_out(image, num_frames)
        else:
            frames = [np.zeros((video_height, video_width, 3), dtype=np.uint8) for _ in range(num_frames)]
        
        segments = split_text_into_segments(narration, font, video_width - 40)
        num_segments = len(segments)
        if num_segments == 0:
            continue
        
        total_chars = sum(len(segment) for segment in segments)
        
        # Allocate time to each segment proportionally to its length
        for segment in segments:
            seg_chars = len(segment)
            segment_duration = (seg_chars / total_chars) * scene_duration
            segment_frames = int(segment_duration * video_fps)
            total_chars_in_segment = len(segment)
            for frame_idx in range(segment_frames):
                frame = frames[frame_idx % num_frames].copy()
                frame_pil = Image.fromarray(frame)
                draw = ImageDraw.Draw(frame_pil)
                
                # Ensure final frame shows full text
                char_count = int(((frame_idx + 1) / segment_frames) * total_chars_in_segment)
                displayed_text = segment[:char_count]
                
                text_width, text_height = font.getbbox(displayed_text)[2:4]
                x = (video_width - text_width) // 2
                y = (video_height - text_height) // 2
                
                # Draw a black rectangle as background behind the text
                padding = 10
                rect_coords = [(x - padding, y - padding), (x + text_width + padding, y + text_height + padding)]
                draw.rectangle(rect_coords, fill=(0, 0, 0))
                
                # Now draw the text on top
                draw.text((x, y), displayed_text, font=font, fill=(255, 255, 255))
                
                video.write(np.array(frame_pil))
    
    video.release()
    print(f"Video saved as {output_filename}")

scene_data = [
    {'id': '1',
     'Scene': 'Sunny Meadow',
     'Description': 'Sunny Meadow where Barnaby and Sheldon meet near the big oak tree.',
     'Narration': 'One sunny morning, Barnaby hopped past Sheldon Shelldon, a tortoise whose shell was a beautiful shade of deep green. Sheldon was slowly, slowly making his way to the big oak tree   ',
     'Save_audio_path': 'arman_output\\New_Generated_voices\\voices_20250403064254\\voice_scene_1.mp3',
     'Audio_duration': 12.55,
     'Save_image_path': 'arman_output\\New_Generated_images\\images_20250403064306\\image_scene_1.png'},
]

output_filename = "typewriter_animatiosssssssssn.mp4"
create_video_with_typewriter_effect(scene_data, output_filename)


error: OpenCV(4.10.0) D:\a\opencv-python\opencv-python\opencv\modules\imgproc\src\resize.cpp:4152: error: (-215:Assertion failed) !ssize.empty() in function 'cv::resize'


In [None]:
############# Static caption ###########3
import cv2
import numpy as np
import os
from datetime import datetime
from moviepy import *
from typing_extensions import TypedDict
from langchain_core.messages import HumanMessage, SystemMessage
from langgraph.graph import StateGraph, START, END
import json
from typing_extensions import TypedDict
from langchain_core.messages import HumanMessage, SystemMessage
from langgraph.graph import StateGraph, START, END
import json
from typing import Sequence
from langgraph.graph.message import add_messages
from langchain_core.messages import BaseMessage, HumanMessage
from typing import Annotated
from IPython.display import display, Markdown
from PIL import Image, ImageDraw, ImageFont

# ----------------------- Data Types -----------------------
class ClassObject(TypedDict):
    Object: str
    Description: str

class MainCharacters(TypedDict):
    Name: str
    Appearance: str
    Characteristics: str

class SupportingCharacters(TypedDict):
    Name: str
    Appearance: str
    Characteristics: str

class ScenesList(TypedDict):
    id: str
    Scene: str
    Description: str
    Narration: str
    Img_prompt: str
    Save_audio_path: str
    Save_image_path: str

class GraphState(TypedDict):
    MainCharacters: list[MainCharacters]
    SupportingCharacters: list[SupportingCharacters]
    Scene_list: list[ScenesList]
    Objects: list[ClassObject]
    messages: Annotated[Sequence[BaseMessage], add_messages]
    pre_processing_video_path: str
    Voices_folder: str
    Images_folder: str

class SubState(TypedDict):
    current_scene: ScenesList
    output_folder: str

# ----------------------- Utility Functions -----------------------
def split_text_into_segments(text, font, max_width):
    """Split the text into segments that fit within max_width."""
    words = text.split()
    segments = []
    current_segment = ""
    for word in words:
        test_line = current_segment + (" " if current_segment else "") + word
        w = font.getbbox(test_line)[2]
        if w <= max_width:
            current_segment = test_line
        else:
            if current_segment:
                segments.append(current_segment)
            current_segment = word
    if current_segment:
        segments.append(current_segment)
    print(f"print segments: {segments}")    
    return segments

def zoom_in(image, num_frames=30, zoom_factor=0.5):
    """Generates frames for a zoom-in effect."""
    frames = []
    h, w = image.shape[:2]
    for i in range(num_frames):
        scale = 1.0 + (i / num_frames) * zoom_factor
        center = (w // 2, h // 2)
        M = cv2.getRotationMatrix2D(center, 0, scale)
        frame = cv2.warpAffine(image, M, (w, h))
        frames.append(frame)
    return frames

def zoom_out(image, num_frames=30, zoom_factor=0.5):
    """Generates frames for a zoom-out effect without blinking."""
    frames = []
    h, w = image.shape[:2]
    for i in range(num_frames):
        scale = 1.0 + ((num_frames - i - 1) / num_frames) * zoom_factor
        center = (w // 2, h // 2)
        M = cv2.getRotationMatrix2D(center, 0, scale)
        frame = cv2.warpAffine(image, M, (w, h))
        frames.append(frame)
    return frames

def fade_in(image, num_frames=2):
    """Creates a fade-in effect from black to the image."""
    frames = []
    black = np.zeros_like(image)
    for i in range(num_frames):
        alpha = i / num_frames
        frame = cv2.addWeighted(image, alpha, black, 1 - alpha, 0)
        frames.append(frame)
    return frames

def fade_out(image, num_frames=30):
    """Creates a fade-out effect from image to black."""
    frames = []
    black = np.zeros_like(image)
    for i in range(num_frames):
        alpha = 1 - i / num_frames
        frame = cv2.addWeighted(image, alpha, black, 1 - alpha, 0)
        frames.append(frame)
    return frames

# ----------------------- Pre-processing Video Function -----------------------
def pre_processing_video(state: GraphState):
    fps = 2
    timestamp = datetime.now().strftime("%Y%m%d%H%M%S")
    dynamic_folder = f"pre_video_{timestamp}"
    output_folder = os.path.join("Pre_Generated_videos", dynamic_folder)
    output_folder = os.path.join("output", output_folder)
    os.makedirs(output_folder, exist_ok=True)
    file_name = f'pre_video_{timestamp}.mp4'
    
    pre_processing_video_path = os.path.join(output_folder, file_name)
    scene_list = state['Scene_list']
    if not scene_list:
        raise ValueError("No scenes provided.")

    first_img = cv2.imread(scene_list[0]["Save_image_path"])
    if first_img is None:
        raise ValueError(f"Cannot load image: {scene_list[0]['Save_image_path']}")
    
    h, w, _ = first_img.shape
    fourcc = cv2.VideoWriter_fourcc(*"mp4v")
    video_writer = cv2.VideoWriter(pre_processing_video_path, fourcc, fps, (w, h))
    
    # Load a font for captions (adjust size as needed)
    caption_font = ImageFont.truetype("arial.ttf", 32)
    # Maximum width for caption text (with some margin)
    max_caption_width = w - 40
    
    for idx, scene in enumerate(scene_list):
        img_path = scene["Save_image_path"]
        duration = float(scene["Audio_duration"])
        narration = scene["Narration"]
        
        img = cv2.imread(img_path)
        if img is None:
            print(f"Warning: Cannot load image {img_path}. Skipping.")
            continue
        img = cv2.resize(img, (w, h))
        
        total_frames = int(fps * duration)
        fade_in_frame = int(fps * 1)
        fade_out_frame = int(fps * 1)
        first_image_duration = total_frames - fade_out_frame
        
        # Generate zoom/fade effect frames
        if idx == 0:
            zoom_in_frames = zoom_in(img, num_frames=first_image_duration, zoom_factor=0.5)
            final_zoom_frame = zoom_in_frames[-1]
            fade_out_frames = fade_out(final_zoom_frame, num_frames=fade_out_frame)
            effect_frames = zoom_in_frames + fade_out_frames
        elif idx % 2 == 1:
            zoom_out_frames = zoom_out(img, num_frames=first_image_duration, zoom_factor=0.5)
            final_zoom_frame = zoom_out_frames[-1]
            fade_out_frames = fade_out(final_zoom_frame, num_frames=fade_out_frame)
            effect_frames = zoom_out_frames + fade_out_frames
        else:
            fade_in_frames = fade_in(img, num_frames=fade_in_frame)
            zoom_in_frames = zoom_in(img, num_frames=first_image_duration, zoom_factor=0.5)
            final_zoom_frame = zoom_in_frames[-1]
            fade_out_frames = fade_out(final_zoom_frame, num_frames=fade_out_frame)
            effect_frames = fade_in_frames + zoom_in_frames + fade_out_frames
        
        # Use the split_text_into_segments logic to divide the narration into caption segments
        segments = split_text_into_segments(narration, caption_font, max_caption_width)
        num_segments = len(segments)
        if num_segments == 0:
            current_caption = ""
        else:
            # Determine frames per caption segment (display each segment for a proportionate time)
            frames_per_segment = total_frames // num_segments
        
        # Overlay the caption on each frame
        for frame_idx in range(total_frames):
            frame = effect_frames[frame_idx].copy()
            frame_pil = Image.fromarray(frame)
            draw = ImageDraw.Draw(frame_pil)
            
            if num_segments > 0:
                # Determine current segment index based on frame index
                segment_index = min(frame_idx // frames_per_segment, num_segments - 1)
                current_caption = segments[segment_index]
            else:
                current_caption = ""
            
            # Measure text size and calculate position (bottom center with padding)
            text_width, text_height = caption_font.getbbox(current_caption)[2:4]
            x = (w - text_width) // 2
            y = h - text_height - 50  # 50 pixels margin from bottom
            
            # Draw a black rectangle as a background for the caption text
            padding = 10
            draw.rectangle([(x - padding, y - padding), (x + text_width + padding, y + text_height + padding)], fill=(0, 0, 0))
            # Draw the caption text in white
            draw.text((x, y), current_caption, font=caption_font, fill=(255, 255, 255))
            
            video_writer.write(np.array(frame_pil))
    
    video_writer.release()
    print(f"pre_processing_video saved as {pre_processing_video_path}") 
    state['pre_processing_video_path'] = pre_processing_video_path 
    return state

# ----------------------- Workflow -----------------------
workflow = StateGraph(GraphState)
workflow.add_node('pre_processing_video', pre_processing_video)
workflow.add_edge(START, 'pre_processing_video')
workflow.add_edge('pre_processing_video', END)
app = workflow.compile()

# Invoke the workflow with your scene data (assumed to be available in resp3['Scene_list'])
resp4 = app.invoke({"Scene_list": resp3['Scene_list']})
print(resp4)


In [None]:
import cv2
import numpy as np
import math
import os
import time
import asyncio
from concurrent.futures import ProcessPoolExecutor
from dataclasses import dataclass
from typing import Optional, List
from PIL import Image, ImageDraw, ImageFont
import random
# Fixed resolution and fps
TARGET_W, TARGET_H = 720, 1280
FPS = 30

# Utilities
import math
 

import random
import math

class Particle:
    def __init__(self, width, height, color=(255, 255, 255), min_size=2, max_size=5):
        self.width = width
        self.height = height
        self.color = color
        self.min_size = min_size
        self.max_size = max_size
        
        self.reset()

    def reset(self):
        speed_factor = 1.75 
        self.x = 0  # Start from left
        self.y = random.uniform(0, self.height)  # Random Y position
        self.radius = random.uniform(self.min_size, self.max_size)
        self.speed_x = random.uniform(3.0, 8.0)*speed_factor  # Strong rightward motion
        self.speed_y = random.uniform(-1.0, 1.0)*speed_factor  # Slight up/down
        self.alpha = random.randint(50,100)
        self.life = random.randint(40, 80)
        self.age = 0

    def move(self):
        self.x += self.speed_x
        self.y += self.speed_y
        self.age += 1

        fade_ratio = 1.0 - (self.age / self.life)
        self.alpha = int(self.alpha * fade_ratio)

        if self.x > self.width or self.age >= self.life:
            self.reset()

    def draw(self, draw_obj):
        rgba_color = (*self.color, max(0, min(255, self.alpha)))
        draw_obj.ellipse(
            (self.x, self.y, self.x + self.radius, self.y + self.radius),
            fill=rgba_color
        )

def split_text_into_segments(text: str, font: ImageFont.FreeTypeFont, max_width: int) -> List[str]:
    words = text.split()
    segments, current = [], ""
    for w in words:
        test = f"{current} {w}".strip()
        width = font.getbbox(test)[2]
        if width <= max_width:
            current = test
        else:
            if current:
                segments.append(current)
            current = w
    if current:
        segments.append(current)
    return segments

# Effects

def zoom_in(img, frames, zoom):
    h, w = img.shape[:2]
    out = []
    for i in range(frames):
        scale = 1 + (i/frames)*zoom
        M = cv2.getRotationMatrix2D((w//2, h//2), 0, scale)
        out.append(cv2.warpAffine(img, M, (w, h)))
    return out

def zoom_out(img, frames, zoom):
    h, w = img.shape[:2]
    out = []
    for i in range(frames):
        scale = 1 + ((frames-i-1)/frames)*zoom
        M = cv2.getRotationMatrix2D((w//2, h//2), 0, scale)
        out.append(cv2.warpAffine(img, M, (w, h)))
    return out

def fade_in(seq, fade):
    black = np.zeros_like(seq[0])
    out = []
    for i in range(fade):
        alpha = (i+1)/fade
        out.append(cv2.addWeighted(seq[0], alpha, black, 1-alpha, 0))
    return out + seq

def fade_out(seq, fade):
    black = np.zeros_like(seq[0])
    out = []
    for i in range(fade):
        alpha = 1 - (i+1)/fade
        out.append(cv2.addWeighted(seq[-1], alpha, black, 1-alpha, 0))
    return seq + out

 
async def single_video_generation(cur_scene, output_folder: str, ctx):
    
    narration = cur_scene.narration
    img_path = cur_scene.image_path
    
    audio_dur = float(cur_scene.audio_duration)
    fade_frames = int(0.5 * FPS)
    audio_frames     = math.ceil(audio_dur * FPS)
    zoom_frame=0.5
    
           
          
    total_video_frames = audio_frames + 2 * fade_frames
    core_frames      = audio_frames 
     
     
    img = cv2.imread(img_path)
    if img is None:
        raise ValueError(f"Cannot load image: {img_path}")
    img = cv2.resize(img, (TARGET_W, TARGET_H))

    
    
     
    sid = int(cur_scene.id)
    if  sid % 2 != 0:
        core_seq = zoom_in(img, core_frames,zoom_frame)
    else:
        core_seq = zoom_out(img, core_frames, zoom_frame)

    seq = fade_out(fade_in(core_seq, fade_frames), fade_frames)

    if sid == 1:
        seq = fade_out(core_seq, fade_frames)
    else:
        seq = fade_out(fade_in(core_seq, fade_frames), fade_frames)
        
    os.makedirs(output_folder, exist_ok=True)
    out_path = os.path.join(output_folder, f"video_scene_{sid}.mp4")
    fourcc = cv2.VideoWriter_fourcc(*"mp4v")
    writer = cv2.VideoWriter(out_path, fourcc, FPS, (TARGET_W, TARGET_H))

    
            
    font = ImageFont.truetype("arial.ttf", 40)
    max_w = TARGET_W - 40
    segments = split_text_into_segments(narration, font, max_w)
    if not segments:
        segments = [""]
    total_chars = sum(len(s) for s in segments)
    
    if sid != 1:
        for f in range(fade_frames):
            writer.write(seq[f])
    caption_idx = 0
     
    num_particles = 20
    particle_color = (255, 255, 255)   
    particles = [Particle(TARGET_W, TARGET_H, color=particle_color, min_size=1, max_size=4) for _ in range(num_particles)]
    for seg in segments:
        seg_chars  = len(seg)
        seg_frames = max(1, int((seg_chars / total_chars) * audio_frames))
        for i in range(seg_frames):
            abs_frame = (0 if sid == 1 else fade_frames) + caption_idx
            if abs_frame >= len(seq):
                break

            frame   = seq[abs_frame].copy()
            img_pil = Image.fromarray(frame)
            draw    = ImageDraw.Draw(img_pil)
            for p in particles:
                p.draw(draw)
                p.move()
            # Partial text reveal
            count = int(((i+1) / seg_frames) * seg_chars)
            text  = seg[:count]
            tw, th = font.getbbox(text)[2:4]
            x = (TARGET_W - tw) // 2
            y = (TARGET_H - th) // 2

            # Draw background box + text
            pad = 10
            draw.rectangle([(x-pad,y-pad),(x+tw+pad,y+th+pad)], fill=(0,0,0))
            draw.text((x,y), text, font=font, fill=(255,255,255))

            writer.write(np.array(img_pil))
            caption_idx += 1
    
    start_fade_out = (0 if sid == 1 else fade_frames) + audio_frames
    for f in range(fade_frames):
        if start_fade_out + f < len(seq):
            writer.write(seq[start_fade_out + f])

    writer.release()

    
    for s in ctx.scene_list:
        if s.id == cur_scene.id:
            s.video_path = out_path
            break
    return out_path

async def continue_generate_video(scenes_resp, output_folder,ctx):
    tasks = [single_video_generation(scene, output_folder, ctx) for scene in scenes_resp.scene_list[0:1]]
    return await asyncio.gather(*tasks)

scenes_resp = VideoGen(
    story_theme="A boy in a jungle with his cat and dog",
    scene_list=[
        ScenesList(
            id="1",
            scene="Jungle Discovery",
            description="Leo, Whiskers, and Buddy stumble upon a hidden waterfall in the jungle.",
            narration="Once upon a time, Leo and Luna swung through the jungle, laughing as they went!",
            img_prompt="Create a detailed, photorealistic image of the following scene:\n            Leo, Whiskers, and Buddy stumble upon a hidden waterfall in the jungle.\n\n            **Main Characters**:\n            Leo - A young boy, around 7 years old, with messy brown hair, bright green eyes, and always wears a slightly oversized khaki shirt and shorts. Often has dirt smudges on his face and knees., Adventurous, curious, kind-hearted, a little clumsy, loves animals.\n            **Supporting Characters**:\n            Whiskers - A fluffy, calico cat with striking green eyes and a perpetually curious expression. Has a habit of getting into things. - Independent, playful, loyal, sometimes mischievous, loves to explore., Buddy - A golden retriever with floppy ears, a wagging tail, and a goofy grin. Wears a red collar. - Friendly, loyal, enthusiastic, protective of Leo, loves to play fetch.\n            **Objects**:\n            Lush green jungle foliage, tall trees, a clear waterfall cascading into a small pool, colorful butterflies fluttering around.\n            **Mood & Lighting**: Cinematic, immersive atmosphere with realistic lighting to match the scene's emotions.\n            The illustration should capture the story’s essence and atmosphere.",
            object_description=None,
            audio_path="youtube_shorts/Generated_voices\\voices_20250509233133\\voice_scene_1.mp3",
            image_path="youtube_shorts/Generated_images\\images_20250509233133/image_1.png",
            video_path="youtube_shorts/Generated_videos\\video_20250509233133/video_scene_1.mp4",
            combine_audiovideo_path="youtube_shorts/Generated_audio_video\\audio_with_video_20250509233133\\combine_audio_video_1.mp4",
            audio_duration=6.17,
        ),
        ScenesList(
            id="2",
            scene="Whiskers' Curiosity",
            description="Whiskers gets distracted by a brightly colored butterfly and wanders off.",
            narration="Uh oh! They took a wrong turn. Now Leo and Luna are lost!",
            img_prompt="Create a detailed, photorealistic image of the following scene:\n            Whiskers gets distracted by a brightly colored butterfly and wanders off.\n\n            **Main Characters**:\n            Leo - A young boy, around 7 years old, with messy brown hair, bright green eyes, and always wears a slightly oversized khaki shirt and shorts. Often has dirt smudges on his face and knees., Adventurous, curious, kind-hearted, a little clumsy, loves animals.\n            **Supporting Characters**:\n            Whiskers - A fluffy, calico cat with striking green eyes and a perpetually curious expression. Has a habit of getting into things. - Independent, playful, loyal, sometimes mischievous, loves to explore., Buddy - A golden retriever with floppy ears, a wagging tail, and a goofy grin. Wears a red collar. - Friendly, loyal, enthusiastic, protective of Leo, loves to play fetch.\n            **Objects**:\n            A large fern with broad leaves, a bright blue butterfly, sunlight filtering through the leaves.\n            **Mood & Lighting**: Cinematic, immersive atmosphere with realistic lighting to match the scene's emotions.\n            The illustration should capture the story’s essence and atmosphere.",
            object_description=None,
            audio_path="youtube_shorts/Generated_voices\\voices_20250509233133\\voice_scene_2.mp3",
            image_path="youtube_shorts/Generated_images\\images_20250509233133/image_2.png",
            video_path="youtube_shorts/Generated_videos\\video_20250509233133/video_scene_2.mp4",
            combine_audiovideo_path="youtube_shorts/Generated_audio_video\\audio_with_video_20250509233133\\combine_audio_video_2.mp4",
            audio_duration=6.86,
        ),
        ScenesList(
            id="3",
            scene="Lost in the Jungle",
            description="Leo and Buddy search for Whiskers, calling her name.",
            narration="An Old Monkey appears! He is wise and offers to help them find their way.",
            img_prompt="Create a detailed, photorealistic image of the following scene:\n            Leo and Buddy search for Whiskers, calling her name.\n\n            **Main Characters**:\n            Leo - A young boy, around 7 years old, with messy brown hair, bright green eyes, and always wears a slightly oversized khaki shirt and shorts. Often has dirt smudges on his face and knees., Adventurous, curious, kind-hearted, a little clumsy, loves animals.\n            **Supporting Characters**:\n            Whiskers - A fluffy, calico cat with striking green eyes and a perpetually curious expression. Has a habit of getting into things. - Independent, playful, loyal, sometimes mischievous, loves to explore., Buddy - A golden retriever with floppy ears, a wagging tail, and a goofy grin. Wears a red collar. - Friendly, loyal, enthusiastic, protective of Leo, loves to play fetch.\n            **Objects**:\n            Dense jungle undergrowth, tangled vines, dappled sunlight on the forest floor.\n            **Mood & Lighting**: Cinematic, immersive atmosphere with realistic lighting to match the scene's emotions.\n            The illustration should capture the story’s essence and atmosphere.",
            object_description=None,
            audio_path="youtube_shorts/Generated_voices\\voices_20250509233133\\voice_scene_3.mp3",
            image_path="youtube_shorts/Generated_images\\images_20250509233133/image_3.png",
            video_path="youtube_shorts/Generated_videos\\video_20250509233133/video_scene_3.mp4",
            combine_audiovideo_path="youtube_shorts/Generated_audio_video\\audio_with_video_20250509233133\\combine_audio_video_3.mp4",
            audio_duration=6.53,
        ),
        ScenesList(
            id="4",
            scene="Reunion",
            description="Buddy finds Whiskers stuck in a hollow log, and Leo helps her out.",
            narration="Following Old Monkey, Leo and Luna travelled through winding paths to find home.",
            img_prompt="Create a detailed, photorealistic image of the following scene:\n            Buddy finds Whiskers stuck in a hollow log, and Leo helps her out.\n\n            **Main Characters**:\n            Leo - A young boy, around 7 years old, with messy brown hair, bright green eyes, and always wears a slightly oversized khaki shirt and shorts. Often has dirt smudges on his face and knees., Adventurous, curious, kind-hearted, a little clumsy, loves animals.\n            **Supporting Characters**:\n            Whiskers - A fluffy, calico cat with striking green eyes and a perpetually curious expression. Has a habit of getting into things. - Independent, playful, loyal, sometimes mischievous, loves to explore., Buddy - A golden retriever with floppy ears, a wagging tail, and a goofy grin. Wears a red collar. - Friendly, loyal, enthusiastic, protective of Leo, loves to play fetch.\n            **Objects**:\n            A hollow log covered in moss, Whiskers looking scared, Buddy wagging his tail.\n            **Mood & Lighting**: Cinematic, immersive atmosphere with realistic lighting to match the scene's emotions.\n            The illustration should capture the story’s essence and atmosphere.",
            object_description=None,
            audio_path="youtube_shorts/Generated_voices\\voices_20250509233133\\voice_scene_4.mp3",
            image_path="youtube_shorts/Generated_images\\images_20250509233133/image_4.png",
            video_path="youtube_shorts/Generated_videos\\video_20250509233133/video_scene_4.mp4",
            combine_audiovideo_path="youtube_shorts/Generated_audio_video\\audio_with_video_20250509233133\\combine_audio_video_4.mp4",
            audio_duration=6.17,
        ),
    ],
    supporting_characters=[],
    main_characters=[],
    final_path=None,
)


ctx = scenes_resp
import time
from datetime import datetime
import asyncio

if __name__ == "__main__":
    timestamp = datetime.now().strftime("%Y%m%d%H%M%S")
    audio_with_video = os.path.join(
        "youtube_shorts/Generated_videos", f"video_20250509233133d5"
    )
    os.makedirs(audio_with_video, exist_ok=True)
    print(audio_with_video)
    asyncio.run(continue_generate_video(scenes_resp, audio_with_video, ctx))
    print(f"ctx:{ctx}")
    print("text")