In [1]:
from dotenv import load_dotenv

load_dotenv()
import nest_asyncio
import os

nest_asyncio.apply()
import json
import base64
import moviepy
import numpy as np
from textwrap import wrap


from moviepy.editor import (
    VideoFileClip,
    AudioFileClip,
    ImageClip,
    CompositeVideoClip,
    CompositeAudioClip,
    ColorClip,
    TextClip,
    concatenate_videoclips,
    concatenate_audioclips,
)

In [2]:
import re
from datetime import datetime, timedelta


def parse_srt(srt_file_path):
    """Parse an SRT file into a list of dictionaries with 'start', 'end', and 'text'."""
    with open(srt_file_path, "r") as file:
        srt_content = file.read()

    # Split into segments based on double line breaks
    segments = re.split(r"\n\n+", srt_content)

    subtitles = []
    for segment in segments:
        lines = segment.split("\n")
        if len(lines) >= 3:
            # Extract start and end times
            times = re.findall(r"(\d{2}:\d{2}:\d{2},\d{3})", lines[1])

            start_time = str_to_timedelta(times[0])
            end_time = str_to_timedelta(times[1])

            # The remaining lines are subtitle text
            text = "\n".join(lines[2:])

            subtitles.append({"start": start_time, "end": end_time, "text": text})

    return subtitles


def str_to_timedelta(time_str):
    """Convert a time string from SRT format to a timedelta object."""
    datetime_obj = datetime.strptime(time_str, "%H:%M:%S,%f")
    return timedelta(
        hours=datetime_obj.hour,
        minutes=datetime_obj.minute,
        seconds=datetime_obj.second,
        microseconds=datetime_obj.microsecond,
    )


def wrap_text(text, max_width):
    # Wrap text to the specified width and join with newline to form up to 3 lines
    return "\n".join(wrap(text, max_width))

In [3]:
with open("data/data.json") as f:
    data = json.load(f)
print(data["pictures"])

[{'description': "An image of the word 'TRIE' spelled out in large, colorful letters.", 'start': 0, 'end': 5}, {'description': 'An illustration of a massive library with towering bookshelves filled with books.', 'start': 5, 'end': 12}, {'description': 'An abstract representation of a trie data structure, with nodes and branches representing letters.', 'start': 12, 'end': 17}, {'description': 'An animated sequence showing words being placed on shelves corresponding to their first letters.', 'start': 17, 'end': 24}, {'description': 'A close-up shot of a word being placed on a shelf, with subsequent letters branching off into subshelves.', 'start': 24, 'end': 28}, {'description': 'An illustration of subshelves branching off from each letter of a word.', 'start': 28, 'end': 33}, {'description': 'A visual representation of a search query navigating through the trie structure to find a specific word.', 'start': 35, 'end': 39}, {'description': 'An animation demonstrating the speed of word ret

In [4]:
from moviepy.editor import VideoFileClip, vfx
import numpy as np


def create_circular_mask(clip, radius=None):
    """
    Applies a circular mask to the given clip, making the exterior of the circle transparent.
    """
    if radius is None:
        radius = min(clip.size) // 2

    def mask_frame(frame):
        h, w = frame.shape[:2]
        Y, X = np.ogrid[:h, :w]
        center = (h // 2, w // 2)
        dist_from_center = np.sqrt((X - center[1]) ** 2 + (Y - center[0]) ** 2)

        mask = dist_from_center <= radius
        new_frame = frame.copy()
        for i in range(3):  # Apply mask to each channel
            new_frame[:, :, i] = frame[:, :, i] * mask

        return new_frame

    masked_clip = clip.fl_image(mask_frame)

    # Create a mask clip
    mask_clip = clip.fl_image(lambda frame: 255 * (mask_frame(frame) > 0))
    masked_clip = masked_clip.set_mask(mask_clip.to_mask())

    return masked_clip

In [5]:
image_clips = []
pictures = data["pictures"]

subtitles = parse_srt("data/transcript.srt")

# Load the headshot video to determine its duration
headshot_clip = VideoFileClip("data/headshot.mp4").resize(
    height=500
)  # Adjust resizing as needed
headshot_duration = headshot_clip.duration
headshot_audio = headshot_clip.audio

# Make audio slightly louder
headshot_audio = headshot_audio.volumex(1.5)

# Apply circular mask and position the headshot clip
headshot_clip = create_circular_mask(headshot_clip)
headshot_clip = headshot_clip.set_position(("right", "bottom")).margin(
    right=50, bottom=50, opacity=0
)

# Initialize the list for image clips
fade_duration = 0.5  # Duration of fade-in and fade-out effects
for i, picture in enumerate(pictures):
    img_path = f"data/images/image_{i}.png"
    img_clip = ImageClip(img_path)

    # Resize the image to fit the width of the canvas
    img_clip = img_clip.resize(width=1080)

    # Create a black background clip with the same size as the canvas
    black_bg = ColorClip(size=(1080, 1920), color=(0, 0, 0))

    # Composite the image clip onto the black background clip
    img_clip = CompositeVideoClip(
        [black_bg, img_clip.set_position("center")], size=(1080, 1920)
    )

    # Dynamically adjust the duration to extend to the start of the next picture, if applicable
    if i < len(pictures) - 1:  # Check if there is a next picture
        next_picture_start = pictures[i + 1]["start"]
        img_clip_duration = next_picture_start - picture["start"]
    else:  # For the last picture, use its original end time
        img_clip_duration = picture["end"] - picture["start"]
    img_clip = img_clip.set_duration(img_clip_duration)
    img_clip = img_clip.set_start(picture["start"])

    # Add fade-in effect to all but the first clip
    if i > 0:
        img_clip = img_clip.crossfadein(fade_duration)
    # Add fade-out effect to all but the last clip
    if i < len(pictures) - 1:
        img_clip = img_clip.crossfadeout(fade_duration)

    image_clips.append(img_clip)


# Concatenate image clips
video_clip = concatenate_videoclips(
    image_clips, method="compose", padding=-fade_duration
)

# Adjust the final image clip to match the headshot video's duration if necessary
if video_clip.duration < headshot_duration:
    # Extend the last clip
    last_clip = image_clips[-1].set_end(headshot_duration)
    image_clips[-1] = last_clip
    video_clip = concatenate_videoclips(
        image_clips, method="chain"
    )
elif video_clip.duration > headshot_duration:
    # Truncate the video_clip to match the headshot_duration
    video_clip = video_clip.subclip(0, headshot_duration)

subtitle_clips = []
for subtitle in subtitles:
    # Create a TextClip for this subtitle
    wrapped_text = wrap_text(subtitle["text"], 40)
    txt_clip = TextClip(
        wrapped_text, fontsize=48, color="white", font="Arial-Bold", align="West"
    )

    # Set the duration and start time for the TextClip
    start_seconds = subtitle["start"].total_seconds()
    end_seconds = subtitle["end"].total_seconds()
    txt_clip = txt_clip.set_start(start_seconds).set_duration(
        end_seconds - start_seconds
    )

    # Set the position of the TextClip in the top middle of the screen
    txt_clip = txt_clip.set_position(("center", "top")).margin(top=50, opacity=0)

    subtitle_clips.append(txt_clip)

# Load and adjust the background music
background_music = AudioFileClip("data/music.mp3")
repeat_count = int(headshot_duration // background_music.duration) + 1
# Create a list with the audio clip repeated
repeated_clips = [background_music] * repeat_count

# Concatenate the repeated clips
looped_background_music = concatenate_audioclips(repeated_clips)

# Trim the concatenated audio to match the headshot_duration
looped_background_music = looped_background_music.subclip(0, headshot_duration)

final_audio = CompositeAudioClip([headshot_audio, looped_background_music])

# Create the final composite clip
final_clip = CompositeVideoClip(
    [
        video_clip.set_duration(headshot_duration),
        *subtitle_clips,
        headshot_clip.set_duration(headshot_duration)
    ],
    size=(1080, 1920),
).set_audio(final_audio)
print(f"Video Clip Duration: {video_clip.duration}")
print(f"Headshot Clip Duration: {headshot_clip.duration}")
for i, clip in enumerate(subtitle_clips):
    print(f"Subtitle Clip {i} Duration: {clip.duration} Start: {clip.start}")

# Write the final video to a file
final_clip.write_videofile("data/final_video.mp4", threads=8, fps=24)

Video Clip Duration: 59.04
Headshot Clip Duration: 59.04
Subtitle Clip 0 Duration: 5.0 Start: 0.0
Subtitle Clip 1 Duration: 7.0 Start: 5.0
Subtitle Clip 2 Duration: 5.0 Start: 12.0
Subtitle Clip 3 Duration: 7.0 Start: 17.0
Subtitle Clip 4 Duration: 4.0 Start: 24.0
Subtitle Clip 5 Duration: 5.0 Start: 28.0
Subtitle Clip 6 Duration: 4.0 Start: 35.0
Subtitle Clip 7 Duration: 6.0 Start: 39.0
Subtitle Clip 8 Duration: 7.0 Start: 45.0
Subtitle Clip 9 Duration: 5.0 Start: 52.0
Subtitle Clip 10 Duration: 2.0 Start: 57.0
Moviepy - Building video data/final_video.mp4.
MoviePy - Writing audio in final_videoTEMP_MPY_wvf_snd.mp3


                                                                      

MoviePy - Done.
Moviepy - Writing video data/final_video.mp4



                                                                

Moviepy - Done !
Moviepy - video ready data/final_video.mp4
