In [1]:
import os
import numpy as np
import librosa
import librosa.display
import moviepy.editor as mp
from moviepy.video.io.ImageSequenceClip import ImageSequenceClip
import re


def mmss_to_seconds(time_str):
    """Convert mm:ss format to seconds."""
    match = re.match(r"(\d+):(\d+)", time_str)
    if match:
        minutes, seconds = map(int, match.groups())
        return minutes * 60 + seconds
    else:
        raise ValueError("Time format must be mm:ss")


def detect_audio_peaks(audio_path, start_time, end_time, sr=22050, hop_length=512):
    """
    Detects peaks in the given audio file and returns timestamps for image transitions.

    Parameters:
    - audio_path: Path to the .mp3 file
    - start_time: Start time in seconds
    - end_time: End time in seconds
    - sr: Sample rate for librosa (default 22050)
    - hop_length: Number of samples per frame

    Returns:
    - List of timestamps where images should change
    """
    # Load audio file
    y, sr = librosa.load(audio_path, sr=sr, offset=start_time, duration=end_time - start_time)

    # Compute the onset envelope
    onset_env = librosa.onset.onset_strength(y=y, sr=sr, hop_length=hop_length)

    # Detect peaks
    peak_indices = librosa.onset.onset_detect(onset_envelope=onset_env, sr=sr, hop_length=hop_length)

    # Convert peak indices to time (in seconds)
    peak_times = librosa.frames_to_time(peak_indices, sr=sr, hop_length=hop_length)

    return peak_times


def create_video(image_folder, audio_path, output_video, start_time_str, end_time_str, fps=30):
    """
    Creates a video with images changing at audio peaks.

    Parameters:
    - image_folder: Folder containing images
    - audio_path: Path to the audio file
    - output_video: Path for the generated video
    - start_time_str: Start time in "mm:ss" format
    - end_time_str: End time in "mm:ss" format
    - fps: Frames per second of the video
    """
    # Convert time strings to seconds
    start_time = mmss_to_seconds(start_time_str)
    end_time = mmss_to_seconds(end_time_str)

    # Detect peaks in the audio file
    peak_times = detect_audio_peaks(audio_path, start_time, end_time)

    # Load images
    images = sorted(
        [os.path.join(image_folder, img) for img in os.listdir(image_folder) if img.endswith(('.png', '.jpg', '.jpeg'))]
    )

    if not images:
        raise ValueError("No images found in the specified folder.")

    if len(images) < len(peak_times):
        print(f"Warning: More peaks ({len(peak_times)}) detected than images ({len(images)}). Some images may repeat.")

    # Select images based on peaks
    selected_images = []
    for i in range(len(peak_times)):
        selected_images.append(images[i % len(images)])  # Loop images if peaks > images

    # Create video clip with images changing at peak times
    clip = ImageSequenceClip(selected_images, durations=np.diff(np.append(peak_times, end_time - start_time)))

    # Load audio
    audio = mp.AudioFileClip(audio_path).subclip(start_time, end_time)

    # Set the audio to the video
    clip = clip.set_audio(audio)

    # Write the final video file
    clip.write_videofile(output_video, fps=fps, codec="libx264")




In [3]:

create_video(
        image_folder="results/video",  # Folder with images
        audio_path="sources/audio/Hania_Rani_F_Major.mp3",  # Path to audio file
        output_video="results/video/output.mp4",  # Output video filename
        start_time_str="2:54",  # Start time in mm:ss
        end_time_str="3:16",  # End time in mm:ss
        fps=30  # Frame rate
    )

Moviepy - Building video results/video/output.mp4.
MoviePy - Writing audio in outputTEMP_MPY_wvf_snd.mp3


                                                                   

MoviePy - Done.
Moviepy - Writing video results/video/output.mp4



                                                               

Moviepy - Done !
Moviepy - video ready results/video/output.mp4
