In [15]:
from datetime import datetime
import os
import numpy as np
import librosa
import librosa.display
import subprocess
import moviepy.editor as mp
from moviepy.video.io.ImageSequenceClip import ImageSequenceClip
import re
import random



In [16]:
def mmss_to_seconds(time_str):
    """Convert mm:ss format to seconds."""
    match = re.match(r"(\d+):(\d+)", time_str)
    if match:
        minutes, seconds = map(int, match.groups())
        return minutes * 60 + seconds
    else:
        raise ValueError("Time format must be mm:ss")


def detect_audio_peaks(audio_path, start_time, end_time, sr=22050, hop_length=512):
    
    # Load audio file
    y, sr = librosa.load(audio_path, sr=sr, offset=start_time, duration=end_time - start_time)

    # Compute the onset envelope
    onset_env = librosa.onset.onset_strength(y=y, sr=sr, hop_length=hop_length)

    # Detect peaks
    peak_indices = librosa.onset.onset_detect(onset_envelope=onset_env, sr=sr, hop_length=hop_length)

    # Convert peak indices to time (in seconds)
    peak_times = librosa.frames_to_time(peak_indices, sr=sr, hop_length=hop_length)

    return peak_times


#this function associates the images to the peaks in the audio file
def follow_the_peaks(image_folder, audio_path, output_video, start_time_str, end_time_str, fps=30):
    
    
    start_time = mmss_to_seconds(start_time_str)
    end_time = mmss_to_seconds(end_time_str)

    # Detect peaks in the audio file
    peak_times = detect_audio_peaks(audio_path, start_time, end_time)

    
    images = [os.path.join(image_folder, img) for img in os.listdir(image_folder) if img.endswith(('.png', '.jpg', '.jpeg', '.JPG'))]
    random.shuffle(images) 

    if not images:
        raise ValueError("No images found in the specified folder.")

    if len(images) < len(peak_times):
        print(f"Warning: More peaks ({len(peak_times)}) detected than images ({len(images)}). Some images may repeat.")


    selected_images = []
    for i in range(len(peak_times)):
        selected_images.append(images[i % len(images)])  # Loop images if peaks > images

    
    clip = ImageSequenceClip(selected_images, durations=np.diff(np.append(peak_times, end_time - start_time)))
    audio = mp.AudioFileClip(audio_path).subclip(start_time, end_time)

    #attach audio to video
    clip = clip.set_audio(audio)

    clip.write_videofile(output_video, fps=fps)




In [None]:
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")

follow_the_peaks(
        image_folder="results/maxxi_piano2",  
        audio_path="sources/audio/Hania_Rani_F_Major.mp3", 
        output_video=f"results/video/output_{timestamp}.mp4",  
        start_time_str="2:54",  
        end_time_str="3:16",  
        fps=30  
    )