In [None]:
import numpy as np
import librosa
import cv2
import dlib
import moviepy.editor as mpy

def create_particles(num_particles, width, height):
    particles = []
    for _ in range(num_particles):
        x = np.random.randint(0, width)
        y = np.random.randint(0, height)
        size = np.random.randint(2, 5)
        speed = np.random.randint(1, 5)
        particles.append([x, y, size, speed])
    return particles

def update_particles(particles, width, height, energy):
    for p in particles:
        p[1] -= p[3] * energy
        if p[1] < 0:
            p[1] = height
            p[0] = np.random.randint(0, width)
    return particles

def draw_particles(frame, particles, color):
    for p in particles:
        cv2.circle(frame, (int(p[0]), int(p[1])), p[2], color, -1)

def create_mosaic_mask(height, width, faces, num_blocks=50, min_block_size=20, max_block_size=150):
    mask = np.ones((height, width), dtype=np.uint8) * 255
    for _ in range(num_blocks):
        block_size = np.random.randint(min_block_size, max_block_size)
        x = np.random.randint(0, width - block_size)
        y = np.random.randint(0, height - block_size)
        
        # 顔との衝突チェックを行い、衝突がある場合はブロックを描画しない
        face_collision = any([
            x < face.right() and x + block_size > face.left() and
            y < face.bottom() and y + block_size > face.top()
            for face in faces
        ])
        
        if not face_collision:
            cv2.rectangle(mask, (x, y), (x + block_size, y + block_size), 0, -1)
    
    # 顔の周りにパディングを追加
    for face in faces:
        padding = 10  # パディングのピクセル数
        top = max(face.top() - padding, 0)
        bottom = min(face.bottom() + padding, height)
        left = max(face.left() - padding, 0)
        right = min(face.right() + padding, width)
        mask[top:bottom, left:right] = 255
    
    return mask

def apply_mosaic_effect(frame, mask, block_size=30):
    height, width = frame.shape[:2]
    small = cv2.resize(frame, (width // block_size, height // block_size))
    mosaic = cv2.resize(small, (width, height), interpolation=cv2.INTER_NEAREST)
    return np.where(mask[:,:,None] == 255, frame, mosaic)

def apply_blur_effect(frame, energy):
    blur_amount = int(energy * 4)
    return cv2.GaussianBlur(frame, (blur_amount * 2 + 1, blur_amount * 2 + 1), 0)

def apply_bounce_effect(frame, current_onset, max_onset):
    bounce_amount = int(20 * current_onset / max_onset)
    if bounce_amount > 0:
        padded_frame = cv2.copyMakeBorder(frame, bounce_amount, bounce_amount, 0, 0, cv2.BORDER_CONSTANT, value=[0, 0, 0])
        return padded_frame[bounce_amount:-bounce_amount, :]
    else:
        return frame

def apply_glitch_effect(frame, strength=10):
    height, width, _ = frame.shape
    glitch_frame = frame.copy()
    num_slices = np.random.randint(1, strength)
    for _ in range(num_slices):
        slice_height = np.random.randint(1, height // strength)
        start_y = np.random.randint(0, height - slice_height)
        start_x = np.random.randint(-strength, strength)
        end_x = width + start_x
        if start_x > 0:
            if end_x > width:
                end_x = width
            glitch_frame[start_y:start_y + slice_height, start_x:end_x] = frame[start_y:start_y + slice_height, :end_x - start_x]
        else:
            if -start_x > width:
                start_x = -width
            glitch_frame[start_y:start_y + slice_height, :end_x] = frame[start_y:start_y + slice_height, -start_x:]
    return glitch_frame

def create_music_visualizer(image_path, audio_path, output_path):
    y, sr = librosa.load(audio_path)
    duration = librosa.get_duration(y=y, sr=sr)
    
    fps = 30
    n_frames = int(duration * fps)
    
    S = librosa.feature.melspectrogram(y=y, sr=sr, n_mels=32, fmax=8000, hop_length=sr//fps)
    S_dB = librosa.power_to_db(S, ref=np.max)
    onset_env = librosa.onset.onset_strength(y=y, sr=sr, hop_length=sr//fps)
    
    img = cv2.imread(image_path)
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    height, width, _ = img.shape

    detector = dlib.get_frontal_face_detector()
    faces = detector(img, 1)
    
    particles = create_particles(100, width, height)
    fourcc = cv2.VideoWriter_fourcc(*'mp4v')
    video = cv2.VideoWriter('temp_output.mp4', fourcc, fps, (width, height))
    
    prev_bars = np.zeros(32)
    zoom_factor = 1.0
    max_onset = np.max(onset_env)

    mosaic_mask = create_mosaic_mask(height, width, faces)

    for frame_num in range(n_frames):
        audio_idx = int(frame_num * len(y) / n_frames)
        chunk = y[audio_idx:audio_idx + sr//fps]
        
        spec_frame = S_dB[:, frame_num] if frame_num < S_dB.shape[1] else S_dB[:, -1]
        current_onset = onset_env[frame_num] if frame_num < len(onset_env) else onset_env[-1]
        energy = np.mean(np.abs(chunk)) * 10

        frame = img.copy()
        frame = apply_mosaic_effect(frame, mosaic_mask)
        particles = update_particles(particles, width, height, energy)
        draw_particles(frame, particles, (255, 255, 255))

        bar_width = width // 32
        max_bar_height = height // 4

        for j, h in enumerate(spec_frame):
            target_height = int(np.interp(h, [S_dB.min(), S_dB.max()], [0, max_bar_height]))
            prev_bars[j] = prev_bars[j] * 0.7 + target_height * 0.3
            bar_height = int(prev_bars[j])

            bar_color = (255, 255, 255, 150)

            # 上部のバー
            overlay = frame.copy()
            cv2.rectangle(overlay,
                          (j * bar_width, height // 2 - bar_height),
                          ((j + 1) * bar_width, height // 2),
                          bar_color,
                          -1)
            cv2.addWeighted(overlay, 0.5, frame, 1 - 0.5, 0, frame)

            # 下部のバー
            overlay = frame.copy()
            cv2.rectangle(overlay,
                          (j * bar_width, height // 2),
                          ((j + 1) * bar_width, height // 2 + bar_height),
                          bar_color,
                          -1)
            cv2.addWeighted(overlay, 0.5, frame, 1 - 0.5, 0, frame)

        overlay = frame.copy()
        cv2.addWeighted(overlay, 0.5, frame, 0.5, 0, frame)
        frame = apply_blur_effect(frame, energy)
        target_zoom = 1 + 0.05 * current_onset / max_onset
        zoom_factor = zoom_factor * 0.7 + target_zoom * 0.3
        scaled_frame = cv2.resize(frame, None, fx=zoom_factor, fy=zoom_factor)
        start_y = (scaled_frame.shape[0] - height) // 2
        start_x = (scaled_frame.shape[1] - width) // 2
        frame = scaled_frame[start_y:start_y + height, start_x:start_x + width]
        frame = apply_bounce_effect(frame, current_onset, max_onset)
        if np.random.rand() < 0.1:
            frame = apply_glitch_effect(frame)
        video.write(cv2.cvtColor(frame, cv2.COLOR_RGB2BGR))
    
    video.release()
    
    video = mpy.VideoFileClip('temp_output.mp4')
    audio = mpy.AudioFileClip(audio_path).set_duration(video.duration)
    final_video = video.set_audio(audio)
    final_video.write_videofile(output_path, codec="libx264", audio_codec="aac")

image_path = 'image.png'
audio_path = 'audio.mp3'
output_path = 'output_videoDemo.mp4'
create_music_visualizer(image_path, audio_path, output_path)


In [None]:
import numpy as np
import librosa
import cv2
import moviepy.editor as mpy
from concurrent.futures import ThreadPoolExecutor

def create_particles(num_particles, width, height):
    particles = []
    for _ in range(num_particles):
        x = np.random.randint(0, width)
        y = np.random.randint(0, height)
        size = np.random.randint(2, 5)
        speed = np.random.randint(1, 5)
        particles.append([x, y, size, speed])
    return particles

def update_particles(particles, width, height, energy):
    for p in particles:
        p[1] -= p[3] * energy
        if p[1] < 0:
            p[1] = height
            p[0] = np.random.randint(0, width)
    return particles

def draw_particles(frame, particles, color):
    for p in particles:
        cv2.circle(frame, (int(p[0]), int(p[1])), p[2], color, -1)

def create_blur_mask(height, width, num_blocks=50, min_block_size=20, max_block_size=150):
    mask = np.zeros((height, width), dtype=np.uint8)
    blur_strengths = np.zeros((height, width), dtype=np.uint8)
    
    def process_block(_):
        block_size = np.random.randint(min_block_size, max_block_size)
        x = np.random.randint(0, width - block_size)
        y = np.random.randint(0, height - block_size)
        
        blur_strength = np.random.randint(5, 20)
        cv2.rectangle(mask, (x, y), (x + block_size, y + block_size), 1, -1)
        cv2.rectangle(blur_strengths, (x, y), (x + block_size, y + block_size), blur_strength, -1)
    
    with ThreadPoolExecutor() as executor:
        executor.map(process_block, range(num_blocks))
    
    return mask, blur_strengths

def apply_blur_effect(frame, mask, blur_strengths):
    result = frame.copy()
    for strength in range(5, 21):
        blur_mask = (blur_strengths == strength) & (mask == 1)
        if np.any(blur_mask):
            blurred = cv2.GaussianBlur(frame, (strength, strength), 0)
            result[blur_mask] = blurred[blur_mask]
    return result

def apply_bounce_effect(frame, current_onset, max_onset):
    bounce_amount = int(20 * current_onset / max_onset)
    if bounce_amount > 0:
        padded_frame = cv2.copyMakeBorder(frame, bounce_amount, bounce_amount, 0, 0, cv2.BORDER_CONSTANT, value=[0, 0, 0])
        return padded_frame[bounce_amount:-bounce_amount, :]
    else:
        return frame

def apply_glitch_effect(frame, strength=10):
    height, width, _ = frame.shape
    glitch_frame = frame.copy()
    num_slices = np.random.randint(1, strength)
    for _ in range(num_slices):
        slice_height = np.random.randint(1, height // strength)
        start_y = np.random.randint(0, height - slice_height)
        start_x = np.random.randint(-strength, strength)
        end_x = width + start_x
        if start_x > 0:
            if end_x > width:
                end_x = width
            glitch_frame[start_y:start_y + slice_height, start_x:end_x] = frame[start_y:start_y + slice_height, :end_x - start_x]
        else:
            if -start_x > width:
                start_x = -width
            glitch_frame[start_y:start_y + slice_height, :end_x] = frame[start_y:start_y + slice_height, -start_x:]
    return glitch_frame

def process_frame(frame, particles, spec_frame, current_onset, max_onset, energy, mask, blur_strengths):
    frame = apply_blur_effect(frame, mask, blur_strengths)
    
    particles = update_particles(particles, frame.shape[1], frame.shape[0], energy)
    draw_particles(frame, particles, (255, 255, 255))

    height, width = frame.shape[:2]
    bar_width = width // 32
    max_bar_height = height // 4

    for j, h in enumerate(spec_frame):
        bar_height = int(np.interp(h, [spec_frame.min(), spec_frame.max()], [0, max_bar_height]))
        bar_color = (255, 255, 255, 150)

        cv2.rectangle(frame,
                      (j * bar_width, height // 2 - bar_height),
                      ((j + 1) * bar_width, height // 2),
                      bar_color,
                      -1)
        cv2.rectangle(frame,
                      (j * bar_width, height // 2),
                      ((j + 1) * bar_width, height // 2 + bar_height),
                      bar_color,
                      -1)

    frame = apply_bounce_effect(frame, current_onset, max_onset)
    if np.random.rand() < 0.1:
        frame = apply_glitch_effect(frame)
    
    return frame

def create_music_visualizer(image_path, audio_path, output_path):
    y, sr = librosa.load(audio_path)
    duration = librosa.get_duration(y=y, sr=sr)
    
    fps = 30
    n_frames = int(duration * fps)
    
    S = librosa.feature.melspectrogram(y=y, sr=sr, n_mels=32, fmax=8000, hop_length=sr//fps)
    S_dB = librosa.power_to_db(S, ref=np.max)
    onset_env = librosa.onset.onset_strength(y=y, sr=sr, hop_length=sr//fps)
    
    img = cv2.imread(image_path)
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    height, width, _ = img.shape

    particles = create_particles(100, width, height)
    mask, blur_strengths = create_blur_mask(height, width)

    fourcc = cv2.VideoWriter_fourcc(*'mp4v')
    video = cv2.VideoWriter('temp_output.mp4', fourcc, fps, (width, height))
    
    max_onset = np.max(onset_env)

    for frame_num in range(n_frames):
        audio_idx = int(frame_num * len(y) / n_frames)
        chunk = y[audio_idx:audio_idx + sr//fps]
        
        spec_frame = S_dB[:, frame_num] if frame_num < S_dB.shape[1] else S_dB[:, -1]
        current_onset = onset_env[frame_num] if frame_num < len(onset_env) else onset_env[-1]
        energy = np.mean(np.abs(chunk)) * 10

        frame = process_frame(img.copy(), particles, spec_frame, current_onset, max_onset, energy, mask, blur_strengths)
        video.write(cv2.cvtColor(frame, cv2.COLOR_RGB2BGR))
    
    video.release()
    
    video = mpy.VideoFileClip('temp_output.mp4')
    audio = mpy.AudioFileClip(audio_path).set_duration(video.duration)
    final_video = video.set_audio(audio)
    final_video.write_videofile(output_path, codec="libx264", audio_codec="aac")

if __name__ == "__main__":
    image_path = 'image.png'  # 入力画像のパス
    audio_path = 'audio.mp3'  # 入力音声のパス
    output_path = 'output_videoDemo.mp4'  # 出力ビデオのパス
    create_music_visualizer(image_path, audio_path, output_path)