<a href="https://colab.research.google.com/github/gtbnhyujmj/-Good-Auto_Shorts_Maker/blob/main/Auto_Loop_mov_maker.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
!pip install ffmpeg-python opencv-python pillow tqdm pydub

Collecting ffmpeg-python
  Downloading ffmpeg_python-0.2.0-py3-none-any.whl.metadata (1.7 kB)
Collecting pydub
  Downloading pydub-0.25.1-py2.py3-none-any.whl.metadata (1.4 kB)
Downloading ffmpeg_python-0.2.0-py3-none-any.whl (25 kB)
Downloading pydub-0.25.1-py2.py3-none-any.whl (32 kB)
Installing collected packages: pydub, ffmpeg-python
Successfully installed ffmpeg-python-0.2.0 pydub-0.25.1


In [2]:
import os
import ffmpeg
import shutil
import numpy as np
from PIL import Image
from tqdm import tqdm
from pydub import AudioSegment

# 定義區

In [3]:
def extract_audio(input_mov, output_wav):
    """
    從 MOV/MP4 提取音訊，儲存成 WAV
    """
    (
        ffmpeg
        .input(output_mov)
        .input(mix_audio_wav)
        .output(output_mov_with_audio, vcodec='copy', acodec='aac', shortest=None)
        .run(overwrite_output=True)
    )

    print(f"✅ 分離音軌：{output_wav}")

In [4]:
def generate_silence_wav(duration_sec, output_wav):
    """
    產生指定長度的靜音 WAV
    """
    (
        ffmpeg
        .input('anullsrc=r=44100:cl=stereo', f='lavfi', t=duration_sec)
        .output(output_wav, acodec='pcm_s16le', ac=2, ar='44100')
        .run(overwrite_output=True)
    )
    print(f"✅ 產生靜音：{output_wav}")

In [5]:
def auto_clean_audio(input_wav, output_wav, silence_db_thresh=-45.0, min_chunk_ms=100):
    """
    音訊降噪：低於 silence_db_thresh 的片段消成靜音
    """
    audio = AudioSegment.from_wav(input_wav)
    samples = np.array(audio.get_array_of_samples())
    chunk_len = int(audio.frame_rate * min_chunk_ms / 1000)
    cleaned = AudioSegment.silent(duration=0, frame_rate=audio.frame_rate)
    total_len = len(audio)

    for i in range(0, total_len, min_chunk_ms):
        chunk = audio[i:i+min_chunk_ms]
        if chunk.dBFS < silence_db_thresh:
            cleaned += AudioSegment.silent(duration=min_chunk_ms, frame_rate=audio.frame_rate)
        else:
            cleaned += chunk

    cleaned.export(output_wav, format="wav")
    print(f"✅ 自動消噪完成：{output_wav}")


In [6]:
def align_and_mix_audio(audio_list, total_frames, fps, output_wav):
    """
    audio_list: 多層音軌WAV路徑
    total_frames: 對齊到這個幀數
    fps: 幀率
    output_wav: 最後混音輸出檔名
    """
    total_length_ms = int(total_frames / fps * 1000)
    result = AudioSegment.silent(duration=total_length_ms)

    for audio_path in audio_list:
        if audio_path is None or not os.path.exists(audio_path):
            continue
        audio = AudioSegment.from_file(audio_path)
        # 讓音訊自動重複補到影片總長
        repeats = int(np.ceil(total_length_ms / len(audio)))
        audio_full = (audio * repeats)[:total_length_ms]
        result = result.overlay(audio_full)

    result.export(output_wav, format="wav")
    print(f"✅ 多層音軌補齊並混音完成：{output_wav}")

In [7]:
def extract_frames_with_alpha(input_path, output_folder, fps=None, ref_img=None, ref_frames=None):
    """
    支援 mov/mp4、png資料夾、單張png、單獨mp3/wav音軌
    mp3/wav 時會產生全透明 frame，用於補齊幀數，畫面疊合時沒畫面，混音時才有聲音
    """
    os.makedirs(output_folder, exist_ok=True)

    if input_path.lower().endswith(('.mov', '.mp4')):
        options = {'pix_fmt': 'rgba'}
        if fps:
            options['vf'] = f'fps={fps}'
        (
            ffmpeg
            .input(input_path)
            .output(os.path.join(output_folder, 'frame_%05d.png'), **options)
            .run(overwrite_output=True)
        )
    elif os.path.isdir(input_path):
        png_files = [f for f in sorted(os.listdir(input_path)) if f.endswith('.png')]
        if len(png_files) == 0:
            raise ValueError(f"資料夾 {input_path} 裡面沒有 PNG！")
        for i, fname in enumerate(png_files):
            img = Image.open(os.path.join(input_path, fname)).convert("RGBA")
            img.save(os.path.join(output_folder, f"frame_{i+1:05d}.png"))
    elif input_path.lower().endswith('.png'):
        img = Image.open(input_path).convert("RGBA")
        img.save(os.path.join(output_folder, "frame_00001.png"))
    elif input_path.lower().endswith(('.mp3', '.wav')):
        # 產生全透明 frame，用於對齊疊圖
        if ref_img is not None and ref_frames is not None:
            size = ref_img.size
            nframes = ref_frames
        else:
            # 沒有參考就用 1080x1920 1 幀
            size = (1080, 1920)
            nframes = 1
        for i in range(nframes):
            transparent = Image.new("RGBA", size, (0,0,0,0))
            transparent.save(os.path.join(output_folder, f"frame_{i+1:05d}.png"))
    else:
        raise ValueError("input_path 必須是 MOV、MP4、PNG 資料夾、單張 PNG 或 MP3/WAV")

    print(f"✅ {input_path} 已轉換為 PNG 幀：{output_folder}")

In [8]:
def blend_frames_multi(bg_folder, fg_folder, output_folder):
    """
    將兩個PNG資料夾疊圖，frame數自動補齊，用循環方式
    """
    os.makedirs(output_folder, exist_ok=True)
    bg_frames = sorted([f for f in os.listdir(bg_folder) if f.endswith('.png')])
    fg_frames = sorted([f for f in os.listdir(fg_folder) if f.endswith('.png')])
    n = max(len(bg_frames), len(fg_frames))
    bg_len = len(bg_frames)
    fg_len = len(fg_frames)

    for i in tqdm(range(n), desc="疊加中"):
        bg_img = Image.open(os.path.join(bg_folder, bg_frames[i % bg_len])).convert("RGBA")
        fg_img = Image.open(os.path.join(fg_folder, fg_frames[i % fg_len])).convert("RGBA")
        out_img = Image.alpha_composite(bg_img, fg_img)
        out_img.save(os.path.join(output_folder, f"frame_{i+1:05d}.png"))

    print(f"✅ 疊圖完成：{output_folder}")

In [9]:
def frames_to_mov_prores4444(frames_folder, output_path, fps=30):
    (
        ffmpeg
        .input(os.path.join(frames_folder, 'frame_%05d.png'), framerate=fps)
        .output(output_path, vcodec='prores_ks', profile=4, pix_fmt='yuva444p10le')
        .run(overwrite_output=True)
    )
    print(f"✅ MOV (ProRes 4444) 輸出完成（透明+壓縮）：{output_path}")

In [14]:
def super_layered_composite(input_list, fps=30, silence_db_thresh=-45.0):
    prev_frames_dir = None
    audio_tracks = []
    max_frames = 0
    ref_img = None
    ref_frames = None

    # 拆解每層，並抽音軌
    for idx, inp in enumerate(input_list):
        frames_dir = f"/content/layer_{idx+1}_frames"

        # 第一次遇到「真實畫面」（影片、圖片、資料夾），存下尺寸和張數
        if ref_img is None and not inp.lower().endswith(('.mp3', '.wav')):
            # 先轉成frame
            extract_frames_with_alpha(inp, frames_dir, fps=fps)
            sample_file = os.path.join(frames_dir, sorted(os.listdir(frames_dir))[0])
            ref_img = Image.open(sample_file)
            ref_frames = len([f for f in os.listdir(frames_dir) if f.endswith('.png')])
        else:
            # MP3/WAV
            extract_frames_with_alpha(inp, frames_dir, fps=fps, ref_img=ref_img, ref_frames=ref_frames)

        n_frames = len([f for f in os.listdir(frames_dir) if f.endswith('.png')])
        max_frames = max(max_frames, n_frames)
        # 音軌處理
        if inp.lower().endswith(('.mov', '.mp4', '.mp3', '.wav')):
            audio_wav = f"/content/layer_{idx+1}_audio.wav"
            if inp.lower().endswith(('.mov', '.mp4')):
                extract_audio(inp, audio_wav)
                auto_clean_audio(audio_wav, audio_wav, silence_db_thresh)
            elif inp.lower().endswith('.mp3'):
                # 轉成wav
                AudioSegment.from_mp3(inp).export(audio_wav, format="wav")
                auto_clean_audio(audio_wav, audio_wav, silence_db_thresh)
            elif inp.lower().endswith('.wav'):
                auto_clean_audio(inp, audio_wav, silence_db_thresh)
            audio_tracks.append(audio_wav)
        else:
            audio_tracks.append(None)

        # 疊圖
        if prev_frames_dir is None:
            prev_frames_dir = frames_dir
        else:
            out_dir = f"/content/merge_{idx}_frames"
            blend_frames_multi(prev_frames_dir, frames_dir, out_dir)
            shutil.rmtree(prev_frames_dir)
            shutil.rmtree(frames_dir)
            prev_frames_dir = out_dir

    # 合成影片
    output_mov = '/content/final_output.mov'
    frames_to_mov_prores4444(prev_frames_dir, output_mov, fps=fps)

    # 合成音軌
    mix_audio_wav = "/content/final_output_audio.wav"
    align_and_mix_audio([a for a in audio_tracks if a], max_frames, fps, mix_audio_wav)

    # 最後音軌mux
    output_mov_with_audio = '/content/final_output_with_audio.mov'
    output_mov_with_audio = '/content/final_output_with_audio.mov'
    video_stream = ffmpeg.input(output_mov)
    audio_stream = ffmpeg.input(mix_audio_wav)
    (
        ffmpeg
        .output(video_stream, audio_stream, output_mov_with_audio, vcodec='copy', acodec='aac', shortest=None)
        .run(overwrite_output=True)
    )

    print(f"🎉 最終影片（含聲音）輸出：{output_mov_with_audio}")

    return output_mov_with_audio

In [15]:
# 請依自己的情況把 input_list 改成你自己的檔案、資料夾路徑
input_list = [
    '/content/drive/MyDrive/shorts/OK_Stamps/OK_001.png',  # 第一層影片/圖片
    '/content/drive/MyDrive/shorts/BGM_MP3/vine_boom.mp3',     # 第二層
    # '/content/貼紙夾',        # 第三層可用資料夾
    # ... 無限層
]

final_mov = super_layered_composite(input_list, fps=30)

✅ /content/drive/MyDrive/shorts/OK_Stamps/OK_001.png 已轉換為 PNG 幀：/content/layer_1_frames
✅ /content/drive/MyDrive/shorts/BGM_MP3/vine_boom.mp3 已轉換為 PNG 幀：/content/layer_2_frames
✅ 自動消噪完成：/content/layer_2_audio.wav


疊加中: 100%|██████████| 1/1 [00:00<00:00,  1.68it/s]


✅ 疊圖完成：/content/merge_1_frames
✅ MOV (ProRes 4444) 輸出完成（透明+壓縮）：/content/final_output.mov
✅ 多層音軌補齊並混音完成：/content/final_output_audio.wav
🎉 最終影片（含聲音）輸出：/content/final_output_with_audio.mov


# 測試

In [16]:
from pydub import AudioSegment
import ffmpeg

# 你想要的秒數
DURATION = 5  # 秒

# 圖片、音訊、影片路徑
IMAGE = '/content/drive/MyDrive/shorts/OK_Stamps/OK_001.png'
AUDIO = '/content/drive/MyDrive/shorts/BGM_MP3/vine_boom.mp3'
OUTPUT = '/content/final_output_with_audio.mov'

# 先產生5秒動畫（透明）
(
    ffmpeg
    .input(IMAGE, loop=1, t=DURATION, framerate=30)
    .output('/content/looped.mov', vcodec='prores_ks', profile=4, pix_fmt='yuva444p10le', framerate=30)
    .run(overwrite_output=True)
)

# 然後再合成音軌（截到5秒剛好）
video_stream = ffmpeg.input('/content/looped.mov')
audio_stream = ffmpeg.input(AUDIO)
(
    ffmpeg
    .output(video_stream, audio_stream, OUTPUT, vcodec='copy', acodec='aac', shortest=None)
    .run(overwrite_output=True)
)

print(f"✅ 成功產生5秒有聲音的影片：{OUTPUT}")

✅ 成功產生5秒有聲音的影片：/content/final_output_with_audio.mov
