<a href="https://colab.research.google.com/github/globalenglish01/Billion/blob/main/CutMP4%26Combine2Learn.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install yt-dlp  # 安装 yt-dlp（如果之前未安装）
!apt install ffmpeg  # 安装 ffmpeg

# 下载 B 站播放列表到 Google Drive 目标文件夹

Collecting yt-dlp
  Downloading yt_dlp-2025.9.5-py3-none-any.whl.metadata (177 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m177.1/177.1 kB[0m [31m3.0 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading yt_dlp-2025.9.5-py3-none-any.whl (3.3 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m3.3/3.3 MB[0m [31m30.1 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: yt-dlp
Successfully installed yt-dlp-2025.9.5
Reading package lists... Done
Building dependency tree... Done
Reading state information... Done
ffmpeg is already the newest version (7:4.4.2-0ubuntu0.22.04.1).
0 upgraded, 0 newly installed, 0 to remove and 35 not upgraded.


In [None]:

from google.colab import drive
drive.mount('/content/drive')

import os
import subprocess

# 输入输出路径
input_folder = "/content/drive/My Drive/Bilibili_Videos/input_videos"
output_folder = "/content/drive/My Drive/mp4/input"
os.makedirs(output_folder, exist_ok=True)

# 场景阈值和最小拆分时长（秒）
scene_threshold = 0.3       # 调整场景检测敏感度，越大越不容易切分
min_scene_duration = 5       # 最小拆分时长，避免每句都切

# 获取所有视频文件
video_files = [f for f in os.listdir(input_folder) if f.endswith('.mp4')]

for video_file in video_files:
    input_video_path = os.path.join(input_folder, video_file)
    print(f"\n正在处理视频文件: {video_file}")

    # 获取视频总时长
    result = subprocess.run(
        ["ffprobe", "-i", input_video_path, "-show_entries", "format=duration",
         "-v", "quiet", "-of", "csv=p=0"],
        capture_output=True, text=True
    )
    total_duration = float(result.stdout.strip())

    # 提取场景变化时间戳
    timestamp_file = f"timestamps_{video_file}.txt"
    if not os.path.exists(timestamp_file):
        cmd = (
            f"ffmpeg -i \"{input_video_path}\" "
            f"-vf \"select='gt(scene,{scene_threshold})',showinfo\" -vsync 2 -f null - 2>&1 | grep 'pts_time' > \"{timestamp_file}\""
        )
        subprocess.run(cmd, shell=True)

    with open(timestamp_file, "r") as file:
        lines = file.readlines()

    # 分割视频
    start_time = 0
    index = 1
    total_splits = len(lines) + 1

    for line in lines:
        if "pts_time" in line:
            timestamp = float(line.split('pts_time:')[1].split()[0])
            duration = timestamp - start_time

            # 只切分大于最小拆分时长的视频段
            if duration >= min_scene_duration:
                output_file_name = f"{os.path.splitext(video_file)[0]}_{index:03d}.mp4"
                output_file_path = os.path.join(output_folder, output_file_name)

                if os.path.exists(output_file_path):
                    print(f"已存在，跳过: {output_file_name}")
                else:
                    ffmpeg_command = [
                        "ffmpeg",
                        "-i", input_video_path,
                        "-ss", str(start_time),
                        "-t", str(duration),
                        "-c:v", "libx264",
                        "-preset", "fast",
                        "-crf", "23",
                        output_file_path
                    ]
                    subprocess.run(ffmpeg_command)
                    print(f"处理完成: {output_file_name} ({index}/{total_splits})")
                start_time = timestamp
                index += 1

    # 处理最后一个片段
    if start_time < total_duration:
        output_file_name = f"{os.path.splitext(video_file)[0]}_{index:03d}.mp4"
        output_file_path = os.path.join(output_folder, output_file_name)
        if os.path.exists(output_file_path):
            print(f"已存在，跳过: {output_file_name}")
        else:
            ffmpeg_command = [
                "ffmpeg",
                "-i", input_video_path,
                "-ss", str(start_time),
                "-c:v", "libx264",
                "-preset", "fast",
                "-crf", "23",
                output_file_path
            ]
            subprocess.run(ffmpeg_command)
            print(f"处理完成: {output_file_name} ({index}/{total_splits})")

    print(f"视频文件 {video_file} 处理完成！")

Mounted at /content/drive

正在处理视频文件: 2022-7.mp4
处理完成: 2022-7_001.mp4 (1/1)
视频文件 2022-7.mp4 处理完成！


In [None]:
# 安装 ffmpeg 和 tqdm
!apt-get update
!apt-get install -y ffmpeg
!pip install tqdm

from google.colab import drive
drive.mount('/content/drive')

import shutil
import os
import shlex
import subprocess
from tqdm import tqdm
from pydub import AudioSegment
from pydub.silence import split_on_silence


# ========== 工具函数 ==========
def run_ffmpeg(cmd):
    print(f"执行命令: {cmd}")
    subprocess.run(cmd, shell=True, check=True)

def extract_audio_from_mp4(mp4_file, out_mp3):
    """从 MP4 提取音频到 mp3"""
    if os.path.exists(out_mp3):
        print(f"已存在音频文件: {out_mp3}")
        return
    cmd = f'ffmpeg -y -i {shlex.quote(mp4_file)} -q:a 2 -map a {shlex.quote(out_mp3)}'
    run_ffmpeg(cmd)

def generate_silence(duration_sec, out_file):
    """生成静音 mp3"""
    if os.path.exists(out_file):
        return
    silence = AudioSegment.silent(duration=int(duration_sec * 1000))
    silence.export(out_file, format="mp3")

def speed_audio(input_file, speed, output_file):
    """生成加速音频"""
    if os.path.exists(output_file):
        return
    cmd = f'ffmpeg -y -i {shlex.quote(input_file)} -filter:a "atempo={speed}" {shlex.quote(output_file)}'
    run_ffmpeg(cmd)

def concat_audios(file_list, out_file):
    """拼接 mp3"""
    print(f"🔗 开始合并 {len(file_list)} 个文件 → {out_file}")
    list_file = out_file + "_list.txt"
    with open(list_file, "w", encoding="utf-8") as f:
        for fpath in file_list:
            f.write(f"file '{fpath}'\n")
    cmd = f'ffmpeg -y -f concat -safe 0 -i {list_file} -c copy {shlex.quote(out_file)}'
    run_ffmpeg(cmd)
    os.remove(list_file)


def merge_mp3_with_middle_frame_ffmpeg(mp4_path, mp3_path, output_path):
    """
    用 FFmpeg 将 mp4 的中间帧和 mp3 合成 mp4 视频

    :param mp4_path: 输入 MP4 文件
    :param mp3_path: 输入 MP3 文件
    :param output_path: 输出 MP4 文件
    """
    print(f"🔗 开始生成 {output_path}")

    # 1. 获取视频时长
    print("⏳ 获取视频时长...")
    cmd_duration = f'ffprobe -v error -show_entries format=duration -of csv=p=0 {shlex.quote(mp4_path)}'
    result = subprocess.run(cmd_duration, shell=True, capture_output=True, text=True)
    duration = float(result.stdout.strip())
    middle_time = duration / 2
    print(f"视频总时长: {duration:.2f}s，中间帧时间: {middle_time:.2f}s")

    # 2. 截取中间帧
    print("⏳ 截取中间帧...")
    frame_path = os.path.join(os.path.dirname(output_path), "temp_frame.png")
    os.makedirs(os.path.dirname(output_path), exist_ok=True)
    cmd_frame = f'ffmpeg -y -ss {middle_time} -i {shlex.quote(mp4_path)} -frames:v 1 {shlex.quote(frame_path)}'
    run_ffmpeg(cmd_frame)
    print(f"中间帧已保存: {frame_path}")

    # 3. 获取 MP3 时长
    print("⏳ 获取音频时长...")
    cmd_audio_duration = f'ffprobe -v error -show_entries format=duration -of csv=p=0 {shlex.quote(mp3_path)}'
    result = subprocess.run(cmd_audio_duration, shell=True, capture_output=True, text=True)
    audio_duration = float(result.stdout.strip())
    print(f"音频总时长: {audio_duration:.2f}s")

    # 4. 合成视频（图片 + 音频）
    print("⏳ 合成视频...")
    cmd_merge = (
        f'ffmpeg -y -loop 1 -i {shlex.quote(frame_path)} -i {shlex.quote(mp3_path)} '
        f'-c:v libx264 -t {audio_duration} -pix_fmt yuv420p -c:a aac {shlex.quote(output_path)}'
    )
    run_ffmpeg(cmd_merge)

    # 5. 删除临时图片
    if os.path.exists(frame_path):
        os.remove(frame_path)
        print(f"已删除临时图片: {frame_path}")

    print(f"✅ 已生成文件: {output_path}")

# ========== 核心处理函数 ==========
def process_mp4_to_mp3_with_progress(
    file_path, temp_path, output_dir,
    silence_start=2.0, silence_end=0.5, silence_step=0.1,
    repeat_static=10, repeat_speed=10, speed_list=None
):
    """
    处理流程：
    1. 从 mp4 提取 mp3
    2. 切割音频
    3. 多倍数静音循环拼接
    4. 原始文件重复
    5. 各倍速文件重复
    6. 合并输出最终 mp3
    """

    if speed_list is None:
        speed_list = [round(1.0 + i * 0.1, 1) for i in range(1, 11)]  # 1.1 ~ 2.0

    basename = os.path.splitext(os.path.basename(file_path))[0]
    mp3_file = os.path.join(temp_path, f"{basename}.mp3")
    os.makedirs(temp_path, exist_ok=True)
    os.makedirs(output_dir, exist_ok=True)

    # 1️⃣ 提取音频
    print("🎵 提取音频中...")
    extract_audio_from_mp4(file_path, mp3_file)

    # 2️⃣ 切割音频
    print("✂️ 正在切割音频...")
    audio = AudioSegment.from_mp3(mp3_file)
    chunks = split_on_silence(
        audio,
        min_silence_len=200,
        silence_thresh=audio.dBFS - 14
    )
    chunk_files = []
    for i, chunk in enumerate(chunks, start=1):
        out_chunk = os.path.join(temp_path, f"{basename}_chunk_{i}.mp3")
        chunk.export(out_chunk, format="mp3")
        chunk_files.append(out_chunk)
        print(f"  ✅ 切割片段导出: {out_chunk}")

    # 3️⃣ 多倍数静音拼接
    final_chunks = []
    silence_multipliers = []
    mult = silence_start
    while mult >= silence_end:
        silence_multipliers.append(round(mult, 2))
        mult -= silence_step

    print("⏸️ 开始拼接切片 + 静音...")
    for mult in silence_multipliers:
        print(f"  ➡️ 正在处理静音倍数 {mult}x")
        for idx, chunk in enumerate(chunk_files, start=1):
            final_chunks.append(chunk)
            silence_duration = len(AudioSegment.from_mp3(chunk)) / 1000 * mult
            silence_file = os.path.join(temp_path, f"{basename}_silence_{idx}_{mult}.mp3")
            generate_silence(silence_duration, silence_file)
            final_chunks.append(silence_file)
            print(f"     📌 添加: 切片 {idx} + 静音({mult}x)")

    # 4️⃣ 原文件重复
    print(f"🔁 添加原始文件 {repeat_static} 次...")
    for _ in range(repeat_static):
        final_chunks.append(mp3_file)

    # 5️⃣ 各倍速文件重复
    print("⚡ 生成并添加加速版本...")
    for speed in speed_list:
        speed_file = os.path.join(temp_path, f"{basename}_speed_{speed}.mp3")
        speed_audio(mp3_file, speed, speed_file)
        for _ in range(repeat_speed):
            final_chunks.append(speed_file)
        print(f"  ✅ 已添加 {speed}x 版本 * {repeat_speed}")

    # 6️⃣ 合并输出
    #out_mp3 = os.path.join(temp_path, f"{basename}_final.mp3")
    out_mp3 = os.path.join(output_dir, f"{basename}_final.mp3")
    concat_audios(final_chunks, out_mp3)
    print(f"🎉 最终文件完成: {out_mp3}")

    # 6️⃣ 合并输出
    #out_mp4 = os.path.join(output_dir, f"{basename}_final_video.mp4")
    #merge_mp3_with_middle_frame_ffmpeg(
    #    mp4_path=file_path,
    #    mp3_path=out_mp3,
    #    output_path=out_mp4
    #)

from google.colab import files
import os

def save_and_download_mp3(mp3_path):
    """
    保存 MP3 文件并生成下载链接
    """
    if os.path.exists(mp3_path):
        print(f"✅ 文件已生成: {mp3_path}")
        files.download(mp3_path)  # 直接触发浏览器下载
    else:
        print(f"❌ 文件不存在: {mp3_path}")


input_dir = "/content/drive/MyDrive/mp4/input"
output_dir = "/content/drive/MyDrive/mp4/output"
temp_dir = "/content/drive/MyDrive/mp4/temp"

for filename in tqdm(sorted(os.listdir(input_dir)), desc="处理文件"):
    if filename.lower().endswith(".mp4"):
        file_path = os.path.join(input_dir, filename)
        base_name = os.path.splitext(filename)[0]
        #print(base_name)
        temp_path = os.path.join(temp_dir, base_name)  # 每个文件独立子目录
        #print(temp_path)
        try:
            out_mp3 = os.path.join(output_dir, f"{base_name}_final.mp3")
            #out_mp4 = os.path.join(output_dir, f"{base_name}_final_video.mp4")

            #if os.path.exists(out_mp4):
            #    print(f"已存在，跳过: {out_mp4}")
            if os.path.exists(out_mp3):
                print(f"已存在，跳过: {out_mp3}")
            else:
                process_mp4_to_mp3_with_progress(file_path, temp_path, output_dir)

                # 调用下载
                save_and_download_mp3(out_mp3)

        finally:
            # ========= 清理临时目录 =========
            if os.path.exists(temp_path):
                shutil.rmtree(temp_path)
                print(f"🧹 已清理临时目录：{temp_path}")

0% [Working]            Hit:1 http://archive.ubuntu.com/ubuntu jammy InRelease
0% [Waiting for headers] [Waiting for headers] [Connected to cloud.r-project.or                                                                               Hit:2 http://archive.ubuntu.com/ubuntu jammy-updates InRelease
                                                                               Hit:3 http://security.ubuntu.com/ubuntu jammy-security InRelease
                                                                               Hit:4 https://cli.github.com/packages stable InRelease
0% [Waiting for headers] [Connected to cloud.r-project.org (3.171.85.81)] [Conn                                                                               Hit:5 https://cloud.r-project.org/bin/linux/ubuntu jammy-cran40/ InRelease
0% [Waiting for headers] [Connected to r2u.stat.illinois.edu (192.17.190.167)]                                                                                Hit:6 http://ar

处理文件:   0%|          | 0/195 [00:00<?, ?it/s]

已存在，跳过: /content/drive/MyDrive/mp4/output/N1_202207_001_final.mp3
已存在，跳过: /content/drive/MyDrive/mp4/output/N1_202207_002_final.mp3
已存在，跳过: /content/drive/MyDrive/mp4/output/N1_202207_003_final.mp3
已存在，跳过: /content/drive/MyDrive/mp4/output/N1_202207_004_final.mp3
已存在，跳过: /content/drive/MyDrive/mp4/output/N1_202207_005_final.mp3
已存在，跳过: /content/drive/MyDrive/mp4/output/N1_202207_006_final.mp3
已存在，跳过: /content/drive/MyDrive/mp4/output/N1_202207_007_final.mp3
已存在，跳过: /content/drive/MyDrive/mp4/output/N1_202207_008_final.mp3
已存在，跳过: /content/drive/MyDrive/mp4/output/N1_202207_009_final.mp3
已存在，跳过: /content/drive/MyDrive/mp4/output/N1_202207_010_final.mp3
已存在，跳过: /content/drive/MyDrive/mp4/output/N1_202207_011_final.mp3
已存在，跳过: /content/drive/MyDrive/mp4/output/N1_202207_012_final.mp3
已存在，跳过: /content/drive/MyDrive/mp4/output/N1_202207_013_final.mp3
已存在，跳过: /content/drive/MyDrive/mp4/output/N1_202207_014_final.mp3
已存在，跳过: /content/drive/MyDrive/mp4/output/N1_202207_015_final.mp3
已存在，跳过: /c

处理文件:  31%|███       | 60/195 [00:00<00:01, 70.15it/s]

🧹 已清理临时目录：/content/drive/MyDrive/mp4/temp/video_04_005
🎵 提取音频中...
执行命令: ffmpeg -y -i /content/drive/MyDrive/mp4/input/video_04_006.mp4 -q:a 2 -map a /content/drive/MyDrive/mp4/temp/video_04_006/video_04_006.mp3
✂️ 正在切割音频...
  ✅ 切割片段导出: /content/drive/MyDrive/mp4/temp/video_04_006/video_04_006_chunk_1.mp3
  ✅ 切割片段导出: /content/drive/MyDrive/mp4/temp/video_04_006/video_04_006_chunk_2.mp3
  ✅ 切割片段导出: /content/drive/MyDrive/mp4/temp/video_04_006/video_04_006_chunk_3.mp3
  ✅ 切割片段导出: /content/drive/MyDrive/mp4/temp/video_04_006/video_04_006_chunk_4.mp3
  ✅ 切割片段导出: /content/drive/MyDrive/mp4/temp/video_04_006/video_04_006_chunk_5.mp3
  ✅ 切割片段导出: /content/drive/MyDrive/mp4/temp/video_04_006/video_04_006_chunk_6.mp3
  ✅ 切割片段导出: /content/drive/MyDrive/mp4/temp/video_04_006/video_04_006_chunk_7.mp3
  ✅ 切割片段导出: /content/drive/MyDrive/mp4/temp/video_04_006/video_04_006_chunk_8.mp3
  ✅ 切割片段导出: /content/drive/MyDrive/mp4/temp/video_04_006/video_04_006_chunk_9.mp3
  ✅ 切割片段导出: /content/drive/MyDrive/mp4

处理文件:  31%|███       | 60/195 [00:10<00:01, 70.15it/s]

  ✅ 切割片段导出: /content/drive/MyDrive/mp4/temp/video_04_006/video_04_006_chunk_13.mp3
  ✅ 切割片段导出: /content/drive/MyDrive/mp4/temp/video_04_006/video_04_006_chunk_14.mp3
  ✅ 切割片段导出: /content/drive/MyDrive/mp4/temp/video_04_006/video_04_006_chunk_15.mp3
  ✅ 切割片段导出: /content/drive/MyDrive/mp4/temp/video_04_006/video_04_006_chunk_16.mp3
  ✅ 切割片段导出: /content/drive/MyDrive/mp4/temp/video_04_006/video_04_006_chunk_17.mp3
  ✅ 切割片段导出: /content/drive/MyDrive/mp4/temp/video_04_006/video_04_006_chunk_18.mp3
  ✅ 切割片段导出: /content/drive/MyDrive/mp4/temp/video_04_006/video_04_006_chunk_19.mp3
  ✅ 切割片段导出: /content/drive/MyDrive/mp4/temp/video_04_006/video_04_006_chunk_20.mp3
  ✅ 切割片段导出: /content/drive/MyDrive/mp4/temp/video_04_006/video_04_006_chunk_21.mp3
  ✅ 切割片段导出: /content/drive/MyDrive/mp4/temp/video_04_006/video_04_006_chunk_22.mp3
  ✅ 切割片段导出: /content/drive/MyDrive/mp4/temp/video_04_006/video_04_006_chunk_23.mp3
  ✅ 切割片段导出: /content/drive/MyDrive/mp4/temp/video_04_006/video_04_006_chunk_24.mp3
  ✅ 