擷取 numbers txt 裡面編號的第幾句台詞的每一幀

In [None]:
import os
import re
from datetime import timedelta
from PIL import Image
from moviepy.editor import VideoFileClip
from google.colab import drive
drive.mount('/content/drive')

def sanitize_filename(text):  # 替換不允許的字元為全形或底線
    replace_map = {"/": "／", "\\": "＼", "?": "？", "*": "＊", "\"": "＂", "<": "＜", ">": "＞", "|": "｜", ":": "："}
    for key, value in replace_map.items():
        text = text.replace(key, value)
    return text

def parse_srt_time(time_str):
    #解析SRT時間格式為timedelta
    h, m, s = time_str.replace(',', '.').split(':')
    return timedelta(
        hours=int(h),
        minutes=int(m),
        seconds=int(float(s)),
        microseconds=int((float(s)-int(float(s)))*1e6)
    )

def parse_srt(srt_file):
    #解析SRT文件
    subs = []
    with open(srt_file, 'r', encoding='utf-8-sig') as f:  # 改用 utf-8-sig 處理BOM
        content = f.read().split('\n\n')

    for entry in content:
        parts = entry.strip().split('\n')
        if len(parts) >= 3:
            # 移除BOM字符並清理索引號
            index_str = parts[0].strip().replace('\ufeff', '')
            index = int(index_str)

            time_line = parts[1]
            start_str, end_str = time_line.split(' --> ')
            text = '\n'.join(parts[2:])
            subs.append({
                'index': index,
                'start_time': parse_srt_time(start_str),
                'end_time': parse_srt_time(end_str),
                'text': text
            })
    return subs

def read_numbers(numbers_file): # 0 index to 1 index
    #讀取數字列表並轉換為SRT兼容的1-based索引
    numbers = []
    with open(numbers_file, 'r') as f:
        for line_num, line in enumerate(f, 1):
            cleaned_line = line.strip()
            if not cleaned_line:
                continue  # 跳過空行

            try:
                # 將輸入的0-based數字轉換為1-based
                original_num = int(cleaned_line)
                if original_num < 0:
                    raise ValueError("負數無效")

                srt_index = original_num + 1
                numbers.append(srt_index)

            except ValueError as e:
                print(f"[警告] 第{line_num}行無效數字: '{cleaned_line}' ({str(e)})")
                continue

    return numbers

def capture_frames(video_path, srt_path, numbers_path, output_path, episode, image_format='jpg'):
    # 新增集數解析 (從視頻文件名中提取)
    video_filename = os.path.basename(video_path)


    # 讀取必要數據
    numbers = read_numbers(numbers_path)
    subs = parse_srt(srt_path)
    selected_subs = [sub for sub in subs if sub['index'] in numbers]

    # 加載視頻
    video = VideoFileClip(video_path)
    frame_rate = 23.976
    frame_interval = 1.0 / frame_rate

    for sub in selected_subs:
        # 計算時間參數
        start = sub['start_time'].total_seconds()
        end = sub['end_time'].total_seconds()
        video_duration = video.duration

        # 確保時間在視頻範圍內
        start = max(0, min(start, video_duration))
        end = max(0, min(end, video_duration))

        # 創建輸出目錄
        sanitized_text = sanitize_filename(sub['text'].replace(" ", ""))
        folder_name = f"sub{sub['index']:03d}"
        output_folder = os.path.join(output_path, folder_name)
        os.makedirs(output_folder, exist_ok=True)

        # 輸出字幕信息
        print(f"\n編號: {sub['index']}")
        print(f"台詞: {sub['text']}")
        print(f"開始時間: {str(sub['start_time']).split('.')[0]}")
        print(f"結束時間: {str(sub['end_time']).split('.')[0]}")
        print(f"輸出目錄: {output_folder}\n")

        # 逐幀截取
        frame_count = 0
        current_time = start

        while current_time <= end and current_time <= video_duration:
            try:
                frame = video.get_frame(current_time)
                time_str = str(timedelta(seconds=current_time)).replace(':', '-').split('.')[0]
                # 重點修改：新文件名格式
                filename = f"mygo_{episode}_{(sub['index']-1):03d}_{sanitized_text}_{frame_count:04d}.{image_format}"
                filepath = os.path.join(output_folder, filename)
                Image.fromarray(frame).save(filepath)
                if(frame_count == 0):
                  print(f"已保存: {filename}")
                frame_count += 1
                current_time += frame_interval
            except Exception as e:
                print(f"錯誤發生在時間 {current_time:.3f}s: {str(e)}")
                break

    video.close()


video_file = "/content/drive/MyDrive/SAS/mygo_video/[ANi] BanG Dream! It's MyGO!!!!! - 13 [1080P][Baha][WEB-DL][AAC AVC][CHT].mp4"
srt_file = "/content/drive/MyDrive/SAS/mygo_subtitle/BanG Dream! It_s MyGO!!!!! - 13.srt"
episode = "ep13"
numbers_file = "/content/drive/MyDrive/SAS/mygo_subtitle/numbers.txt"
output_path = "/content/drive/MyDrive/SAS/frame15output"

capture_frames(video_file, srt_file, numbers_file, output_path, episode)

擷取 第幾句台詞的每一幀 (單句)

In [None]:
import os
import re
from datetime import timedelta
from PIL import Image
from moviepy.editor import VideoFileClip
from google.colab import drive

#drive.mount('/content/drive')

def sanitize_filename(text):  # 替換不允許的字元為全形或底線
    replace_map = {"/": "／", "\\": "＼", "?": "？", "*": "＊", "\"": "＂", "<": "＜", ">": "＞", "|": "｜", ":": "："}
    for key, value in replace_map.items():
        text = text.replace(key, value)
    return text

def parse_srt_time(time_str):
    """解析SRT時間格式為timedelta"""
    h, m, s = time_str.replace(',', '.').split(':')
    return timedelta(
        hours=int(h),
        minutes=int(m),
        seconds=int(float(s)),
        microseconds=int((float(s)-int(float(s)))*1e6)
    )

def parse_srt(srt_file):
    """解析SRT文件"""
    subs = []
    with open(srt_file, 'r', encoding='utf-8-sig') as f:  # 改用 utf-8-sig 處理BOM
        content = f.read().split('\n\n')

    for entry in content:
        parts = entry.strip().split('\n')
        if len(parts) >= 3:
            # 移除BOM字符並清理索引號
            index_str = parts[0].strip().replace('\ufeff', '')
            index = int(index_str)

            time_line = parts[1]
            start_str, end_str = time_line.split(' --> ')
            text = '\n'.join(parts[2:])
            subs.append({
                'index': index,
                'start_time': parse_srt_time(start_str),
                'end_time': parse_srt_time(end_str),
                'text': text
            })
    return subs

def capture_frames(video_path, srt_path, output_path, episode, image_format='jpg'):
    # 新增集數解析 (從視頻文件名中提取 ep03)
    video_filename = os.path.basename(video_path)

    # 讀取必要數據
    number = int(input("請輸入台詞編號 (輸入 -1 結束): mygo_ep01_120.jpg 輸入要+1 "))
    if number == -1:
      return

    subs = parse_srt(srt_path)
    selected_subs = [sub for sub in subs if sub['index'] == number]

    # 加載視頻
    video = VideoFileClip(video_path)
    frame_rate = 23.976
    frame_interval = 1.0 / frame_rate

    for sub in selected_subs:
        # 計算時間參數
        start = sub['start_time'].total_seconds()
        end = sub['end_time'].total_seconds()
        video_duration = video.duration

        # 確保時間在視頻範圍內
        start = max(0, min(start, video_duration))
        end = max(0, min(end, video_duration))

        # 創建輸出目錄
        sanitized_text = sanitize_filename(sub['text'].replace(" ", ""))
        folder_name = f"sub{sub['index']:03d}"
        output_folder = os.path.join(output_path, folder_name)
        os.makedirs(output_folder, exist_ok=True)

        # 輸出字幕信息
        print(f"\n編號: {sub['index']}")
        print(f"台詞: {sub['text']}")
        print(f"開始時間: {str(sub['start_time']).split('.')[0]}")
        print(f"結束時間: {str(sub['end_time']).split('.')[0]}")
        print(f"輸出目錄: {output_folder}\n")

        # 逐幀截取
        frame_count = 0
        current_time = start

        while current_time <= end and current_time <= video_duration:
            try:
                frame = video.get_frame(current_time)
                time_str = str(timedelta(seconds=current_time)).replace(':', '-').split('.')[0]
                # 重點修改：新文件名格式
                filename = f"mygo_{episode}_{(sub['index']-1):03d}_{sanitized_text}_{frame_count:04d}.{image_format}"
                filepath = os.path.join(output_folder, filename)
                Image.fromarray(frame).save(filepath)
                print(f"已保存: {filename}")
                frame_count += 1
                current_time += frame_interval
            except Exception as e:
                print(f"錯誤發生在時間 {current_time:.3f}s: {str(e)}")
                break

    video.close()

# 使用範例
video_file = "/content/drive/MyDrive/SAS/mygo_video/[ANi] BanG Dream! It's MyGO!!!!! - 03 [1080P][Baha][WEB-DL][AAC AVC][CHT].mp4"
srt_file = "/content/drive/MyDrive/SAS/mygo_subtitle/BanG Dream! It_s MyGO!!!!! - 03.srt"

output_path = "/content/drive/MyDrive/SAS/frame0output"
episode = "ep03"

capture_frames(video_file, srt_file, output_path, episode)

清除frame截圖留下的編號

In [None]:
import os
import re

# 设置目标文件夹路径（修改为你的实际路径）
folder_path = '/content/drive/MyDrive/SAS/clear'

# 遍历文件夹中的所有文件
for filename in os.listdir(folder_path):
    # 分割文件名和扩展名
    base_name, extension = os.path.splitext(filename)

    # 使用正则表达式移除末尾的下划线和四位数字
    new_base = re.sub(r'_(\d{4})$', '', base_name)

    # 只有当文件名有变化时才进行重命名
    if new_base != base_name:
        # 构建新文件名
        new_filename = f"{new_base}{extension}"

        # 获取完整文件路径
        old_path = os.path.join(folder_path, filename)
        new_path = os.path.join(folder_path, new_filename)

        # 执行重命名操作
        os.rename(old_path, new_path)
        print(f'Renamed: "{filename}" -> "{new_filename}"')