In [38]:
# 尝试检测视频中代表场景转换的关键帧
# 目前只在幻灯片视频中测试过，因此可能不适用于其他类型视频

# 1. 基于图像信息提取
# 2. 基于运动分析（光流分析）

import cv2
import argparse
import json
import os
import numpy as np
import errno

# 获取视频的基本信息
def getInfo(sourcePath):
    cap = cv2.VideoCapture(sourcePath)
    info = {
        "framecount": cap.get(cv2.CAP_PROP_FRAME_COUNT),  # 总帧数
        "fps": cap.get(cv2.CAP_PROP_FPS),                # 帧率
        "width": int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)), # 宽度
        "heigth": int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT)), # 高度
        "codec": int(cap.get(cv2.CAP_PROP_FOURCC))       # 编码格式
    }
    cap.release()
    return info

# 缩放图像比例
def scale(img, xScale, yScale):
    res = cv2.resize(img, None, fx=xScale, fy=yScale, interpolation=cv2.INTER_AREA)
    return res

# 调整图像大小
def resize(img, width, heigth):
    res = cv2.resize(img, (width, heigth), interpolation=cv2.INTER_AREA)
    return res

# 提取图像的主色调
# 使用 KMeans 聚类像素点，返回颜色中心
def extract_cols(image, numCols):
    # 转换为 np.float32 类型的矩阵以用于聚类
    Z = image.reshape((-1, 3))
    Z = np.float32(Z)

    # 设置聚类参数
    max_iter = 20
    epsilon = 1.0
    K = numCols
    criteria = (cv2.TERM_CRITERIA_EPS + cv2.TERM_CRITERIA_MAX_ITER, max_iter, epsilon)
    labels = np.array([])

    # 进行聚类
    compactness, labels, centers = cv2.kmeans(Z, K, labels, criteria, 10, cv2.KMEANS_RANDOM_CENTERS)

    # 计算每个聚类的像素数量
    clusterCounts = []
    for idx in range(K):
        # 使用 labels 作为布尔索引，获取属于该类的像素数目
        count = np.sum(labels == idx)
        clusterCounts.append(count)

    # 将颜色从 BGR 转为 RGB
    rgbCenters = []
    for center in centers:
        bgr = center.tolist()
        bgr.reverse()  # BGR 转为 RGB
        rgbCenters.append(bgr)

    # 构建颜色信息
    cols = []
    for i in range(K):
        iCol = {
            "count": clusterCounts[i],
            "col": rgbCenters[i]
        }
        cols.append(iCol)

    return cols


# 计算相邻帧的差异数据
def calculateFrameStats(sourcePath, verbose=False, after_frame=0):
    cap = cv2.VideoCapture(sourcePath)  # 打开视频文件

    data = {
        "frame_info": []  # 用于存储帧差异信息
    }

    lastFrame = None  # 存储上一帧
    while cap.isOpened():
        ret, frame = cap.read()
        if frame is None:
            break

        frame_number = cap.get(cv2.CAP_PROP_POS_FRAMES) - 1

        # 转为灰度图像，缩小尺寸并进行模糊处理，降低噪声影响
        gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)  # 转灰度图
        gray = scale(gray, 0.25, 0.25)  # 缩放到原图 1/4
        gray = cv2.GaussianBlur(gray, (9, 9), 0.0)  # 高斯模糊

        if frame_number < after_frame:
            lastFrame = gray
            continue

        if lastFrame is not None:
            # 当前帧减去上一帧
            diff = cv2.subtract(gray, lastFrame)

            # 计算非零像素点数量（变化的像素点数）
            diffMag = cv2.countNonZero(diff)

            # 记录帧差异信息
            frame_info = {
                "frame_number": int(frame_number),
                "diff_count": int(diffMag)
            }
            data["frame_info"].append(frame_info)

            if verbose:
                cv2.imshow('diff', diff)  # 显示帧差异
                if cv2.waitKey(1) & 0xFF == ord('q'):
                    break

        # 保存当前帧作为下一次比较的上一帧
        lastFrame = gray

    cap.release()
    cv2.destroyAllWindows()

    # 计算帧差异的统计数据
    diff_counts = [fi["diff_count"] for fi in data["frame_info"]]
    data["stats"] = {
        "num": len(diff_counts),
        "min": np.min(diff_counts),
        "max": np.max(diff_counts),
        "mean": np.mean(diff_counts),
        "median": np.median(diff_counts),
        "sd": np.std(diff_counts)  # 标准差
    }

    # 统计高于均值、标准差等的帧数量
    greater_than_mean = [fi for fi in data["frame_info"] if fi["diff_count"] > data["stats"]["mean"]]
    greater_than_median = [fi for fi in data["frame_info"] if fi["diff_count"] > data["stats"]["median"]]
    greater_than_one_sd = [fi for fi in data["frame_info"] if fi["diff_count"] > data["stats"]["sd"] + data["stats"]["mean"]]
    greater_than_two_sd = [fi for fi in data["frame_info"] if fi["diff_count"] > (data["stats"]["sd"] * 2) + data["stats"]["mean"]]
    greater_than_three_sd = [fi for fi in data["frame_info"] if fi["diff_count"] > (data["stats"]["sd"] * 3) + data["stats"]["mean"]]

    data["stats"]["greater_than_mean"] = len(greater_than_mean)
    data["stats"]["greater_than_median"] = len(greater_than_median)
    data["stats"]["greater_than_one_sd"] = len(greater_than_one_sd)
    data["stats"]["greater_than_three_sd"] = len(greater_than_three_sd)
    data["stats"]["greater_than_two_sd"] = len(greater_than_two_sd)
 # 将所有 int64 转换为 Python 的 int 类型
    for frame_info in data["frame_info"]:
        frame_info["frame_number"] = int(frame_info["frame_number"])
        frame_info["diff_count"] = int(frame_info["diff_count"])
    
    return data
  

# 保存图像的多种分辨率版本
def writeImagePyramid(destPath, name, seqNumber, image):
    fullPath = os.path.join(destPath, "full", name + "-" + str(seqNumber) + ".png")
    halfPath = os.path.join(destPath, "half", name + "-" + str(seqNumber) + ".png")
    quarterPath = os.path.join(destPath, "quarter", name + "-" + str(seqNumber) + ".png")
    eigthPath = os.path.join(destPath, "eigth", name + "-" + str(seqNumber) + ".png")
    sixteenthPath = os.path.join(destPath, "sixteenth", name + "-" + str(seqNumber) + ".png")

    hImage = scale(image, 0.5, 0.5)
    qImage = scale(image, 0.25, 0.25)
    eImage = scale(image, 0.125, 0.125)
    sImage = scale(image, 0.0625, 0.0625)

    cv2.imwrite(fullPath, image)
    cv2.imwrite(halfPath, hImage)
    cv2.imwrite(quarterPath, qImage)
    cv2.imwrite(eigthPath, eImage)
    cv2.imwrite(sixteenthPath, sImage)

# 检测关键帧（场景变化帧）
def detectScenes(sourcePath, destPath, data, name, verbose=False):
    destDir = os.path.join(destPath, "images")
    makeOutputDirs(destPath)  # 确保目录结构存在

    diff_threshold = data["stats"]["mean"] + (data["stats"]["sd"] * 1.5)  # 动态阈值

    cap = cv2.VideoCapture(sourcePath)
    for index, fi in enumerate(data["frame_info"]):
        if fi["diff_count"] < diff_threshold:
            continue

        cap.set(cv2.CAP_PROP_POS_FRAMES, fi["frame_number"])
        ret, frame = cap.read()
        if not ret or frame is None:
            print(f"无法读取帧: {fi['frame_number']}")
            continue

        small = resize(frame, 100, 100)
        cols = extract_cols(small, 5)
        data["frame_info"][index]["dominant_cols"] = cols

        if frame is not None:
            writeImagePyramid(destDir, name, fi["frame_number"], frame)
            print(f"关键帧保存: {fi['frame_number']} -> {destDir}")

            if verbose:
                cv2.imshow('extract', frame)
                if cv2.waitKey(1) & 0xFF == ord('q'):
                    break

    cap.release()
    cv2.destroyAllWindows()
    return data

# 创建保存结果的输出目录
def makeOutputDirs(path):
    try:
        os.makedirs(os.path.join(path, "metadata"))
        os.makedirs(os.path.join(path, "images", "full"))
        os.makedirs(os.path.join(path, "images", "half"))
        os.makedirs(os.path.join(path, "images", "quarter"))
        os.makedirs(os.path.join(path, "images", "eigth"))
        os.makedirs(os.path.join(path, "images", "sixteenth"))
    except OSError as exception:
        if exception.errno != errno.EEXIST:
            raise

# 定义一个转换函数，递归地将数据中的 int64 转换为普通 int
def convert_int64(obj):
    if isinstance(obj, np.int64):
        return int(obj)  # 将 np.int64 转为普通 int
    elif isinstance(obj, dict):
        return {k: convert_int64(v) for k, v in obj.items()}
    elif isinstance(obj, list):
        return [convert_int64(i) for i in obj]
    return obj

def process_video(video_path, output_dir):
    """
    处理视频，提取关键帧并输出到指定目录。

    :param video_path: 输入视频路径
    :param output_dir: 输出文件夹路径
    """
    # 检查输入路径是否存在
    if not os.path.exists(video_path):
        print(f"输入视频路径无效: {video_path}")
        return

    # 创建输出目录
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)

    print(f"开始处理视频: {video_path}")
    print(f"输出结果将保存到: {output_dir}")

    # 读取视频信息
    print("读取视频信息...")
    video_info = getInfo(video_path)
    with open(os.path.join(output_dir, "video_info.json"), "w") as f:
        json.dump(video_info, f, indent=4)

    # 计算帧间差异
    print("计算帧差异...")
    frame_stats = calculateFrameStats(video_path)
    with open(os.path.join(output_dir, "frame_stats.json"), "w") as f:
        # 先转换所有 int64 为 int
        frame_stats = convert_int64(frame_stats)
        json.dump(frame_stats, f, indent=4)

    # 检测场景并提取关键帧
    print("检测场景并提取关键帧...")
    scene_data = detectScenes(video_path, output_dir, frame_stats, name="keyframes")
    with open(os.path.join(output_dir, "scene_info.json"), "w") as f:
        # 同样转换 scene_data 中的 int64 为 int
        scene_data = convert_int64(scene_data)
        json.dump(scene_data, f, indent=4)

    print("处理完成！结果保存在输出目录中。")


In [17]:
video_path = "C:/data/video/0-两手托天理三焦（八段锦）/standard_0.mp4"
output_dir = "C:/data/result/scene_detector"


process_video(video_path, output_dir)


开始处理视频: C:/data/video/0-两手托天理三焦（八段锦）/standard_0.mp4
输出结果将保存到: C:/data/result/scene_detector
读取视频信息...
计算帧差异...
检测场景并提取关键帧...
关键帧保存: 59 -> C:/data/result/scene_detector\images
关键帧保存: 74 -> C:/data/result/scene_detector\images
关键帧保存: 78 -> C:/data/result/scene_detector\images
关键帧保存: 83 -> C:/data/result/scene_detector\images
关键帧保存: 88 -> C:/data/result/scene_detector\images
关键帧保存: 120 -> C:/data/result/scene_detector\images
关键帧保存: 125 -> C:/data/result/scene_detector\images
关键帧保存: 130 -> C:/data/result/scene_detector\images
关键帧保存: 131 -> C:/data/result/scene_detector\images
关键帧保存: 135 -> C:/data/result/scene_detector\images
关键帧保存: 137 -> C:/data/result/scene_detector\images
关键帧保存: 140 -> C:/data/result/scene_detector\images
关键帧保存: 144 -> C:/data/result/scene_detector\images
关键帧保存: 149 -> C:/data/result/scene_detector\images
关键帧保存: 150 -> C:/data/result/scene_detector\images
关键帧保存: 154 -> C:/data/result/scene_detector\images
关键帧保存: 159 -> C:/data/result/scene_detector\images
关键帧保存: 168 ->

In [59]:
import cv2
import os
import numpy as np
from sklearn.cluster import KMeans
import argparse
from pathlib import Path
from PIL import Image


# 获取视频的基本信息
def get_video_info(video_path):
    cap = cv2.VideoCapture(video_path)
    info = {
        "frame_count": int(cap.get(cv2.CAP_PROP_FRAME_COUNT)),
        "fps": cap.get(cv2.CAP_PROP_FPS),
        "width": int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)),
        "height": int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
    }
    cap.release()
    return info

# 计算帧间光流
def calculate_optical_flow(video_path):
    cap = cv2.VideoCapture(video_path)
    last_frame = None
    frame_info = []
    
    while True:
        ret, frame = cap.read()
        if not ret:
            break

        frame_number = int(cap.get(cv2.CAP_PROP_POS_FRAMES) - 1)
        gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
        gray = cv2.resize(gray, (320, 180))  # 减小分辨率加速处理

        if last_frame is not None:
            flow = cv2.calcOpticalFlowFarneback(
                last_frame, gray, None, 0.5, 3, 15, 3, 5, 1.2, 0
            )
            magnitude = np.sqrt(flow[..., 0]**2 + flow[..., 1]**2)
            avg_magnitude = np.mean(magnitude)
            frame_info.append({
                "frame_number": frame_number,
                "optical_flow_mag": avg_magnitude
            })
        last_frame = gray

    cap.release()
    return frame_info

# K-Means 聚类帧
def cluster_frames_kmeans(frame_info, n_clusters=10):
    frame_numbers = np.array([f["frame_number"] for f in frame_info])
    optical_flows = np.array([f["optical_flow_mag"] for f in frame_info])

    # 特征矩阵：时间（帧号）和光流强度
    features = np.column_stack((frame_numbers, optical_flows))

    # 进行 K-Means 聚类
    kmeans = KMeans(n_clusters=n_clusters, random_state=42).fit(features)

    # 将帧按聚类标签分组
    clusters = {i: [] for i in range(n_clusters)}
    for idx, label in enumerate(kmeans.labels_):
        clusters[label].append(frame_info[idx])

    # 为每类选择中心点帧
    cluster_centers = kmeans.cluster_centers_
    representative_frames = []
    for cluster_id, center in enumerate(cluster_centers):
        closest_frame = min(
            clusters[cluster_id],
            key=lambda x: np.linalg.norm([x["frame_number"], x["optical_flow_mag"]] - center)
        )
        representative_frames.append(closest_frame)

    # 按时间顺序排序
    representative_frames.sort(key=lambda x: x["frame_number"])
    return representative_frames

def save_frames(video_path, output_dir, frames):
    os.makedirs(output_dir, exist_ok=True)
    cap = cv2.VideoCapture(video_path)

    for frame in frames:
        cap.set(cv2.CAP_PROP_POS_FRAMES, frame["frame_number"])
        ret, img = cap.read()
        if ret:
            filename = f"frame_{frame['frame_number']:06d}.jpg"
            # 确保路径使用正确的分隔符并且能够处理中文
            frame_path = os.path.join(output_dir, filename).replace("\\", "/")
            
            # 将 OpenCV 的图像格式转换为 PIL 图像格式
            pil_img = Image.fromarray(cv2.cvtColor(img, cv2.COLOR_BGR2RGB))
            
            try:
                pil_img.save(frame_path)  # 使用Pillow保存图片
                print(f"保存帧: {frame_path}")  # 输出保存路径
            except Exception as e:
                print(f"保存帧时出错: {e}, 路径: {frame_path}")

    cap.release()
# 主处理流程
# 主处理流程
def process_video(video_path, output_dir=None, n_clusters=10, return_frames=False):
    print("获取视频信息...")
    video_info = get_video_info(video_path)
    print(f"视频信息: {video_info}")

    print("计算帧间光流...")
    frame_info = calculate_optical_flow(video_path)
    print(f"共计算 {len(frame_info)} 帧的光流信息。")

    print(f"对帧进行 {n_clusters} 聚类...")
    representative_frames = cluster_frames_kmeans(frame_info, n_clusters)
    print(f"共选出 {len(representative_frames)} 个关键帧。")

    if return_frames:
        # 如果选择直接返回帧信息，则不保存图片
        print("处理完成，返回关键帧信息。")
        return representative_frames
    else:
        if output_dir is None:
            raise ValueError("保存模式下，output_dir 不能为空！")
        print("保存关键帧...")
        save_frames(video_path, output_dir, representative_frames)
        print("处理完成！")



In [51]:
video_path = "C:/data/video/0-两手托天理三焦（八段锦）/standard_0.mp4"


In [56]:
output_dir = "C:/data/result/scene_detector/测试"
process_video(video_path, output_dir, n_clusters=15)


获取视频信息...
视频信息: {'frame_count': 1050, 'fps': 30.0, 'width': 1280, 'height': 720}
计算帧间光流...
共计算 1049 帧的光流信息。
对帧进行 15 聚类...
共选出 15 个关键帧。
保存关键帧...
保存帧: C:/data/result/scene_detector/测试/frame_000039.jpg
保存帧: C:/data/result/scene_detector/测试/frame_000114.jpg
保存帧: C:/data/result/scene_detector/测试/frame_000187.jpg
保存帧: C:/data/result/scene_detector/测试/frame_000258.jpg
保存帧: C:/data/result/scene_detector/测试/frame_000329.jpg
保存帧: C:/data/result/scene_detector/测试/frame_000400.jpg
保存帧: C:/data/result/scene_detector/测试/frame_000471.jpg
保存帧: C:/data/result/scene_detector/测试/frame_000540.jpg
保存帧: C:/data/result/scene_detector/测试/frame_000609.jpg
保存帧: C:/data/result/scene_detector/测试/frame_000675.jpg
保存帧: C:/data/result/scene_detector/测试/frame_000740.jpg
保存帧: C:/data/result/scene_detector/测试/frame_000804.jpg
保存帧: C:/data/result/scene_detector/测试/frame_000870.jpg
保存帧: C:/data/result/scene_detector/测试/frame_000939.jpg
保存帧: C:/data/result/scene_detector/测试/frame_001012.jpg
处理完成！


In [60]:
import os

# 主处理流程
def process_all_videos(video_dir, output_base_dir, n_clusters=15):
    """
    遍历主文件夹，对每个视频提取关键帧并保存。
    
    Args:
        video_dir: 视频的主文件夹路径（包含子文件夹，子文件夹名是标签名）。
        output_base_dir: 输出目录的根路径。
        n_clusters: 每个视频提取的关键帧数量。
    """
    # 遍历主文件夹下的每个子文件夹
    for label in os.listdir(video_dir):
        label_dir = f"{video_dir}/{label}".replace("\\", "/")  # 使用 /
        if not os.path.isdir(label_dir):  # 跳过非文件夹
            print(f"跳过非文件夹项: {label_dir}")
            continue
        
        print(f"开始处理标签: {label}")
        
        # 遍历该标签下的所有视频文件
        for video_file in os.listdir(label_dir):
            video_path = f"{label_dir}/{video_file}".replace("\\", "/")  # 使用 /
            
            if not video_file.endswith((".mp4", ".avi", ".mkv")):  # 仅处理视频文件
                print(f"跳过非视频文件: {video_file}")
                continue
            
            # 输出目录为 标签名/视频名（不含扩展名）
            video_name = os.path.splitext(video_file)[0]  # 去掉扩展名
            output_dir = f"{output_base_dir}/{label}/{video_name}".replace("\\", "/")  # 使用 /
            os.makedirs(output_dir, exist_ok=True)
            
            print(f"处理视频: {video_path}")
            try:
                # 处理当前视频
                process_video(video_path, output_dir=output_dir, n_clusters=n_clusters)
            except Exception as e:
                print(f"处理视频时发生错误: {video_path}，错误信息: {e}")
    
    print("所有视频处理完成！")


In [61]:
# 配置路径
video_dir = "C:/data/video"  # 视频主文件夹路径
output_dir = "C:/data/result/scene_detector10"  # 关键帧输出目录
n_clusters = 10  # 每个视频提取的关键帧数量

# 执行批量处理
process_all_videos(video_dir, output_dir, n_clusters=n_clusters)


开始处理标签: 0-两手托天理三焦（八段锦）
处理视频: C:/data/video/0-两手托天理三焦（八段锦）/reference_0.mp4
获取视频信息...
视频信息: {'frame_count': 1050, 'fps': 30.0, 'width': 720, 'height': 1280}
计算帧间光流...
共计算 1049 帧的光流信息。
对帧进行 10 聚类...
共选出 10 个关键帧。
保存关键帧...
保存帧: C:/data/result/scene_detector10/0-两手托天理三焦（八段锦）/reference_0/frame_000046.jpg
保存帧: C:/data/result/scene_detector10/0-两手托天理三焦（八段锦）/reference_0/frame_000138.jpg
保存帧: C:/data/result/scene_detector10/0-两手托天理三焦（八段锦）/reference_0/frame_000234.jpg
保存帧: C:/data/result/scene_detector10/0-两手托天理三焦（八段锦）/reference_0/frame_000335.jpg
保存帧: C:/data/result/scene_detector10/0-两手托天理三焦（八段锦）/reference_0/frame_000441.jpg
保存帧: C:/data/result/scene_detector10/0-两手托天理三焦（八段锦）/reference_0/frame_000551.jpg
保存帧: C:/data/result/scene_detector10/0-两手托天理三焦（八段锦）/reference_0/frame_000662.jpg
保存帧: C:/data/result/scene_detector10/0-两手托天理三焦（八段锦）/reference_0/frame_000773.jpg
保存帧: C:/data/result/scene_detector10/0-两手托天理三焦（八段锦）/reference_0/frame_000885.jpg
保存帧: C:/data/result/scene_detector10/0-两手托天理三焦（八段锦）/r