In [1]:
!pip install scenedetect
!pip install transnetv2-pytorch
!pip install decord

Collecting scenedetect
  Downloading scenedetect-0.6.7-py3-none-any.whl.metadata (3.9 kB)
Downloading scenedetect-0.6.7-py3-none-any.whl (130 kB)
Installing collected packages: scenedetect
Successfully installed scenedetect-0.6.7
Collecting transnetv2-pytorch
  Downloading transnetv2_pytorch-1.0.5-py3-none-any.whl.metadata (10 kB)
Collecting ffmpeg-python (from transnetv2-pytorch)
  Downloading ffmpeg_python-0.2.0-py3-none-any.whl.metadata (1.7 kB)
Downloading transnetv2_pytorch-1.0.5-py3-none-any.whl (32.7 MB)
   ---------------------------------------- 0.0/32.7 MB ? eta -:--:--
   - -------------------------------------- 1.0/32.7 MB 10.1 MB/s eta 0:00:04
   - -------------------------------------- 1.0/32.7 MB 10.1 MB/s eta 0:00:04
   - -------------------------------------- 1.0/32.7 MB 10.1 MB/s eta 0:00:04
   - -------------------------------------- 1.0/32.7 MB 10.1 MB/s eta 0:00:04
   - -------------------------------------- 1.0/32.7 MB 10.1 MB/s eta 0:00:04
   - ------------------

In [1]:
# ==== Build SceneJSON (merge TransNetV2 + cutframe; GPU-first; save Vxxx.json only) ====
from pathlib import Path
import json, re, cv2, math

# ---------------- Paths ----------------
DATASET_DIR = Path(r'D:\VN_Multi_User_Video_Search')
VIDEOS_ROOT = Path(r'D:\VN_Multi_User_Video_Search\AIC_Video')
SCENE_ROOT  = DATASET_DIR / 'dict' / 'SceneJSON'
SCENE_ROOT.mkdir(parents=True, exist_ok=True)

print("DATASET_DIR:", DATASET_DIR.resolve())
print("VIDEOS_ROOT:", VIDEOS_ROOT.resolve())
print("SCENE_ROOT :", SCENE_ROOT.resolve())

# ---------------- Options ----------------
USE_TRANSNET      = True      # ưu tiên dùng TransNetV2 nếu có
USE_GPU_DIFF      = True      # nếu không có TransNetV2, thử GPU frame-diff (decord + torch)
CPU_THRESHOLD     = 27.0      # ngưỡng PySceneDetect khi fallback
GPU_STEP          = 2          # đọc thưa khung (2 hoặc 3), giảm 1/2 lượng khung
GPU_SMOOTH_WIN    = 25
GPU_BASE_THRESH   = 0.12
GPU_ADAPT_K       = 2.0
GPU_MIN_SCENE_LEN = 30         # ép cảnh tối thiểu dài hơn, đỡ chia vụn
GPU_BATCH_FRAMES  = 16
SKIP_EXISTING     = True
PRINT_PROGRESS    = True

# ---------------- Helpers ----------------
def split_ids(video_id: str):
    # "L21_V001" -> ("L21", "V001")
    return video_id.split("_", 1) if "_" in video_id else (video_id[:3], video_id)

def save_scenejson(part: str, video_id: str, scenes):
    """Lưu CHỈ Vxxx.json dưới SceneJSON/<[A-Z]xx>/Vxxx.json"""
    _, bare = split_ids(video_id)
    out_dir = SCENE_ROOT / part
    out_dir.mkdir(parents=True, exist_ok=True)
    out_path = out_dir / f"{bare}.json"
    to_dump = [[int(s), int(e)] for s, e in scenes if int(e) > int(s)]
    with open(out_path, "w", encoding="utf-8") as f:
        json.dump(to_dump, f)
    return out_path

def find_part_from_foldername(name: str) -> str:
    m = re.search(r"L\d{2}", name)
    return m.group(0) if m else name

def iter_videos():
    # Kiểu 1: videos/Videos_*/video/*.mp4
    for d in sorted(VIDEOS_ROOT.iterdir()):
        if d.is_dir() and d.name.startswith("Videos_"):
            part = find_part_from_foldername(d.name)
            sub = d / "video"
            mp4s = sorted(sub.glob("*.mp4")) if sub.is_dir() else sorted(d.glob("*.mp4"))
            for v in mp4s:
                yield part, v.stem, v
    # Kiểu 2: videos/*.mp4
    for v in sorted(VIDEOS_ROOT.glob("*.mp4")):
        vid = v.stem
        part = vid.split("_",1)[0] if "_" in vid else vid[:3]
        yield part, vid, v

# ---------------- Backends ----------------
def detect_with_transnet(video_path: Path):
    """TransNetV2 (GPU nếu TF GPU có sẵn). Trả (scenes|None, error_str|None)."""
    try:
        from transnetv2 import TransNetV2
    except Exception as e:
        return None, f"import transnetv2 failed: {e}"
    try:
        model = TransNetV2()  # TF dùng GPU tự động nếu có
        # predictors: trả xác suất cắt theo frame
        preds = model.predict_video(str(video_path))
    # một số bản lib có method trên model; nếu không, thử import hàm riêng
        try:
            scenes = model.predictions_to_scenes(preds)
        except Exception:
            from transnetv2 import predictions_to_scenes
            scenes = predictions_to_scenes(preds)
        scenes = [[int(s), int(e)] for s, e in scenes]
        return scenes, None
    except Exception as e:
        return None, f"transnetv2 error: {e}"

def _to_cuda_tensor(frames):
    import torch, numpy as np
    try:
        t = torch.utils.dlpack.from_dlpack(frames.to_dlpack())
    except Exception:
        if hasattr(frames, "asnumpy"):
            frames = frames.asnumpy()
        if not isinstance(frames, np.ndarray):
            frames = np.asarray(frames)
        t = torch.from_numpy(frames)
    device = "cuda" if torch.cuda.is_available() else "cpu"
    return t.to(device=device, dtype=torch.float32) / 255.0

def _smooth_1d(x, win):
    import torch, torch.nn.functional as F
    if win <= 1:
        return x
    x = x.view(1,1,-1)
    pad = win // 2
    x = F.pad(x, (pad, pad), mode="reflect")
    y = F.avg_pool1d(x, kernel_size=win, stride=1)
    return y.view(-1)

def _adaptive_threshold(diffs, base_thr=0.10, k=2.0):
    import torch
    med = torch.median(diffs)
    mad = torch.median(torch.abs(diffs - med)) + 1e-6
    thr = torch.clamp(med + k * mad, min=base_thr, max=0.95)
    return float(thr)

def detect_with_gpu_diff(video_path: Path):
    """decord + torch: trả scenes hoặc (None,error)."""
    try:
        import decord, torch
    except Exception as e:
        return None, f"gpu-diff deps missing: {e}"
    # mở với decord (có thể chỉ-CPU decode, vẫn compute GPU)
    try:
        ctx = decord.gpu(0) if torch.cuda.is_available() else decord.cpu()
        vr = decord.VideoReader(str(video_path), ctx=ctx)
    except Exception as e:
        # thử CPU ctx
        try:
            vr = decord.VideoReader(str(video_path), ctx=decord.cpu())
        except Exception as e2:
            return None, f"decord open failed: {e2}"
    n = len(vr)
    if n <= 1:
        return [[0, max(1, n-1)]], None

    idxs = list(range(0, n, GPU_STEP))
    if idxs[-1] != n-1: idxs.append(n-1)

    import torch
    diffs = []
    for bi in range(0, len(idxs), GPU_BATCH_FRAMES):
        sub = idxs[bi:bi+GPU_BATCH_FRAMES]
        frames = vr.get_batch(sub)
        t = _to_cuda_tensor(frames)           # (B,H,W,3) on device
        y = 0.299*t[...,0] + 0.587*t[...,1] + 0.114*t[...,2]
        if y.shape[0] >= 2:
            d = (y[1:] - y[:-1]).abs().mean(dim=(1,2))  # (B-1,)
            diffs.append(d.detach())
        del frames, t

    if not diffs:
        return [[0, max(1, n-1)]], None

    diffs = torch.cat(diffs, dim=0)           # len = len(idxs)-1
    dmin, dmax = float(diffs.min()), float(diffs.max())
    if dmax > dmin:
        diffs = (diffs - dmin) / (dmax - dmin)
    diffs = _smooth_1d(diffs, GPU_SMOOTH_WIN)
    thr = _adaptive_threshold(diffs, base_thr=GPU_BASE_THRESH, k=GPU_ADAPT_K)
    cand = (diffs > thr).nonzero(as_tuple=True)[0].tolist()
    boundaries = [idxs[i+1] for i in cand]

    filtered, last = [], 0
    for b in boundaries:
        if b - last >= GPU_MIN_SCENE_LEN:
            filtered.append(b); last = b

    scenes, start = [], 0
    for b in filtered:
        if b > start:
            scenes.append([start, b]); start = b
    if start < n-1: scenes.append([start, n-1])
    if not scenes:  scenes = [[0, max(1, n-1)]]
    return [[int(s), int(e)] for s,e in scenes], None

def detect_with_pyscenedetect(video_path: Path):
    """CPU fallback: PySceneDetect."""
    try:
        from scenedetect import open_video, SceneManager
        from scenedetect.detectors import ContentDetector
    except Exception as e:
        return None, f"scenedetect missing: {e}"
    try:
        video = open_video(str(video_path))
        sm = SceneManager()
        sm.add_detector(ContentDetector(threshold=CPU_THRESHOLD))
        sm.detect_scenes(video, show_progress=False)
        scenes = sm.get_scene_list()
        out = [[s.get_frames(), e.get_frames()] for s, e in scenes]
        if not out:
            cap = cv2.VideoCapture(str(video_path))
            n_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT) or 0)
            cap.release()
            out = [[0, max(1, n_frames-1)]]
        return out, None
    except Exception as e:
        return None, f"scenedetect error: {e}"

# ---------------- Main ----------------
videos = list(iter_videos())
print(f"Found videos: {len(videos)}")

done = 0
for part, video_id, vpath in videos:
    _, bare = split_ids(video_id)
    out_dir = SCENE_ROOT / part
    out_path = out_dir / f"{bare}.json"
    if SKIP_EXISTING and out_path.exists():
        if PRINT_PROGRESS:
            print(f"[SKIP] {part}/{video_id} -> {out_path.name}")
        continue

    scenes = None
    errlog = []

    # 1) TransNetV2
    if USE_TRANSNET and scenes is None:
        s, err = detect_with_transnet(vpath)
        if s is not None: scenes = s
        if err: errlog.append(err)

    # 2) GPU diff
    if USE_GPU_DIFF and scenes is None:
        s, err = detect_with_gpu_diff(vpath)
        if s is not None: scenes = s
        if err: errlog.append(err)

    # 3) PySceneDetect
    if scenes is None:
        s, err = detect_with_pyscenedetect(vpath)
        if s is not None: scenes = s
        if err: errlog.append(err)

    if scenes is None:
        print(f"[FAIL] {part}/{video_id}: {errlog}")
        continue

    path_saved = save_scenejson(part, video_id, scenes)
    done += 1
    if PRINT_PROGRESS:
        print(f"[OK] {part}/{video_id} -> {path_saved.name} | scenes={len(scenes)}")

print(f"Done. Wrote {done} file(s) into {SCENE_ROOT}.")

DATASET_DIR: D:\VN_Multi_User_Video_Search
VIDEOS_ROOT: D:\VN_Multi_User_Video_Search\AIC_Video
SCENE_ROOT : D:\VN_Multi_User_Video_Search\dict\SceneJSON
Found videos: 1478
[OK] K01/K01_V001 -> V001.json | scenes=95
[OK] K01/K01_V002 -> V002.json | scenes=85
[OK] K01/K01_V003 -> V003.json | scenes=102
[OK] K01/K01_V004 -> V004.json | scenes=68
[OK] K01/K01_V005 -> V005.json | scenes=167
[OK] K01/K01_V006 -> V006.json | scenes=128
[OK] K01/K01_V007 -> V007.json | scenes=167
[OK] K01/K01_V008 -> V008.json | scenes=157
[OK] K01/K01_V009 -> V009.json | scenes=52
[OK] K01/K01_V010 -> V010.json | scenes=100
[OK] K01/K01_V011 -> V011.json | scenes=139
[OK] K01/K01_V012 -> V012.json | scenes=137
[OK] K01/K01_V013 -> V013.json | scenes=157
[OK] K01/K01_V014 -> V014.json | scenes=64
[OK] K01/K01_V015 -> V015.json | scenes=61
[OK] K01/K01_V016 -> V016.json | scenes=122
[OK] K01/K01_V017 -> V017.json | scenes=91
[OK] K01/K01_V018 -> V018.json | scenes=152
[OK] K01/K01_V019 -> V019.json | scenes=99