In [None]:
import cv2
import os

In [None]:
def get_keyframe_from_video(vidPath, output_dir, vidname, skip_seconds=0, second_stop=None, THRESHOLD=20):
  if not os.path.exists(output_dir):
    os.makedirs(output_dir)

  cap = cv2.VideoCapture(vidPath)
  fps = cap.get(cv2.CAP_PROP_FPS)
  skip_frames = int(fps * skip_seconds)

  cap.set(cv2.CAP_PROP_POS_FRAMES, skip_frames)

  ret, prev_frame = cap.read()
  if not ret:
        cap.release()
        return

  prev_gray = cv2.cvtColor(prev_frame, cv2.COLOR_BGR2GRAY)

  while True:
    ret, frame = cap.read()
    if not ret:
      break

    gray_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
    diff = cv2.absdiff(prev_gray, gray_frame)

    score = diff.sum() / diff.size

    timestamp_ms = int(cap.get(cv2.CAP_PROP_POS_MSEC))
    timestamp_sec = timestamp_ms // 1000

    if second_stop is not None and timestamp_sec >= second_stop:
      break

    if score > THRESHOLD:

      mins = timestamp_sec // 60
      secs = timestamp_sec % 60

      filename = f"{vidname}_{mins:02d}m{secs:02d}s.jpg"
      cv2.imwrite(os.path.join(output_dir, filename), frame)

    prev_gray = gray_frame

  cap.release()
  return

In [None]:
input_dir = "/content/drive/MyDrive/Dataset"
output_dir = "/content/drive/MyDrive/DatasetKeyFrame"
THRESHOLD = 20
SKIP_SECONDS = 20

video_lst = os.listdir(input_dir)

"""
_________________________________________________________________
"""

vidname = video_lst[0]
vidname_no_ext = os.path.splitext(vidname)[0]

parts = vidname_no_ext.split("_")
if len(parts) >= 2:
  group_folder = parts[1]
else:
  group_folder = "Others"

group_path = os.path.join(output_dir, group_folder)
if not os.path.exists(group_path):
  os.makedirs(group_path)

vidPath = os.path.join(input_dir, vidname)
get_keyframe_from_video(vidPath, group_path, vidname_no_ext, 0, SKIP_SECONDS, THRESHOLD)

"""
_________________________________________________________________
"""

for vidname in video_lst:
  vidPath = os.path.join(input_dir, vidname)

  vidname_no_ext = os.path.splitext(vidname)[0]

  parts = vidname_no_ext.split("_")
  if len(parts) >= 2:
    group_folder = parts[1]
  else:
    group_folder = "Others"

  group_path = os.path.join(output_dir, group_folder)
  if not os.path.exists(group_path):
    os.makedirs(group_path)


  print(f"Đang xử lý {vidname_no_ext}")
  get_keyframe_from_video(vidPath, group_path, vidname_no_ext, SKIP_SECONDS, None, THRESHOLD)

print("Đã xử lý xong")

Đang xử lý L27_V004
Đang xử lý L27_V002
Đang xử lý L27_V005
Đang xử lý L27_V006
Đang xử lý L27_V001
Đang xử lý L27_V009
Đang xử lý L27_V003
Đang xử lý L27_V008
Đang xử lý L27_V010
Đang xử lý L27_V007
Đang xử lý L27_V013
Đang xử lý L27_V015
Đang xử lý L27_V016
Đang xử lý L27_V012
Đang xử lý L27_V011
Đang xử lý L27_V014
