In [44]:
pip install opencv-python-headless numpy scikit-image


Note: you may need to restart the kernel to use updated packages.


In [45]:
import cv2
import numpy as np
from skimage.feature import local_binary_pattern
import os
from sklearn.cluster import KMeans
import json

In [46]:
def extract_color_histogram(frame, bins=64):
    # Không làm phẳng các histogram mà lưu giữ cấu trúc 3 chiều
    hsv = cv2.cvtColor(frame, cv2.COLOR_BGR2HSV)
    lab = cv2.cvtColor(frame, cv2.COLOR_BGR2LAB)
    hsv_hist = cv2.calcHist([hsv], [0, 1, 2], None, [bins]*3, [0, 180, 0, 256, 0, 256])
    lab_hist = cv2.calcHist([lab], [0, 1, 2], None, [bins]*3, [0, 256, 0, 256, 0, 256])
    cv2.normalize(hsv_hist, hsv_hist)
    cv2.normalize(lab_hist, lab_hist)
    return {'hsv': hsv_hist, 'lab': lab_hist}

In [47]:
def extract_brightness_contrast_features(frame):
    # Trả về giá trị trung bình và độ lệch chuẩn dưới dạng tuple
    gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
    return np.mean(gray), np.std(gray)

In [48]:
def extract_lbp_features(frame, radii=[1, 3, 8], n_points=[8, 16, 24], method='uniform'):
    # Giữ nguyên cấu trúc không gian của histogram LBP
    gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
    lbp_features = {}
    for radius, points in zip(radii, n_points):
        lbp = local_binary_pattern(gray, points, radius, method)
        hist = np.histogram(lbp.ravel(), bins=points+2, range=(0, points+2))[0]
        hist = hist.astype("float") / (hist.sum() + 1e-6)
        lbp_features[f'radius_{radius}_points_{points}'] = hist
    return lbp_features


In [49]:
def extract_sift_features(frame, n_clusters=10):
    # Trả về các cluster centers như một danh sách các mảng
    gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
    sift = cv2.SIFT_create()
    keypoints, descriptors = sift.detectAndCompute(gray, None)
    if descriptors is not None and len(descriptors):
        kmeans = KMeans(n_clusters=min(n_clusters, len(descriptors)), n_init=10, random_state=0)
        kmeans.fit(descriptors)
        return kmeans.cluster_centers_
    return np.zeros((n_clusters, 128))  # Tr

In [50]:
def process_video(video_path, sample_rate=30, bins=32, radii=[1, 3, 5], n_points=[8, 16, 24], sift_dim=128):
    cap = cv2.VideoCapture(video_path)
    color_features, texture_features, sift_features, brightness_features, contrast_features = [], [], [], [], []
    frame_index = 0
    while True:
        ret, frame = cap.read()
        if not ret:
            break
        if frame_index % sample_rate == 0:
            color_features.append(extract_color_histogram(frame, bins))
            texture_features.append(extract_lbp_features(frame, radii, n_points))  # Corrected to match expected list format
            sift_features.append(extract_sift_features(frame))
            brightness, contrast = extract_brightness_contrast_features(frame)
            brightness_features.append(brightness)
            contrast_features.append(contrast)
        frame_index += 1
    cap.release()
    return color_features, texture_features, sift_features, brightness_features, contrast_features



In [51]:
def process_all_videos(directory_path, sample_rate=30, bins=32, radii=[1, 3, 5], n_points=[8, 16, 24], sift_clusters=10):
    data_directory = os.path.join('/kaggle/working/', 'data')
    os.makedirs(data_directory, exist_ok=True)

    features_database = {}

    for file_name in os.listdir(directory_path):
        if file_name.endswith('.mp4'):
            video_path = os.path.join(directory_path, file_name)
            color_features, texture_features, sift_features, average_brightness, average_contrast = process_video(video_path, sample_rate, bins, radii, n_points, sift_clusters)

            # Save the features as JSON
            save_features_as_json(color_features, os.path.join(data_directory, f'{file_name}_color_features.json'))
            save_features_as_json(texture_features, os.path.join(data_directory, f'{file_name}_texture_features.json'))
            save_features_as_json(sift_features, os.path.join(data_directory, f'{file_name}_sift_features.json'))
            save_features_as_json(average_brightness, os.path.join(data_directory, f'{file_name}_brightness.json'))
            save_features_as_json(average_contrast, os.path.join(data_directory, f'{file_name}_contrast.json'))

            # Store features in the database
            features_database[file_name] = (color_features, texture_features, sift_features, average_brightness, average_contrast)

            print(f"Processed and saved features for video {file_name} in JSON format")

    return features_database

In [52]:
def save_features_as_json(features, file_name):
    def convert_to_serializable(item):
        if isinstance(item, np.ndarray):
            return item.tolist()  # Chuyển ndarray sang list
        elif isinstance(item, dict):
            return {key: convert_to_serializable(value) for key, value in item.items()}
        elif isinstance(item, list):
            return [convert_to_serializable(elem) for elem in item]
        else:
            return item

    # Đảm bảo tất cả dữ liệu đều có thể serialize được
    serializable_data = convert_to_serializable(features)

    # Lưu dữ liệu vào file JSON
    with open(file_name, 'w') as f:
        json.dump(serializable_data, f, indent=4)

In [53]:
directory_path = '/kaggle/input/data-2/data-2'
process_all_videos(directory_path, sample_rate=30)

Processed and saved features for video 019.mp4 in JSON format
Processed and saved features for video 017.mp4 in JSON format
Processed and saved features for video 026.mp4 in JSON format
Processed and saved features for video 030.mp4 in JSON format
Processed and saved features for video 009.mp4 in JSON format
Processed and saved features for video 028.mp4 in JSON format
Processed and saved features for video 015.mp4 in JSON format
Processed and saved features for video 013.mp4 in JSON format
Processed and saved features for video 012.mp4 in JSON format
Processed and saved features for video 004.mp4 in JSON format
Processed and saved features for video 003.mp4 in JSON format
Processed and saved features for video 020.mp4 in JSON format
Processed and saved features for video 025.mp4 in JSON format
Processed and saved features for video 002.mp4 in JSON format
Processed and saved features for video 018.mp4 in JSON format
Processed and saved features for video 008.mp4 in JSON format
Processe

{'019.mp4': ([{'hsv': array([[[0.00011315, 0.        , 0.        , ..., 0.        ,
             0.        , 0.        ],
            [0.        , 0.        , 0.        , ..., 0.        ,
             0.        , 0.        ],
            [0.        , 0.        , 0.        , ..., 0.        ,
             0.        , 0.        ],
            ...,
            [0.        , 0.        , 0.        , ..., 0.        ,
             0.        , 0.        ],
            [0.        , 0.        , 0.        , ..., 0.        ,
             0.        , 0.        ],
            [0.00022629, 0.00011315, 0.        , ..., 0.        ,
             0.        , 0.        ]],
    
           [[0.        , 0.        , 0.        , ..., 0.        ,
             0.        , 0.        ],
            [0.        , 0.        , 0.        , ..., 0.        ,
             0.        , 0.        ],
            [0.        , 0.        , 0.        , ..., 0.        ,
             0.        , 0.        ],
            ...,
      