In [11]:
pip install opencv-python-headless numpy scikit-image


Note: you may need to restart the kernel to use updated packages.


In [12]:
import cv2
import numpy as np
from skimage.feature import local_binary_pattern, hog
import os
from sklearn.cluster import KMeans
import json

In [13]:
def extract_color_histogram(frame, bins=64):
    hsv = cv2.cvtColor(frame, cv2.COLOR_BGR2HSV)
    hsv_hist = cv2.calcHist([hsv], [0, 1, 2], None, [bins]*3, [0, 180, 0, 256, 0, 256])
    cv2.normalize(hsv_hist, hsv_hist)
    return hsv_hist.flatten()

In [14]:
def extract_brightness_contrast_features(frame):
    gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
    return np.mean(gray), np.std(gray)

In [15]:
def extract_lbp_features(frame, radius=3, n_points=24, method='uniform'):
    gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
    lbp = local_binary_pattern(gray, n_points, radius, method)
    (hist, _) = np.histogram(lbp.ravel(), bins=np.arange(0, n_points + 3), range=(0, n_points + 2))
    hist = hist.astype("float")
    hist /= (hist.sum() + 1e-6)
    return hist

In [16]:
def extract_hog_features(frame):
    gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
    hog_features = hog(gray, pixels_per_cell=(8, 8), cells_per_block=(2, 2), visualize=False)
    return hog_features

In [17]:
def extract_sift_features(frame, n_clusters=50):
    gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
    sift = cv2.SIFT_create()
    keypoints, descriptors = sift.detectAndCompute(gray, None)
    if descriptors is not None and len(descriptors) >= n_clusters:
        kmeans = KMeans(n_clusters=n_clusters, n_init=10, random_state=0)
        kmeans.fit(descriptors)
        return kmeans.cluster_centers_.flatten()
    return np.zeros(n_clusters * 128)

In [18]:
def process_video(video_path, sample_rate=30):
    cap = cv2.VideoCapture(video_path)
    color_features, brightness_features, contrast_features, lbp_features, hog_features, sift_features = [], [], [], [], [], []
    frame_index = 0
    while True:
        ret, frame = cap.read()
        if not ret:
            break
        if frame_index % sample_rate == 0:
            color_features.append(extract_color_histogram(frame))
            brightness, contrast = extract_brightness_contrast_features(frame)
            brightness_features.append(brightness)
            contrast_features.append(contrast)
            lbp_features.append(extract_lbp_features(frame))
            hog_features.append(extract_hog_features(frame))
            sift_features.append(extract_sift_features(frame))
        frame_index += 1
    cap.release()
    
    avg_color = np.mean(color_features, axis=0)
    avg_brightness = np.mean(brightness_features)
    avg_contrast = np.mean(contrast_features)
    avg_lbp = np.mean(lbp_features, axis=0)
    avg_hog = np.mean(hog_features, axis=0)
    avg_sift = np.mean(sift_features, axis=0)
    
    return avg_color, avg_brightness, avg_contrast, avg_lbp, avg_hog, avg_sift

In [19]:
def process_all_videos(directory_path, sample_rate=30):
    data_directory = 'data'
    os.makedirs(data_directory, exist_ok=True)
    features_database = {}

    for file_name in os.listdir(directory_path):
        if file_name.endswith('.mp4'):
            video_path = os.path.join(directory_path, file_name)
            avg_color, avg_brightness, avg_contrast, avg_lbp, avg_hog, avg_sift = process_video(video_path, sample_rate)

            features_database[file_name] = {
                'avg_color': avg_color.tolist(),
                'avg_brightness': avg_brightness,
                'avg_contrast': avg_contrast,
                'avg_lbp': avg_lbp.tolist(),
                'avg_hog': avg_hog.tolist(),
                'avg_sift': avg_sift.tolist()
            }

            with open(os.path.join(data_directory, f'{file_name}_features.json'), 'w') as f:
                json.dump(features_database[file_name], f, indent=4)

            print(f"Processed and saved features for video {file_name} in JSON format")

    return features_database

In [20]:
directory_path = '/kaggle/input/data-2/data-2'
process_all_videos(directory_path, sample_rate=30)

Processed and saved features for video 019.mp4 in JSON format
Processed and saved features for video 017.mp4 in JSON format
Processed and saved features for video 026.mp4 in JSON format
Processed and saved features for video 030.mp4 in JSON format
Processed and saved features for video 009.mp4 in JSON format
Processed and saved features for video 028.mp4 in JSON format
Processed and saved features for video 015.mp4 in JSON format
Processed and saved features for video 013.mp4 in JSON format
Processed and saved features for video 012.mp4 in JSON format
Processed and saved features for video 004.mp4 in JSON format
Processed and saved features for video 003.mp4 in JSON format
Processed and saved features for video 020.mp4 in JSON format
Processed and saved features for video 025.mp4 in JSON format
Processed and saved features for video 002.mp4 in JSON format
Processed and saved features for video 018.mp4 in JSON format
Processed and saved features for video 008.mp4 in JSON format
Processe

{'019.mp4': {'avg_color': [2.266207229695283e-05,
   2.3499051167163998e-05,
   0.00032196191023103893,
   0.00029686768539249897,
   0.0003721496323123574,
   0.0005776733160018921,
   0.00244476948864758,
   0.002656254218891263,
   0.0021853118669241667,
   0.0031157624907791615,
   0.003051420906558633,
   0.00399350468069315,
   0.003712825011461973,
   0.010632764548063278,
   0.019247397780418396,
   0.02778465859591961,
   0.05230128765106201,
   0.05417543277144432,
   0.08145259320735931,
   0.054154131561517715,
   0.0677829310297966,
   0.03069828264415264,
   0.01950744539499283,
   0.011436285451054573,
   0.006922081578522921,
   0.010462778620421886,
   0.01311606913805008,
   0.02107873372733593,
   0.010186796076595783,
   0.007587021216750145,
   0.0030710941646248102,
   0.0024889057967811823,
   0.0033396268263459206,
   0.0026932128239423037,
   0.003131140023469925,
   0.001979695400223136,
   0.0029197772964835167,
   0.00215741666033864,
   0.001774782547727227