In [None]:
import cv2
import os
import numpy as np
import tensorflow as tf
import pickle
import concurrent.futures
from tqdm import tqdm

In [None]:
# Mount Google Drive
from google.colab import drive
drive.mount('/content/drive')

In [None]:
# Paths in Google Drive
base_path = '/content/drive/My Drive/vit_dnn/'

# Set paths
video_folder = base_path + 'Base_Dataset/Validation_Data/Videos'
annotation_path = base_path + 'Base_Dataset/Validation_Data/Annotations/annotation_validation.pkl'
save_folder = base_path + 'Preprocess2/Validation_Preprocessed2'

In [None]:
def extract_frames(video_path, num_frames, frames_per_second=1):
    cap = cv2.VideoCapture(video_path)
    frames = []
    frame_rate = cap.get(cv2.CAP_PROP_FPS)
    target_frame_indices = [int(frame_rate * i) for i in range(5)]
    
    for idx in target_frame_indices:
        cap.set(cv2.CAP_PROP_POS_FRAMES, idx)
        ret, frame = cap.read()
        if ret:
            frames.append(frame)
    
    cap.release()
    return frames

In [None]:
def preprocess_frame(frame):
    frame = cv2.resize(frame, (224, 224))
    frame = tf.keras.applications.resnet.preprocess_input(frame)
    return frame

In [None]:
def process_video(video_file, video_folder, all_annotations, num_frames):
    video_path = os.path.join(video_folder, video_file)
    frames = extract_frames(video_path, num_frames)
    processed_frames = [preprocess_frame(frame) for frame in frames]
    base_name = os.path.basename(video_file)

    if all(base_name in all_annotations[trait] for trait in all_annotations.keys()):
        # Retrieve only the first 5 labels from the annotations
        annotation = [all_annotations[trait][base_name] for trait in all_annotations.keys()][:5]
        return processed_frames, annotation

    return None, None

In [None]:
# Load annotations
with open(annotation_path, 'rb') as file:
    all_annotations = pickle.load(file, encoding='latin1')

In [None]:
# Create save folder if it doesn't exist
if not os.path.exists(save_folder):
    os.makedirs(save_folder)

In [None]:
# Adjust the number of frames here to reduce the size
num_frames = 5

In [None]:
# Main processing loop
all_video_data, all_video_labels = [], []
all_video_files = os.listdir(video_folder)
errors = []

In [None]:
for video_file in tqdm(all_video_files):
    X_batch, y_batch = process_video(video_file, video_folder, all_annotations, num_frames)
    if X_batch is not None and y_batch is not None:
        all_video_data.append(X_batch)
        all_video_labels.append(y_batch)

In [None]:
# Convert to numpy arrays and save
all_video_data = np.array(all_video_data)
all_video_labels = np.array(all_video_labels)
np.save(os.path.join(save_folder, 'preprocessed_validation_batch_final5.npy'), all_video_data)
np.save(os.path.join(save_folder, 'labels_validation_batch_final5.npy'), all_video_labels)

In [None]:
# Error reporting
if errors:
    print("Errors during processing:")
    for video_file, error_message in errors:
        print(f"Video: {video_file}, Error: {error_message}")