In [3]:
import cv2
import os
import numpy as np
import tensorflow as tf
import pickle
import concurrent.futures
from tqdm import tqdm


# Paths in Google Drive
base_path = 'C:/Users/myldr/Desktop/CS559_Proj/Dataset/First_Impressions_V2/'

# Set paths
video_folder = base_path + 'Training_Data/Videos'
annotation_path = base_path + 'Training_Data/Annotations/annotation_training.pkl'
save_folder = base_path + 'Processed_Training_Data_2'

def extract_frames(video_path, num_frames, frames_per_second=1):
    cap = cv2.VideoCapture(video_path)
    frames = []
    frame_rate = cap.get(cv2.CAP_PROP_FPS)
    target_frame_indices = [int(frame_rate * i) for i in range(5)]

    for idx in target_frame_indices:
        cap.set(cv2.CAP_PROP_POS_FRAMES, idx)
        ret, frame = cap.read()
        if ret:
            frames.append(frame)

    cap.release()
    return frames


def preprocess_frame(frame):
    frame = cv2.resize(frame, (224, 224))
    frame = tf.keras.applications.resnet.preprocess_input(frame)
    return frame

def process_video(video_file, video_folder, all_annotations, num_frames):
    video_path = os.path.join(video_folder, video_file)
    frames = extract_frames(video_path, num_frames)
    processed_frames = [preprocess_frame(frame) for frame in frames]
    base_name = os.path.basename(video_file)

    if all(base_name in all_annotations[trait] for trait in all_annotations.keys()):
        # Retrieve only the first 5 labels from the annotations
        annotation = [all_annotations[trait][base_name] for trait in all_annotations.keys()][:5]
        return processed_frames, annotation

    return None, None


# Load annotations
with open(annotation_path, 'rb') as file:
    all_annotations = pickle.load(file, encoding='latin1')

# Create save folder if it doesn't exist
if not os.path.exists(save_folder):
    os.makedirs(save_folder)

# Adjust the number of frames here to reduce the size
num_frames = 5

# Main processing loop
all_video_data, all_video_labels = [], []
all_video_files = os.listdir(video_folder)
errors = []

for video_file in tqdm(all_video_files):
    X_batch, y_batch = process_video(video_file, video_folder, all_annotations, num_frames)
    if X_batch is not None and y_batch is not None:
        all_video_data.append(X_batch)
        all_video_labels.append(y_batch)

# Convert to numpy arrays and save
all_video_data = np.array(all_video_data)
all_video_labels = np.array(all_video_labels)
np.save(os.path.join(save_folder, 'preprocessed_tarining_batch_final5.npy'), all_video_data)
np.save(os.path.join(save_folder, 'labels_tarining_batch_final5.npy'), all_video_labels)

# Error reporting
if errors:
    print("Errors during processing:")
    for video_file, error_message in errors:
        print(f"Video: {video_file}, Error: {error_message}")


100%|██████████| 5995/5995 [21:01<00:00,  4.75it/s]


In [4]:
import cv2
import os
import numpy as np
import tensorflow as tf
import pickle
import concurrent.futures
from tqdm import tqdm


# Paths in Google Drive
base_path = 'C:/Users/myldr/Desktop/CS559_Proj/Dataset/First_Impressions_V2/'

# Set paths
video_folder = base_path + 'Validation_Data/Videos'
annotation_path = base_path + 'Validation_Data/Annotations/annotation_validation.pkl'
save_folder = base_path + 'Processed_Validation_Data_2'

def extract_frames(video_path, num_frames, frames_per_second=1):
    cap = cv2.VideoCapture(video_path)
    frames = []
    frame_rate = cap.get(cv2.CAP_PROP_FPS)
    target_frame_indices = [int(frame_rate * i) for i in range(5)]

    for idx in target_frame_indices:
        cap.set(cv2.CAP_PROP_POS_FRAMES, idx)
        ret, frame = cap.read()
        if ret:
            frames.append(frame)

    cap.release()
    return frames


def preprocess_frame(frame):
    frame = cv2.resize(frame, (224, 224))
    frame = tf.keras.applications.resnet.preprocess_input(frame)
    return frame

def process_video(video_file, video_folder, all_annotations, num_frames):
    video_path = os.path.join(video_folder, video_file)
    frames = extract_frames(video_path, num_frames)
    processed_frames = [preprocess_frame(frame) for frame in frames]
    base_name = os.path.basename(video_file)

    if all(base_name in all_annotations[trait] for trait in all_annotations.keys()):
        # Retrieve only the first 5 labels from the annotations
        annotation = [all_annotations[trait][base_name] for trait in all_annotations.keys()][:5]
        return processed_frames, annotation

    return None, None


# Load annotations
with open(annotation_path, 'rb') as file:
    all_annotations = pickle.load(file, encoding='latin1')

# Create save folder if it doesn't exist
if not os.path.exists(save_folder):
    os.makedirs(save_folder)

# Adjust the number of frames here to reduce the size
num_frames = 5

# Main processing loop
all_video_data, all_video_labels = [], []
all_video_files = os.listdir(video_folder)
errors = []

for video_file in tqdm(all_video_files):
    X_batch, y_batch = process_video(video_file, video_folder, all_annotations, num_frames)
    if X_batch is not None and y_batch is not None:
        all_video_data.append(X_batch)
        all_video_labels.append(y_batch)

# Convert to numpy arrays and save
all_video_data = np.array(all_video_data)
all_video_labels = np.array(all_video_labels)
np.save(os.path.join(save_folder, 'validaiton_videos_final5.npy'), all_video_data)
np.save(os.path.join(save_folder, 'validaiton_labels_final5.npy'), all_video_labels)

# Error reporting
if errors:
    print("Errors during processing:")
    for video_file, error_message in errors:
        print(f"Video: {video_file}, Error: {error_message}")


100%|██████████| 1999/1999 [06:52<00:00,  4.84it/s]


In [5]:
# Paths in Google Drive
base_path = 'C:/Users/myldr/Desktop/CS559_Proj/Dataset/First_Impressions_V2/'

save_folder = base_path + 'Processed_Validation_Data_2'

# Load the saved data with allow_pickle=True
loaded_data = np.load(os.path.join(save_folder, 'validaiton_videos_final5.npy'), allow_pickle=True)
loaded_labels = np.load(os.path.join(save_folder, 'validaiton_labels_final5.npy'), allow_pickle=True)

# Print the shapes
print("Video Data Shape:", loaded_data.shape)
print("Labels Shape:", loaded_labels.shape)

Video Data Shape: (1999, 5, 224, 224, 3)
Labels Shape: (1999, 5)


In [6]:
# Paths in Google Drive
base_path = 'C:/Users/myldr/Desktop/CS559_Proj/Dataset/First_Impressions_V2/'

save_folder = base_path + 'Processed_Training_Data_2'


# Load the saved data with allow_pickle=True
loaded_data = np.load(os.path.join(save_folder, 'preprocessed_tarining_batch_final5.npy'), allow_pickle=True)
loaded_labels = np.load(os.path.join(save_folder, 'labels_tarining_batch_final5.npy'), allow_pickle=True)

# Print the shapes
print("Video Data Shape:", loaded_data.shape)
print("Labels Shape:", loaded_labels.shape)

Video Data Shape: (5995, 5, 224, 224, 3)
Labels Shape: (5995, 5)
