In [113]:
import pandas as pd
from pathlib import Path
import shutil
from tqdm import tqdm
import cv2
import numpy as np
import os

In [35]:
BASE_PATH = Path('/project/volume/data/in/EmoReact')
LABELS_PATH = Path('/project/volume/data/in/EmoReact/EmoReact_V_1.0/Labels/')
ORIG_FOLDERS = ['Test', 'Train', 'Validation']
LABEL_FILES = ['test_labels.text', 'train_labels.text/', 'val_labels.text']

labels = ['Curiosity', 'Uncertainty', 'Excitement', 'Happiness', 'Surprise', 'Disgust', 'Fear', 'Frustration']

In [27]:
def create_folders():
    for label in labels:
        label_path = BASE_PATH / Path(label)
        if not label_path.exists():
            label_path.mkdir(parents=True, exist_ok=True)

def read_labels():
    df_train, df_test, df_val = pd.DataFrame(), pd.DataFrame(), pd.DataFrame()
    for label in LABEL_FILES:
        path = LABELS_PATH / Path(label)
        df = pd.read_csv(str(path), header=None)
        df = df.drop(columns=[df.columns[-1]])
        df.columns = labels
        if 'test_labels.text' in label:
            df_train = pd.concat([df_train, df], ignore_index=True)
        elif 'train_labels.text' in label:
            df_test = pd.concat([df_test, df], ignore_index=True)
        elif 'val_labels.text' in label:
            df_val = pd.concat([df_val, df], ignore_index=True)

    return df_train, df_test, df_val

In [23]:
create_folders(labels, BASE_PATH)

In [31]:
df_train, df_test, df_val = read_labels()

In [64]:
def move_files():
    for folder in tqdm(ORIG_FOLDERS):
        folder_path = BASE_PATH / Path('EmoReact_V_1.0/Data') / folder
        if 'Test' in folder:
            df = df_test
        elif 'Train' in folder:
            df = df_train
        elif 'Validation' in folder:
            df = df_val
        else:
            continue

        for file_idx, file in enumerate(list(folder_path.rglob('*.mp4'))):
            file_label = df.loc[file_idx].idxmax() 
            destination_folder = BASE_PATH / Path(file_label)
            destination_folder.mkdir(exist_ok=True, parents=True)
            shutil.move(str(file), str(destination_folder))
        
        if folder_path.exists() and folder_path.is_dir():
            shutil.rmtree(folder_path)


In [65]:
move_files()

100%|████████████████████████████████████████████████████████████████████████████| 3/3 [00:00<00:00, 1026.25it/s]


In [None]:
# Moce the image files
for folder_idx, folder in enumerate(orig_folders):
    files = os.listdir(BASE_DIR + folder)
    number_of_images = len([name for name in files])
    n_train = int((number_of_images * 0.6) + 0.5)
    n_valid = int((number_of_images*0.25) + 0.5)
    n_test = number_of_images - n_train - n_valid
    print(number_of_images, n_train, n_valid, n_test)
    for idx, file in enumerate(files):
        file_name = BASE_DIR + folder + file
        if idx < n_train:
            shutil.move(file_name, BASE_DIR + "train/" + names[folder_idx])
        elif idx < n_train + n_valid:
            shutil.move(file_name, BASE_DIR + "val/" + names[folder_idx])
        else:
            shutil.move(file_name, BASE_DIR + "test/" + names[folder_idx])


In [68]:
number_of_videos = len(list(BASE_PATH.rglob('*.mp4')))


1102

In [72]:
video_lengths = []
for file in list(BASE_PATH.rglob('*.mp4')):
    cap = cv2.VideoCapture(str(file))
    fps = cap.get(cv2.CAP_PROP_FPS)
    frame_count = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
    duration = frame_count / fps
    video_lengths.append(duration)
    cap.release()

In [76]:
average_length = np.mean(video_lengths)
std_dev = np.std(video_lengths)

print(average_length, std_dev)

4.860247103750756 2.064505095633643


In [118]:
def extract_frames():
    for p in list(BASE_PATH.rglob('*.mp4')):
        print(p)
        cap = cv2.VideoCapture(str(p))
        fps = cap.get(cv2.CAP_PROP_FPS)
        print('frames per second: ', fps)
        total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
        nbr_frames = int(total_frames * 0.5)
        print('nbr_frames: ', nbr_frames)

        for frame_idx in tqdm(range(total_frames)):
            cap.set(cv2.CAP_PROP_POS_FRAMES, frame_idx)
            ret, frame = cap.read()
            frame_file_name = str(p.stem) + f'_{frame_idx}.jpg'
            frame_path = str(p.parent) + frame_file_name
            if not os.path.exists(str(frame_path)):
                cv2.imwrite(frame_path, frame)
    
        cap.release()
        if os.path.exists(str(p)):
            os.remove(str(p))

In [119]:
extract_frames()

/project/volume/data/in/EmoReact/Excitement/ESCARGOT2_2.mp4
frames per second:  23.976023976023978
nbr_frames:  57


100%|██████████████████████████████████████████████████████████████████████████| 115/115 [00:08<00:00, 13.25it/s]


/project/volume/data/in/EmoReact/Excitement/DUBSTEP140_2.mp4
frames per second:  23.976023976023978
nbr_frames:  35


100%|████████████████████████████████████████████████████████████████████████████| 70/70 [00:03<00:00, 17.78it/s]


/project/volume/data/in/EmoReact/Excitement/HARLEMSHAKE43_2.mp4
frames per second:  23.976023976023978
nbr_frames:  70


 82%|████████████████████████████████████████████████████████████▉             | 116/141 [00:12<00:02,  9.33it/s]


KeyboardInterrupt: 

In [89]:
total_frames_needed = int(average_length * fps)
total_frames_needed
#sampling_rate = max(1, total_frames_needed // )  # Sample approximately 1000 frames

total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))

percentage = 0.5
total_frames_needed = int(total_frames * percentage)
