Take unorganized folders of labelled video data and transform the clips into sequences of frames and organize them into folders according to the emotion depicted in the clip

In [1]:
import cv2
import glob
import os
import pandas as pd

CREMA-D

In [30]:
demographics = pd.read_csv("VideoDemographics.csv") # provides info about each actor in the CREMA-D dataset by actor ID

In [31]:
# Turn the demographics dataframe into a dictionary to easily access age & gender by actor id
demographics_as_dict = demographics.set_index("ActorID").T.to_dict('list')

In [32]:
# Get path to all videos to be turned into frames
files = glob.glob("crema-d" + "/**/*.flv", recursive=True)

In [33]:
crema_d_to_expected = {"ANG":"Angry", "DIS":"Disgust", "FEA":"Fear", "HAP":"Happy", "SAD":"Sad", "NEU":"Neutral"}
expressions = {0:"Angry", 1:"Disgust", 2:"Fear", 3:"Happy", 4:"Sad", 5:"Surprise", 6:"Neutral"} 

In [39]:
num_videos = {"Angry":0, "Disgust":0, "Fear":0, "Happy":0, "Sad":0, "Surprise":0, "Neutral":0} # map emotion category to the number of videos already processed from this category

for filepath in files:
    split_path = filepath.split("\\")[1].split("_")

    emotion = crema_d_to_expected[split_path[2]] # emotion depicted in the video
    intensity = split_path[3][:-4] # intensity of the emotion 
    age = str(demographics_as_dict[int(split_path[0])][0]) # age of the actor
    gender = "M" if demographics_as_dict[int(split_path[0])][1] == "Male" else "F" # gender of the actor

    cap = cv2.VideoCapture(filepath)
    frame_num = 0
    has_frames_left, frame = cap.read()

    while(has_frames_left):
        if not os.path.exists(f'Emotions/{emotion}/video_{num_videos[emotion]}_{gender}_{age}_{intensity}/'): # cv2 will not create the directory if it does not exist, it will just silently fail
            os.makedirs(f'Emotions/{emotion}/video_{num_videos[emotion]}_{gender}_{age}_{intensity}/')
        
        cv2.imwrite(f'Emotions/{emotion}/video_{num_videos[emotion]}_{gender}_{age}_{intensity}/frame_{frame_num}.jpg', frame) 
        frame_num = frame_num + 1
        has_frames_left, frame = cap.read()

    if frame_num != 0: # video was successfully processed
        num_videos[emotion] = num_videos[emotion] + 1

    cap.release()

RAVDESS

In [42]:
files = []
for i in range(1, 25):
    files_subset = glob.glob(f"RAVDESS\\Video_Speech_Actor_{i}\\Actor_{i}\\" + "*.mp4") if i >= 10 else glob.glob(f"RAVDESS\\Video_Speech_Actor_0{i}\\Actor_0{i}\\" + "*.mp4")
    files.extend(files_subset)

2878 video clips instead of 2880 because I had issues downloading 2 of the files

2880 = 60 trials per actor X 2 modalities X 24 actors

In [44]:
ravdess_to_expected = {1:6, 2:6, 3:3, 4:4, 5:0, 6:2, 7:1, 8:5} # note: RAVDESS has an extra class for "calm:2" which we will classify as neutral for now
expressions = {0:"Angry", 1:"Disgust", 2:"Fear", 3:"Happy", 4:"Sad", 5:"Surprise", 6:"Neutral"} 

num_videos = {"Angry":0, "Disgust":0, "Fear":0, "Happy":0, "Sad":0, "Surprise":0, "Neutral":0} # map emotion category to the number of videos already processed from this category

for filepath in files:
    emotion = expressions[ravdess_to_expected[int(filepath.split("\\")[3].split("-")[2])]] 

    cap = cv2.VideoCapture(filepath)
    frame_num = 0
    has_frames_left, frame = cap.read()

    while(has_frames_left):
        if not os.path.exists(f'RAVDESS Frames/{emotion}/video_{num_videos[emotion]}/'): # cv2 will not create the directory if it does not exist, it will just silently fail
            os.makedirs(f'RAVDESS Frames/{emotion}/video_{num_videos[emotion]}/')
        
        cv2.imwrite(f'RAVDESS Frames/{emotion}/video_{num_videos[emotion]}/frame_{frame_num}.jpg', frame) 
        frame_num = frame_num + 1
        has_frames_left, frame = cap.read()

    if frame_num != 0: # video was successfully processed
        num_videos[emotion] = num_videos[emotion] + 1

    cap.release()