In [8]:
import pandas as pd
from pathlib import Path
import shutil
from tqdm import tqdm
import cv2
import numpy as np
import os
import random
#from prettytable import PrettyTable

In [9]:
BASE_PATH = Path('/project/volume/data/in/EmoReact')
LABELS_PATH = Path('/project/volume/data/in/EmoReact/EmoReact_V_1.0/Labels/')
ORIG_FOLDERS = ['Test', 'Train', 'Validation']
LABEL_FILES = ['test_labels.text', 'train_labels.text/', 'val_labels.text']

labels = ['Curiosity', 'Uncertainty', 'Excitement', 'Happiness', 'Surprise', 'Disgust', 'Fear', 'Frustration']

In [10]:
def create_folders():
    for label in labels:
        label_path = BASE_PATH / Path(label)
        if not label_path.exists():
            label_path.mkdir(parents=True, exist_ok=True)

def read_labels():
    df_train, df_test, df_val = pd.DataFrame(), pd.DataFrame(), pd.DataFrame()
    for label in LABEL_FILES:
        path = LABELS_PATH / Path(label)
        df = pd.read_csv(str(path), header=None)
        df = df.drop(columns=[df.columns[-1]])
        df.columns = labels
        if 'test_labels.text' in label:
            df_train = pd.concat([df_train, df], ignore_index=True)
        elif 'train_labels.text' in label:
            df_test = pd.concat([df_test, df], ignore_index=True)
        elif 'val_labels.text' in label:
            df_val = pd.concat([df_val, df], ignore_index=True)

    return df_train, df_test, df_val

In [11]:
create_folders()

In [12]:
df_train, df_test, df_val = read_labels()

In [14]:
def move_files():
    for folder in tqdm(ORIG_FOLDERS):
        folder_path = BASE_PATH / Path('EmoReact_V_1.0/Data') / folder
        if 'Test' in folder:
            df = df_test
        elif 'Train' in folder:
            df = df_train
        elif 'Validation' in folder:
            df = df_val
        else:
            continue

        for file_idx, file in enumerate(list(folder_path.rglob('*.mp4'))):
            if file_idx >= len(df):
                break
            file_label = df.loc[file_idx].idxmax() 
            destination_folder = BASE_PATH / Path(file_label)
            destination_folder.mkdir(exist_ok=True, parents=True)
            shutil.move(str(file), str(destination_folder))
        
        if folder_path.exists() and folder_path.is_dir():
            shutil.rmtree(folder_path)

In [15]:
move_files()

100%|██████████████████████████████████████████████████████████████████████████████| 3/3 [00:02<00:00,  1.13it/s]


In [16]:
def get_video_lenght():
    video_lengths = []
    for file in list(BASE_PATH.rglob('*.mp4')):
        cap = cv2.VideoCapture(str(file))
        fps = cap.get(cv2.CAP_PROP_FPS)
        frame_count = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
        duration = frame_count / fps
        video_lengths.append(duration)
        cap.release()
    return video_lengths

In [None]:
video_lengths = get_video_lenght()

In [None]:
average_length = np.mean(video_lengths)
std_dev = np.std(video_lengths)

print(average_length, std_dev)

In [17]:
def extract_frames():
    for p in tqdm(list(BASE_PATH.rglob('*.mp4'))):
        cap = cv2.VideoCapture(str(p))
        total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
        nbr_frames = int(total_frames * 0.25)
        #nbr_frames = 110

        for frame_idx in range(nbr_frames):
            cap.set(cv2.CAP_PROP_POS_FRAMES, frame_idx)
            _, frame = cap.read()
            frame_file_name = Path(str(p.stem) + f'_{frame_idx}.jpg')
            frame_path = str(p.parent / frame_file_name)
            #print(frame_path)
            if not os.path.exists(str(frame_path)):
                cv2.imwrite(frame_path, frame)
    
        cap.release()
        if os.path.exists(str(p)):
            os.remove(str(p))

In [18]:
extract_frames()

100%|████████████████████████████████████████████████████████████████████████| 1037/1037 [23:45<00:00,  1.37s/it]


In [19]:
from mediapipe import Image
from mediapipe.tasks import python
from mediapipe.tasks.python import vision

In [20]:
model_path_detect = '/project/volume/models/blaze_face_short_range.tflite'
model_path_mesh = '/project/volume/models/face_landmarker.task'

base_options_detect = python.BaseOptions(model_asset_path=model_path_detect)
options_detect = vision.FaceDetectorOptions(base_options=base_options_detect)
detector_detect = vision.FaceDetector.create_from_options(options_detect)

base_options_mesh = python.BaseOptions(model_asset_path=model_path_mesh)
options_mesh = vision.FaceLandmarkerOptions(base_options=base_options_mesh,
                                       output_face_blendshapes=False,
                                       output_facial_transformation_matrixes=True,
                                       num_faces=1)
detector_mesh = vision.FaceLandmarker.create_from_options(options_mesh)

INFO: Created TensorFlow Lite XNNPACK delegate for CPU.
W0000 00:00:1717402833.838333      12 face_landmarker_graph.cc:174] Sets FaceBlendshapesGraph acceleration to xnnpack by default.


In [22]:
detect_faces()

100%|███████████████████████████████| 30816/30816 [17:36<00:00, 29.17it/s]


In [24]:
train_prop = 0.6
test_prop = 0.2
valid_prop = 0.2

number_of_images = len(list(BASE_PATH.rglob('*.jpg')))

print("[INFO] Number of images in total ..." + str(number_of_images))

n_train = int((number_of_images * train_prop) + 0.5)
n_valid = int((number_of_images * valid_prop) + 0.5)
n_test = number_of_images - n_train - n_valid


print(f"[INFO] Number of images used in training ... {str(n_train)} ({str(train_prop * 100)}%)")
print(f"[INFO] Number of images used in testing ... {str(n_test)} ({str(test_prop * 100)}%)")
print(f"[INFO] Number of images used in validation ...{str(n_valid)} ({str(valid_prop * 100)}%)")

[INFO] Number of images in total ...32074
[INFO] Number of images used in training ... 19244 (60.0%)
[INFO] Number of images used in testing ... 6415 (20.0%)
[INFO] Number of images used in validation ...6415 (20.0%)


In [25]:
def split_and_move():
    for label in tqdm(labels):
        folder_path = BASE_PATH / label
        print(folder_path)
        #print(len(list(folder_path.rglob('*.jpg'))))
        train_destination = BASE_PATH / "train" / label
        val_destination = BASE_PATH / "val" / label
        test_destination = BASE_PATH / "test" / label

        train_destination.mkdir(parents=True, exist_ok=True)
        val_destination.mkdir(parents=True, exist_ok=True)
        test_destination.mkdir(parents=True, exist_ok=True)

        files = list(folder_path.rglob('*.jpg'))
        random.shuffle(files)
        train_n = (int((len(files) * train_prop) + 0.5))
        val_n = (int((len(files) * valid_prop) + 0.2))
        
        for file_idx, file in enumerate(list(folder_path.rglob('*.jpg'))):
            if file_idx < train_n:
                shutil.move(str(file), train_destination)
            elif file_idx < train_n + val_n:
                shutil.move(str(file), val_destination)
            else:
                shutil.move(str(file), test_destination)

        if folder_path.exists() and folder_path.is_dir():
            shutil.rmtree(folder_path)

In [26]:
split_and_move()

  0%|                                                                                           | 0/8 [00:00<?, ?it/s]

/project/volume/data/in/EmoReact/Curiosity


 12%|██████████▍                                                                        | 1/8 [00:37<04:24, 37.82s/it]

/project/volume/data/in/EmoReact/Uncertainty


 25%|████████████████████▊                                                              | 2/8 [00:47<02:06, 21.14s/it]

/project/volume/data/in/EmoReact/Excitement


 38%|███████████████████████████████▏                                                   | 3/8 [01:13<01:56, 23.32s/it]

/project/volume/data/in/EmoReact/Happiness


 50%|█████████████████████████████████████████▌                                         | 4/8 [01:32<01:26, 21.61s/it]

/project/volume/data/in/EmoReact/Surprise


 62%|███████████████████████████████████████████████████▉                               | 5/8 [01:34<00:43, 14.66s/it]

/project/volume/data/in/EmoReact/Disgust


 75%|██████████████████████████████████████████████████████████████▎                    | 6/8 [01:37<00:21, 10.77s/it]

/project/volume/data/in/EmoReact/Fear
/project/volume/data/in/EmoReact/Frustration


100%|███████████████████████████████████████████████████████████████████████████████████| 8/8 [01:39<00:00, 12.38s/it]


In [None]:
print(len(list(BASE_PATH.rglob('*.jpg'))))

print(14952 * 0.6)
print(14952 * 0.2)


In [23]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers

ModuleNotFoundError: No module named 'tensorflow'

In [None]:
train_gen = keras.preprocessing.image.ImageDataGenerator()
valid_gen = keras.preprocessing.image.ImageDataGenerator()
test_gen = keras.preprocessing.image.ImageDataGenerator()