In [1]:
import numpy as np
import ffmpeg  # ffmpeg-python
import json
import cv2

from os import listdir
from os.path import splitext
from os.path import join

## Explanation of different datasets
The Aff-Wild2 dataset is used for training, validation and testing. While the AFEW 7.0 dataset is only used for testing. 

#### AFEW 7.0
In this dataset each video shows only one facial expression displayed, therefore each video is placed in a folder, where the folder name represents the facial expression class. The test dataset videos classes are not given, therefore only the train+validation set will be combined to use for final testing results of the model.

The Training and validation sets are combined in one folder called Val+train_AFEW, which sits inside the AFEW 7.0 Dataset folder.

In [2]:
from load_filenames import AF7_dir_videos, AF7_dir_labels, AF7_labeled_videos_FN

### AFEW 7.0 Dataset Size ###

In [3]:
# Size of each set
AF7_all_videos = [j for i in AF7_dir_labels for j in AF7_labeled_videos_FN.get(i)]

print(f"The training+validation set contains: {len(AF7_all_videos)} videos")

The training+validation set contains: 1156 videos


### Sequence length of each video ###

Only FFmpeg is able to extract all frames, probably has to do with encoding of the videos. SKvideo/OpenCV all return incorrent amount of frames (2/3 frames sometimes)

https://github.com/kkroening/ffmpeg-python/issues/63


In [4]:
# Create dictionary of sequence length
AF7_seqlength = {}

# Save the shape information of the array for each file in the previously made dictionary
for label in AF7_labeled_videos_FN.keys():
    videos = AF7_labeled_videos_FN.get(label)
    for video in videos:

        probe = ffmpeg.probe(join(AF7_dir_videos, join(label, video)))
        video_info = next(x for x in probe["streams"] if x["codec_type"] == "video")
        width = int(video_info["width"])
        height = int(video_info["height"])

        out, _ = (
            ffmpeg.input(join(AF7_dir_videos, join(label, video)))
            .output("pipe:", format="rawvideo", pix_fmt="rgb24")
            .run(capture_stdout=True)
        )
        frames = np.frombuffer(out, np.uint8).reshape([-1, height, width, 3])

        AF7_seqlength[video] = frames.shape

In [5]:
# Save json file with all video's names and shapes = (n_frames, width, height, colour channels)
with open("data/AF2_video_shapes.json", "w") as fp:
    json.dump(AF7_seqlength, fp)