In [None]:
from pathlib import Path
import subprocess

import pandas as pd
import numpy as np
from progressbar import ProgressBar
import tensorflow as tf
import matplotlib.pyplot as plt

from tensorflow.keras.applications import Xception
from tensorflow.keras.applications import VGG16
from tensorflow.keras.applications import ResNet50

from tensorflow.keras.models import Model, load_model
from tensorflow.keras.layers import GlobalAveragePooling2D, Dense
from tensorflow.keras.optimizers import RMSprop
from tensorflow.keras.losses import SparseCategoricalCrossentropy
from tensorflow.keras.callbacks import EarlyStopping, TensorBoard

import pickle

In [None]:
data_dir = '/kaggle/input/daisee/DAiSEE/DataSet'
label_dir = '/kaggle/input/daisee/DAiSEE/Labels'
xception_weights_dir = '/kaggle/input/xception/xception_weights_tf_dim_ordering_tf_kernels_notop.h5'
resnet_weights_dir = '/kaggle/input/resnet50-weights/resnet50_weights_tf_dim_ordering_tf_kernels_notop.h5'
vgg16_weights_dir = '/kaggle/input/vgg16-weights/vgg16_weights_tf_dim_ordering_tf_kernels_notop.h5'
frames_dir = '/kaggle/working/frames'
numpy_dir = '/kaggle/working/labels'
model_dir = '/kaggle/working/model12'
evaluate_dir = "/kaggle/working/evaluate"
history_dir = '/kaggle/working/history'

# Извлечение кадров

In [None]:
def get_frames(subdirectory, video, odir):
    subprocess.run(f"ffmpeg -i {video} -vf fps=0.7 "
                    f"{odir}/{video.parts[-1][:-4]}_%1d.jpeg "
                    "-loglevel quiet", shell=True, check=True)

def extract_frames(data_dir, label_dir, out_dir):
    data_dir = Path(data_dir)
    label_dir = Path(label_dir)
    out_dir = Path(out_dir)

    subdirectories = ["Train", "Test", "Validation"]
    for subdirectory in subdirectories:
        sdir = data_dir / subdirectory
        label_path = str(label_dir) + f"/{subdirectory}Labels.csv"
        odir = out_dir / subdirectory
        odir.mkdir(parents=True, exist_ok=True)
        label = pd.read_csv(label_path)
        print(f"Extracting frames for {subdirectory}")
        with ProgressBar(max_value=len(list(sdir.glob("*/*/*")))) as bar:
            for i, video in enumerate(sdir.glob("*/*/*")):
                if label['ClipID'].str.contains(video.parts[-1]).any():
                    get_frames(subdirectory, video, odir)
                bar.update(i)

In [None]:
extract_frames(data_dir, label_dir, frames_dir)

# Сохранение заголовков и меток

In [None]:
def save_filepath_label(usage, frame_dir, label_dir, out_dir):
    frame_dir = Path(frame_dir)
    label_dir = Path(label_dir)
    out_dir = Path(out_dir)
    out_dir.mkdir(parents=True, exist_ok=True)
    frame_dir = frame_dir / usage
    print(type(frame_dir))

    label_path = str(label_dir) + f"/{usage}Labels.csv"

    labeldf = pd.read_csv(label_path)
    nrows = len(list(frame_dir.glob("*.jpeg")))
    ncols = len(labeldf.columns) - 1
    filepath = np.empty((nrows,), dtype=object)
    label = np.empty((nrows, ncols), dtype=np.float32)
    print(f"Getting filepath and labels for {usage}")
    with ProgressBar(max_value=nrows) as bar:
        for i, frame in enumerate(frame_dir.glob("*.jpeg")):
            filepath[i] = str(frame)
            framename = frame.parts[-1]
            frameid = framename[:framename.find("_")]
            video = frameid + ".avi"
            if labeldf['ClipID'].str.contains(video).any():
                lidx = labeldf.index[labeldf['ClipID'].str.contains(video)]
            else:
                video = frameid + ".mp4"
                lidx = labeldf.index[labeldf['ClipID'].str.contains(video)]
            label[i] = labeldf.iloc[lidx, 1:]
            bar.update(i)

    np.random.seed(100)
    indices = np.random.permutation(nrows)
    filepath = filepath[indices]
    print(filepath[0])
    label = label[indices]
    np.save(f"{str(out_dir)}/x_{usage.lower()}", filepath, allow_pickle=True)
    np.save(f"{str(out_dir)}/y_{usage.lower()}", label)
    return filepath, label

In [None]:
save_filepath_label("Train", frame_dir, label_dir, numpy_dir)
save_filepath_label("Test", frame_dir, label_dir, numpy_dir)
save_filepath_label("Validation", frame_dir, label_dir, numpy_dir)

In [None]:
class_names = np.array(
    ['Boredom', 'Engagement', 'Confusion', 'Frustration']
)
autotune = tf.data.experimental.AUTOTUNE
img_width = 299
img_height = 299
batch_size = 64
shuffle_buffer_size = 3000
old_epoch = 0

base_learning_rate = 0.0001
finetune_at = 80

In [None]:
def show_batch(image, label):
    image = image.numpy()
    plt.figure(figsize=(15, 15))
    for i in range(batch_size):
        plt.subplot(6, 6, i + 1)
        imgtitle = [label["y1"][i].numpy().item(),
                    label["y2"][i].numpy().item(),
                    label["y3"][i].numpy().item(),
                    label["y4"][i].numpy().item()]
        plt.imshow(np.uint8(image[i] * 255))
        plt.title(imgtitle, fontsize=8)
        plt.axis('off')
    plt.show()

In [None]:
def parse_function(filepath, label):
    image = tf.io.read_file(filepath)
    image = tf.io.decode_jpeg(contents=image, channels=3)
    image = tf.image.convert_image_dtype(image=image, dtype=tf.float32)
    image = tf.image.resize(images=image,
                            size=[img_width, img_height],
                            method=tf.image.ResizeMethod.BILINEAR,
                            antialias=True)
    return image, label

In [None]:
def get_dataset(usage, numpy_dir):
    numpy_dir = Path(numpy_dir)
    x = np.load(numpy_dir / f'x_{usage.lower()}.npy', allow_pickle=True)
    y = np.load(numpy_dir / f'y_{usage.lower()}.npy')
    dataset = tf.data.Dataset.from_tensor_slices(
        (x, {"y1": y[:, :1], "y2": y[:, 1:2],
             "y3": y[:, 2:3], "y4": y[:, :3:4]})
    )
    dataset = dataset.map(map_func=parse_function, num_parallel_calls=autotune)
    if usage == 'Train':
        dataset = dataset.shuffle(buffer_size=shuffle_buffer_size,
                                  reshuffle_each_iteration=True)

    dataset = dataset.batch(batch_size)
    dataset = dataset.prefetch(autotune)
    return dataset

In [None]:
train_set = get_dataset("Train", numpy_dir)
test_set = get_dataset("Test", numpy_dir)
val_set = get_dataset("Validation", numpy_dir)

# Обучение ResNet50

In [None]:
def get_ResNet50_model(weight_dir, out_dir, fullyconnected=False, finetune=False):
    if finetune:
        if fullyconnected:
            base_model = load_model(str(out_dir) + "/ResNet50_on_DAiSEE_fc.h5")
        else:
            base_model = load_model(str(out_dir) + "/ResNet50_on_DAiSEE.h5")

        base_model.trainable = True
        for layer in base_model.layers[:finetune_at]:
            layer.trainable = False
        return base_model
    else:
        resnet50 = ResNet50(weights=Path(weight_dir), include_top=False, input_shape=(img_width, img_height, 3))
        resnet50.trainable = False

        x = GlobalAveragePooling2D()(resnet50.output)

        if fullyconnected:
            x = Dense(128, activation="relu", name="fc1")(x)
            x = Dense(64, activation="relu", name="fc2")(x)

        boredom = Dense(4, name="y1")(x)
        engagement = Dense(4, name="y2")(x)
        confusion = Dense(4, name="y3")(x)
        frustration = Dense(4, name="y4")(x)

        model = Model(inputs=resnet50.input, outputs=[boredom, engagement, confusion, frustration])
    return model

def ResNet50_train(weight_dir, numpy_dir, out_dir, history_dir, fullyconnected=False, finetune=False):
    global old_epoch
    out_dir = Path(out_dir)
    out_dir.mkdir(parents=True, exist_ok=True)
    log_dir = out_dir / "logs"
    log_dir.mkdir(parents=True, exist_ok=True)

    train_ds = get_dataset("Train", numpy_dir)
    validation_ds = get_dataset("Validation", numpy_dir)
    model = get_ResNet50_model(weight_dir, out_dir, fullyconnected, finetune)

    if finetune:
        lr = base_learning_rate / 10
        if fullyconnected:
            model_path = str(out_dir) + "/ResNet50_on_DAiSEE_finetune_fc.h5"
            history_path = str(history_dir) + "/resnet_training_history_finetune_fc.pkl"
        else:
            model_path = str(out_dir) + "/ResNet50_on_DAiSEE_finetune.h5"
            history_path = str(history_dir) + "/resnet_training_history_finetune.pkl"

    else:
        lr = base_learning_rate
        if fullyconnected:
            model_path = str(out_dir) + "/ResNet50_on_DAiSEE_fc.h5"
            history_path = str(history_dir) + "/resnet_training_history_fc.pkl"
        else:
            model_path = str(out_dir) + "/ResNet50_on_DAiSEE.h5"
            history_path = str(history_dir) + "/resnet_training_history.pkl"

    model.compile(optimizer=RMSprop(learning_rate=lr),
                  loss={"y1": SparseCategoricalCrossentropy(from_logits=True),
                        "y2": SparseCategoricalCrossentropy(from_logits=True),
                        "y3": SparseCategoricalCrossentropy(from_logits=True),
                        "y4": SparseCategoricalCrossentropy(from_logits=True)},
                  metrics={"y1": "sparse_categorical_accuracy",
                           "y2": "sparse_categorical_accuracy",
                           "y3": "sparse_categorical_accuracy",
                           "y4": "sparse_categorical_accuracy"})
    print(model.summary())

    callbacks = [EarlyStopping(monitor='val_loss', min_delta=1e-2, patience=2, verbose=1),
                 TensorBoard(log_dir=str(log_dir))
    ]

    history = model.fit(train_ds,
                        steps_per_epoch=len(train_ds),
                        epochs=1,
                        initial_epoch=old_epoch,
                        validation_data=validation_ds,
                        callbacks=callbacks,
                        validation_steps=len(validation_ds))

    model.save(model_path)
    with open(history_path, 'wb') as file:
        pickle.dump(history.history, file)
    return model, history

In [None]:
model, history = ResNet50_train(resnet_weights_dir, numpy_dir, model_dir, history_dir, fullyconnected=False, finetune=False)

In [None]:
model, history = ResNet50_train(resnet_weights_dir, numpy_dir, model_dir, history_dir, fullyconnected=True, finetune=False)

# Обучение Xception

In [None]:
def get_Xception_model(weight_dir, out_dir, fullyconnected=False, finetune=False):
    if finetune:
        if fullyconnected:
            base_model = load_model(str(out_dir) + "/Xception_on_DAiSEE_fc.h5")
        else:
            base_model = load_model(str(out_dir) + "/Xception_on_DAiSEE.h5")

        base_model.trainable = True
        for layer in base_model.layers[:finetune_at]:
            layer.trainable = False
        return base_model
    else:
        base_model = Xception(weights=weight_dir,
                              include_top=False,
                              input_shape=(img_width, img_height, 3))

        base_model.trainable = False
        x = GlobalAveragePooling2D()(base_model.output)
        if fullyconnected:
            x = Dense(128, activation="relu", name="fc1")(x)
            x = Dense(64, activation="relu", name="fc2")(x)
        boredom = Dense(4, name="y1")(x)
        engagement = Dense(4, name="y2")(x)
        confusion = Dense(4, name="y3")(x)
        frustration = Dense(4, name="y4")(x)
        model = Model(inputs=base_model.input, outputs=[boredom, engagement, confusion, frustration])
    return model


def Xception_train(weight_dir, numpy_dir, out_dir, fullyconnected=False, finetune=False):
    global old_epoch
    out_dir = Path(out_dir)
    out_dir.mkdir(parents=True, exist_ok=True)
    log_dir = out_dir / "logs"
    log_dir.mkdir(parents=True, exist_ok=True)

    train_ds = get_dataset("Train", numpy_dir).take(48)
    validation_ds = get_dataset("Validation", numpy_dir).take(16)
    model = get_Xception_model(weight_dir, out_dir, fullyconnected, finetune)

    if finetune:
        lr = base_learning_rate / 10
        finetune_epochs = 0
        if fullyconnected:
            model_path = str(out_dir) + "/Xception_on_DAiSEE_finetune_fc.h5"
            history_path = "/kaggle/working/history/xception_training_history_finetune_fc.pkl"
        else:
            model_path = str(out_dir) + "/Xception_on_DAiSEE_finetune.h5"
            history_path = "/kaggle/working/history/xception_training_history_finetune.pkl"
    else:
        lr = base_learning_rate
        finetune_epochs = 0
        if fullyconnected:
            model_path = str(out_dir) + "/Xception_on_DAiSEE_fc.h5"
            history_path = "/kaggle/working/history/xception_training_history_fc.pkl"
        else:
            model_path = str(out_dir) + "/Xception_on_DAiSEE.h5"
            history_path = "/kaggle/working/history/xception_training_history.pkl"

    model.compile(optimizer=RMSprop(learning_rate=lr),
                  loss={"y1": SparseCategoricalCrossentropy(from_logits=True),
                        "y2": SparseCategoricalCrossentropy(from_logits=True),
                        "y3": SparseCategoricalCrossentropy(from_logits=True),
                        "y4": SparseCategoricalCrossentropy(from_logits=True)},
                  metrics={"y1": "sparse_categorical_accuracy",
                           "y2": "sparse_categorical_accuracy",
                           "y3": "sparse_categorical_accuracy",
                           "y4": "sparse_categorical_accuracy"})
    print(model.summary())

    callbacks = [
        EarlyStopping(monitor='val_loss', min_delta=1e-2,
                      patience=2, verbose=1),
        TensorBoard(log_dir=str(log_dir))
    ]

    total_epochs = epochs + finetune_epochs
    history = model.fit(train_ds,
                        epochs=total_epochs,
                        initial_epoch=old_epoch,
                        callbacks=callbacks,
                        validation_data=validation_ds)

    model.save(model_path)


    with open(history_path, 'wb') as file:
        pickle.dump(history.history, file)

    if finetune:
        old_epoch = 0
    else:
        old_epoch = history.epoch[-1]

In [None]:
model, history = Xception_train(xception_weights_dir, numpy_dir, model_dir, history_dir, fullyconnected=False, finetune=False)

In [None]:
model, history = Xception_train(xception_weights_dir, numpy_dir, model_dir, history_dir, fullyconnected=True, finetune=False)

# Оценка моделей

In [None]:
resnet_model = load_model(f"/kaggle/working/model12/ResNet50_on_DAiSEE.h5")
resnet_accuracy = resnet_model.evaluate(test_set)

In [None]:
resnet_model = load_model(f"/kaggle/working/model12/ResNet50_on_DAiSEE_fc.h5")
resnet_accuracy = resnet_model.evaluate(test_set)

In [None]:
xception_model = load_model(f"/kaggle/working/model/Xception_on_DAiSEE.h5")
xception_accuracy = xception_model.evaluate(test_set)

In [None]:
xception_model = load_model(f"/kaggle/working/model/Xception_on_DAiSEE_fc.h5")
xception_accuracy = xception_model.evaluate(test_set)

In [None]:
xception_model = load_model(f"/kaggle/working/model/Xception_on_DAiSEE_finetune.h5")
xception_accuracy = xception_model.evaluate(test_set)

In [None]:
xception_model = load_model(f"/kaggle/working/model/Xception_on_DAiSEE_finetune_fc.h5")
xception_accuracy = xception_model.evaluate(test_set)