In [1]:
%env CUDA_VISIBLE_DEVICES=1

device='cuda'

env: CUDA_VISIBLE_DEVICES=1


In [2]:
if False:
    !pip install mtcnn
    !pip install timm==0.4.5
    !pip install tensorflow
    !pip install numba
    #!pip install keras==2.4.3
    #!pip install -q torch==1.7.1 torchvision
    !pip install torch==2.1.0+cu118 torchvision==0.16.0+cu118 -f https://download.pytorch.org/whl/torch_stable.html

In [3]:
import os
import cv2
import random
import numpy as np
import matplotlib.pyplot as plt
from sklearn import svm
from sklearn import preprocessing, metrics
from sklearn.ensemble import RandomForestClassifier
from keras.callbacks import ModelCheckpoint, EarlyStopping
from tensorflow.keras.utils import to_categorical
from copy import deepcopy
from tqdm import tqdm

from mtcnn import MTCNN

import tensorflow as tf
from tensorflow.keras.models import Model, Sequential, load_model, model_from_json
from tensorflow.keras.optimizers import Adam, SGD

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from PIL import Image
from torchvision import datasets, transforms

print(f"Torch: {torch.__version__}")

2024-06-25 13:36:36.388259: I tensorflow/core/util/port.cc:111] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2024-06-25 13:36:36.556862: E tensorflow/compiler/xla/stream_executor/cuda/cuda_dnn.cc:9342] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-06-25 13:36:36.556925: E tensorflow/compiler/xla/stream_executor/cuda/cuda_fft.cc:609] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-06-25 13:36:36.558616: E tensorflow/compiler/xla/stream_executor/cuda/cuda_blas.cc:1518] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2024-06-25 13:36:36.651860: I tensorflow/core/platform/cpu_feature_g

Torch: 2.1.0+cu118


In [4]:
from tensorflow.keras import backend as K
from tensorflow.keras.layers import Input, Dropout, Dense, Activation, Concatenate, Reshape
from tensorflow.keras.layers import Flatten, RepeatVector, Permute, TimeDistributed
from tensorflow.keras.layers import Multiply, Lambda, Softmax

In [5]:
# setting device on GPU if available, else CPU
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print('Using device:', device)
print()

#Additional Info when using cuda
if device.type == 'cuda':
    print(torch.cuda.get_device_name(0))
    print('Memory Usage:')
    print('Allocated:', round(torch.cuda.memory_allocated(0)/1024**3,1), 'GB')
    print('Reserved:   ', round(torch.cuda.memory_reserved(0)/1024**3,1), 'GB')

Using device: cuda

NVIDIA GeForce RTX 2080 Ti
Memory Usage:
Allocated: 0.0 GB
Reserved:    0.0 GB


## Helpers

In [6]:
import pickle

def save_features(file_name, features):
    if os.path.isfile(file_name):
        print("Error! Cannot save features because file already exists")
        return
    with open(file_name, 'wb') as f:
        pickle.dump(features, f) # , protocol=pickle.HIGHEST_PROTOCOL)
    
def load_features(file_name):
    with open(file_name, 'rb') as f:
        return pickle.load(f)

def save_weights(model, file_name):
    if os.path.isfile(file_name):
        print("Error! Cannot save features because file already exists")
        return
    model.save_weights(file_name)

def load_weights(model, file_name):
    model.load_weights(file_name)

In [7]:
test_transforms = transforms.Compose(
    [
        transforms.Resize((224,224)),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                     std=[0.229, 0.224, 0.225])
    ]
)

def extract_features(root_dir, directories):
    all_features = []
    for i, d in enumerate(directories):
        if d.startswith('.'):
            continue
        print('[{}/{}]'.format(i+1, len(directories)), end="\r") 
        imgs = []
        sequence = []
        path = os.path.join(root_dir, d)
        for img in os.listdir(path):
            if not img.endswith(ext):
                continue
            img_file = os.path.join(path, img)
            if torch_model:
                image = Image.open(img_file)
                img_tensor = test_transforms(image)
                imgs.append(img_tensor)
            else:
                image = cv2.imread(img_file)
                image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
                imgs.append(image)
            
            if len(imgs) >= BATCH_SIZE:
                if torch_model:
                    features = cnn_model(torch.stack(imgs, dim=0).to(device))
                    features = features.data.cpu().numpy()
                else:
                    features = cnn_model.predict(np.array(imgs), verbose=0)
                if len(sequence) == 0:
                    sequence = features
                else:
                    sequence = np.concatenate((sequence, features),axis=0)
                imgs = []
                
        if len(imgs) > 0:        
            if torch_model:
                features = cnn_model(torch.stack(imgs, dim=0).to(device))
                features = features.data.cpu().numpy()
            else:
                features = cnn_model.predict(np.array(imgs), verbose=0)
            if len(sequence) == 0:
                sequence = features
            else:
                sequence = np.concatenate((sequence, features),axis=0)
        
        all_features.append(sequence)
    return all_features

def extract_or_read_features(pickle, data_files, directory):
    if os.path.isfile(pickle):
        file2features = load_features(pickle)
    else:
        features = extract_features(directory, data_files)
        file2features = {}
        for i in range(len(features)):
            file2features[data_files[i]] = features[i]
        save_features(pickle, file2features)
    return file2features

In [8]:
INPUT_SIZE = (224, 224)
model_urls = {
    #'affectnet_7_vggface2_rexnet150.pt': 'https://github.com/HSE-asavchenko/face-emotion-recognition/raw/b55d78c7d2a4c02339de916936fc73749eb58798/models/affectnet_emotions/affectnet_7_vggface2_rexnet150.pt',
    ##'enet_b0_7.pt': 'https://github.com/HSE-asavchenko/face-emotion-recognition/raw/b55d78c7d2a4c02339de916936fc73749eb58798/models/affectnet_emotions/enet_b0_7.pt',
    'enet_b0_8_best_afew.pt': 'https://github.com/HSE-asavchenko/face-emotion-recognition/raw/b55d78c7d2a4c02339de916936fc73749eb58798/models/affectnet_emotions/enet_b0_8_best_afew.pt',
    ##'enet_b0_8_best_vgaf.pt': 'https://github.com/HSE-asavchenko/face-emotion-recognition/raw/b55d78c7d2a4c02339de916936fc73749eb58798/models/affectnet_emotions/enet_b0_8_best_vgaf.pt',
    ##'enet_b2_7.pt': 'https://github.com/HSE-asavchenko/face-emotion-recognition/raw/b55d78c7d2a4c02339de916936fc73749eb58798/models/affectnet_emotions/enet_b2_7.pt',
    ##'enet_b2_8.pt': 'https://github.com/HSE-asavchenko/face-emotion-recognition/raw/b55d78c7d2a4c02339de916936fc73749eb58798/models/affectnet_emotions/enet_b2_8.pt',
    'mobilenet_7.h5': 'https://github.com/HSE-asavchenko/face-emotion-recognition/raw/b55d78c7d2a4c02339de916936fc73749eb58798/models/affectnet_emotions/mobilenet_7.h5',
    ##'enet_b0_8_va_mtl.pt': 'https://github.com/HSE-asavchenko/face-emotion-recognition/raw/24c7b22228f88429bf11c64f1f3f292b8e8abe32/models/affectnet_emotions/enet_b0_8_va_mtl.pt',
    #'EfficientNet_B0.pt': 'https://github.com/PolinaDemochkina/diploma_hse/raw/main/app/src/main/assets/EfficientNet_B0.pt',
}

def download_model(model_url, model_name):
    import urllib.request

    if not os.path.isfile(model_name):
        urllib.request.urlretrieve(model_url, model_name)

In [9]:
def print_classes(files):
    labels = []
    for f in files:
        labels.append(video2label[f])
    min_num = len(files)
    for i in np.unique(labels):
        count = labels.count(i)
        if count < min_num:
            min_num = count
        percent = count * 100 / len(labels)
        print("{} {}/{}: {}%".format(i, count, len(labels), percent))
    return min_num

## Prepare EngageWild dataset

In [10]:
DATA_DIR='/home/HDD6TB/datasets/emotions/EmotiW/engagement/'
ext = "png"

In [11]:
import csv
labels_list = ['distracted', 'engaged']
video2label={}

import csv
video2label={}
with open(os.path.join(DATA_DIR,'Engagement_Labels_Engagement.csv'), mode='r') as csvfile:
    labels_reader = csv.reader(csvfile, delimiter='\t')
    for i,row in enumerate(labels_reader):
        if i==0:
            print('first:',row)
            continue
        videoname,label=row[0],float(row[1])
        video2label[videoname]=label
        #print(videoname,label)
        #if (videoname not in filename2features_val) and (videoname not in filename2features_train):
        #    print(videoname,label)
#check if fix is incorrect
video2label['subject_87_Vid_3']=video2label['subject_77_Vid_6']
#video2label = to_categorical(video2label)
print(len(video2label))
print(video2label)

first: ['subject', 'label']
196
{'subject_1_Vid_1': 1.0, 'subject_1_Vid_2': 1.0, 'subject_1_Vid_3': 0.66, 'subject_1_Vid_4': 1.0, 'subject_1_Vid_5': 1.0, 'subject_31_Vid_6': 1.0, 'subject_2_Vid_6': 0.33, 'subject_3_Vid_6': 1.0, 'subject_3_Vid_1': 0.33, 'subject_3_Vid_2': 0.33, 'subject_3_Vid_3': 0.66, 'subject_3_Vid_4': 0.66, 'subject_3_Vid_5': 0.33, 'subject_3_Vid_7': 1.0, 'subject_4_Vid_6': 1.0, 'subject_5_Vid_6': 1.0, 'subject_6_Vid_6': 0.33, 'subject_7_Vid_1': 0.66, 'subject_7_Vid_2': 0.66, 'subject_7_Vid_3': 0.66, 'subject_7_Vid_4': 0.66, 'subject_7_Vid_5': 0.33, 'subject_8_Vid_6': 1.0, 'subject_9_Vid_6': 0.0, 'subject_10_Vid_6': 0.66, 'subject_11_Vid_6': 1.0, 'subject_12_Vid_6': 0.0, 'subject_13_Vid_6': 0.66, 'subject_14_Vid_6': 0.66, 'subject_15_Vid_6': 0.66, 'subject_16_Vid_6': 0.33, 'subject_17_Vid_6': 0.66, 'subject_18_Vid_6': 0.33, 'subject_19_Vid_6': 1.0, 'subject_20_Vid_6': 0.66, 'subject_20_Vid_1': 0.66, 'subject_20_Vid_2': 0.66, 'subject_20_Vid_3': 0.66, 'subject_20_Vid_

In [12]:
FEATURES_DIR = "features_EngageWild/"

### Facial images

In [13]:
FACES_DIR = os.path.join(DATA_DIR, "frames/faces/mtcnn_aligned")

In [14]:
train_dir = os.path.join(FACES_DIR, "Train")
test_dir = os.path.join(FACES_DIR, "validation")
train_data_files = os.listdir(train_dir)
test_data_files = os.listdir(test_dir)
print(len(train_data_files))
print(len(test_data_files))
val_data_files = None

147
48


In [15]:
print("Train:")
train_min_num = print_classes(train_data_files)
print("Test:")
test_min_num = print_classes(test_data_files)

Train:
0.0 5/147: 3.401360544217687%
0.33 35/147: 23.80952380952381%
0.66 79/147: 53.74149659863946%
1.0 28/147: 19.047619047619047%
Test:
0.0 4/48: 8.333333333333334%
0.33 10/48: 20.833333333333332%
0.66 19/48: 39.583333333333336%
1.0 15/48: 31.25%


## Prepare DAiSEE dataset

In [10]:
DATA_DIR='/home/HDD6TB/datasets/emotions/DAiSEE/'
ext = "jpg"

In [11]:
import pandas as pd
df=pd.read_csv(os.path.join(DATA_DIR,'Labels/AllLabels.csv'))
df.columns = df.columns.str.replace(' ', '')
df.head()
labels2fileAndValues=df.set_index('ClipID').to_dict()
video2label={os.path.splitext(video)[0]:val for video,val in labels2fileAndValues['Engagement'].items()}
print(len(video2label))
print(video2label)

8925
{'1100011002': 2, '1100011003': 2, '1100011004': 3, '1100011005': 3, '1100011006': 3, '1100011007': 2, '1100011008': 3, '1100011009': 2, '1100011010': 3, '1100011011': 3, '1100011012': 2, '1100011013': 3, '1100011014': 3, '1100011015': 3, '1100011016': 3, '1100011017': 3, '1100011018': 3, '1100011019': 3, '1100011020': 3, '1100011021': 3, '1100011022': 3, '1100011023': 3, '1100011025': 3, '1100011026': 3, '1100011027': 3, '1100011028': 3, '1100011029': 3, '1100011031': 3, '1100011032': 3, '1100011034': 3, '1100011035': 3, '1100011037': 3, '1100011038': 3, '1100011040': 2, '1100011046': 3, '1100011047': 3, '1100011048': 2, '1100011049': 3, '1100011050': 3, '1100011051': 3, '1100011052': 3, '1100011053': 3, '1100011054': 3, '1100011055': 3, '1100011056': 3, '1100011057': 3, '1100011058': 3, '1100011059': 3, '1100011060': 3, '1100011062': 3, '1100011063': 3, '1100011064': 3, '1100011066': 3, '1100011067': 3, '1100011068': 3, '1100011069': 3, '1100011070': 3, '1100011071': 3, '1100011

In [12]:
FEATURES_DIR = "features_DAiSEE/"

### Facial images

In [13]:
FACES_DIR = os.path.join(DATA_DIR, "faces/mtcnn_aligned")

In [14]:
test_dir = os.path.join(FACES_DIR, "Test")
train_dir = os.path.join(FACES_DIR, "Train")
val_dir = os.path.join(FACES_DIR, "Validation")
train_data_files = os.listdir(train_dir)
test_data_files = os.listdir(test_dir)
val_data_files = os.listdir(val_dir)
test_data_files = [x for x in test_data_files if x in video2label]
test_data_files = [x for x in test_data_files if x in video2label]
val_data_files = [x for x in val_data_files if x in video2label]

In [15]:
print("Train:")
train_min_num = print_classes(train_data_files)
print("Test:")
test_min_num = print_classes(test_data_files)
print("Val:")
val_min_num = print_classes(val_data_files)

Train:
0 34/5482: 0.6202116016052536%
1 214/5482: 3.903684786574243%
2 2649/5482: 48.32178037212696%
3 2585/5482: 47.15432323969354%
Test:
0 4/1723: 0.2321532211259431%
1 81/1723: 4.7011027278003485%
2 861/1723: 49.97098084735926%
3 777/1723: 45.095763203714455%
Val:
0 23/1720: 1.3372093023255813%
1 160/1720: 9.30232558139535%
2 912/1720: 53.02325581395349%
3 625/1720: 36.33720930232558%


## Extract all features

In [None]:
for base_model_key in model_urls.keys():
    print("Model: ", base_model_key)
    download_model(model_urls[base_model_key], base_model_key)
    base_model_name = './' + base_model_key
    torch_model = True if base_model_name[base_model_name.rfind('.') + 1:] == 'pt' else False
    if torch_model:
        BATCH_SIZE = 64
    else:
        BATCH_SIZE = 128
        
    if base_model_key in ('enet_b2_7.pt', 'enet_b2_8.pt'):
        cnn_model = torch.load(base_model_name)
        cnn_model = cnn_model.to(device)
        cnn_model.eval()

        #del cnn_model.classifier
        last_layer=torch.nn.Sequential(cnn_model.classifier,torch.nn.Softmax(dim=0))
        if True:
            inp = torch.randn(1408).to(device)
            f=last_layer.forward(inp)
            #print(f.shape,f,f.sum(axis=1))
        cnn_model.classifier=torch.nn.Identity()
        #print(cnn_model)
    elif base_model_key in ('enet_b0_8_va_mtl.pt'):
        cnn_model = torch.load(base_model_name)
        cnn_model = cnn_model.to(device)
        cnn_model.eval()

        #del cnn_model.classifier
        last_layer=torch.nn.Sequential(cnn_model.classifier,torch.nn.Softmax(dim=1))
        if True:
            inp = torch.randn(20, 1280).to(device)
            f=last_layer.forward(inp)
            #print(f.shape,f,f.sum(axis=1))
        cnn_model.classifier=torch.nn.Identity()
        #print(cnn_model)
    elif base_model_key in ('EfficientNet_B0.pt'):
        device = 'cpu'
        cnn_model = torch.load(base_model_name)
        cnn_model = cnn_model.to(device)
        cnn_model.eval()
    elif torch_model:
        cnn_model = torch.load(base_model_name)
        cnn_model = cnn_model.to(device)
        cnn_model.eval()

        #del cnn_model.classifier
        last_layer=torch.nn.Sequential(cnn_model.classifier[0],torch.nn.Softmax(dim=1))
        if True:
            inp = torch.randn(20, 1280).to(device)
            f=last_layer.forward(inp)
            #print(f.shape,f,f.sum(axis=1))
        cnn_model.classifier=torch.nn.Identity()
    else:
        base_model = load_model(base_model_name)
        layer_out = base_model.get_layer('global_pooling')
        cnn_model = Model(base_model.input,layer_out.output)

        #cnn_model.summary()
    
    print("Create train dataset")
    train_pickle = FEATURES_DIR + 'train_features_engagement_{}.pickle'.format(base_model_key)
    _ = extract_or_read_features(train_pickle, train_data_files, train_dir)
    
    print("Create test dataset")
    test_pickle = FEATURES_DIR + 'test_features_engagement_{}.pickle'.format(base_model_key)
    _ = extract_or_read_features(test_pickle, test_data_files, test_dir)
    
    if val_data_files is not None:
        print("Create val dataset:")
        val_pickle = FEATURES_DIR + 'val_features_engagement_{}.pickle'.format(base_model_key)
        _ = extract_or_read_features(val_pickle, val_data_files, val_dir)

Model:  enet_b0_8_best_afew.pt
Create train dataset
Create test dataset
Create val dataset:
Model:  mobilenet_7.h5


2024-06-25 13:37:18.588719: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1886] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 7442 MB memory:  -> device: 0, name: NVIDIA GeForce RTX 2080 Ti, pci bus id: 0000:68:00.0, compute capability: 7.5


Create train dataset
[1/5482]

2024-06-25 13:37:20.419508: I tensorflow/compiler/xla/stream_executor/cuda/cuda_dnn.cc:442] Loaded cuDNN version 8700
2024-06-25 13:37:21.649452: I tensorflow/tsl/platform/default/subprocess.cc:304] Start cannot spawn child process: No such file or directory


[5027/5482]