This notebook contains the code used in the BigComp 2022 paper: <br>
<b>Heeryon Cho, Woo-Kyu Kang, Younsoo Park, Sungeu Chae, and Seong-joon Kim, "Multi-label Facial Emotion Recognition Using Korean Drama Movie Clips," BigComp 2022.</b>
### Split train / valid / test data.

In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
import collections

In [2]:
def create_split_dataset(dataset_name, stratify):
    print("======", dataset_name, "======")
    fname = "label/label_" + dataset_name + ".csv"
    df_all = pd.read_csv(fname)
    print("column_info:", df_all.columns)
    print("data_all:", df_all.shape)
    label_all = df_all.iloc[:, 2:]
    
    if stratify:
        stratify_val = label_all
    else:
        stratify_val = None
        
    X_temp, X_valid, y_temp, y_valid = train_test_split(df_all, label_all, stratify=stratify_val, 
                                                        random_state=42, test_size=0.09999)    
    print("y_temp:", y_temp.shape)
    print("y_valid:", y_valid.shape)
    y_valid_ints = [y.argmax() for y in y_valid.values]
    freq_valid = collections.Counter(y_valid_ints)
    print("y_valid_freq:", freq_valid)
    print("y_valid_uniq_class:", len(freq_valid))
    y_temp_ints = [y.argmax() for y in y_temp.values]
    freq_temp = collections.Counter(y_temp_ints)
    print("y_temp_freq:", freq_temp)
    print("y_temp_uniq_class:", len(freq_temp))
    
    if stratify:
        stratify_val = y_temp
    else:
        stratify_val = None    
    
    X_train, X_test, y_train, y_test = train_test_split(X_temp, y_temp, stratify=stratify_val, 
                                                        random_state=42, test_size=0.11111)
    print("y_train:", y_train.shape)
    print("y_test:", y_test.shape)
    y_train_ints = [y.argmax() for y in y_train.values]
    freq_train = collections.Counter(y_train_ints)
    print("y_train_freq:", freq_train)
    print("y_train_uniq_class:", len(freq_train))
    y_test_ints = [y.argmax() for y in y_test.values]
    freq_test = collections.Counter(y_test_ints)
    print("y_test_freq:", freq_test)
    print("y_test_uniq_class:", len(freq_test))
    
    fn_valid = "label/label_" + dataset_name + "_valid.csv"
    X_valid.to_csv(fn_valid, index=False)
    
    fn_train = "label/label_" + dataset_name + "_train.csv"
    X_train.to_csv(fn_train, index=False)
    
    fn_test = "label/label_" + dataset_name + "_test.csv"
    X_test.to_csv(fn_test, index=False)

In [3]:
!ls -la label/*

-rw-r--r-- 1 root root 1505005 Dec  6 11:52 label/label_22_emo.csv
-rw-r--r-- 1 root root  563831 Dec  6 11:12 label/label_6_basic_emo.csv
-rw-r--r-- 1 root root 3105579 Dec  6 11:12 label/label_multi-label.csv


In [4]:
create_split_dataset("6_basic_emo", True)

column_info: Index(['img_id', 'clip_id', 'angry', 'disgusted', 'surprised', 'fearful',
       'happy', 'sad'],
      dtype='object')
data_all: (12813, 8)
y_temp: (11531, 6)
y_valid: (1282, 6)
y_valid_freq: Counter({0: 759, 2: 272, 5: 193, 3: 28, 4: 18, 1: 12})
y_valid_uniq_class: 6
y_temp_freq: Counter({0: 6827, 2: 2443, 5: 1737, 3: 252, 4: 167, 1: 105})
y_temp_uniq_class: 6
y_train: (10249, 6)
y_test: (1282, 6)
y_train_freq: Counter({0: 6068, 2: 2171, 5: 1544, 3: 224, 4: 149, 1: 93})
y_train_uniq_class: 6
y_test_freq: Counter({0: 759, 2: 272, 5: 193, 3: 28, 4: 18, 1: 12})
y_test_uniq_class: 6


In [5]:
create_split_dataset("22_emo", True)

column_info: Index(['img_id', 'clip_id', 'moved', 'fearful', 'surprised', 'angry',
       'anxious', 'smitten', 'fluttered', 'disappointed', 'fulfilled',
       'ashamed', 'sad', 'upset', 'sympathetic', 'passionate', 'depressed',
       'amused', 'affectionate', 'guilty', 'jealous', 'peaceful', 'happy',
       'disgusted'],
      dtype='object')
data_all: (19800, 24)
y_temp: (17820, 22)
y_valid: (1980, 22)
y_valid_freq: Counter({3: 759, 2: 272, 10: 193, 11: 170, 4: 95, 7: 92, 12: 51, 17: 47, 15: 45, 16: 43, 0: 31, 5: 30, 1: 28, 14: 20, 20: 19, 18: 17, 6: 16, 8: 15, 19: 15, 21: 12, 9: 7, 13: 3})
y_valid_uniq_class: 22
y_temp_freq: Counter({3: 6827, 2: 2443, 10: 1737, 11: 1532, 4: 856, 7: 826, 12: 458, 17: 427, 15: 410, 16: 384, 0: 281, 5: 270, 1: 252, 14: 182, 20: 166, 18: 157, 6: 144, 19: 138, 8: 134, 21: 105, 9: 66, 13: 25})
y_temp_uniq_class: 22
y_train: (15840, 22)
y_test: (1980, 22)
y_train_freq: Counter({3: 6068, 2: 2172, 10: 1544, 11: 1362, 4: 761, 7: 734, 12: 407, 17: 380, 15: 3

In [6]:
create_split_dataset("multi-label", False)

column_info: Index(['img_id', 'clip_id', 'moved', 'fearful', 'bored', 'surprised', 'angry',
       'anxious', 'smitten', 'fluttered', 'disappointed', 'fulfilled',
       'ashamed', 'sad', 'upset', 'sympathetic', 'passionate', 'depressed',
       'amused', 'affectionate', 'guilty', 'neutral', 'jealous', 'peaceful',
       'happy', 'disgusted'],
      dtype='object')
data_all: (38817, 26)
y_temp: (34935, 24)
y_valid: (3882, 24)
y_valid_freq: Counter({4: 1196, 11: 653, 3: 318, 6: 312, 12: 309, 8: 222, 16: 160, 5: 149, 1: 90, 13: 87, 0: 58, 21: 54, 17: 54, 7: 49, 18: 47, 15: 35, 22: 24, 10: 19, 9: 16, 20: 14, 23: 11, 14: 4, 2: 1})
y_valid_uniq_class: 23
y_temp_freq: Counter({4: 10934, 11: 5935, 3: 3062, 12: 2852, 6: 2583, 8: 2065, 16: 1342, 5: 1217, 1: 951, 13: 691, 0: 585, 17: 442, 7: 441, 18: 427, 21: 351, 10: 215, 9: 208, 15: 167, 22: 161, 20: 160, 23: 106, 14: 33, 2: 7})
y_temp_uniq_class: 23
y_train: (31053, 24)
y_test: (3882, 24)
y_train_freq: Counter({4: 9732, 11: 5246, 3: 2723, 12:

In [7]:
!ls -la label/*

-rw-r--r-- 1 root root 1505005 Dec  6 11:52 label/label_22_emo.csv
-rw-r--r-- 1 root root  150685 Dec  6 11:55 label/label_22_emo_test.csv
-rw-r--r-- 1 root root 1204045 Dec  6 11:55 label/label_22_emo_train.csv
-rw-r--r-- 1 root root  150685 Dec  6 11:55 label/label_22_emo_valid.csv
-rw-r--r-- 1 root root  563831 Dec  6 11:12 label/label_6_basic_emo.csv
-rw-r--r-- 1 root root   56467 Dec  6 11:55 label/label_6_basic_emo_test.csv
-rw-r--r-- 1 root root  451015 Dec  6 11:55 label/label_6_basic_emo_train.csv
-rw-r--r-- 1 root root   56467 Dec  6 11:55 label/label_6_basic_emo_valid.csv
-rw-r--r-- 1 root root 3105579 Dec  6 11:12 label/label_multi-label.csv
-rw-r--r-- 1 root root  310779 Dec  6 11:55 label/label_multi-label_test.csv
-rw-r--r-- 1 root root 2484459 Dec  6 11:55 label/label_multi-label_train.csv
-rw-r--r-- 1 root root  310779 Dec  6 11:55 label/label_multi-label_valid.csv
