# Script description
This script is to create for the AffectNet dataset the csv for the training-testing data (in a single file) and the validation data which contains the image path and the emotion label.
</br>
Therefore, this script creates and the balance dataset based on the label.csv and the annotation file of test_outputs which has been created with the FairFace annotation tool.

In [1]:
import os
import pandas as pd
import numpy as np
import cv2
from operator import itemgetter

In [2]:
def create_affectNet_csv(dataset_path='Dataset\input', sub_paths=['train_set', 'val_set'], filenames=['labels.csv', 'val_labels.csv']):
    """
    This function taking the dataset batch and the file name of CSV file which are gona create for validation data as one file
    and an other one for the rest of data which will be used for training and testing
    :param dataset_path: str, the dir where the affectnet folder dataset is contain (without have the affectnet)
    :subs_paths: list, a list with the name of the folder for train and validation
    :filenames: list, the filenames for each CSV file
    :return: None
    """
    dir_path = os.getcwd()
    path_dataset_dir = os.path.join(dir_path, dataset_path)
    path_dataset_dir = os.path.join(path_dataset_dir, 'affectnet')
    image_filename_lst = []
    label_lst = []
    # from id to emotion class
    from_id_to_emotion =  {0:'neutral', 1:'happy', 2:'sad', 3:'surprise', 4:'fear', 5:'disgust', 6:'anger', 7: 'contempt'}
    for path, output in zip(sub_paths, filenames):
        image_filename_lst = []
        label_lst = []
        
        # have the path of the images and the anotations
        path_dataset = os.path.join(path_dataset_dir, path)
        path_dataset_images = os.path.join(path_dataset, 'images')
        path_dataset_annotations = os.path.join(path_dataset, 'annotations')
        arr = os.listdir(path_dataset_images)
        # for all the paths read the images filename
        for pth in arr:
            filename_without_ext = os.path.splitext(pth)[0]
            aro_filename = f'{filename_without_ext}_aro.npy'
            exp_filename = f'{filename_without_ext}_exp.npy'
            lnd_filename = f'{filename_without_ext}_lnd.npy'
            val_filename = f'{filename_without_ext}_val.npy'
            
            filename = os.path.join(path_dataset_annotations, exp_filename)            
            image_exp = np.load(filename)
            
            filename = os.path.join(path_dataset_annotations, lnd_filename)
            image_lnd = np.load(filename)
            
            filename = os.path.join(path_dataset_annotations, val_filename)
            image_val = np.load(filename)
            
            filename = os.path.join(path_dataset_annotations, aro_filename)
            image_aro = np.load(filename)
            
            img_filename = os.path.join(path, 'images')
            img_filename = os.path.join(img_filename, pth)
            image_filename_lst.append(img_filename)
            
            label_lst.append(from_id_to_emotion[int(image_exp)])
            
        data = {'pth': image_filename_lst,
                'label': label_lst}

        df = pd.DataFrame(data)
        csv_dir = os.path.join(path_dataset_dir, output)
        df.to_csv(csv_dir, index=False)
            

In [7]:
def create_balance_affect_Net_csv(dataset_dir='Dataset/input/affectnet', labels_csv='labels.csv', annotations_csv='test_outputs.csv'):
    """
    This function created the balance datsaet in terms of the domain group of four races and the emotion classes
    :param dataset_dir: str, the directory of the AffectNet dataset
    :labels_csv: str, the filename of the labels and the image path of the affectNet dataset
    :annotations_csv: str, the filename of the demographic group annotations
    :return: pandas.Dataframe, the baalnce dataset image paths and emotion labels
    """
    dir_path = os.getcwd()
    path_dataset_dir = os.path.join(dir_path, dataset_dir)
    path_sens_features = os.path.join(path_dataset_dir, annotations_csv)
    path_csv_labels = os.path.join(path_dataset_dir, labels_csv)
    
    csv_sf = pd.read_csv(path_sens_features)
    csv_labels = pd.read_csv(path_csv_labels)
    df = pd.merge(csv_sf, csv_labels, on="pth")
    
    # Read the labels and put it in a list
    label_values = df['label'].values.tolist()
    emotion_classes = list(set(df['label'].values.tolist()))
    emotion_labels_count = [label_values.count(emotion) for emotion in emotion_classes]
    
    # Have the lower amount of a dataset
    index_min = min(enumerate(emotion_labels_count), key=itemgetter(1))[0] 
    min_emotion = emotion_classes[index_min]
    
    balance_dataframe = df.loc[df['label'] == min_emotion]
    min_emotions_values = emotion_labels_count[index_min]
    
    emotion_classes.remove(min_emotion)
    race4 = ['Black', 'White', 'Indian', 'Asian']
    equal_value = int(min_emotions_values/len(race4))
    for emotion in emotion_classes:
        df_emotion = df.loc[df['label'] == emotion]
        # for each emotion class his four races is balance as add the same percentage for each sensitive feature group
        for race in race4:
            df_emotion_per_race = df_emotion[df_emotion['race4'] == race]
            df_emotion_per_race = df_emotion_per_race.iloc[:equal_value]
            balance_dataframe = pd.concat([balance_dataframe, df_emotion_per_race])

    df_labels = balance_dataframe[['pth', 'label']]
    df_race_annotations = balance_dataframe[['pth', 'race', 'race4', 'gender', 'age', 'race_scores_fair', 'race_scores_fair_4', 'gender_scores_fair', 'age_scores_fair']]
    
    balance_dataset_dir = os.path.join(path_dataset_dir, 'balance_dataset_labels.csv')
    df_labels.to_csv(balance_dataset_dir, index=False)
    
    balance_dataset_dir = os.path.join(path_dataset_dir, 'balance_dataset_annotations.csv')
    df_race_annotations.to_csv(balance_dataset_dir, index=False)
    return df_labels

In [8]:
df = create_balance_affect_Net_csv()

In [9]:
display(df)

Unnamed: 0,pth,label
12,train_set\images\100012.jpg,contempt
42,train_set\images\100047.jpg,contempt
120,train_set\images\10015.jpg,contempt
201,train_set\images\100251.jpg,contempt
299,train_set\images\100368.jpg,contempt
...,...,...
205484,train_set\images\366930.jpg,anger
205642,train_set\images\367132.jpg,anger
205841,train_set\images\367397.jpg,anger
206181,train_set\images\36785.jpg,anger


In [10]:
create_affectNet_csv()