In [44]:
import os
import pandas as pd
import numpy as np
import shutil
import cv2
from sklearn.model_selection import train_test_split
import tensorflow as tf
import matplotlib.pyplot as plt
from PIL import Image

In [45]:
cwd = os.getcwd()
df = pd.read_csv(cwd + "/../data_csv/preprocessing_data.csv")

In [46]:
# create folder
dir_img = cwd + '/augmentation'
if os.path.exists(dir_img):
    shutil.rmtree(dir_img)
os.makedirs(dir_img)

dictionary = ['ANGER', 'CONTEMPT', 'DISGUST', 'FEAR', 'HAPPINESS',  'NEUTRAL', 'SADNESS', 'SURPRISE']
for emo in dictionary:
    os.makedirs(dir_img + "/" + emo)

In [49]:
count = [0 for x in range(8)]

# ImageDataGenerator for brightness
bright_datagen = tf.keras.preprocessing.image.ImageDataGenerator(brightness_range=[0.8,1.2])
flip_datagen = tf.keras.preprocessing.image.ImageDataGenerator(horizontal_flip=True, vertical_flip=False)

all_data = []

# old
for i, row in df.iterrows():
    if count[row.emotion] >= 10000:
        continue

    imagePath = cwd + "/../cleaned_images/" + row.image

    # convert to 4 channels for iterator
    image = cv2.imread(imagePath)

    name = dictionary[row.emotion] + "/" 
    if len(row.image.split('/')) == 1:
        name += row.image.split('.')[0] + '_' + str(count[row.emotion]) + ".jpg"
    else:
        name += row.image.split('/')[1].split('.')[0] + '_' + str(count[row.emotion]) + ".jpg"

    cv2.imwrite(os.path.join(dir_img, name), image)
    count[row.emotion] += 1
    all_data.append([name, row.emotion])

    if i > 0 and i % 10000 == 0:
        print('[INFO] OLD {}/{}'.format(i, len(df)))

print(count)

# new
for i, row in df.iterrows():
    if count[row.emotion] >= 10000:
        continue

    imagePath = cwd + "/../cleaned_images/" + row.image

    # convert to 4 channels for iterator
    image = cv2.imread(imagePath)

    export_image = []
    image = np.expand_dims(image, 0)

    # bright
    aug_iter = bright_datagen.flow(image, batch_size=1)
    bright_image = next(aug_iter)[0].astype('uint8')
    export_image.append(bright_image)

    # flip
    aug_iter = flip_datagen.flow(image, batch_size=1)
    flip_image = next(aug_iter)[0].astype('uint8')
    export_image.append(flip_image)

    # bright and flip
    input = np.expand_dims(bright_image, 0)
    aug_iter = flip_datagen.flow(input, batch_size=1)
    bright_flip_image = next(aug_iter)[0].astype('uint8')
    export_image.append(bright_flip_image)   

    # augmented image
    for image in export_image:
        name = dictionary[row.emotion] + "/"
        if len(row.image.split('/')) == 1:
            name += row.image.split('.')[0] + '_' + str(count[row.emotion]) + ".jpg"
        else:
            name += row.image.split('/')[1].split('.')[0] + '_' + str(count[row.emotion]) + ".jpg"

        cv2.imwrite(os.path.join(dir_img, name), image)
        count[row.emotion] += 1
        all_data.append([name, row.emotion])

    if i > 0 and i % 10000 == 0:
        print('[INFO] NEW {}/{}'.format(i, len(df)))

print(count)

[INFO] OLD 10000/33303
[INFO] OLD 20000/33303
[INFO] OLD 30000/33303
[2487, 2203, 2423, 2562, 8651, 9695, 3095, 2187]
[INFO] NEW 10000/33303
[INFO] NEW 30000/33303
[9948, 8812, 9692, 10002, 10001, 10001, 10001, 8748]


In [51]:
df = pd.DataFrame(all_data, columns=["image", "emotion"])
df.to_csv(cwd + "/augmentation_data.csv", index=False)

In [20]:
def create_state(df, split_fraction, file_name):
    # arrange data
    X = list(df['image'][i] for i in range(len(df)))
    y = list(df['emotion'][i] for i in range(len(df)))

    # train test split
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=split_fraction)

    # prepare data to export
    new_df = pd.DataFrame(columns=['image', 'emotion'])
    new_df['image'] = X_test
    new_df['emotion'] = y_test

    # add split data to new csv file
    new_df.to_csv(cwd + '/state/' + file_name + '.csv', index=False)

    print(new_df.groupby('emotion').count())

    # drop split data from all data
    data = {
        'image': X_train,
        'emotion': y_train
    }
    df = pd.DataFrame(data)

    return df

In [21]:
dir_img = cwd + '/state'
if os.path.exists(dir_img):
    shutil.rmtree(dir_img)
os.makedirs(dir_img)

In [22]:
# split test data
df = create_state(df, 0.1, "test")

         image
emotion       
0          219
1          202
2          250
3          249
4          870
5          970
6          334
7          374


In [23]:
# split val data
df = create_state(df, 0.1, "val")

         image
emotion       
0          227
1          200
2          204
3          224
4          795
5          881
6          280
7          310


In [24]:
df

Unnamed: 0,image,emotion
0,SADNESS/sadness (236).png,6
1,FEAR/fear (1117).jpg,3
2,Yoriko_Kawaguchi_0014.jpg,5
3,ANGER/anger (2512).jpg,0
4,SADNESS/sadness (243).jpg,6
...,...,...
28081,SURPRISE/surprise (730).png,7
28082,Heath_Ledger_0001.jpg,5
28083,Nathalie_Baye_0004.jpg,4
28084,SADNESS/sadness (1787).jpg,6


In [25]:
df.to_csv(cwd + "/state/train.csv", index=False)