This file is supposed to show augmentation process ending with augmentated images saved in the specified location. This process will also add new entries to metadata.csv with data of original images (except new paths will be created).

In [8]:
import os
import sys
train_module_path = os.path.abspath(os.path.join('..'))
if train_module_path not in sys.path:
    sys.path.append(train_module_path)
from data.consts import DATA_DIR
from data.consts import AUGMENTED_DATA_DIR

from tensorflow.keras.preprocessing.image import ImageDataGenerator
import cv2
import numpy as np
import pandas as pd
import scipy

In [2]:
metadata_path = DATA_DIR + '/metadata.csv'
metadata = pd.read_csv(metadata_path)
mock_metadata_path = DATA_DIR + '/mock_metadata.csv'
mock_metadata = pd.read_csv(mock_metadata_path)

cols = ['age', 'gender', 'path', 'face_score1']

In [3]:
datagen = ImageDataGenerator(
    rotation_range=20,     # For higher values, the detect_faces function doesn't seem to find any faces
    width_shift_range=0.1,
    height_shift_range=0.1,
    shear_range=0.1,
    zoom_range=0.25,
    brightness_range=[0.4,1.5],
    horizontal_flip=True,
    channel_shift_range=40,
    fill_mode='nearest')    # Alternatives: 'reflect', 'wrap'

In [4]:
# Directory to save augmented images
output_dir = AUGMENTED_DATA_DIR # This is just a mock, replace with real one

# Generate and save augmented images along with updating metadata
augmented_metadata = {col: [] for col in cols}

This part uses mock_metadata instead of metadata to show how augmentation process might work on the whole dataset. As for the number of augmented images, we may require a mapping from age and gender to the number of augmented images that is needed for that class. This might help with preventing the overfitting.

In [5]:
for i, row in mock_metadata.iterrows():   # Replace mock_metadata with metadata!
    img_path = row['path']
    name = img_path.split('_')[-1].split('.')[0]
    img = cv2.imread(img_path)
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)  
    # Generate augmented images
    generator = datagen.flow(np.array([img]), batch_size=16)
    desired_augmented_images = 10    # Change this number depending on our needs
    
    # Save augmented images
    while desired_augmented_images > 0:
        augmented_img = generator.next()[0]
        output_path = os.path.join(output_dir, f"aug_{desired_augmented_images}_{name}.jpg")
        cv2.imwrite(output_path, cv2.cvtColor(augmented_img, cv2.COLOR_BGR2RGB))

        # Update augmented metadata
        augmented_metadata['age'].append(row['age'])
        augmented_metadata['gender'].append(row['gender'])
        augmented_metadata['path'].append(output_path)
        augmented_metadata['face_score1'].append(row['face_score1'])   # This value might be misleading after augmentation
        
        desired_augmented_images -= 1

In [6]:
# Convert augmented metadata to a DataFrame
augmented_metadata_df = pd.DataFrame(augmented_metadata)

# Update the original metadata with augmented metadata
updated_metadata = pd.concat([mock_metadata, augmented_metadata_df], ignore_index=True) # Replace with metadata

# Save the updated metadata to CSV
updated_metadata.to_csv(mock_metadata_path, index=False)   # Replace with real path to metadata

In [7]:
pd.set_option('display.max_colwidth', None)  # Set to None to display the entire column width
updated_metadata

Unnamed: 0,age,gender,path,face_score1
0,27,male,C:\Users\piotr\PycharmProjects\had\train\augmentation\augmentation_example.jpg,1
1,27,male,C:/Users/piotr/PycharmProjects/had/train/augmentation/augmented_images\aug_10_example.jpg,1
2,27,male,C:/Users/piotr/PycharmProjects/had/train/augmentation/augmented_images\aug_9_example.jpg,1
3,27,male,C:/Users/piotr/PycharmProjects/had/train/augmentation/augmented_images\aug_8_example.jpg,1
4,27,male,C:/Users/piotr/PycharmProjects/had/train/augmentation/augmented_images\aug_7_example.jpg,1
5,27,male,C:/Users/piotr/PycharmProjects/had/train/augmentation/augmented_images\aug_6_example.jpg,1
6,27,male,C:/Users/piotr/PycharmProjects/had/train/augmentation/augmented_images\aug_5_example.jpg,1
7,27,male,C:/Users/piotr/PycharmProjects/had/train/augmentation/augmented_images\aug_4_example.jpg,1
8,27,male,C:/Users/piotr/PycharmProjects/had/train/augmentation/augmented_images\aug_3_example.jpg,1
9,27,male,C:/Users/piotr/PycharmProjects/had/train/augmentation/augmented_images\aug_2_example.jpg,1
