In [4]:
# Data preparation
## - Load all the images and create training dataset
## - Create training dataset by performing data augmentation

In [3]:
# imports
import csv
from PIL import Image
import numpy as np
import matplotlib.pyplot as plt
import os
import shutil

from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.utils import to_categorical

In [12]:
# path variabels
PROJ_PATH = 'D:/Work/IMDA/capstone project/'
GENERATE_FOR_TRAIN = 1
PATH_TRAIN_DATASET = PROJ_PATH + 'dataset/train'
PATH_VALID_DATASET = PROJ_PATH + 'dataset/train'

In [5]:
# build a hash table with key as file name and value as card title
dict_card_title = dict()
arr_card_title = []
arr_card_code = []
with open(PROJ_PATH + 'dataset/teppen_card_database.csv', newline='') as csvfile:
    row = csv.reader(csvfile, delimiter=',')
    for elem in row:
        dict_card_title[elem[0]] = elem[1]
        arr_card_code.append(elem[0])
        arr_card_title.append(elem[1])

dict_card_image = dict()
numOfClasses = len(dict_card_title)
imgData = []
labels = []
for index, key in enumerate(dict_card_title):
    img = Image.open(PROJ_PATH + 'dataset/images/' + key + '.png')
    dict_card_image[key] = np.asarray(img)
    imgData.append(np.asarray(img))
    labels.append(index)
imgData = np.array(imgData)
labels = np.array(labels)

In [6]:
y_label = to_categorical(labels, 12)
y_label.shape

(12, 12)

In [7]:
imgData.shape

(12, 498, 360, 4)

In [16]:
num_of_samples_to_generate = 20
output_path = PATH_TRAIN_DATASET if GENERATE_FOR_TRAIN else PATH_VALID_DATASET

def generateData(output_path, key, index):
    if os.path.exists(output_path) == 0:
        os.makedirs(output_path)
    x = np.array([imgData[index]])
    y = np.array([y_label[index]])
    count = 0
    for x_batch, y_batch in datagen.flow(x, y, batch_size=1, shuffle=1, save_prefix=key, save_to_dir=output_path):
        count = count + 1
        if count > num_of_samples_to_generate:
            break

# data augmentation on
# 1) brightness
# 2) channel shift
datagen = ImageDataGenerator(
    rescale=1./255, 
    brightness_range=(0.3,0.7), 
    channel_shift_range=100,
    fill_mode='nearest')


if os.path.exists(output_path) == 0:
    os.makedirs(output_path)
for index, key in enumerate(dict_card_title):
    generateData(output_path, key, index)

In [17]:
# additional augmentation on 
# 1) resizing to get different resolution
print('Starting to retrieve all the images to resize and save...')
for directory in os.listdir(output_path):
    for index, filename in enumerate(os.listdir(output_path + directory)):
        # make sure to only process images
        if filename.lower().endswith(('.png', 'jpg', 'jpeg')) == 0:
            continue
        img = Image.open(output_path + directory + '/' + filename)
        
        width, height = img.size
        target_shrink_scale = [1, 2, 3, 4, 5]
        
        for new_scale in target_shrink_scale:
            new_size = ((int)(width/new_scale), (int)(height/new_scale))
            resized_img = img.resize(new_size)
            save_path = output_path + directory + '/'
            save_filename = directory+'_'+str(index)+'_'+str(new_scale)+'.png'
            resized_img.save(save_path + save_filename)
        
        file_to_remove = output_path + directory + '/' + filename
        print('Removing file['+file_to_remove+']')
        os.remove(file_to_remove)
print('Completed!')

[D:/Work/IMDA/capstone project/dataset/train/CHUN-LI 002/CHUN-LI 002_0_899.png]
Removing file[D:/Work/IMDA/capstone project/dataset/train/CHUN-LI 002/CHUN-LI 002_0_9099.png]
Removing file[D:/Work/IMDA/capstone project/dataset/train/CHUN-LI 002/CHUN-LI 002_0_9501.png]
Removing file[D:/Work/IMDA/capstone project/dataset/train/CHUN-LI 003/CHUN-LI 003_0_1329.png]
Removing file[D:/Work/IMDA/capstone project/dataset/train/CHUN-LI 003/CHUN-LI 003_0_2909.png]
Removing file[D:/Work/IMDA/capstone project/dataset/train/CHUN-LI 003/CHUN-LI 003_0_3275.png]
Removing file[D:/Work/IMDA/capstone project/dataset/train/CHUN-LI 003/CHUN-LI 003_0_3289.png]
Removing file[D:/Work/IMDA/capstone project/dataset/train/CHUN-LI 003/CHUN-LI 003_0_3715.png]
Removing file[D:/Work/IMDA/capstone project/dataset/train/CHUN-LI 003/CHUN-LI 003_0_5074.png]
Removing file[D:/Work/IMDA/capstone project/dataset/train/CHUN-LI 003/CHUN-LI 003_0_5931.png]
Removing file[D:/Work/IMDA/capstone project/dataset/train/CHUN-LI 003/CHUN