In [1]:
# import libraries
import os
import matplotlib.pyplot as plt
import keras
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator
import numpy as np
import cv2
import pandas as pd

In [2]:
# set directory and image color
img_dir = 'Outputs/images/blue/'
color = 'blue'

In [3]:
# image preprocessing - separate generators in case I want to test images different from train/validation images
data_generator = ImageDataGenerator(
    #width_shift_range=0.1,
    # shear_range=0.1,
    zoom_range=0.3,
    # brightness_range=[0.5,1.5],
    samplewise_center=True,
    samplewise_std_normalization=True
)

test_generator = ImageDataGenerator(
    zoom_range=0.3,
    samplewise_center=True,
    samplewise_std_normalization=True
)

In [4]:
# create train/test/val images with generators

data_train = data_generator.flow_from_directory(img_dir+'train/sub/', target_size=(224, 224), 
                            batch_size=32, class_mode='binary', 
                            shuffle=False,
                            save_to_dir=img_dir+'train/processed',
                            # save_prefix='processed_'
                            )
data_val = data_generator.flow_from_directory(img_dir+'val/sub/', target_size=(224, 224), 
                            batch_size=32, class_mode='binary', 
                            shuffle=False,
                            save_to_dir=img_dir+'val/processed',
                            # save_prefix='processed_'
                            )

data_test = test_generator.flow_from_directory(img_dir+'test/sub/', target_size=(224,224), 
                            batch_size=32, class_mode='binary',
                            shuffle=False,
                            save_to_dir=img_dir+'test/processed',
                            # save_prefix='processed_'
                            )

Found 3888 images belonging to 2 classes.
Found 432 images belonging to 2 classes.
Found 1080 images belonging to 2 classes.


In [5]:
# apply image transformations 

for i in range(len(data_val)):
    data_val.next()

for i in range(len(data_train)):
    data_train.next()

for i in range(len(data_test)):
    data_test.next()

In [6]:
# get original image file names

val_images = [file.split('\\')[1] for file in data_val.filenames]
test_images = [file.split('\\')[1] for file in data_test.filenames]
train_images = [file.split('\\')[1] for file in data_train.filenames]

In [7]:
# get new, processed image file names
tst_list = sorted(os.listdir(path=img_dir+'test/processed'), key=lambda x: int(str(x)[1:].split('_')[0]))
train_list = sorted(os.listdir(path=img_dir+'train/processed'), key=lambda x: int(str(x)[1:].split('_')[0]))
val_list = sorted(os.listdir(path=img_dir+'val/processed'), key=lambda x: int(str(x)[1:].split('_')[0]))

# create dataframes matching the original image file names with the new processed image file names
test_df = pd.DataFrame(list(zip(test_images,tst_list)), columns=['image_file', 'process_image'])
train_df = pd.DataFrame(list(zip(train_images,train_list)), columns=['image_file', 'process_image'])
val_df = pd.DataFrame(list(zip(val_images,val_list)), columns=['image_file', 'process_image'])

# save dataframes
test_df.to_csv(f'Outputs/{color}_processed_test_df.csv')
train_df.to_csv(f'Outputs/{color}_processed_train_df.csv')
val_df.to_csv(f'Outputs/{color}_processed_val_df.csv')