In [None]:
import tensorflow as tf
from keras.preprocessing.image import ImageDataGenerator
from keras.utils import img_to_array

from numpy import expand_dims
import zipfile
from zipfile import ZipFile

import numpy as np
import pandas as pd
import csv
import cv2
from PIL import Image

Perform data augmentation (rotation, flip) of images with label 1. 

In [None]:
main_dir = "/Users/dalilkoheeallee/Desktop"
extracted_dir = main_dir + "/extracted_images"
image_dir = extracted_dir +  "/train/"
augmented_image_dir = main_dir + '/augmented/'
zip_file_name = 'train.zip'
csv_file_name = 'train.csv'

In [None]:
with ZipFile(zip_file_name,'r') as myzip:
    myzip.extractall(extracted_dir)
    with open(csv_file_name, mode ='r') as file:

        csvFile = csv.reader(file)  
        for lines in csvFile:
            filename = lines[0]
            begin_idx = filename.index("Image")
            prefix = filename[begin_idx:]
            prefix = prefix[:-4]
            label = int(lines[1])

            img = cv2.imread(image_dir+filename, cv2.IMREAD_GRAYSCALE)
            dataImage = img_to_array(img)

            if label == 0:
                name_new_image = str(label)+"_"+prefix+"_"+".png"
                pil0_img = tf.keras.utils.array_to_img(dataImage, scale=False, dtype="uint8")
                pil0_img.save(augmented_image_dir + name_new_image)

            else :
                imageNew = expand_dims(dataImage, 0)

                imageDataGen = ImageDataGenerator(
                    rotation_range=0.25,
                    horizontal_flip = True,
                    vertical_flip=True,
                    dtype=np.uint8)

                iterator = imageDataGen.flow(imageNew, batch_size=1)

                for i in range(3):
                    batch = iterator.next()
                    image = batch[0]
                    pil1_img = tf.keras.utils.array_to_img(image, scale=False, dtype="uint8")
                    name_new_image = str(label)+"_"+prefix+"_"+str(i)+".png"
                    pil1_img.save(augmented_image_dir + name_new_image)


Then apply maker_csv on the augmented_image_dir

Tranform test images from uint16 to uint8, to be consistent with format of augmented train set.

In [None]:
extracted_dir_test = main_dir + "/extracted_images_test"
image_dir_test = extracted_dir_test +  "/test/"
augmented_image_dir_test = main_dir + '/augmented2/'
zip_file_name_test = 'test.zip'
csv_file_name_test = 'test.csv'

In [None]:
with ZipFile(zip_file_name_test,'r') as myzip:
    myzip.extractall(extracted_dir_test)
    with open(csv_file_name_test, mode ='r') as file:

        csvFile = csv.reader(file)  
        for lines in csvFile:
            filename = lines[0]
            

            img = cv2.imread(image_dir_test+filename, cv2.IMREAD_GRAYSCALE)
            dataImage = img_to_array(img)

            pil0_img = tf.keras.utils.array_to_img(dataImage, scale=False, dtype="uint8")
            pil0_img.save(augmented_image_dir_test+filename)

Add noise to data and perform data augmentation on label 1.

In [None]:
noisy_image_dir = main_dir + '/augmented_noise/'

def add_noise(image):
  # Add Gaussian noise to the image
      noise = np.random.normal(0,np.std(image)/5 , image.shape)
      return image + noise


with open(csv_file_name, mode ='r') as file:

        csvFile = csv.reader(file)  
        for lines in csvFile:
            filename = lines[0]
            begin_idx = filename.index("Image")
            prefix = filename[begin_idx:]
            prefix = prefix[:-4]
            label = int(lines[1])

            img = cv2.imread(image_dir+filename, cv2.IMREAD_GRAYSCALE)
            dataImage = img_to_array(img)

            if label == 0:
                imageNew = expand_dims(dataImage, 0)

                imageDataGen = ImageDataGenerator(
                    preprocessing_function=add_noise,
                    dtype=np.uint8)

                iterator = imageDataGen.flow(imageNew, batch_size=1)

                batch = iterator.next()
                image = batch[0]
                pil_img = tf.keras.utils.array_to_img(image, scale=False, dtype="uint8")
                name_new_image = str(label)+"_"+prefix+"_"+".png"
                pil_img.save(noisy_image_dir+name_new_image)

            else :
                imageNew = expand_dims(dataImage, 0)

                imageDataGen = ImageDataGenerator(
                    rotation_range=0.25,
                    horizontal_flip = True,
                    vertical_flip=True,
                    preprocessing_function=add_noise,
                    dtype=np.uint8)

                iterator = imageDataGen.flow(imageNew, batch_size=1)

                for i in range(3):
                    batch = iterator.next()
                    image = batch[0]
                    pil_img = tf.keras.utils.array_to_img(image, scale=False, dtype="uint8")
                    name_new_image = str(label)+"_"+prefix+"_"+str(i)+".png"
                    pil_img.save(noisy_image_dir+name_new_image)

                

Separate train dataset in 75% training and 25% validation. Augmentation with flip and rotation for training.

In [None]:
df = pd.read_csv('train.csv')

np.random.seed(42)

df = df.sample(frac=1)

num_rows = df.shape[0]
split_point = int(num_rows * 0.75)

df1 = df.iloc[:split_point]
df2 = df.iloc[split_point:]

df1.to_csv('train_75.csv', index=False)
df2.to_csv('validation_25.csv', index=False)

In [None]:
main_dir = "/Users/dalilkoheeallee/Desktop"
extracted_dir = main_dir + "/extracted_images"
image_dir = extracted_dir +  "/train/"
augmented75_image_dir = main_dir + '/augmented75/'
validation_image_dir = main_dir + '/validation/'
zip_file_name = 'train.zip'
csv_file_name75 = 'train_75.csv'
csv_file_name25 = 'validation_25.csv'

In [None]:
with open(csv_file_name75, mode ='r') as file:

        csvFile = csv.reader(file)  
        for lines in csvFile:
            filename = lines[0]
            begin_idx = filename.index("Image")
            prefix = filename[begin_idx:]
            prefix = prefix[:-4]
            label = int(lines[1])

            img = cv2.imread(image_dir+filename, cv2.IMREAD_GRAYSCALE)
            dataImage = img_to_array(img)

            if label == 0:
                name_new_image = str(label)+"_"+prefix+"_"+".png"
                pil0_img = tf.keras.utils.array_to_img(dataImage, scale=False, dtype="uint8")
                pil0_img.save(augmented75_image_dir + name_new_image)

            else :
                imageNew = expand_dims(dataImage, 0)

                imageDataGen = ImageDataGenerator(
                    rotation_range=0.25,
                    horizontal_flip = True,
                    vertical_flip=True,
                    dtype=np.uint8)

                iterator = imageDataGen.flow(imageNew, batch_size=1)

                for i in range(10):
                    batch = iterator.next()
                    image = batch[0]
                    pil1_img = tf.keras.utils.array_to_img(image, scale=False, dtype="uint8")
                    name_new_image = str(label)+"_"+prefix+"_"+str(i)+".png"
                    pil1_img.save(augmented75_image_dir + name_new_image)

In [None]:
with open(csv_file_name25, mode ='r') as file:

        csvFile = csv.reader(file)  
        for lines in csvFile:
            filename = lines[0]
            

            img = cv2.imread(image_dir+filename, cv2.IMREAD_GRAYSCALE)
            dataImage = img_to_array(img)

            pil0_img = tf.keras.utils.array_to_img(dataImage, scale=False, dtype="uint8")
            pil0_img.save(validation_image_dir+filename)

Then apply maker_csv on the augmented75_image_dir