# Data Augmentation

## Import Necessary Modules

In [1]:
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator
import cv2
import matplotlib.pyplot as plt
from os import listdir
import time    
import os
from tqdm import tqdm
%matplotlib inline

2024-03-18 11:42:35.206765: I tensorflow/core/util/util.cc:169] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.


In [2]:
# Time string formatting
def hms_string(sec_elapsed):
    h = int(sec_elapsed / (60 * 60))
    m = int((sec_elapsed % (60 * 60)) / 60)
    s = sec_elapsed % 60
    return f"{h}:{m}:{round(s,1)}"

In [3]:
def augment_data(file_dir, n_generated_samples, save_to_dir):
    data_gen = ImageDataGenerator(rotation_range=15, 
                                  width_shift_range=0.1, 
                                  height_shift_range=0.1, 
                                  horizontal_flip=True, 
                                  vertical_flip=True, 
                                  fill_mode='nearest'
                                 )

    
    for filename in listdir(file_dir):
        # load the image
    
        image = cv2.imread(file_dir + '/' + filename)
        # reshape the image
        image = image.reshape((1,)+image.shape)
        # prefix of the names for the generated sampels.
        save_prefix = 'aug_' + filename[:-4]
        # generate 'n_generated_samples' sample images
        i=0
        for batch in data_gen.flow(x=image, batch_size=1, save_to_dir=save_to_dir, 
                                           save_prefix=save_prefix, save_format='png'):
            i += 1
            if i > n_generated_samples:
                break

In [4]:
def augment_image(data_path, label, aug_num):

    start_time = time.time()

    # path to save augmented image
    augmented_data_path_from = os.path.join(data_path, 'cropped', 'Training')
    augmented_data_path_to = os.path.join(data_path, 'augmented', 'Training')

    for i in range(0, len(label)):
        folder_from = os.path.join(augmented_data_path_from, label[i])
        folder_to = os.path.join(augmented_data_path_to, label[i])
        augment_data(file_dir=folder_from, n_generated_samples=aug_num[i], save_to_dir=folder_to)
    
    end_time = time.time()

    execution_time = (end_time - start_time)
    print(f"Elapsed time: {hms_string(execution_time)}")

    data_summary(augmented_data_path_to, label)

    augmented_data_path_from = os.path.join(data_path, 'cropped', 'Testing')
    augmented_data_path_to = os.path.join(data_path, 'augmented', 'Testing')

    for i in label:
        folderPath_from = os.path.join(augmented_data_path_from,i)
        folderPath_to = os.path.join(augmented_data_path_to,i)
        num = 1
        for file in tqdm(os.listdir(folderPath_from)):
            file = cv2.imread(os.path.join(folderPath_from, file))      
            a = str(f'{i}-{num}.png')
            plt.imsave(os.path.join(folderPath_to, a), file, cmap='gray')
            num = num + 1


Let's see how many glioma, meningioma and pituitary tumor examples are there after performing data augmentation:

In [5]:
def data_summary(main_path, label):
    

    m = 0
    num = []
    for i in label:
        path = os.path.join(main_path, i)
        num.append(len(listdir(path)))
        m = m + len(listdir(path))

    print(f"Number of examples: {m}")
    for i in range(0, len(label)):
        prec = (num[i]* 100.0) / m
        print(f"Percentage of {label[i]} examples: {prec}%, number of glioma examples: {num[i]}")
    

In [6]:
augment_image('/root/autodl-tmp/dataset/figshare_MBTD', ['glioma', 'meningioma', 'pituitary'], [2, 5, 4])

Elapsed time: 0:9:48.5
Number of examples: 10535
Percentage of glioma examples: 32.46321784527765%, number of glioma examples: 3420
Percentage of meningioma examples: 32.22591362126246%, number of glioma examples: 3395
Percentage of pituitary examples: 35.310868533459896%, number of glioma examples: 3720


100%|██████████| 286/286 [00:12<00:00, 23.11it/s]
100%|██████████| 142/142 [00:06<00:00, 23.38it/s]
100%|██████████| 186/186 [00:10<00:00, 16.98it/s]


In [7]:
augment_image('/root/autodl-tmp/dataset/SARTAJ_dataset', ['glioma', 'meningioma', 'notumor', 'pituitary'], [2, 2, 5, 2])

Elapsed time: 0:7:48.0
Number of examples: 9794
Percentage of glioma examples: 25.301204819277107%, number of glioma examples: 2478
Percentage of meningioma examples: 25.178680824994895%, number of glioma examples: 2466
Percentage of notumor examples: 24.188278537880336%, number of glioma examples: 2369
Percentage of pituitary examples: 25.33183581784766%, number of glioma examples: 2481


100%|██████████| 100/100 [00:03<00:00, 30.09it/s]
100%|██████████| 115/115 [00:02<00:00, 40.32it/s]
100%|██████████| 105/105 [00:01<00:00, 88.77it/s]
100%|██████████| 74/74 [00:07<00:00, 10.39it/s]


In [8]:
augment_image('/root/autodl-tmp/dataset/Br35H+SARTAJ+figshare', ['glioma', 'meningioma', 'notumor', 'pituitary'], [3, 3, 2, 2])

Elapsed time: 0:14:29.6
Number of examples: 19794
Percentage of glioma examples: 26.694958068101446%, number of glioma examples: 5284
Percentage of meningioma examples: 27.05365262200667%, number of glioma examples: 5355
Percentage of notumor examples: 24.173992118823886%, number of glioma examples: 4785
Percentage of pituitary examples: 22.077397191068002%, number of glioma examples: 4370


100%|██████████| 300/300 [00:10<00:00, 28.70it/s]
100%|██████████| 306/306 [00:09<00:00, 32.22it/s]
100%|██████████| 405/405 [00:06<00:00, 59.63it/s]
100%|██████████| 300/300 [00:14<00:00, 20.95it/s]


In [6]:
augment_image('/root/autodl-tmp/dataset/Br35H', ['yes', 'no'], [3, 3])

Elapsed time: 0:7:27.1
Number of examples: 9599
Percentage of yes examples: 50.00520887592457%, number of glioma examples: 4800
Percentage of no examples: 49.99479112407543%, number of glioma examples: 4799


100%|██████████| 300/300 [00:19<00:00, 15.20it/s]
100%|██████████| 300/300 [00:09<00:00, 32.51it/s]
