# Image Augmentation

## Import Modules

In [0]:
import numpy as np
from google.colab import drive
import matplotlib.pyplot as plt
import os
import tensorflow as tf
from scipy.io import loadmat, savemat
from sklearn.model_selection import KFold
import skimage.filters as filters
from skimage.transform import rescale

## Get a List of File Names of the Dataset


Assume that the dataset files we are looking for are of type 'mat'. Also assume that the image dataset is stored in the google drive in advance. You can put the dataset anywhere in the drive, but in our case, we put it in '/CS168-Automatic-TICI/data', where CS168-Automatic-TICI is the project repository.

In [2]:
BASE_DIR = os.getcwd()

# Mount the google drive to the virtue machine running this script.
# This will prompt for authorization.
DRIVE_MOUNT_DIR = os.path.join(os.getcwd(), 'drive')
drive.mount(DRIVE_MOUNT_DIR, force_remount=True)

Mounted at /content/drive


In [3]:
# Now the google drive can be accessed at 'DRIVE_MOUNT_DIR/My Drive'.
# Specify the directory to the data set.
REPOSITORY_NAME = 'CS168-Automatic-TICI'
REPOSITORY = os.path.join(DRIVE_MOUNT_DIR, 'My Drive', REPOSITORY_NAME)
DATA_DIR_NAME = 'feature_images'
DATA_DIR = os.path.join(REPOSITORY, DATA_DIR_NAME)

# Get a list of full paths of all mat files in the data directory.
for root, _dirs, files in os.walk(DATA_DIR):
    files = list(filter(lambda fname: fname.lower().endswith('.mat'), sorted(files)))
nfiles = len(files)
print('{} files found in the data directory \'{}\'.'.format(nfiles, DATA_DIR))

145 files found in the data directory '/content/drive/My Drive/CS168-Automatic-TICI/feature_images'.


## Read the Files, Extract Feature Images and TICIs
​
Each mat file contains an image set that represents the revascularization of a patient, and a TICI score. Read the files and output the image sets and TICI scores. Explore one of the image sets to get the image set dimensions.

In [4]:
images = []
TICIs = []


# TODO: put this part into a function to reduce RAM usage



# Extract the image set and TICI information for all mat files.
for n in range(nfiles):
    # Print the progress.
    if n % 10 == 0:
        print('{} / {} done'.format(n, nfiles))
    content = loadmat(os.path.join(DATA_DIR, files[n]))
    image, TICI = content['image'], content['TICI']
    images.append(image)
    TICIs.append(str(TICI[0]))

0 / 145 done
10 / 145 done
20 / 145 done
30 / 145 done
40 / 145 done
50 / 145 done
60 / 145 done
70 / 145 done
80 / 145 done
90 / 145 done
100 / 145 done
110 / 145 done
120 / 145 done
130 / 145 done
140 / 145 done


In [0]:
# Image augmentation
datagen = tf.keras.preprocessing.image.ImageDataGenerator(
    rotation_range=20,
    width_shift_range=0.2,
    height_shift_range=0.2,
    horizontal_flip=True
)

In [6]:
image_shape = np.shape(images[0]) if len(images) > 0 else None
print(image_shape)

reshaped_images = np.array(images).reshape(len(images), image_shape[0], image_shape[1], 1)

(1024, 1024)


In [7]:
SAVE_DIR_NAME = 'augmented_images'
SAVE_DIR = os.path.join(REPOSITORY, SAVE_DIR_NAME)

print(SAVE_DIR)
!if test -d '$SAVE_DIR'; then rm -rf '$SAVE_DIR'; fi
!mkdir '$SAVE_DIR'


AUGMENTATION_FACTOR = 10
augmentation_count = 0
fname = 1

for batch_X, batch_y in datagen.flow(reshaped_images, TICIs, batch_size=len(reshaped_images)):
    for i in range(len(batch_X)):
        savemat(os.path.join(SAVE_DIR, str(fname) + '.mat'), {
            'image': batch_X[i],
            'TICI': batch_y[i]
        })
        fname += 1

    augmentation_count += 1
    print('{}00% image dataset generated.'.format(augmentation_count))
    if augmentation_count >= AUGMENTATION_FACTOR:
        break  # otherwise the generator would loop indefinitely
        
        


/content/drive/My Drive/CS168-Automatic-TICI/augmented_images
100% image dataset generated.
200% image dataset generated.
300% image dataset generated.
400% image dataset generated.
500% image dataset generated.
600% image dataset generated.
700% image dataset generated.
800% image dataset generated.
900% image dataset generated.
1000% image dataset generated.
