In [16]:
from keras.preprocessing.image import ImageDataGenerator
from sklearn.model_selection import train_test_split
from keras.preprocessing.image import img_to_array
from keras.preprocessing.image import load_img
import matplotlib.pyplot as plt
import numpy as np
import cv2
import os
import tensorflow
import keras

In [2]:
# directory which contains the image dataset from Kaggle
DATADIR = "C:/Users/Vern Sin/Documents/Jupyter/CXR Classification/ori"

# there are 2 category in the dataset
CATEGORIES = ["covid", "health"]

In [3]:
# resize image from both category
def resize_img(LargeImage):
    SmallImage = cv2.resize(LargeImage, (256, 256))
    return SmallImage

In [4]:
# create pseudo color image using list comprehension
def increase_channel(smallImage):
    image_BilateralFilter = cv2.bilateralFilter(smallImage, 9, 75, 75)
    image_HistEqual = cv2.equalizeHist(smallImage)
    final_image = [[np.array([image_HistEqual[i][j], smallImage[i][j], image_BilateralFilter[i][j]]) for j in range(256)] for i in range(256)]
#     final_image = np.zeros([256,256,3])
#     final_image[:,:,0] = np.array(image_HistEqual)
#     final_image[:,:,1] = np.array(smallImage)
#     final_image[:,:,2] = np.array(image_HistEqual)
    return final_image

In [5]:
# calling resize and pseudo color image formation in one go is easier
def image_preprocessing(ori_Image):
    small_Image = resize_img(ori_Image)
    processed_Image = increase_channel(small_Image)
    return processed_Image

In [9]:
# get the image dataset by looping through categories in data directory
def get_data():
    Data = []
    for category in CATEGORIES:
        path = os.path.join(DATADIR, category)
        class_label = CATEGORIES.index(category)
        counter = 0
        for image in os.listdir(path):
            img_ary = cv2.imread(os.path.join(path, image), cv2.IMREAD_GRAYSCALE)
            img_processed_ary = image_preprocessing(img_ary)
            
#             write the psuedo color image 
            filename = str(class_label)+"_"+str(counter)+".png"
            cv2.imwrite(filename, np.array(img_processed_ary))
            counter+=1
            
#             for later train test split use
            Data.append([img_processed_ary, class_label])
    
    X = []
    y = []
    
#     split the information from data into features and labels
    for features, labels in Data:
        X.append(features)
        y.append(labels)
        
    return X, y

In [10]:
X, y = get_data()
X_train, X_test, y_train, y_test = train_test_split(X, y,test_size=0.2,random_state=0)

In [11]:
# writing images from train set into train folder separated by different folders(categories)
counter_covid = 0
counter_health = 0
for i in range(len(X_train)):
    filename = "./training/"
    if y_train[i] == 0:
        filename += "covid/"+str(counter_covid)+".png"
        cv2.imwrite(filename, np.array(X_train[i]))
        counter_covid += 1
    elif y_train[i] == 1:
        filename += "health/"+str(counter_health)+".png"
        cv2.imwrite(filename, np.array(X_train[i]))
        counter_health += 1

In [12]:
# writing images from train set into test folder separated by different folders(categories)
counter_covid = 0
counter_health = 0
for i in range(len(X_test)):
    filename = "./testing/"
    if y_train[i] == 0:
        filename += "covid/"+str(counter_covid)+".png"
        cv2.imwrite(filename, np.array(X_train[i]))
        counter_covid += 1
    elif y_train[i] == 1:
        filename += "health/"+str(counter_health)+".png"
        cv2.imwrite(filename, np.array(X_train[i]))
        counter_health += 1

In [17]:
# augment the image only from training set, write them in a folder name "preview"
TRAINDIR = "C:/Users/Vern Sin/Documents/Jupyter/CXR Classification/training"
def augment_image():
    train = ImageDataGenerator(rotation_range = 5, width_shift_range = 0.1, height_shift_range = 0.1, zoom_range = 0.1)
    for category in CATEGORIES:
        path = os.path.join(TRAINDIR, category)
        class_label = CATEGORIES.index(category)
        for image in os.listdir(path):
            x_ray_img = load_img(os.path.join(path, image))
            x_ray_ary = img_to_array(x_ray_img)  # this is a Numpy array with shape (3, 150, 150)
            x_ray_ary = x_ray_ary.reshape((1,) + x_ray_ary.shape) 
            i=0
            for batch in train.flow(x_ray_ary, batch_size=1, save_to_dir='preview', save_prefix=category, save_format='png'):
                i += 1
                if i > 20:
                    break
augment_image()