In [1]:
import numpy as np 
import matplotlib.pyplot as plt
import os
from PIL import Image
import cv2

from keras.preprocessing.image import ImageDataGenerator
from keras.utils import load_img

2023-12-04 19:09:58.722716: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: SSE4.1 SSE4.2 AVX AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


## Loading data

In [2]:
train = '../data/chest_xray/train'
test = '../data/chest_xray/test'
val = '../data/chest_xray/val'

### Setting up training and testing folders

In [3]:
labels = ['PNEUMONIA', 'NORMAL']
img_size = 150

def get_training_data(data_dir):
    data = [] 
    for label in labels: 
        path = os.path.join(data_dir, label)
        class_num = labels.index(label)
        for img in os.listdir(path):
            try:
                img_arr = cv2.imread(os.path.join(path, img), cv2.IMREAD_GRAYSCALE)
                resized_arr = cv2.resize(img_arr, (img_size, img_size))
                data.append([resized_arr, class_num])
            except Exception as e:
                print(f"Error reading image: {e}")

    return np.array(data, dtype=object)

In [4]:
train = get_training_data(train)
test = get_training_data(test)
val = get_training_data(val)

## Processing

We divide the data into x and y

In [5]:
x_train = []
y_train = []

x_val = []
y_val = []

x_test = []
y_test = []

for feature, label in train:
    x_train.append(feature)
    y_train.append(label)

for feature, label in test:
    x_test.append(feature)
    y_test.append(label)
    
for feature, label in val:
    x_val.append(feature)
    y_val.append(label)

Perform a grayscale normalization to reduce the effect of illumination's differences.

In [6]:
# Normalize the data
x_train = np.array(x_train) / 255
x_val = np.array(x_val) / 255
x_test = np.array(x_test) / 255

In [7]:
# resize data
x_train = x_train.reshape(-1, img_size, img_size, 1)
y_train = np.array(y_train)

x_val = x_val.reshape(-1, img_size, img_size, 1)
y_val = np.array(y_val)

x_test = x_test.reshape(-1, img_size, img_size, 1)
y_test = np.array(y_test)

Data Augmentation Techniques

1. Randomly rotate some training images by 30 degrees.
2. Randomly zoom by 20% on some training images.
3. Randomly shift images horizontally by 10% of the width.
4. Randomly shift images vertically by 10% of the height.
5. Randomly flip images horizontally.

Once the data augmentation is applied, the model is trained using the augmented training dataset.


In [8]:
datagen = ImageDataGenerator(
        featurewise_center=False,  
        samplewise_center=False,  
        featurewise_std_normalization=False,  
        samplewise_std_normalization=False,  
        zca_whitening=False,  
        rotation_range = 30,  
        zoom_range = 0.2,  
        width_shift_range=0.1,  
        height_shift_range=0.1,  
        horizontal_flip = True, 
        vertical_flip=False)  

In [9]:
datagen.fit(x_train)

## Save processed data

In [10]:
np.save('../data/processed/x_train.npy', x_train)
np.save('../data/processed/y_train.npy', y_train)

np.save('../data/processed/x_val.npy', x_val)
np.save('../data/processed/y_val.npy', y_val)

np.save('../data/processed/x_test.npy', x_test)
np.save('../data/processed/y_test.npy', y_test)