<a href="https://colab.research.google.com/github/izzanurafifah/braintumor-classification/blob/main/braintumor_classification.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# Dataset

import os

folder_yes = 'brain_tumor_dataset/yes/'
folder_no = 'brain_tumor_dataset/no/'
count = 1

for filename in os.listdir(folder_yes):
    source = folder_yes + filename
    destination = folder_yes + "Y_" + str(count) + ".jpg"
    os.rename(source, destination)
    count+=1
print("All files in Yes folder have been renamed.")

for filename in os.listdir(folder_no):
    source = folder_no + filename
    destination = folder_no + "N_" + str(count) + ".jpg"
    os.rename(source, destination)
    count+=1
print("All files in No folder have been renamed.")

In [None]:
# Exploratory Data Analysis (EDA)

import matplotlib.pyplot as plt

tumorous = os.listdir('brain_tumor_dataset/yes/')
totalY = len(tumorous)

nontumorous = os.listdir('brain_tumor_dataset/no/')
totalN = len(nontumorous)

data = {'tumorous': totalY, 'nontumorous': totalN}

xAxis = data.keys()
yAxis = data.values()

fig = plt.figure(figsize=(3,5))
plt.bar(xAxis, yAxis)
plt.xlabel("Data")
plt.ylabel("Number of Brain Tumor Images")
plt.title("Count of Brain Tumor Images")
plt.show()

In [None]:
# Data Augmentation

import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Dropout, Flatten, Dense
from tensorflow.keras.applications.vgg19 import VGG19
from tensorflow.keras.optimizers import SGD, RMSprop, Adam
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping, ReduceLROnPlateau
import cv2

def augmented_data(file_dir, n_generated_samples, save_to_dir):
    data_generated = ImageDataGenerator(rotation_range=10,
                                       width_shift_range=0.1,
                                       shear_range=0.1,
                                       brightness_range=(0.3, 1.0),
                                       horizontal_flip=True,
                                       vertical_flip=True,
                                       fill_mode='nearest')
    for filename in os.listdir(file_dir):
        image = cv2.imread(file_dir + '/' + filename)
        image = image.reshape((1,) + image.shape)
        save_prefix = "aug_" + filename[:-4]
        i=0
        for batch in data_generated.flow(x = image, batch_size = 1, save_to_dir = save_to_dir, save_prefix = save_prefix, save_format="jpg"):
            i+=1
            if i>n_generated_samples:
                break

augmented_data_path = 'augmented_data/'

augmented_data(folder_yes, 6, augmented_data_path + 'yes')
augmented_data(folder_no, 9, augmented_data_path + 'no')

In [None]:
# Exploratory Data Analysis (EDA)

tumorous = os.listdir(augmented_data_path + 'yes')
totalY = len(tumorous)

nontumorous = os.listdir(augmented_data_path + 'no')
totalN = len(nontumorous)

total = totalY + totalN

perY = (totalY/total)*100
perN = (totalN/total)*100

print(f"Number of samples: {total}")
print(f"Number of positive samples in percentage: {perY:.2f}%")
print(f"Number of negative samples in percentage: {perN:.2f}%")

data = {'tumorous': totalY, 'nontumorous': totalN}

xAxis = data.keys()
yAxis = data.values()

fig = plt.figure(figsize=(3,5))
plt.bar(xAxis, yAxis)
plt.xlabel("Data")
plt.ylabel("Number of Brain Tumor Images")
plt.title("Count of Brain Tumor Images")
plt.show()

In [None]:
# Cropped Image

import cv2

def crop_brain_tumor(image):
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    gray = cv2.GaussianBlur(gray, (5, 5), 0)

    threshold = cv2.threshold(gray, 45, 255, cv2.THRESH_BINARY)[1]
    threshold = cv2.erode(threshold, None, iterations = 2)
    threshold = cv2.dilate(threshold, None, iterations = 2)

    contours, _ = cv2.findContours(threshold.copy(), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    c = max(contours, key = cv2.contourArea)

    extLeft = tuple(c[c[:,:,0].argmin()][0])
    extRight = tuple(c[c[:,:,0].argmax()][0])
    extTop = tuple(c[c[:,:,1].argmin()][0])
    extBottom = tuple(c[c[:,:,1].argmax()][0])

    new_image = image[extTop[1]:extBottom[1], extLeft[0]:extRight[0]]

    # fig, axes = plt.subplots(1, 2, figsize=(12, 6))
    # axes[0].imshow(image)
    # axes[0].set_title("Original Image")

    # axes[1].imshow(new_image)
    # axes[1].set_title("Cropped Image")

    # plt.show()

    return new_image

In [None]:
import os

folder_yes = 'augmented_data/yes/'
folder_no = 'augmented_data/no/'

for filename in os.listdir(folder_yes):
    img = cv2.imread(folder_yes + filename)
    img = crop_brain_tumor(img)
    cv2.imwrite(folder_yes + filename, img)

for filename in os.listdir(folder_no):
    img = cv2.imread(folder_no + filename)
    img = crop_brain_tumor(img)
    cv2.imwrite(folder_no + filename, img)

In [None]:
# Image Load

import numpy as np
from sklearn.utils import shuffle

def load_data(dir_list, image_size):
    X = []
    y = []

    image_width, image_height = image_size

    for directory in dir_list:
        for filename in os.listdir(directory):
            image = cv2.imread(directory + '/' + filename)
            image = crop_brain_tumor(image)
            image = cv2.resize(image, (image_width, image_height), interpolation = cv2.INTER_CUBIC)
            image = image/255.00
            X.append(image)
            if directory[-3:] == 'yes':
                y.append(1)
            else:
                y.append(0)

    X = np.array(X)
    y = np.array(y)

    X,y = shuffle(X,y)
    print(f"Number of example is: {len(X)}")
    print(f"X shape is: {X.shape}")
    print(f"y shape is: {y.shape}")

    return X,y

In [None]:
augmented_yes = augmented_data_path + 'yes'
augmented_no = augmented_data_path + 'no'

IMAGE_WIDTH, IMAGE_HEIGHT = (240, 240)

X = load_data([augmented_yes, augmented_no], (IMAGE_WIDTH, IMAGE_HEIGHT))

In [None]:
# Data Split (70% Train, 15% Test, 15% Validation)

import shutil

original_file_yes = 'augmented_data/yes/'
train_yes = []
for i in range(0,760):
    train_yes.append(os.listdir(original_file_yes)[i])
for filename in train_yes:
    src = os.path.join(original_file_yes, filename)
    dst = os.path.join('tumorous_and_nontumorous/train/tumorous/', filename)
    shutil.copyfile(src, dst)

test_yes = []
for i in range(760,923):
    test_yes.append(os.listdir(original_file_yes)[i])
for filename in test_yes:
    src = os.path.join(original_file_yes, filename)
    dst = os.path.join('tumorous_and_nontumorous/test/tumorous/', filename)
    shutil.copyfile(src, dst)

validation_yes = []
for i in range(923,1085):
    validation_yes.append(os.listdir(original_file_yes)[i])
for filename in validation_yes:
    src = os.path.join(original_file_yes, filename)
    dst = os.path.join('tumorous_and_nontumorous/validation/tumorous/', filename)
    shutil.copyfile(src, dst)

In [None]:
original_file_no = 'augmented_data/no/'
train_no = []
for i in range(0,686):
    train_no.append(os.listdir(original_file_no)[i])
for filename in train_no:
    src = os.path.join(original_file_no, filename)
    dst = os.path.join('tumorous_and_nontumorous/train/nontumorous/', filename)
    shutil.copyfile(src, dst)

test_no = []
for i in range(686,833):
    test_no.append(os.listdir(original_file_no)[i])
for filename in test_no:
    src = os.path.join(original_file_no, filename)
    dst = os.path.join('tumorous_and_nontumorous/test/nontumorous/', filename)
    shutil.copyfile(src, dst)

validation_no = []
for i in range(833,980):
    validation_no.append(os.listdir(original_file_no)[i])
for filename in validation_no:
    src = os.path.join(original_file_no, filename)
    dst = os.path.join('tumorous_and_nontumorous/validation/nontumorous/', filename)
    shutil.copyfile(src, dst)

In [None]:
# Model Build

train_datagen = ImageDataGenerator(rescale=1./255,
                  horizontal_flip=0.4,
                  vertical_flip=0.4,
                  rotation_range=40,
                  shear_range=0.2,
                  width_shift_range=0.4,
                  height_shift_range=0.4,
                  fill_mode='nearest')
test_datagen = ImageDataGenerator(rescale=1.0/255)
validation_datagen = ImageDataGenerator(rescale=1.0/255)

In [None]:
def clean_dataset(base_dir):
    for root, dirs, files in os.walk(base_dir):
        for dir_name in dirs:
            if dir_name == ".ipynb_checkpoints":
                checkpoint_path = os.path.join(root, dir_name)
                shutil.rmtree(checkpoint_path)
                print(f"Deleted: {checkpoint_path}")

dataset_base_dir = "tumorous_and_nontumorous"
clean_dataset(dataset_base_dir)

In [None]:
train_generator = train_datagen.flow_from_directory('tumorous_and_nontumorous/train/', batch_size=32, target_size=(240,240), class_mode='categorical', shuffle=True, seed=42, color_mode='rgb')

test_generator = test_datagen.flow_from_directory('tumorous_and_nontumorous/test/', batch_size=32, target_size=(240,240), class_mode='categorical', shuffle=True, seed=42, color_mode='rgb')

validation_generator = validation_datagen.flow_from_directory('tumorous_and_nontumorous/validation/', batch_size=32, target_size=(240,240), class_mode='categorical', shuffle=True, seed=42, color_mode='rgb')

In [None]:
class_labels = train_generator.class_indices
class_name = {value: key for (key,value) in class_labels.items()}
class_name

In [None]:
base_model = VGG19(input_shape=(240, 240, 3), include_top=False, weights='imagenet')

for layer in base_model.layers:
    layer.trainable=False

x = base_model.output
flat = Flatten()(x)

class_1 = Dense(4608, activation='relu')(flat)
drop_out = Dropout(0.2)(class_1)
class_2 = Dense(1152, activation='relu')(drop_out)
output = Dense(2, activation='softmax')(class_2)

model_01 = Model(base_model.input, output)
model_01.summary()

In [None]:
# Callback

filepath = 'model.keras'
es = EarlyStopping(monitor='val_loss', verbose=1, mode='min', patience=4)
cp = ModelCheckpoint(filepath, monitor='val_loss', verbose=1, save_best_only=True, save_weights_only=False, mode='auto', save_freq='epoch')
lrr = ReduceLROnPlateau(monitor='val_accuracy', patience=3, verbose=1, factor=0.5, min_lr=0.0001)

In [None]:
sgd = SGD(learning_rate=0.0001, decay=1e-6, momentum=0.9, nesterov=True)
model_01.compile(loss='categorical_crossentropy', optimizer=sgd, metrics=['accuracy'])

In [None]:
history_01 = model_01.fit(train_generator, steps_per_epoch=10, epochs=20, callbacks=[es,cp,lrr], validation_data=validation_generator)

In [None]:
# Plot Performance

fig, (ax1,ax2) = plt.subplots(nrows=1, ncols=2, figsize=(12,6))
fig.suptitle("Model Training (Frozen CNN)", fontsize=12)
max_epoch = len(history_01.history['accuracy'])+1
epochs_list = list(range(1, max_epoch))

ax1.plot(epochs_list, history_01.history['accuracy'], color='b', linestyle='-', label='Training Data')
ax1.plot(epochs_list, history_01.history['val_accuracy'], color='r', linestyle='-', label='Validation Data')
ax1.set_title('Training Accuracy', fontsize=12)
ax1.set_xlabel('Epochs', fontsize=12)
ax1.set_ylabel('Accuracy', fontsize=12)
ax1.legend(frameon=False, loc='lower center', ncol=2)

ax2.plot(epochs_list, history_01.history['loss'], color='b', linestyle='-', label='Training Data')
ax2.plot(epochs_list, history_01.history['val_loss'], color='r', linestyle='-', label='Validation Data')
ax2.set_title('Training Loss', fontsize=12)
ax2.set_xlabel('Epochs', fontsize=12)
ax2.set_ylabel('Loss', fontsize=12)
ax2.legend(frameon=False, loc='upper center', ncol=2)
plt.savefig("training_frozencnn.jpeg", format='jpeg', dpi=100, bbox_inches='tight')

In [None]:
model_01.save(filepath="model_weights/vgg19_model_01.keras", overwrite=True)

In [None]:
model_01.load_weights("model_weights/vgg19_model_01.keras")
vgg_val_eval_01 = model_01.evaluate(validation_generator)
vgg_test_eval_01 = model_01.evaluate(test_generator)

In [None]:
print(f'Validation Loss: {vgg_val_eval_01[0]}')
print(f'Validation Accuracy: {vgg_val_eval_01[1]}')
print(f'Test Loss: {vgg_test_eval_01[0]}')
print(f'Test Accuracy: {vgg_test_eval_01[1]}')

In [None]:
filenames = test_generator.filenames
nb_sample = len(filenames)

vgg_prediction_01 = model_01.predict(test_generator, steps=nb_sample, verbose=1)
y_pred = np.argmax(vgg_prediction_01, axis=1)

In [None]:
# Incremental Unfreezing and Fine Tuning

base_model = VGG19(include_top=False, input_shape=(240, 240, 3))
base_model_layer_names = [layer.name for layer in base_model.layers]

x = base_model.output
flat = Flatten()(x)

class_1 = Dense(4608, activation='relu')(flat)
drop_out = Dropout(0.2)(class_1)
class_2 = Dense(1152, activation='relu')(drop_out)
output = Dense(2, activation='softmax')(class_2)

model_02 = Model(base_model.inputs, output)
model_02.load_weights('model_weights/vgg19_model_01.keras')

set_trainable = False
for layer in base_model.layers:
    if layer.name in ['block5_conv4', 'block5_conv3']:
        set_trainable = True
    if set_trainable:
        layer.trainable = True
    else:
        layer.trainable = False

print(model_02.summary())

In [None]:
sgd = SGD(learning_rate=0.0001, decay=1e-6, momentum=0.9, nesterov=True)
model_02.compile(loss='categorical_crossentropy', optimizer=sgd, metrics=['accuracy'])

In [None]:
history_02 = model_02.fit(train_generator, steps_per_epoch=10, epochs=20, callbacks=[es,cp,lrr], validation_data=validation_generator)

In [None]:
# Plot Performance

fig, (ax1,ax2) = plt.subplots(nrows=1, ncols=2, figsize=(12,6))
fig.suptitle("Model Training (Frozen CNN)", fontsize=12)
max_epoch = len(history_02.history['accuracy'])+1
epochs_list = list(range(1, max_epoch))

ax1.plot(epochs_list, history_02.history['accuracy'], color='b', linestyle='-', label='Training Data')
ax1.plot(epochs_list, history_02.history['val_accuracy'], color='r', linestyle='-', label='Validation Data')
ax1.set_title('Training Accuracy', fontsize=12)
ax1.set_xlabel('Epochs', fontsize=12)
ax1.set_ylabel('Accuracy', fontsize=12)
ax1.legend(frameon=False, loc='lower center', ncol=2)

ax2.plot(epochs_list, history_02.history['loss'], color='b', linestyle='-', label='Training Data')
ax2.plot(epochs_list, history_02.history['val_loss'], color='r', linestyle='-', label='Validation Data')
ax2.set_title('Training Loss', fontsize=12)
ax2.set_xlabel('Epochs', fontsize=12)
ax2.set_ylabel('Loss', fontsize=12)
ax2.legend(frameon=False, loc='upper center', ncol=2)
plt.savefig("training_frozencnn.jpeg", format='jpeg', dpi=100, bbox_inches='tight')

In [None]:
model_02.save(filepath="model_weights/vgg19_model_02.keras", overwrite=True)

In [None]:
model_02.load_weights("model_weights/vgg19_model_02.keras")
vgg_val_eval_02 = model_02.evaluate(validation_generator)
vgg_test_eval_02 = model_02.evaluate(test_generator)

In [None]:
# Unfreezing Entire Network

base_model = VGG19(include_top=False, input_shape=(240, 240, 3))
base_model_layer_names = [layer.name for layer in base_model.layers]

x = base_model.output
flat = Flatten()(x)

class_1 = Dense(4608, activation='relu')(flat)
drop_out = Dropout(0.2)(class_1)
class_2 = Dense(1152, activation='relu')(drop_out)
output = Dense(2, activation='softmax')(class_2)

model_03 = Model(base_model.inputs, output)
model_03.load_weights('model_weights/vgg19_model_02.keras')

In [None]:
sgd = SGD(learning_rate=0.0001, decay=1e-6, momentum=0.9, nesterov=True)
model_03.compile(loss='categorical_crossentropy', optimizer=sgd, metrics=['accuracy'])

In [None]:
model_03.summary()

In [None]:
history_03 = model_03.fit(train_generator, steps_per_epoch=10, epochs=20, callbacks=[es,cp,lrr], validation_data=validation_generator)

In [None]:
model_03.save(filepath="model_weights/vgg19_unfrozen.keras", overwrite=True)

In [None]:
model_03.load_weights("model_weights/vgg19_unfrozen.keras")
vgg_val_eval_03 = model_03.evaluate(validation_generator)
vgg_test_eval_03 = model_03.evaluate(test_generator)