In [34]:

import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Conv2D, Flatten, Dropout, MaxPooling2D
from tensorflow.keras.preprocessing.image import ImageDataGenerator, img_to_array, load_img

import os
import numpy as np
from sklearn.metrics import classification_report, accuracy_score
import matplotlib.pyplot as plt
import pandas as pd
from PIL import Image
from sklearn.metrics import confusion_matrix

In [3]:
#get labels
labels = pd.read_csv('./dataset/label.csv')
print(labels.head())
#get images from file name
def fetch_image(row_id, root = "./dataset/image/"):
    file_name = f"IMAGE_{str.zfill(str(row_id), 4)}.jpg"
    file_path = os.path.join(root, file_name)
    img = Image.open(file_path)
    return np.array(img)

width = fetch_image(1).shape[0]
height = fetch_image(1).shape[1]
depth = fetch_image(1).shape[2]

        file_name             label
0  IMAGE_0000.jpg  meningioma_tumor
1  IMAGE_0001.jpg          no_tumor
2  IMAGE_0002.jpg  meningioma_tumor
3  IMAGE_0003.jpg      glioma_tumor
4  IMAGE_0004.jpg  meningioma_tumor


In [5]:
batch_size = 16

epochs = 30

model = Sequential()

# Layer 1
model.add(Conv2D(16, (5,5), padding='same', activation='relu', use_bias= True, input_shape=(width, height,  1)))
model.add(MaxPooling2D(pool_size=(2,2)))

# Layer 2
model.add(Conv2D(64, (4,4), padding='same', use_bias= True, activation='relu'))
model.add(MaxPooling2D(pool_size=(2,2)))

# Layer 3
model.add(Conv2D(16, (3,3), padding='same', use_bias = True, activation='relu'))
model.add(MaxPooling2D(pool_size=(2,2)))

# Features have been extracted using CNN now we need to classify using a two-layer fully connnected NN

# Two fully connected layers
model.add(Flatten())
# Prevent Overfitting
model.add(Dense(128, activation='relu', use_bias=True))
model.add(Dropout(0.3))

model.add(Dense(64, activation='relu', use_bias=True))
model.add(Dropout(0.3))

model.add(Dense(4, activation="softmax"))

# Compile the model
model.compile(optimizer="rmsprop", loss = 'categorical_crossentropy', metrics=["accuracy"])

model.summary()


Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_3 (Conv2D)            (None, 512, 512, 16)      416       
_________________________________________________________________
max_pooling2d_3 (MaxPooling2 (None, 256, 256, 16)      0         
_________________________________________________________________
conv2d_4 (Conv2D)            (None, 256, 256, 64)      16448     
_________________________________________________________________
max_pooling2d_4 (MaxPooling2 (None, 128, 128, 64)      0         
_________________________________________________________________
conv2d_5 (Conv2D)            (None, 128, 128, 16)      9232      
_________________________________________________________________
max_pooling2d_5 (MaxPooling2 (None, 64, 64, 16)        0         
_________________________________________________________________
flatten_1 (Flatten)          (None, 65536)            

In [6]:

# Image Generators (Training and Validation)
datagen = ImageDataGenerator(rescale=1./255, 
                                  rotation_range=90, 
                                  horizontal_flip=True, 
                                  vertical_flip = True, 
                                  shear_range= 0.2,
                                  zoom_range=0.2,
                                  validation_split=0.2)




In [7]:

data_directory = "./dataset/image"

train_data_gen = datagen.flow_from_dataframe(labels,
                                                  directory = data_directory, 
                                                  x_col = 'file_name',
                                                  y_col = 'label',
                                                  color_mode='grayscale',
                                                  subset="training",
                                                  target_size= (width, height),
                                                  batch_size=batch_size,
                                                  class_mode="categorical")

val_data_gen = datagen.flow_from_dataframe(labels,
                                              directory = data_directory,
                                              x_col = 'file_name',
                                              y_col = 'label',
                                              color_mode= 'grayscale',
                                              subset="validation",
                                              target_size= (width, height),
                                              batch_size=batch_size, 
                                              class_mode="categorical")
print(val_data_gen.n)


Found 2400 validated image filenames belonging to 4 classes.
Found 600 validated image filenames belonging to 4 classes.
600


In [8]:

# model fitting

train_step = train_data_gen.n//train_data_gen.batch_size

val_step = val_data_gen.n//val_data_gen.batch_size

In [9]:

history = model.fit_generator(generator=train_data_gen,
                              steps_per_epoch=train_step,
                              validation_data=val_data_gen,
                              validation_steps=val_step, 
                              epochs=epochs)

model.save('./models/model4')


Epoch 1/30


KeyboardInterrupt: 

In [1]:
def show_final_history(history):
    fig, ax = plt.subplots(1, 2, figsize=(15,5))
    ax[0].set_title('loss')
    ax[0].plot(history.epoch, history.history["loss"], label="Train loss")
    ax[0].plot(history.epoch, history.history["val_loss"], label="Validation loss")
    ax[1].set_title('acc')
    ax[1].plot(history.epoch, history.history["accuracy"], label="Train acc")
    ax[1].plot(history.epoch, history.history["val_accuracy"], label="Validation acc")
    ax[0].legend()
    ax[1].legend()

show_final_history(history)

NameError: name 'history' is not defined

In [10]:
model = tf.keras.models.load_model('./models/model4')

In [13]:
model.evaluate_generator(generator = val_data_gen, steps = val_step, verbose = 1)

 2/37 [>.............................] - ETA: 2:20 - loss: 1.3857 - accuracy: 0.3125

KeyboardInterrupt: 

In [29]:
test_labels = pd.read_csv('./test/label.csv')
test_generator = datagen.flow_from_dataframe(
    test_labels,
    directory="./test/image",
    x_col= "file_name",
    y_col= "label",
    target_size=(256, 256),
    color_mode="grayscale",
    batch_size=1,
    class_mode='categorical',
    shuffle=False,
    seed=42
)


Found 200 validated image filenames belonging to 4 classes.


In [30]:
test_step = test_generator.n // test_generator.batch_size
test_generator.reset()
pred = model.predict_generator(test_generator, steps=test_step, verbose=1)
print(pred.shape)

(200, 4)


In [33]:

pred_idx=np.argmax(pred,axis=1)

print(pred_idx.shape)

pred_labels = dict(((idx, value) for (value, idx) in test_generator.class_indices.items()))

test_pred = [pred_labels[idx] for idx in pred_idx]

print(accuracy_score(test_labels.label.values, test_pred))

print(classification_report(test_labels.label.values, test_pred, target_names = test_labels.label.unique()))

(200,)
0.72
                  precision    recall  f1-score   support

meningioma_tumor       0.65      0.60      0.63        43
    glioma_tumor       0.68      0.65      0.66        68
 pituitary_tumor       0.71      0.86      0.78        37
        no_tumor       0.84      0.81      0.82        52

        accuracy                           0.72       200
       macro avg       0.72      0.73      0.72       200
    weighted avg       0.72      0.72      0.72       200



In [37]:
print(confusion_matrix(test_labels.label.values, test_pred, labels = ['meningioma_tumor', 'glioma_tumor', 'pituitary_tumor', 'no_tumor']))

[[44 12  5  7]
 [14 26  1  2]
 [ 4  2 42  4]
 [ 3  0  2 32]]
