In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

import tensorflow as tf
from tensorflow.keras.utils import image_dataset_from_directory
from tensorflow.keras.layers import Dense, Conv2D, MaxPool2D, Flatten, Rescaling, Dropout
from tensorflow.keras import Sequential
from tensorflow.keras.metrics import TrueNegatives, TruePositives, FalseNegatives, FalsePositives, binary_accuracy, Recall, AUC, BinaryAccuracy, Precision

In [2]:
data_dir = '/kaggle/input/brain-mri-images-for-brain-tumor-detection/brain_tumor_dataset/'


batch_size = 64
img_height = 180
img_width = 180

In [3]:


train_df = image_dataset_from_directory(
    data_dir,
    validation_split=0.2,
  subset="training",
  seed=123,
  image_size=(img_height, img_width),
  batch_size=batch_size,
    shuffle = True,
)

val_df = image_dataset_from_directory(
  data_dir,
  validation_split=0.2,
  subset="validation",
  seed=123,
  image_size=(img_height, img_width),
  batch_size=batch_size
)

In [4]:
class_names = train_df.class_names
print(class_names)

In [5]:
plt.figure(figsize=(10, 10))
for images, labels in train_df.take(1):
    for i in range(9):
        ax = plt.subplot(3, 3, i + 1)
        plt.imshow(images[i].numpy().astype("uint8"))
        plt.title(class_names[labels[i]])
        plt.axis("off")

In [6]:
for image_batch, labels_batch in train_df:
    print(image_batch.shape)
    print(labels_batch.shape)
    break

In [7]:
normalization_layer = tf.keras.layers.Rescaling(1./255)

In [8]:
normalized_df = train_df.map(lambda x, y: (normalization_layer(x), y))
image_batch, labels_batch = next(iter(normalized_df))
first_image = image_batch[0]
# Notice the pixel values are now in `[0,1]`.
print(np.min(first_image), np.max(first_image))

In [9]:
AUTOTUNE = tf.data.AUTOTUNE

train_df = train_df.cache().prefetch(buffer_size=AUTOTUNE)
val_df = val_df.cache().prefetch(buffer_size=AUTOTUNE)

In [10]:
train_df

In [11]:
metrics = [
    TruePositives(name='tp'),
    FalsePositives(name='fp'),
    TrueNegatives(name='tn'),
    FalseNegatives(name='fn'), 
    BinaryAccuracy(name='accuracy'),
    Precision(name='precision'),
    Recall(name='recall'),
    AUC(name='auc'),

]

Before moving on , let's define metrics written above and see what they mean.

suppose:
 yes - positive class
 no - negative class

* True Positive (TP) - case when our model predicted positive for positive class
* True Negative (TN) - case when our model predicted negative for negative class
* False Positive (FP) - case when our model predicted positive for negative class
* False Negative (FN) - case when our model predicted negative for positive class
* Binary Accuracy = (TP + TN)/(TP+TN+FP+FN). It simply describes when our model predicted correctly and in perfect case binary accuracy is 1.
* Precision = TP / (TP + FP) = TP / Total_Predicted_Positive . As we can see from formula, it describes that for all cases when model predicted class to be positive how many of them are actually positive. In ideal case, if FP = 0 then Precision = 0.
* Recall = TP / (TP + FN) = TP / Total_Actual_Positive. From formula we can see that recall actually describes for all inputs value of which are positive, how much of them were correctly labeled as positive.
* True Positive Rate (TPR) - synonym for Recall
* False Positive Rate (FPR) = FP / (FP + TN) = FP / Total_Actual_Negative. This rate defines for all inputs value of which are negative how much of them were labeled as positive. Both TPR and FPR will be used to describe ROC curve.
* ROC curve - a curve showing perfomance of classification model at various classification threshold. Curve is defined as a mapping from set of all TPRs to set of all FPRs (TPR vs. FPR).
* AUC - simply area under the ROC curve. So when AUC is high, there are high chances that predictions will be TP and TN. For example, when AUC = 0. 8, it means there are 80% of chance that model will successfully distinguish positive class from negative. AUC = 0.5 means that model has no capacity to distinguish these two classes.

In [12]:
model = Sequential([
    Conv2D(filters=128, kernel_size=(3,3), strides=1, activation='relu',input_shape=(img_height,img_width, 3)),
    MaxPool2D(pool_size=(3,3)),
    
    Conv2D(filters=64, kernel_size=(3,3), strides=1, activation='relu'),
    Conv2D(filters=64, kernel_size=(3,3), strides=1, activation='relu'),
    MaxPool2D(pool_size=(3,3)),
    
    Conv2D(filters=32, kernel_size=(3,3), strides=1, activation='relu'),
    Conv2D(filters=32, kernel_size=(3,3), strides=1, activation='relu'),
    MaxPool2D(pool_size=(3,3)),

    
    Flatten(),
    Dense(units=512, activation='relu'),
    Dropout(0.25),
    Dense(1,activation='sigmoid')
    
])


In [13]:
model.compile(optimizer = 'adam',
                loss = "binary_crossentropy",
                metrics=metrics)

In [14]:
model.summary()

In [15]:
history = model.fit(train_df, epochs=30,validation_data=val_df,)

In [16]:
validation_results = model.evaluate(val_df, verbose=0)

for name, value in zip(model.metrics_names, validation_results):
  print(name, ': ', value)