In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
# Set random state for numpy operations
from numpy.random import seed
seed(2)
# Set random state for tensorflow operations
from tensorflow.random import set_seed
set_seed(3)
# General imports
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras import regularizers
from keras.callbacks import EarlyStopping, ModelCheckpoint
from keras.models import load_model
import seaborn as sns
from mlxtend.plotting import plot_decision_regions
from sklearn.metrics import confusion_matrix
import cv2
import PIL

## **Business Understanding**

Brain tumors in particular are very difficult to diagnose from an MRI image, and artificial intelligence methods of identifying and classifying tumors are oftentimes more accurate than manual identification by a radiologist. That is why the development of neural networks and other AI processes for tumor classification is so valuable and important.

The survival rate for patients diagnosed with a brain tumor is around 35%. This survival rate could be increased if tumors could be identified earlier and more accurately, which AI methods could help with. Additionally, in third world countries, seasoned neurosurgeons (a neurosurgeon is required to make the diagnoses from looking at the MRI) are hard to come by, so a machine learning tool (Decision Support Tool) which could accurately identify tumors would be of great value in these developing nations. This decision support tool would be beneficial to the health industry, and the target audience would be Doctors without Borders, an organization which sends doctors from the US to developing countries to help improve their healthcare. 

In [None]:

def visualize_training_results(history):
    '''
    From https://machinelearningmastery.com/display-deep-learning-model-training-history-in-keras/
    
    Input: keras history object (output from trained model)
    '''
    fig, (ax1, ax2) = plt.subplots(2, sharex=True)
    fig.suptitle('Model Results')

    # summarize history for accuracy
    ax1.plot(history.history['acc'])
    ax1.plot(history.history['val_acc'])
    ax1.set_ylabel('Accuracy')
    ax1.legend(['train', 'test'], loc='upper left')
    # summarize history for loss
    ax2.plot(history.history['loss'])
    ax2.plot(history.history['val_loss'])
    ax2.set_ylabel('Loss')
    ax2.legend(['train', 'test'], loc='upper left')
    
    plt.xlabel('Epoch')
    plt.show()

In [None]:
# Set up ImageDataGenerator
train_imagegen = keras.preprocessing.image.ImageDataGenerator(rescale=1./255,
                                   zoom_range=([0.6,1]),
                                   rotation_range=10,                           
                                   brightness_range=([0.6, 1.5]),
                                   horizontal_flip=True,
                                   validation_split=0.06) # this will set aside a part of training set for validation data
test_imagegen = keras.preprocessing.image.ImageDataGenerator(rescale=1./255,
                                   zoom_range=([0.6,1]),
                                   rotation_range=10,
                                   brightness_range=([0.6,1.5]),
                                   horizontal_flip=True)
# Bring the data in
train_generator = train_imagegen.flow_from_directory(
                                    '../input/resortedbraintumorclassificationmridata/Brain_MRI_Tumor_Images/Training',
                                    target_size=(200,200),
                                    batch_size=20,
                                    seed=42,
                                    class_mode='binary',
                                    subset='training')

test_generator = test_imagegen.flow_from_directory(
                                    '../input/resortedbraintumorclassificationmridata/Brain_MRI_Tumor_Images/Testing',
                                    target_size=(200,200),
                                    batch_size=20,
                                    seed=42,
                                    class_mode='binary')

val_generator = train_imagegen.flow_from_directory(
                                    '../input/resortedbraintumorclassificationmridata/Brain_MRI_Tumor_Images/Training',
                                    target_size=(200,200),
                                    batch_size=20,
                                    seed=42,
                                    class_mode='binary',
                                    subset='validation')

## **Data Understanding**

One important thing to note is that throughout the modelling process, I use train_generator as my training data images, and testing_generator as my testing set which I use every time I run a model to asses overfitting. Val_generator data is used only once on the final model, to asses how well the model does on data is has never seen before. This clarification is important because for every model that is run, all of the epochs are printed out, and the metrics/results for testing data are referred to as 'val.' Unless otherwise specified, any metric starting with 'val' in the epich print outs is really testing data.

## **Taking a Look at a few different individual images**

In [None]:
tumor1 = PIL.Image.open('../input/resortedbraintumorclassificationmridata/Brain_MRI_Tumor_Images/Training/AllTumorsTrain/p (274).jpg')
tumor1

In [None]:
# Figuring out the number of color channels
tumor1.mode
# It is 'RGB', meaning that there are three color channels

In [None]:
# from https://stackoverflow.com/questions/48435229/how-to-plot-a-list-of-image-in-loop-using-matplotlib/48435411
fig, axes = plt.subplots(2, 6, figsize=(18, 10))
rows = 2

for num, x in enumerate(images):
    img = PIL.Image.open(x)
    re_img = img.resize((200,200))
    ax = axes[num // 6, num % 6]
    ax.imshow(re_img)
    

fig.suptitle('Normal Images on Top Row, Tumor Images on Bottom Row', fontsize=20)
plt.tight_layout()
plt.show()

## **Taking a look at the class imbalance**

### **First looking at training data imbalance**

In [None]:
# looking at how the categories are encoded
train_generator.class_indices
# Tumor data is encoded as zero, no tumor data is encoded as one

In [None]:
# Looking at the place where all labels for training data are stored 
train_generator.classes

In [None]:
# Making a DataFrame out of the training data labels
train_tumors = pd.DataFrame(train_generator.classes)
train_values = train_tumors.value_counts()
train_values

#### **The ratio of images with tumors to those without is 2327:372, or 6.255:1.**

In [None]:
# Making subsets of the dataframe for visualization purposes
train_tumors.rename(columns={0:'Tumor/No Tumor'}, inplace=True)
train_no_tumor = len(train_tumors[train_tumors['Tumor/No Tumor'] == 1])
train_tumor = len(train_tumors[train_tumors['Tumor/No Tumor'] == 0])

In [None]:
# Barplot for visually assessing the training data class imbalance
plt.figure(figsize=(10,8))
sns.set(font_scale=1.4)
sns.barplot(['No Tumor', 'Tumor'], [train_no_tumor, train_tumor])
plt.ylabel("Number of Images")
plt.title('Distribution of Brain MRIs with and without Tumor');


### **Now taking a look at the test data imbalance**

In [None]:
# Making a DataFrame of testing data labels
test_tumors = pd.DataFrame(test_generator.classes)
test_values = test_tumors.value_counts()
test_values

#### **The ratio of images with tumors to those without is 289:105, or 2.75:1**

In [None]:
# Making subsets of the dataframe for plotting purposes
test_tumors.rename(columns={0:'Tumor/No Tumor'}, inplace=True)
test_no_tumor = len(test_tumors[test_tumors['Tumor/No Tumor'] == 1])
test_tumor = len(test_tumors[test_tumors['Tumor/No Tumor'] == 0])

In [None]:
# Barplot for visually assessing the class imbalance in the testing data
plt.figure(figsize=(10,8))
sns.set(font_scale=1.4)
sns.barplot(['No Tumor', 'Tumor'], [test_no_tumor, test_tumor])
plt.ylabel("Number of Images")
plt.title('Distribution of Brain MRIs with and without Tumor in Testing Data');


## **Baseline CNN Model**

In [None]:
# Building the first baseline model; structure is modified from one shown on:
# https://machinelearningmastery.com/how-to-develop-a-cnn-from-scratch-for-cifar-10-photo-classification/
baseline = keras.Sequential()
baseline.add(layers.Conv2D(32, (3,3), activation='relu', input_shape=(200,200,3)))
baseline.add(layers.MaxPooling2D(2,2))
baseline.add(layers.Conv2D(64, (3,3), activation='relu'))
baseline.add(layers.MaxPooling2D(2,2))

baseline.add(layers.Flatten())
baseline.add(layers.Dense(128, activation='relu'))
baseline.add(layers.Dense(1, activation='sigmoid'))

baseline.compile(loss='binary_crossentropy',
                optimizer='adam',
                metrics=['acc', 'Recall', 'Precision', 'TruePositives', 'TrueNegatives', 'FalsePositives', 'FalseNegatives'])

In [None]:
baseline_results = baseline.fit_generator(train_generator,
                                         steps_per_epoch=2699/20,# number of samples / batch size
                                         epochs=10,
                                         validation_data=test_generator)

In [None]:
visualize_training_results(baseline_results)

### **Analysis of Model**

Looking at the above graphs, it is obvious that this first baseline cnn model is overfitting; accuracy for training data ends up at around 94%, whereas testing data ends up at around 65%. Additionally, the loss for testing data is fairly high; for training the loss ends up at 15%, and for testing it ends up at 69%. In the next model iteration, I will add another dense layer, which will hopefully help the model pick up on more patterns, and some dropout layers for a form of regularization.

## **Adding another Dense layer and Dropout layers**

In [None]:
# Adding another dense layer and a couple of dropout layers; structure is modified from one shown on:
# https://machinelearningmastery.com/how-to-develop-a-cnn-from-scratch-for-cifar-10-photo-classification/
layers_drop = keras.Sequential()
layers_drop.add(layers.Conv2D(32, (3,3), activation='relu', input_shape=(200,200,3)))
layers_drop.add(layers.MaxPooling2D(2,2))
layers_drop.add(layers.Conv2D(64, (3,3), activation='relu'))
layers_drop.add(layers.MaxPooling2D(2,2))

layers_drop.add(layers.Flatten())
layers_drop.add(layers.Dense(128, activation='relu'))
layers_drop.add(layers.Dropout(0.3))
layers_drop.add(layers.Dense(64, activation='relu'))
layers_drop.add(layers.Dropout(0.3))
layers_drop.add(layers.Dense(1, activation='sigmoid'))

layers_drop.compile(loss='binary_crossentropy',
                optimizer='adam',
                metrics=['acc', 'Recall', 'Precision', 'TruePositives', 'TrueNegatives', 'FalsePositives', 'FalseNegatives'])

In [None]:
# Fitting the model
layers_drop_results = layers_drop.fit_generator(train_generator,
                                         steps_per_epoch=2699/20,# number of samples / batch size
                                         epochs=10,
                                         validation_data=test_generator)

In [None]:
# Look at how accuracy and loss change across the epochs, for training and testing data
visualize_training_results(layers_drop_results)

### **Analysis of Model**

In this iteration, training accuracy ends up at 93%, and testing ends up at 73%, so the model is overfitting, but less so than the baseline model. As for loss, training loss is 14% and testing loss is 65%, which is not drastically different from the last model. Adding another layer and dropout layers helped decrease overfitting. In the next model iteration I am going to account for the class imabalance, and the added layer and dropout layers might perform better in this iteration.

## **Accounting for class imbalance**

In [None]:
# Accounting for class imbalance; structure is modified from one shown on:
# https://machinelearningmastery.com/how-to-develop-a-cnn-from-scratch-for-cifar-10-photo-classification/

class_ld = keras.Sequential()
class_ld.add(layers.Conv2D(32, (3,3), activation='relu', input_shape=(200,200,3)))
class_ld.add(layers.MaxPooling2D(2,2))
class_ld.add(layers.Conv2D(64, (3,3), activation='relu'))
class_ld.add(layers.MaxPooling2D(2,2))

class_ld.add(layers.Flatten())
class_ld.add(layers.Dense(128, activation='relu'))
class_ld.add(layers.Dropout(0.3))
class_ld.add(layers.Dense(64, activation='relu'))
class_ld.add(layers.Dropout(0.3))
class_ld.add(layers.Dense(1, activation='sigmoid'))

class_ld.compile(loss='binary_crossentropy',
                optimizer='adam',
                metrics=['acc', 'Recall', 'Precision', 'TruePositives', 'TrueNegatives', 'FalsePositives', 'FalseNegatives'])
weights = {0: 1, # TUMOR
          1:6.255} # NO TUMOR 
# there are 6.255 times as many images of MRIs with tumors than without

In [None]:
# Fitting the model
class_ld_results = class_ld.fit_generator(train_generator,
                                          class_weight=weights,
                                         steps_per_epoch=2699/20,# number of samples / batch size
                                         epochs=10,
                                         validation_data=test_generator)

In [None]:
# Look at how accuracy and loss change across the epochs, for training and testing data
visualize_training_results(class_ld_results)

### **Analysis of Model**

In this model iteration, training accuracy was about 89% and testing accuracy is about 57%, so the model is still overfitting compared to the last model. Loss for training is at around 51% and testing loss is around 99%. In terms of acuracy and loss, the model is doing worse than the previous model. However, recall has increased significantly, so it seems that adding class weights is beneficial to the model, even though it requires further tuning.

## **Model with Batch Normalization** 

In [None]:
# Adding Batch Normalization; structure is modified from one shown on:
# https://machinelearningmastery.com/how-to-develop-a-cnn-from-scratch-for-cifar-10-photo-classification/
class_n = keras.Sequential()
class_n.add(layers.Conv2D(32, (3,3), activation='relu', input_shape=(200,200,3)))
class_n.add(layers.BatchNormalization())
class_n.add(layers.MaxPooling2D(2,2))
class_n.add(layers.Conv2D(64, (3,3), activation='relu'))
class_n.add(layers.BatchNormalization())
class_n.add(layers.MaxPooling2D(2,2))
class_n.add(layers.Conv2D(128, (3,3), activation='relu'))
class_n.add(layers.BatchNormalization())
class_n.add(layers.MaxPooling2D(2,2))

class_n.add(layers.Flatten())
class_n.add(layers.Dense(128, activation='relu'))
class_n.add(layers.Dropout(0.3))
class_n.add(layers.Dense(1, activation='sigmoid'))

class_n.compile(loss='binary_crossentropy',
                optimizer='adam',
                metrics=['acc', 'Recall', 'Precision', 'TruePositives', 'TrueNegatives', 'FalsePositives', 'FalseNegatives'])
weights = {0: 1, # TUMOR
          1:6.255} # NO TUMOR
# there are 6.255 times as many images of MRIs with tumors than without

In [None]:
early_stop2 = [EarlyStopping(monitor='val_loss', patience=12, restore_best_weights=True),
            ModelCheckpoint(filepath='best_model.h5', monitor='val_loss',
                           save_best_only=True)]

In [None]:
class_n_results = class_n.fit_generator(train_generator,
                                          class_weight=weights,
                                         steps_per_epoch=2699/20,# number of samples / batch size
                                         epochs=20,
                                         callbacks=early_stop2,
                                         validation_data=test_generator)

In [None]:
# Look at how accuracy and loss change across the epochs, for training and testing data
visualize_training_results(class_n_results)

**Analysis of Model**

The best model from epoch 14 had a training accuracy of around 94%, a testing accuracy of around 70%, a training loss of 30%, and a testing loss of around 84%. Accuracy and loss for both training and testing data is much improved since the last model and it is less overfit, so batch normalization is definitely an improvement.
Because batch normalization makes the network more stable, it is possible to use larger learning rates, which could potentially help the model reach optimal accuracy and minimal loss more quickly, so that is what I will try next.

## **Using a Bigger Learning Rate since I am using Batch Normalization**

In [None]:
# Establishing an instance of Adam with a bigger learning rate
adam_mlr = keras.optimizers.Adam(epsilon=0.01)

In [None]:
# Using Batch Normalization; structure is modified from one shown on:
# https://machinelearningmastery.com/how-to-develop-a-cnn-from-scratch-for-cifar-10-photo-classification/
class_na = keras.Sequential()
class_na.add(layers.Conv2D(32, (3,3), activation='relu', input_shape=(200,200,3)))
class_na.add(layers.BatchNormalization())
class_na.add(layers.MaxPooling2D(2,2))
class_na.add(layers.Conv2D(64, (3,3), activation='relu'))
class_na.add(layers.BatchNormalization())
class_na.add(layers.MaxPooling2D(2,2))
class_na.add(layers.Conv2D(128, (3,3), activation='relu'))
class_na.add(layers.BatchNormalization())
class_na.add(layers.MaxPooling2D(2,2))

class_na.add(layers.Flatten())
class_na.add(layers.Dense(128, activation='relu'))
class_na.add(layers.Dropout(0.3))
class_na.add(layers.Dense(1, activation='sigmoid'))

class_na.compile(loss='binary_crossentropy',
                optimizer=adam_mlr,
                metrics=['acc', 'Recall', 'Precision', 'TruePositives', 'TrueNegatives', 'FalsePositives', 'FalseNegatives'])
weights = {0: 1, # TUMOR
          1:6.255} # NO TUMOR
# there are 6.255 times as many images of MRIs with tumors than without

In [None]:
# Using a bigger learning rate; fitting the model
class_na_results = class_na.fit_generator(train_generator,
                                          class_weight=weights,
                                         steps_per_epoch=2699/20,# number of samples / batch size
                                         epochs=20,
                                         callbacks=early_stop2,
                                         validation_data=test_generator)

In [None]:
# Look at how accuracy and loss change across the epochs, for training and testing data
visualize_training_results(class_na_results)

**Analysis of Model**

In the best epoch of the model (epoch 20) training accuracy is 95% while testing accuracy is 74%. Training loss is 33%, while testing loss is 55%. Testing accuracy is higher by four percentage points than the last model, and loss has decreased by about 30%! Additionally, testing recall is 93%, which is important for the context of this problem. In the next iteration I am going to see if adding more dropout layers will be beneficial.

## **Model with Batch Normalization and more Dropout Layers**

In [None]:
# Accounting for the class imbalance; structure is modified from one shown on:
# https://machinelearningmastery.com/how-to-develop-a-cnn-from-scratch-for-cifar-10-photo-classification/
class_nd = keras.Sequential()

class_nd.add(layers.Conv2D(32, (3,3), activation='relu', input_shape=(200,200,3)))
class_nd.add(layers.BatchNormalization())
class_nd.add(layers.MaxPooling2D(2,2))
class_nd.add(layers.Dropout(0.25))

class_nd.add(layers.Conv2D(64, (3,3), activation='relu'))
class_nd.add(layers.BatchNormalization())
class_nd.add(layers.MaxPooling2D(2,2))
class_nd.add(layers.Dropout(0.25))

class_nd.add(layers.Conv2D(128, (3,3), activation='relu'))
class_nd.add(layers.BatchNormalization())
class_nd.add(layers.MaxPooling2D(2,2))
class_nd.add(layers.Dropout(0.25))

class_nd.add(layers.Flatten())
class_nd.add(layers.Dense(128, activation='relu'))
class_nd.add(layers.Dropout(0.4))
class_nd.add(layers.Dense(1, activation='sigmoid'))

class_nd.compile(loss='binary_crossentropy',
                optimizer='adam',
                metrics=['acc', 'Recall', 'Precision', 'TruePositives', 'TrueNegatives', 'FalsePositives', 'FalseNegatives'])
weights = {0: 1, # TUMOR
          1:6.255} # NO TUMOR
# there are 6.255 times as many images of MRIs with tumors than without

In [None]:
# Fitting the model
class_nd_results = class_nd.fit_generator(train_generator,
                                          class_weight=weights,
                                         steps_per_epoch=2699/20,# number of samples / batch size
                                         epochs=20,
                                          callbacks=early_stop2,
                                         validation_data=test_generator)

In [None]:
# Look at how accuracy and loss change across the epochs, for training and testing data
visualize_training_results(class_nd_results)

**Analysis of Model**

Training accuracy of the best epoch is 91% while testing accuracy is 73%. Training loss is 56% while testing loss is 82%. This model is similar to the last one, except for the fact that training loss is significantly increased. The dropout layers added after each max pooling step may not be particularly beneficial to the model.

In [None]:
# Getting the predicted labels for all images in testing data
baseline_n_pred = baseline_n.predict(test_generator)
len(baseline_n_pred)

In [None]:
# Rounding the predictions and making sure all values are integer type so that they can be directly compared to the true labels, which are all ints
base_npred_round = np.round(baseline_n_pred)
npred_int = []
for entry in base_npred_round:
    npred_int.append(int(entry))
npred_int

In [None]:
# Looking at true labels
test_generator.classes

In [None]:
# I want to find out all of the indices of the incorrectly classified images, as well as their label
wrong_index = []
wrong_entry = []
for index, entry in enumerate(npred_int):
    if entry != test_generator.classes[index]:
        wrong_index.append(index)
        wrong_entry.append(entry)
wrong_index_entry = list(zip(wrong_index, wrong_entry))

In [None]:
# Getting a list of all file names of incorrectly classified tumors, so that I can view the images
fnames = test_generator.filenames 
wrong_fnames = []
for i in wrong_index:
    wrong_fnames.append(fnames[i])
wrong_fnames

In [None]:
# Getting the predicted labels for all images in testing data
baseline_n_pred = baseline_n.predict(test_generator)
len(baseline_n_pred)

# Rounding the predictions and making sure all values are integer type so that they can be directly compared to the true labels, which are all ints
base_npred_round = np.round(baseline_n_pred)
npred_int = []
for entry in base_npred_round:
    npred_int.append(int(entry))
npred_int

# Looking at true labels
test_generator.classes

# I want to find out all of the indices of the incorrectly classified images, as well as their label
wrong_index = []
wrong_entry = []
for index, entry in enumerate(npred_int):
    if entry != test_generator.classes[index]:
        wrong_index.append(index)
        wrong_entry.append(entry)
wrong_index_entry = list(zip(wrong_index, wrong_entry))

# Getting a list of all file names of incorrectly classified tumors, so that I can view the images
fnames = test_generator.filenames 
wrong_fnames = []
for i in wrong_index:
    wrong_fnames.append(fnames[i])
wrong_fnames

# A Glioma the network mislabelled as not having a tumor
PIL.Image.open('../input/resortedbraintumorclassificationmridata/Brain_MRI_Tumor_Images/Testing/AllTumorsTest/Glioma14.jpg')

In [None]:
# A Glioma the network mislabelled as not having a tumor
PIL.Image.open('../input/resortedbraintumorclassificationmridata/Brain_MRI_Tumor_Images/Testing/AllTumorsTest/Glioma14.jpg')

In [None]:
# A Glioma the network mislabelled as not having a tumor
PIL.Image.open('../input/resortedbraintumorclassificationmridata/Brain_MRI_Tumor_Images/Testing/AllTumorsTest/Glioma2.jpg')

In [None]:
# A Glioma the network mislabelled as not having a tumor
PIL.Image.open('../input/resortedbraintumorclassificationmridata/Brain_MRI_Tumor_Images/Testing/AllTumorsTest/Glioma23.jpg')

In [None]:
# A Meningioma the network mislabelled as not having a tumor
PIL.Image.open('../input/resortedbraintumorclassificationmridata/Brain_MRI_Tumor_Images/Testing/AllTumorsTest/Meningioma10.jpg')

In [None]:
# A Meningioma the network mislabelled as not having a tumor
PIL.Image.open('../input/resortedbraintumorclassificationmridata/Brain_MRI_Tumor_Images/Testing/AllTumorsTest/Meningioma17.jpg')

In [None]:
# A Meningioma the network correctly identified as having a tumor
PIL.Image.open('../input/resortedbraintumorclassificationmridata/Brain_MRI_Tumor_Images/Testing/AllTumorsTest/Meningioma19.jpg')

The best epoch of this model has a training accuracy of 93% and a testing accuracy of 80%. It has a training loss of 17% and a testing loss of 54%. This model has much better accuracy and much less loss than the last model, so adding batch normalization really helped.
* look at epoch 10!

## **Using the Pre-Trained VGG-19 Weights (this is my FSM)**

In [None]:
from keras.applications.vgg19 import VGG19
cnn_vgg = VGG19(weights='imagenet',
               include_top=False,
               input_shape=(200,200,3))

In [None]:
cnn_vgg.summary()


In [None]:
# Build first model using pretrained VGG 19 as first layer, and then some dense layers on top
pretrained = keras.Sequential()
pretrained.add(cnn_vgg)
pretrained.add(layers.Flatten())
pretrained.add(layers.Dense(128, activation='relu'))
pretrained.add(layers.Dense(1, activation='sigmoid'))

In [None]:
def Freeze_Pretrained_Base(pretrain, network):
    pretrain.trainable = False
    for layer in network.layers:
        print(layer.name, layer.trainable)
    print(len(network.trainable_weights))

In [None]:
Freeze_Pretrained_Base(cnn_vgg, pretrained)

In [None]:

pretrained.compile(loss='binary_crossentropy',
                optimizer='adam',
                metrics=['acc', 'Recall', 'Precision', 'TruePositives', 'TrueNegatives', 'FalsePositives', 'FalseNegatives'])

pretrained_results = pretrained.fit_generator(train_generator,
                                         steps_per_epoch=2699/20,# number of samples / batch size
                                         epochs=20,
                                        callbacks= early_stop2,
                                         validation_data=test_generator)

In [None]:
# Look at how accuracy and loss change across the epochs, for training and testing data
visualize_training_results(pretrained_results)

### **Analysis of Model**
The epoch with the lowest loss had a training accuracy of ~98% and a loss of ~4%, while the testing data had an accuracy of ~93% and a loss of ~21%! This is the best model yet! Additionally, testing recall is ~95%, which means that false negatives are being minimized. Using the VGG19 pretrained weights was a game changer! However, it would be great if the testing loss could be just a bit lower, so in the next model iteration I will use the SGD optimizer with momentum, since it is known for rapidly decreasing loss.

## **Unfreezing Layers in the Pretrained VGG-19 Network**

In [None]:
# Build first model using pretrained VGG 19 as first layer, and then some dense layers on top
b5c1c2 = keras.Sequential()
b5c1c2.add(cnn_vgg)
b5c1c2.add(layers.Flatten())
b5c1c2.add(layers.Dense(128, activation='relu'))
b5c1c2.add(layers.Dense(1, activation='sigmoid'))

In [None]:
# List for unfreezing of layers function
unfreeze = ['block5_conv1', 'block5_conv2']

In [None]:
# Re-freezing everything except for the last layer of the pretrained CNN
# Code structure from https://github.com/learn-co-curriculum/dsc-using-pretrained-networks-codealong
def Unfreeze_Layers(pretrain, layer_list):
    pretrain.trainable = True
    for layer in  pretrain.layers:
        if layer.name in layer_list:
            layer.trainable = True
        else:
            layer.trainable = False
        
    for layer in pretrain.layers:
        print(layer.name, layer.trainable)
    print(len(pretrain.trainable_weights))

In [None]:
# Unfreezing some of outer layers of VGG19 pretrained network
Unfreeze_Layers(cnn_vgg, unfreeze)

In [None]:
# Compiling and Fitting the model
b5c1c2.compile(loss='binary_crossentropy',
                optimizer='adam',
                metrics=['acc', 'Recall', 'Precision', 'TruePositives', 'TrueNegatives', 'FalsePositives', 'FalseNegatives'])

b5c1c2_results = b5c1c2.fit_generator(train_generator,
                                         steps_per_epoch=2699/20,# number of samples / batch size
                                         epochs=20,
                                        callbacks= early_stop2,
                                         validation_data=test_generator)

**Analysis of Model**

The epoch with the lowest testing los has a trining accuracy of 98% and a testing accuracy of 92%, with a training loss of 5% and a testing loss of 27%. Testing recall is 75%. Other than recall, the results are similar to the previous model; maybe including class weights will help improve the model.

## **Incorporating Class Weights into Pretrained VGG19 model**

In [None]:
# Build first model using pretrained VGG 19 as first layer, and then some dense layers on top
w_b5c1c2 = keras.Sequential()
w_b5c1c2.add(cnn_vgg)
w_b5c1c2.add(layers.Flatten())
w_b5c1c2.add(layers.Dense(128, activation='relu'))
w_b5c1c2.add(layers.Dense(1, activation='sigmoid'))

In [None]:
# Incorporating class weights; compiling and fitting the model
weights = {0: 1, # TUMOR
          1:6.255} # NO TUMOR 
# there are 6.255 times as many images of MRIs with tumors than without


w_b5c1c2.compile(loss='binary_crossentropy',
                optimizer='adam',
                metrics=['acc', 'Recall', 'Precision', 'TruePositives', 'TrueNegatives', 'FalsePositives', 'FalseNegatives'])

w_b5c1c2_results = w_b5c1c2.fit_generator(train_generator,
                                          class_weight = weights,
                                         steps_per_epoch=2699/20,# number of samples / batch size
                                         epochs=20,
                                        callbacks= early_stop2,
                                         validation_data=test_generator)


**Analysis of Model**

The epoch with the lowest testing loss had a training accuracy of 98% and a testing accuracy of 94%, with a training loss of 7% and a testing loss of 22%. Testing recall 97%. Overall, because the recall is higher, this model is better than the first model iteration done using the pretrained VGG19 network.

## **Adjusting Class Weights and adding a Dropout Layer**

In [None]:
# Build first model using pretrained VGG 19 as first layer, and then some dense layers on top
w2_b5c1c2 = keras.Sequential()
w2_b5c1c2.add(cnn_vgg)
w2_b5c1c2.add(layers.Dropout(0.4))
w2_b5c1c2.add(layers.Flatten())
w2_b5c1c2.add(layers.Dense(128, activation='relu'))
w2_b5c1c2.add(layers.Dense(1, activation='sigmoid'))

In [None]:
Freeze_Pretrained_Base(cnn_vgg, w2_b5c1c2 )

In [None]:
Unfreeze_Layers(cnn_vgg, unfreeze)

In [None]:

weights2 = {0: 1, # TUMOR
          1:4} # NO TUMOR 



w2_b5c1c2.compile(loss='binary_crossentropy',
                optimizer='adam',
                metrics=['acc', 'Recall', 'Precision', 'TruePositives', 'TrueNegatives', 'FalsePositives', 'FalseNegatives'])

w2_b5c1c2_results = w2_b5c1c2.fit_generator(train_generator,
                                          class_weight = weights2,
                                         steps_per_epoch=2699/20,# number of samples / batch size
                                         epochs=20,
                                        callbacks= early_stop2,
                                         validation_data=test_generator)

**Analysis of Model**

The epoch with the lowest testing loss has a training accuracy of 98% and a testing accuracy of 95%, with a training loss of 8% and a testing loss of 19%. Testing recall is 97%. This model is better than all iterations using VGG19 thus far, due to the high recall percentage in this epoch, as well as all epochs

In [None]:
w2_b5c1c2.save("w2_b5c1c2.h5")

In [None]:
model = keras.models.load_model("../input/resortedbraintumorclassificationmridata/w2_b5c1c2.h5")
#'../input/resortedbraintumorclassificationmridata/Brain_MRI_Tumor_Images/Training'

### **Now testing out final model on Validation data**

In [None]:
# Build first model using pretrained VGG 19 as first layer, and then some dense layers on top
val_w2_b5c1c2 = keras.Sequential()
val_w2_b5c1c2.add(cnn_vgg)
val_w2_b5c1c2.add(layers.Dropout(0.4))
val_w2_b5c1c2.add(layers.Flatten())
val_w2_b5c1c2.add(layers.Dense(128, activation='relu'))
val_w2_b5c1c2.add(layers.Dense(1, activation='sigmoid'))

In [None]:
Freeze_Pretrained_Base(cnn_vgg, val_w2_b5c1c2 )

In [None]:
Unfreeze_Layers(cnn_vgg, unfreeze)

In [None]:

weights2 = {0: 1, # TUMOR
          1:4} # NO TUMOR 



val_w2_b5c1c2.compile(loss='binary_crossentropy',
                optimizer='adam',
                metrics=['acc', 'Recall', 'Precision', 'TruePositives', 'TrueNegatives', 'FalsePositives', 'FalseNegatives'])

val_w2_b5c1c2_results = val_w2_b5c1c2.fit_generator(train_generator,
                                          class_weight = weights2,
                                         steps_per_epoch=2699/20,# number of samples / batch size
                                         epochs=20,
                                        callbacks= early_stop2,
                                         validation_data=val_generator)

**Analysis of Model**

The epoch with the lowest validation loss had a training accuracy of 97% and a validation accuracy of 99%, with a training loss of 12% and a validation loss of 2%. Validation recall is 100%. The model has excellent performance on data it has not seen before! However, because the validation set was drawn from the training set in the image data generator (before the training data had actually been used to train any models) the class imbalance should be very similar to the class imbalance of the training data, so this is probably a big reason why the model is performing so well on the validation data.

## **Using the Pretrained VGG-19 network again, but with an SGD Optimizer**

In [None]:
# Build first model using pretrained VGG 19 as first layer, and then some dense layers on top
vgg19_sgd = keras.Sequential()
vgg19_sgd.add(cnn_vgg)
vgg19_sgd.add(layers.Flatten())
vgg19_sgd.add(layers.Dense(128, activation='relu'))
vgg19_sgd.add(layers.Dense(1, activation='sigmoid'))

In [None]:
# Make the pretrianed layer untrainable so that during optimization, its weights don't change
cnn_vgg.trainable = False

In [None]:
# Check to see that the pretrained layer is not trainable but that all others are
for layer in vgg19_sgd.layers:
    print(layer.name, layer.trainable)
    
print(len(vgg19_sgd.trainable_weights))

In [None]:
sgd_momen = keras.optimizers.SGD(learning_rate=0.01, momentum=0.9, nesterov=True)



vgg19_sgd.compile(loss='binary_crossentropy',
                optimizer= sgd_momen,
                metrics=['acc', 'Recall', 'Precision', 'TruePositives', 'TrueNegatives', 'FalsePositives', 'FalseNegatives'])

vgg19_sgd_results = vgg19_sgd.fit_generator(train_generator,
                                         steps_per_epoch=2699/20,# number of samples / batch size
                                         epochs=20,
                                        callbacks= early_stop2,
                                         validation_data=test_generator)

**Analysis of Model**

The epoch with the lowest testing loss had a training accuracy of 96% and a testing accuracy of 83%, with a training loss of 11% and a testing loss of 33%. Testing recall is 71%. Although the results from this model iteration, using SGD with momentum instead of Adam as an optimizer, they are not as good as the previous model iteration which used Adam, so it looks like Adam is the best optimizer to use in this situation.