In [1]:
import numpy as np
import matplotlib.pyplot as plt

import tensorflow as tf
import keras
from keras import models, layers
from keras.models import Sequential
from keras.layers import Dense, Dropout, Flatten, Conv2D, MaxPooling2D, BatchNormalization
from keras.applications import VGG16
from keras.preprocessing.image import ImageDataGenerator
from keras.callbacks import ModelCheckpoint
from keras.optimizers import SGD

from sklearn.metrics import classification_report, accuracy_score, roc_auc_score 

Using TensorFlow backend.


## Randomly intialized CNN

This model was mostly for educational purposes. I learned a lot about different kinds of layers like: Batch normalization, max pooling, Drop out, and basic Convolutional layers. During my experimenting with different CNN architectures I did not train the models for longer then ten hours. I learned how much training time these models need, often way more than ten hours. After my experimentation with these architectures I realized just how important learning rate was. Something I did not investigate for a while, but realize now that this is an important parameter to train these neural networks from scratch.

In [2]:
#Loops through ImageDataGenerator and grabs true labels and predictions
#and reports metrics with classification_report method
def predict_and_report(gen, model):
    y_true = []
    y_pred = []
    gen.reset()
    for img, label in gen:
        #get true labels for batch and store them
        y_true.extend([int(z[1]) for z in label])
        #Get predictions as probabilities
        batch_pred = model.predict_on_batch(img)
        #turn probabilities to class labels and store
        batch_pred = np.argmax(batch_pred, axis=1)
        y_pred.extend(batch_pred)
        #break loop
        if gen.batch_index == 0:
            break
            
    print(classification_report(y_true, y_pred))

In [3]:
train_dir = 'C:\\users\\will\\ds\\mammo\\train'
valid_dir = 'C:\\users\\will\\ds\\mammo\\validation'
test_dir = 'C:\\users\\will\\ds\\mammo\\test'

img_width, img_height = 299, 299
batch_size = 64
num_epochs = 160
filter_size = (3, 3)
pool_size = (2, 2)
drop_out_dense = 0.5
drop_out_conv = 0.25
padding = 'same'

train_datagen = ImageDataGenerator(
    rescale=1. / 255,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True,
    vertical_flip=True)

test_datagen = ImageDataGenerator(rescale=1. / 255)

train_generator = train_datagen.flow_from_directory(
    train_dir,
    target_size=(img_height, img_width),
    batch_size=batch_size,
    class_mode='categorical',
    color_mode='grayscale')

validation_generator = test_datagen.flow_from_directory(
    valid_dir,
    target_size=(img_height, img_width),
    batch_size=batch_size,
    class_mode='categorical',
    color_mode='grayscale')

test_generator = test_datagen.flow_from_directory(
    test_dir,
    target_size=(img_height, img_width),
    batch_size=batch_size,
    class_mode='categorical',
    color_mode='grayscale')

Found 39011 images belonging to 2 classes.
Found 8436 images belonging to 2 classes.
Found 8438 images belonging to 2 classes.


In [4]:
model = Sequential()

model.add(Conv2D(32, kernel_size=filter_size, activation='relu', input_shape=(img_width, img_height, 1), padding=padding))
model.add(Conv2D(32, kernel_size=filter_size, activation='relu', padding=padding))
model.add(MaxPooling2D(pool_size=pool_size))
model.add(Dropout(drop_out_conv))

model.add(Conv2D(64, kernel_size=filter_size, activation='relu', padding=padding))
model.add(Conv2D(64, kernel_size=filter_size, activation='relu', padding=padding))
model.add(MaxPooling2D(pool_size=pool_size))
model.add(Dropout(drop_out_conv))

model.add(Conv2D(96, kernel_size=filter_size, activation='relu', padding=padding))
model.add(Conv2D(96, kernel_size=filter_size, activation='relu', padding=padding))
model.add(MaxPooling2D(pool_size=pool_size))
model.add(Dropout(drop_out_conv))

model.add(Conv2D(128, kernel_size=filter_size, activation='relu', padding=padding))
model.add(Conv2D(128, kernel_size=filter_size, activation='relu', padding=padding))
model.add(MaxPooling2D(pool_size=pool_size))
model.add(Dropout(drop_out_conv))

model.add(Flatten())
model.add(Dense(512, activation='relu'))
model.add(Dropout(drop_out_dense))
model.add(Dense(128, activation='relu'))
model.add(Dropout(drop_out_dense))
model.add(layers.Dense(2, activation='softmax'))

model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_1 (Conv2D)            (None, 299, 299, 32)      320       
_________________________________________________________________
conv2d_2 (Conv2D)            (None, 299, 299, 32)      9248      
_________________________________________________________________
max_pooling2d_1 (MaxPooling2 (None, 149, 149, 32)      0         
_________________________________________________________________
dropout_1 (Dropout)          (None, 149, 149, 32)      0         
_________________________________________________________________
conv2d_3 (Conv2D)            (None, 149, 149, 64)      18496     
_________________________________________________________________
conv2d_4 (Conv2D)            (None, 149, 149, 64)      36928     
_________________________________________________________________
max_pooling2d_2 (MaxPooling2 (None, 74, 74, 64)        0         
__________

In [5]:
model.compile(loss='categorical_crossentropy',
              optimizer='adam',
              metrics=['accuracy'])

#Save best model
filepath="C:\\users\\will\\ds\\mammo-weights\\weights-improvement-{epoch:02d}-{val_acc:.2f}.hdf5"
checkpoint = ModelCheckpoint(filepath, monitor='val_loss', verbose=1, save_best_only=True, mode='min')
callbacks_list = [checkpoint]

#Lost the training output for this model, but the weights were saved
model.load_weights(r'C:\Users\Will\ds\mammo-weights-8layer160\weights-improvement-154-0.91.hdf5')

result = model.fit_generator(
            train_generator,
            epochs=num_epochs,
            verbose = 1,
            class_weight= {0:.13, 1:.87},
            callbacks=callbacks_list,
            validation_data = validation_generator
            )

Epoch 1/1


In [10]:
model.load_weights(r'C:\Users\Will\ds\mammo-weights-8layer160\weights-improvement-154-0.91.hdf5')
predict_and_report(validation_generator, model)
predict_and_report(train_generator, model)

             precision    recall  f1-score   support

          0       0.95      0.94      0.95      7343
          1       0.64      0.67      0.66      1093

avg / total       0.91      0.91      0.91      8436

             precision    recall  f1-score   support

          0       0.95      0.94      0.95     33909
          1       0.63      0.68      0.65      5102

avg / total       0.91      0.91      0.91     39011

