In [1]:
import os
import numpy as np
import threading as thr
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.metrics import roc_curve, auc
from keras.models import Sequential
from keras.layers import Conv2D, BatchNormalization, MaxPool2D, Dense, Flatten, InputLayer, Activation, Dropout
from keras.callbacks import ReduceLROnPlateau
from keras.optimizers import SGD

In [2]:
import matlab.engine
eng = matlab.engine.start_matlab()

In [3]:
mammo_o = []
mammo_f = []
label = []
project_folder = "../dataset/"
os.chdir(project_folder)
l = os.listdir()

In [4]:
def create_dataset(ls, o_img, f_img, lbl):
    for l in ls:
        if "_1_resized.pgm" in l:
            mo, mf = eng.dataset_filtered(eng.char(os.path.join(project_folder,l)), nargout = 2)
            o_img.append(mo)
            f_img.append(mf)
            lbl.append(1)
        elif "_2_resized.pgm" in l:
            mo, mf = eng.dataset_filtered(eng.char(os.path.join(project_folder,l)), nargout = 2)
            o_img.append(mo)
            f_img.append(mf)
            lbl.append(0)

In [None]:
os.chdir("./")
threads = []
chunk = 6
for i in range(49):
    t = thr.Thread(target = create_dataset, args = (l[i*chunk : (i+1)*chunk], mammo_o, mammo_f, label))
    threads.append(t)
    t.start()

for i in threads:
    i.join()

In [None]:
mammo_o = np.asarray(mammo_o, dtype = 'float32')/255.
mammo_f = np.asarray(mammo_f, dtype = 'float32')/255.
label = np.asarray(label)
mammo_o_4d = np.reshape(mammo_o, (147, 125, 125, 1))
print(mammo_o_4d.shape)
mammo_f_4d = np.reshape(mammo_f, (147, 64, 64, 1))
print(mammo_f_4d.shape)

In [None]:
def cnn_f(shape=(64, 64, 1)):
    model = Sequential([
        
        Conv2D(10, (3,3), padding = 'same', input_shape = shape),
        BatchNormalization(),
        Activation('relu'),
    
        MaxPool2D((4,4), strides = 2),
        #Dropout(0.1),
        
        
        Conv2D(16, (3,3), padding = 'same'),
        BatchNormalization(),
        Activation('relu'),
        
        MaxPool2D((4,4), strides = 2),
        #Dropout(0.1),
        
        
        Conv2D(29, (3,3), padding = 'same'),
        BatchNormalization(),
        Activation('relu'),
        
        MaxPool2D((4,4), strides = 2),
        #Dropout(0.1),
        
        Flatten(),
        
        Dense(10, activation = 'relu'),
        #Dropout(0.2),
        Dense(1, activation = 'sigmoid')        
        
    ])
    
    return model

In [None]:
model_f = cnn_f()
model_f.summary()

In [None]:
learning_rate = 0.001
model_f.compile(optimizer = SGD(learning_rate, momentum = 0.9), loss = 'binary_crossentropy', metrics = ['accuracy'])

In [None]:
reduce_on_plateau = ReduceLROnPlateau(
    monitor="val_loss",
    factor=0.1,
    patience=10,
    verbose=0,
    mode="auto",
    min_delta=0.0001,
    cooldown=0,
    min_lr=0)

In [None]:
X_train_f, X_val_f, Y_train_f, Y_val_f = train_test_split(mammo_f_4d, label, test_size = 0.2, random_state = 44)
batch_size = 21
train_f = model_f.fit(X_train_f, Y_train_f,
                            batch_size = batch_size,
                            epochs = 200,
                            verbose = 1,
                            validation_data = (X_val_f, Y_val_f),
                            callbacks = [reduce_on_plateau])

In [None]:
acc = train_f.history['accuracy']
val_acc = train_f.history['val_accuracy']
loss = train_f.history['loss']
val_loss = train_f.history['val_loss']
    
epochs_range = range(1, len(acc)+1)
    #Train and validation accuracy 
plt.figure(figsize=(15, 15))
plt.subplot(2, 2, 1)
plt.plot(epochs_range, acc, label='Training Accuracy')
plt.plot(epochs_range, val_acc, label='Validation Accuracy')
plt.legend(loc='lower right')
plt.title('Training and Validation Accuracy')
    #Train and validation loss 
plt.subplot(2, 2, 2)
plt.plot(epochs_range, loss, label='Training Loss')
plt.plot(epochs_range, val_loss, label='Validation Loss')
plt.legend(loc='upper right')
plt.title('Training and Validation Loss')
plt.show()

In [None]:
def cnn_f_aug(shape=(64, 64, 1)):
    model = Sequential([
        
        Conv2D(9, (3,3), padding = 'same', input_shape = shape),
        BatchNormalization(),
        Activation('relu'),
    
        MaxPool2D((4,4), strides = 2),
        #Dropout(0.1),
        
        
        Conv2D(22, (3,3), padding = 'same'),
        BatchNormalization(),
        Activation('relu'),
        
        MaxPool2D((4,4), strides = 2),
        #Dropout(0.1),
        
        
        Conv2D(33, (3,3), padding = 'same'),
        BatchNormalization(),
        Activation('relu'),
        
        MaxPool2D((4,4), strides = 2),
        #Dropout(0.1),
        
        Flatten(),
        
        Dense(10, activation = 'relu'),
        #Dropout(0.2),
        Dense(1, activation = 'sigmoid')        
        
    ])
    
    return model

In [None]:
model_f_aug = cnn_f_aug()
model_f_aug.summary()

In [None]:
learning_rate = 0.001
model_f_aug.compile(optimizer = SGD(learning_rate, momentum = 0.9), loss = 'binary_crossentropy', metrics = ['accuracy'])

In [None]:
from keras.preprocessing.image import ImageDataGenerator

aug = ImageDataGenerator(
                rotation_range = 90,
                horizontal_flip = True,
                vertical_flip = True,
                validation_split = 0.20)

aug_train_f = aug.flow(mammo_f_4d, label, batch_size = 30, subset = 'training')
aug_val_f = aug.flow(mammo_f_4d, label, batch_size = 30, subset = 'validation')

In [None]:
batch_size = 42
train_f_aug = model_f_aug.fit(aug_train_f,
                            batch_size = batch_size,
                            epochs = 200,
                            verbose = 1,
                            validation_data = aug_val_f,
                            callbacks = [reduce_on_plateau])

In [None]:
acc = train_f_aug.history['accuracy']
val_acc = train_f_aug.history['val_accuracy']
loss = train_f_aug.history['loss']
val_loss = train_f_aug.history['val_loss']
    
epochs_range = range(1, len(acc)+1)
    #Train and validation accuracy 
plt.figure(figsize=(15, 15))
plt.subplot(2, 2, 1)
plt.plot(epochs_range, acc, label='Training Accuracy')
plt.plot(epochs_range, val_acc, label='Validation Accuracy')
plt.legend(loc='lower right')
plt.title('Training and Validation Accuracy')
    #Train and validation loss 
plt.subplot(2, 2, 2)
plt.plot(epochs_range, loss, label='Training Loss')
plt.plot(epochs_range, val_loss, label='Validation Loss')
plt.legend(loc='upper right')
plt.title('Training and Validation Loss')
plt.show()

In [None]:
acc_f = []
acc_f_aug = []
for i in range(10):
    
    _, val_acc = model_f.evaluate(X_val_f, Y_val_f, verbose=0)
    print('Validation accuracy: %.3f' % (val_acc))
    acc_f.append(val_acc)

    _, val_acc = model_f_aug.evaluate(aug_val_f, verbose=0)
    print('Validation accuracy: %.3f' % (val_acc))
    acc_f_aug.append(val_acc)

In [None]:
media_f = np.mean(acc_f)
std_f = np.std(acc_f)
print(media_f)
print(std_f)

In [None]:
media_f_aug = np.mean(acc_f_aug)
std_f_aug = np.std(acc_f_aug)
print(media_f_aug)
print(std_f_aug)

In [None]:
mammo_o_t, mammo_f_t, label_t = [], [], []
data_folder_t = "../test_dataset/"
os.chdir(data_folder_t)
l_t = os.listdir()

In [None]:
def create_dataset_t(lista, o_img, f_img, labels):
    #Function calling the Matlab file in order to filter the images.
    for element in lista:
        if "_1_resized.pgm" in element:
            mo, mf = eng.dataset_filtered(eng.char(os.path.join(data_folder, element)), nargout = 2)
            o_img.append(mo)
            f_img.append(mf)
            labels.append(1)
        elif "_2_resized.pgm" in element:
            mo, mf = eng.dataset_filtered(eng.char(os.path.join(data_folder, element)), nargout = 2)
            o_img.append(mo)
            f_img.append(mf)
            labels.append(0)

In [None]:
os.chdir("./")
threads = []
chunk = 6

for i in range(5):
    t = thr.Thread(target = create_dataset, args = (l_t[i*chunk : (i+1)*chunk], mammo_o_t, mammo_f_t, label_t))
    threads.append(t)
    t.start()
    
for j in threads:
    j.join()

In [None]:
eng.quit()

In [None]:
mammo_o_t = np.asarray(mammo_o_t, dtype = 'float32')/255.
mammo_f_t = np.asarray(mammo_f_t, dtype = 'float32')/255.
label_t = np.asarray(label_t)
mammo_o_4d_t = np.reshape(mammo_o_t, (30, 125, 125, 1))
print(mammo_o_4d_t.shape)
mammo_f_4d_t = np.reshape(mammo_f_t, (30, 64, 64, 1))
print(mammo_f_4d_t.shape)

In [None]:
if media_f > media_f_aug:
    model_test = model_f()
else:
    model_test = model_f_aug()

In [None]:
test_loss, test_acc = model.evaluate(mammo_f_4d_t, label_t)
preds_test = model_test.predict(mammo_f_4d_t, verbose=1)
fpr, tpr, _ = roc_curve(label_t, preds_test)
roc_auc = auc(fpr, tpr)

print('\n Test accuracy = %.3f'% (test_acc))
print('\n AUC = %.3f'% (roc_auc))