In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
cd /content/drive/My Drive/Practice/Breast_Cancer_Detection

In [None]:
data = './'

In [None]:
!unzip -q Breast_cancer.zip -d $data

In [None]:
cd /content/drive/My Drive/Practice/Breast_Cancer_Detection/Breast_cancer

**Import Lib**

In [None]:
import json
import math
import os
import cv2
from PIL import Image
import numpy as np
from keras import layers
from keras.applications import DenseNet201
from keras.callbacks import Callback, ModelCheckpoint, ReduceLROnPlateau, TensorBoard
from keras.preprocessing.image import ImageDataGenerator
from keras.utils.np_utils import to_categorical
from keras.models import Sequential
from keras.optimizers import Adam
import matplotlib.pyplot as plt
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import cohen_kappa_score, accuracy_score
import scipy
from tqdm import tqdm
import tensorflow as tf
from keras import backend as K
import gc
from functools import partial
from sklearn import metrics
from collections import Counter
import json
import itertools
from pickle import dump, load

**Load data and convert to np.array**

In [None]:
def Dataset_loader(DIR, RESIZE, sigmaX=10):
    IMG = []
    read = lambda imname: np.asarray(Image.open(imname).convert("RGB"))
    for IMAGE_NAME in tqdm(os.listdir(DIR)):
        PATH = os.path.join(DIR,IMAGE_NAME)
        _, ftype = os.path.splitext(PATH)
        if ftype == ".png":
            img = read(PATH)
           
            img = cv2.resize(img, (RESIZE,RESIZE))
           
            IMG.append(np.array(img))
    return IMG

**Prepare for train set**

In [None]:
cd /content/drive/My Drive/Practice/Breast_Cancer_Detection/Breast_cancer/train

In [None]:
benign_train = np.array(Dataset_loader('./benign',224))
malignant_train = np.array(Dataset_loader('./malignant', 224))
X_train = np.concatenate((benign_train, malignant_train), axis = 0)

**Prepare for validation set**

In [None]:
cd /content/drive/My Drive/Practice/Breast_Cancer_Detection/Breast_cancer/validation

In [None]:
benign_valid = np.array(Dataset_loader('./benign', 224))
malignant_valid = np.array(Dataset_loader('./malignant', 224))
X_valid = np.concatenate((benign_valid, malignant_valid), axis=0)

**Prepare for test set**

In [None]:
cd /content/drive/My Drive/Practice/Breast_Cancer_Detection/Breast_cancer/test

In [None]:
benign_test = np.array(Dataset_loader('./benign',224))
malignant_test = np.array(Dataset_loader('./malignant', 224))
X_test = np.concatenate((benign_test, malignant_test), axis=0)

In [None]:
cd /content/drive/My Drive/Practice/Breast_Cancer_Detection

**Encoded for Data**

In [None]:
with open('train.pkl', 'wb') as encoded_train:
    dump(X_train, encoded_train)
with open('validation.pkl', 'wb') as encoded_validation:
  dump(X_valid, encoded_validation)
with open('test.pkl', 'wb') as encoded_test:
  dump(X_test, encoded_test)

**Load data from encoded**

In [None]:
X_train = load(open("./train.pkl", "rb"))

X_valid = load(open("./validation.pkl", "rb"))

X_test = load(open("./test.pkl", "rb"))

**Create label**

In [None]:
#label for train
benign_train_label = np.zeros(len(benign_train))
malignant_train_label = np.ones(len(malignant_train))
y_train = np.concatenate((benign_train_label, malignant_train_label), axis = 0)

#label for validation
benign_valid_label = np.zeros(len(benign_valid))
malignant_valid_label = np.ones(len(malignant_valid))
y_valid = np.concatenate((benign_valid_label, malignant_valid_label), axis = 0)

#label for test
benign_test_label = np.zeros(len(benign_test))
malignant_test_label = np.ones(len(malignant_test))
y_test = np.concatenate((benign_test_label, malignant_test_label), axis = 0)

**Đảo trật tự ảnh và label của chúng**

In [None]:
s = np.arange(X_train.shape[0])
np.random.shuffle(s)

X_train = X_train[s]
y_train = y_train[s]

s = np.arange(X_valid.shape[0])
np.random.shuffle(s)

X_valid = X_valid[s]
y_valid = y_valid[s]

s = np.arange(X_test.shape[0])
np.random.shuffle(s)

X_test = X_test[s]
y_test = y_test[s]

#one-hot encoded
y_train = to_categorical(y_train, num_classes= 2)
y_valid = to_categorical(y_valid, num_classes= 2)
y_test = to_categorical(y_test, num_classes= 2)

**Show some picture**

In [None]:
w=60
h=40
fig=plt.figure(figsize=(10, 10))
columns = 4
rows = 3

for i in range(1, columns*rows +1):
    ax = fig.add_subplot(rows, columns, i)
    if np.argmax(y_train[i+248]) == 0:
        ax.title.set_text('Benign(Lành tính)')
    else:
        ax.title.set_text('Malignant(Ác tính)')
    plt.imshow(X_train[i+248], interpolation='nearest')
plt.show()

In [None]:
BATCH_SIZE = 16

train_generator = ImageDataGenerator(
        zoom_range=2,  # set range for random zoom
        rotation_range = 90,
        horizontal_flip=True,  # randomly flip images
        vertical_flip=True,  # randomly flip images
    )

**Define and Compile Model**

In [None]:
def build_model(backbone, lr=1e-4):
    model = Sequential()
    model.add(backbone)
    model.add(layers.GlobalAveragePooling2D())
    model.add(layers.Dropout(0.5))
    model.add(layers.BatchNormalization())
    model.add(layers.Dense(2, activation='softmax'))
    
    model.compile(
        loss='binary_crossentropy',
        optimizer=Adam(lr=lr),
        metrics=['accuracy'])
    return model

In [None]:
resnet = DenseNet201(
    weights='imagenet',
    include_top=False,
    input_shape=(224,224,3))

In [None]:
model = build_model(resnet ,lr = 1e-4)
model.summary()

In [None]:
cd /content/drive/My Drive/Practice/Breast_Cancer_Detection

In [None]:
learn_control = ReduceLROnPlateau(monitor='val_acc', patience=5,
                                  verbose=1,factor=0.2, min_lr=1e-7)

filepath="weights.best.hdf5"
checkpoint = ModelCheckpoint(filepath, monitor='val_acc', verbose=1, save_best_only=True, mode='max')

**Fit Model**

In [None]:
N = model.fit_generator(
    train_generator.flow(X_train, y_train, batch_size=BATCH_SIZE),
    steps_per_epoch=X_train.shape[0] / BATCH_SIZE,
    epochs=20,
    validation_data=(X_valid, y_valid),
    callbacks=[learn_control, checkpoint])

**Save weight**

In [None]:
model.save_weights("custom_model.h5")

**Load weight**

In [None]:
model.load_weights("custom_model.h5")

**Visualize Loss and Accuracy**

**Cách 1**

In [None]:
print(N.history.keys())
fig = plt.figure()

#plot accuracy and val_accuracy
plt.plot(N.history['accuracy'])
plt.plot(N.history['val_accuracy'])
plt.xlabel('Epochs')
plt.ylabel('acc and val_acc')
plt.title('Accuracy')
plt.legend(['train', 'val'], loc='upper left')
plt.show()

plt.figure()
#plot loss and val_loss
plt.plot(N.history['loss'])
plt.plot(N.history['val_loss'])
plt.xlabel('Epochs')
plt.ylabel('Loss and Val_loss')
plt.title('Loss')
plt.legend(['train', 'val'])
plt.show()

**Cách 2**

In [None]:
history_df = pd.DataFrame(model.history)
history_df[['accuracy', 'val_accuracy']].plot()

history_df = pd.DataFrame(model.history)
history_df[['loss', 'val_loss']].plot()

**Evaluate Model**

In [None]:
loss, acc = model.evaluate(X_test, y_test, verbose=2)
print('Accuracy: %.3f' % acc)

**Predict**

In [None]:
y_val_pred = model.predict(X_valid)

In [None]:
accuracy_score(np.argmax(y_valid, axis=1), np.argmax(y_val_pred, axis=1))

In [None]:
y_pred = model.predict(X_test)

In [None]:
tta_steps = 5
predictions = []

for i in tqdm(range(tta_steps)):
    preds = model.predict_generator(train_generator.flow(X_test, batch_size=BATCH_SIZE, shuffle=False),
                                    steps = len(X_test)/BATCH_SIZE)
    
    predictions.append(preds)
    gc.collect()
    
y_pred_tta = np.mean(predictions, axis=0)

**Confusion Matrix**

In [None]:
from sklearn.metrics import confusion_matrix

def plot_confusion_matrix(cm, classes,
                          normalize=False,
                          title='Confusion matrix',
                          cmap=plt.cm.Blues):
    if normalize:
        cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]
        print("Normalized confusion matrix")
    else:
        print('Confusion matrix, without normalization')

    print(cm)

    plt.imshow(cm, interpolation='nearest', cmap=cmap)
    plt.title(title)
    plt.colorbar()
    tick_marks = np.arange(len(classes))
    plt.xticks(tick_marks, classes, rotation=55)
    plt.yticks(tick_marks, classes)
    fmt = '.2f' if normalize else 'd'
    thresh = cm.max() / 2.
    for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])):
        plt.text(j, i, format(cm[i, j], fmt),
                 horizontalalignment="center",
                 color="white" if cm[i, j] > thresh else "black")

    plt.ylabel('True label')
    plt.xlabel('Predicted label')
    plt.tight_layout()

cm = confusion_matrix(np.argmax(y_test, axis=1), np.argmax(y_pred, axis=1))

cm_plot_label =['benign', 'malignant']
plot_confusion_matrix(cm, cm_plot_label, title ='Confusion Metrix for Skin Cancer')

In [None]:
cm = confusion_matrix(np.argmax(y_test, axis=1), np.argmax(y_pred_tta, axis=1))

cm_plot_label =['benign', 'malignant']
plot_confusion_matrix(cm, cm_plot_label, title ='Confusion Metrix for Skin Cancer')

**Classification Report**

In [None]:
from sklearn.metrics import classification_report
classification_report( np.argmax(y_test, axis=1), np.argmax(y_pred_tta, axis=1))

**ROC and AUC**

In [None]:
from sklearn.metrics import roc_auc_score, auc
from sklearn.metrics import roc_curve
roc_log = roc_auc_score(np.argmax(y_test, axis=1), np.argmax(y_pred_tta, axis=1))
false_positive_rate, true_positive_rate, threshold = roc_curve(np.argmax(y_test, axis=1), np.argmax(y_pred_tta, axis=1))
area_under_curve = auc(false_positive_rate, true_positive_rate)

plt.plot([0, 1], [0, 1], 'r--')
plt.plot(false_positive_rate, true_positive_rate, label='AUC = {:.3f}'.format(area_under_curve))
plt.xlabel('False positive rate')
plt.ylabel('True positive rate')
plt.title('ROC curve')
plt.legend(loc='best')
plt.show()
#plt.savefig(ROC_PLOT_FILE, bbox_inches='tight')
plt.close()

In [None]:
# i=0
# prop_class=[]
# mis_class=[]

# for i in range(len(y_test)):
#     if(np.argmax(y_test[i])==np.argmax(y_pred_tta[i])):
#         prop_class.append(i)
#     if(len(prop_class)==24):
#         break

# i=0
# for i in range(len(y_test)):
#     if(not np.argmax(y_test[i])==np.argmax(y_pred_tta[i])):
#         mis_class.append(i)
#     if(len(mis_class)==24):
#         break


fig=plt.figure(figsize=(24, 30))
columns = 3
rows = 4

def Transfername(namecode):
    if namecode==0:
        return "Benign(Lành tính)"
    else:
        return "Malignant(Ác tính)"
    
for i in range(rows*columns):
    ax = fig.add_subplot(rows, columns, i+1)
    ax.set_title("Dự đoán: "+ Transfername(np.argmax(y_pred[i])) +"\n"+"Thực tế: "+ Transfername(np.argmax(y_test[i])))
    plt.imshow(X_test[i], interpolation='nearest')
plt.show()