# Oscar Edward Guijaya - 2301981975

# Deteksi Kanker Payudara menggunakan Deep Learning

# Import Library awal yang digunakan

In [5]:
from imutils import paths
import random
import shutil
import os

# Mempersiapkan dataset

In [8]:
imgPaths = list(paths.list_images("datasets/original"))
random.seed(42)
random.shuffle(imgPaths)

i = int(len(imgPaths) * 0.8)
trainPaths = imgPaths[:i]
testPaths = imgPaths[i:]


i = int(len(trainPaths) * 0.1)
valPaths = trainPaths[:i]
trainPaths = trainPaths[i:]

datasets = [
    ("training", trainPaths, "datasets/splitted/training"),
    ("validation", valPaths, "datasets/splitted/validation"),
    ("testing", testPaths, "datasets/splitted/testing")
]

# loop over the datasets
for (dType, imagePaths, baseOutput) in datasets:
    if not os.path.exists(baseOutput):
        os.makedirs(baseOutput)
        
    for inputPath in imagePaths:
        filename = inputPath.split(os.path.sep)[-1]
        label = filename[-5:-4]
        labelPath = os.path.sep.join([baseOutput, label])
        if not os.path.exists(labelPath):
            os.makedirs(labelPath)
        p = os.path.sep.join([labelPath, filename])
        shutil.copy2(inputPath, p)

# Import Library yang diperlukan untuk model CancerNet

In [9]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import BatchNormalization
from tensorflow.keras.layers import SeparableConv2D
from tensorflow.keras.layers import MaxPooling2D
from tensorflow.keras.layers import Activation
from tensorflow.keras.layers import Flatten
from tensorflow.keras.layers import Dropout
from tensorflow.keras.layers import Dense
from tensorflow.keras import backend as K

# Membuat static method untuk model CancerNet

In [10]:
class CancerNet:
    @staticmethod
    def build(width, height, depth, classes):
        model = Sequential()
        inputShape = (height, width, depth)
        chanDim = -1
        # if we are using "channels first", update the input shape
        # and channels dimension
        if K.image_data_format() == "channels_first":
            inputShape = (depth, height, width)
            chanDim = 1
            
        # CONV => RELU => POOL
        model.add(SeparableConv2D(32, (3, 3), padding="same",input_shape=inputShape))
        model.add(Activation("relu"))
        model.add(BatchNormalization(axis=chanDim))
        model.add(MaxPooling2D(pool_size=(2, 2)))
        model.add(Dropout(0.25))
        
        # (CONV => RELU => POOL) * 2
        model.add(SeparableConv2D(64, (3, 3), padding="same"))
        model.add(Activation("relu"))
        model.add(BatchNormalization(axis=chanDim))
        model.add(SeparableConv2D(64, (3, 3), padding="same"))
        model.add(Activation("relu"))
        model.add(BatchNormalization(axis=chanDim))
        model.add(MaxPooling2D(pool_size=(2, 2)))
        model.add(Dropout(0.25))
        
        # (CONV => RELU => POOL) * 3
        model.add(SeparableConv2D(128, (3, 3), padding="same"))
        model.add(Activation("relu"))
        model.add(BatchNormalization(axis=chanDim))
        model.add(SeparableConv2D(128, (3, 3), padding="same"))
        model.add(Activation("relu"))
        model.add(BatchNormalization(axis=chanDim))
        model.add(SeparableConv2D(128, (3, 3), padding="same"))
        model.add(Activation("relu"))
        model.add(BatchNormalization(axis=chanDim))
        model.add(MaxPooling2D(pool_size=(2, 2)))
        model.add(Dropout(0.25))
        
        # first (and only) set of FC => RELU layers
        model.add(Flatten())
        model.add(Dense(256))
        model.add(Activation("relu"))
        model.add(BatchNormalization())
        model.add(Dropout(0.5))
        
        # softmax classifier
        model.add(Dense(classes))
        model.add(Activation("softmax"))
        
        # return the constructed network architecture
        return model

# Import Library yang diperlukan untuk proses Training

In [14]:
import matplotlib
matplotlib.use("Agg")
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.callbacks import LearningRateScheduler
from tensorflow.keras.optimizers import Adagrad
from tensorflow.keras.utils import to_categorical
from sklearn.metrics import classification_report
from sklearn.metrics import confusion_matrix
import matplotlib.pyplot as plt
import numpy as np
import os

# Mendefinisikan parameter untuk training

### Menentukan jumlah epoch, menghitung jumlah dataset dan menghitung class weight 

In [15]:
NUM_EPOCHS = 40
INIT_LR = 1e-2
BS = 32

trainPaths = list(paths.list_images("datasets/splitted/training"))
totalTrain = len(trainPaths)
totalVal = len(list(paths.list_images("datasets/splitted/validation")))
totalTest = len(list(paths.list_images("datasets/splitted/testing")))

trainLabels = [int(p.split(os.path.sep)[-2]) for p in trainPaths]
trainLabels = to_categorical(trainLabels)
classTotals = trainLabels.sum(axis=0)
classWeight = dict()

for i in range(0, len(classTotals)):
    classWeight[i] = classTotals.max() / classTotals[i]

# Data Augmentation

In [16]:
trainAug = ImageDataGenerator(
    rescale=1 / 255.0,
    rotation_range=20,
    zoom_range=0.05,
    width_shift_range=0.1,
    height_shift_range=0.1,
    shear_range=0.05,
    horizontal_flip=True,
    vertical_flip=True,
    fill_mode="nearest")

valAug = ImageDataGenerator(rescale=1 / 255.0)

# Membuat Generator training dari object Data Augmentation yang sudah dibuat sebelumnya

In [17]:
trainGen = trainAug.flow_from_directory(
    "datasets/splitted/training",
    class_mode="categorical",
    target_size=(48, 48),
    color_mode="rgb",
    shuffle=True,
    batch_size=BS)

valGen = valAug.flow_from_directory(
    "datasets/splitted/validation",
    class_mode="categorical",
    target_size=(48, 48),
    color_mode="rgb",
    shuffle=False,
    batch_size=BS)

testGen = valAug.flow_from_directory(
    "datasets/splitted/testing",
    class_mode="categorical",
    target_size=(48, 48),
    color_mode="rgb",
    shuffle=False,
    batch_size=BS)

Found 199818 images belonging to 2 classes.
Found 22201 images belonging to 2 classes.
Found 55505 images belonging to 2 classes.


# Training data

In [18]:
model = CancerNet.build(width=48, height=48, depth=3,classes=2)
opt = Adagrad(lr=INIT_LR, decay=INIT_LR / NUM_EPOCHS)
model.compile(loss="binary_crossentropy", optimizer=opt,metrics=["accuracy"])

H = model.fit(
    x=trainGen,
    steps_per_epoch=totalTrain // BS,
    validation_data=valGen,
    validation_steps=totalVal // BS,
    class_weight=classWeight,
    epochs=NUM_EPOCHS)

  super(Adagrad, self).__init__(name, **kwargs)


Epoch 1/40
Epoch 2/40
Epoch 3/40
Epoch 4/40
Epoch 5/40
Epoch 6/40
Epoch 7/40
Epoch 8/40
Epoch 9/40
Epoch 10/40
Epoch 11/40
Epoch 12/40
Epoch 13/40
Epoch 14/40
Epoch 15/40
Epoch 16/40
Epoch 17/40
Epoch 18/40
Epoch 19/40
Epoch 20/40
Epoch 21/40
Epoch 22/40
Epoch 23/40
Epoch 24/40
Epoch 25/40
Epoch 26/40
Epoch 27/40
Epoch 28/40
Epoch 29/40
Epoch 30/40
Epoch 31/40
Epoch 32/40
Epoch 33/40
Epoch 34/40
Epoch 35/40
Epoch 36/40
Epoch 37/40
Epoch 38/40
Epoch 39/40
Epoch 40/40


## Predict

In [19]:
testGen.reset()
predIdxs = model.predict(x=testGen, steps=(totalTest // BS) + 1)
predIdxs = np.argmax(predIdxs, axis=1)


print(classification_report(testGen.classes, predIdxs,target_names=testGen.class_indices.keys()))


              precision    recall  f1-score   support

           0       0.96      0.72      0.83     39793
           1       0.57      0.93      0.71     15712

    accuracy                           0.78     55505
   macro avg       0.77      0.82      0.77     55505
weighted avg       0.85      0.78      0.79     55505



## Evaluasi menggunakan confusion matrix

In [22]:
cm = confusion_matrix(testGen.classes, predIdxs)
total = sum(sum(cm))
acc = (cm[0, 0] + cm[1, 1]) / total
sensitivity = cm[0, 0] / (cm[0, 0] + cm[0, 1])
specificity = cm[1, 1] / (cm[1, 0] + cm[1, 1])

print(cm)
print("acc: {:.4f}".format(acc))
print("sensitivity: {:.4f}".format(sensitivity))
print("specificity: {:.4f}".format(specificity))

[[28800 10993]
 [ 1163 14549]]
acc: 0.7810
sensitivity: 0.7237
specificity: 0.9260
