# Breast Cancer Detection using CNN
## Dataset: breast-histopathology-images(Kaggle)

In [None]:
!pip install kaggle



In [1]:
from google.colab import files
files.upload()

Saving kaggle.json to kaggle.json


{'kaggle.json': b'{"username":"danielhug","key":"ce6a879792da36a40e9b39a1d16fd8b2"}'}

In [2]:
!mkdir -p kaggle
!cp kaggle.json kaggle

!chmod 600 kaggle/kaggle.json

In [14]:
!kaggle datasets download -d paultimothymooney/breast-histopathology-images

Downloading breast-histopathology-images.zip to /content
100% 3.09G/3.10G [00:34<00:00, 62.6MB/s]
100% 3.10G/3.10G [00:34<00:00, 96.3MB/s]


In [15]:
! mkdir datasets
! mkdir datasets/original
! mv breast-histopathology-images.zip datasets/original


In [10]:
!rm breast-histopathology-images.zip

In [18]:
!ls datasets/original


breast-histopathology-images.zip


In [19]:
from zipfile import ZipFile
file_name = 'datasets/original/breast-histopathology-images.zip'

with ZipFile(file_name, 'r') as zip:
  zip.extractall('datasets/original/')
  print("Done")

Done


In [None]:
import matplotlib
matplotlib.use("Agg")

from keras.preprocessing.image import ImageDataGenerator
from keras.callbacks import LearningRateScheduler
from keras.optimizers import Adagrad
from keras.utils import np_utils
from sklearn.metrics import classification_report
from sklearn.metrics import confusion_matrix
from cancernet.cancernet import CancerNet
from cancernet import config
from imutils import paths
import matplotlib.pyplot as plt
import numpy as np
import os

In [None]:
NUM_EPOCHS = 40; INIT_LR = 1e-2; BS = 32

trainPaths = list(paths.list_images(config.TRAIN_PATH))
lenTrain = len(trainPaths)
lenVal = (list(paths.list_images(config.VAL_PATH)))
lenTest = (list(paths.list_images(config.TEST_PATH)))

trainLabels = [int(p.split(os.path.sep)[-2]) for p in trainPaths]
trainLabels = np_utils.to_categorical(trainLabels)
classTotals = trainLabels.sum(axis = 0)
classWeight = classTotals.max()/classTotals

In [None]:
trainAug = ImageDataGenerator(
    rescale = 1/255.0,
    rotation_range = 20,
    zoom_range = 0.05,
    width_shift_range = 0.1,
    height_shift_range = 0.1,
    shear_range = 0.5,
    horizontal_flip = True,
    vertical_flip = True,
    fill_mode = "nearest"
)

valAug = ImageDataGenerator(rescale = 1 / 255.0)

trainGen = trainAug.flow_from_directory(
    config.TRAIN_PATH,
    class_mode = "categorical",
    target_size = (48,48),
    color_mode = 'rgb',
    shuffle = True,
    batch_size = BS)

valGen = valAug.flow_from_directory(
    config.VAL_PATH,
    class_mode = "categorical",
    target_size = (48,48),
    color_mode = 'rgb',
    shuffle = False,
    batch_size = BS)

testGen = testAug.flow_from_directory(
    config.TEST_PATH,
    class_mode = "categorical",
    target_size = (48,48),
    color_mode = 'rgb',
    shuffle = False,
    batch_size = BS)

In [None]:
model = CancerNet.build(width = 48, height = 48, depth = 3, classes = 2)
opt = Adagrad(lr = INIT_LR, decay = INIT_LR/NUM_EPOCHS)
model.compile(loss = "binary_crossentropy", optimizer = opt, metrics = ["accuracy"])

In [None]:
M = model.fit_generator(
    trainGen,
    steps_per_epoch = lenTrain // BS,
    validation_data = valGen,
    validation_steps = lenVal // BS
    class_weight = classWeight,
    epochs = NUM_EPOCHS)

In [None]:
print("Now evaluating the model")
testGen.reset()
pred_indicies = model.predict_generator(testGen, steps = (lenTest//BS)+1)

pred_indicies = np.argmax(pred_indicies, axis = 1)

print(classification_report(testGen.classes, pred_indicies, target_names=testGen.class_indicies.keys()))

cm = confusion_matrix(testGen.classes, pred_indicies)
total = sum(sum(cm))
accuracy = (cm[0,0]+cm[1,1]) / total
specificity = cm[1,1]/(cm[1,0] + cm[1,1])
sensitivity = cm[0,0]/(cm[0,0] + cm[0.1])
print(cm)
print(f'Accuracy: {accuracy}')
print(f'Specificity: {specificity}')
print(f'Sensitivity: {sensitivity}')

In [None]:
N = NUM_EPOCHS
plt.style.use("ggplot")
plt.figure()
plt.plot(np.arange(0, N), M.history["loss"], label = "train_loss")
plt.plot(np.arange(0, N), M.history["val_loss"], label = "val_loss")
plt.plot(np.arange(0, N)), M.History["acc"], label = "train_acc")
plt.plot(np.arange(o, N), M.history["val_acc"], label = "val_acc")
plt.title("Training Loss and Accuracy on the IDC Dataset")
plt.xlabel("Epoch No.")
plt.ylabel("Loss/Accuracy")
plt.legend(loc = "lower left")
plt.savefig('plot.png')