# **Breast cancer HER2 positive/negative classification using Deep Learning**


## **Mount drive**

In [1]:
from google.colab import drive
drive.mount('/content/drive/', force_remount=True)

Go to this URL in a browser: https://accounts.google.com/o/oauth2/auth?client_id=947318989803-6bn6qk8qdgf4n4g3pfee6491hc0brc4i.apps.googleusercontent.com&redirect_uri=urn%3aietf%3awg%3aoauth%3a2.0%3aoob&response_type=code&scope=email%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdocs.test%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdrive%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdrive.photos.readonly%20https%3a%2f%2fwww.googleapis.com%2fauth%2fpeopleapi.readonly

Enter your authorization code:
··········
Mounted at /content/drive/


## **Imports, global variables and utils functions**

In [2]:
%tensorflow_version 2.x

import cv2
from google.colab.patches import cv2_imshow
import matplotlib.image as mpimg
import matplotlib.pyplot as plt
from PIL import Image
import glob
import ntpath
import numpy as np
import tensorflow as tf
# import tf.keras as keras
import pandas as pd
import random as r
from tensorflow.keras.applications import Xception
from tensorflow.keras.layers import Dense,GlobalAveragePooling2D, Flatten, MaxPooling1D, MaxPooling2D, Conv2D, Dropout
from tensorflow.keras.models import Model, Sequential
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.python.framework.ops import disable_eager_execution
import tensorflow.keras.regularizers

disable_eager_execution()


X_PIXEL = 296
Y_PIXEL = 296

path = "/content/drive/My Drive/Colab Notebooks/PCS-slides/500.tif"
LABEL_PATH = "/content/drive/My Drive/Colab Notebooks/her2_labels"

def get_paths():
  return glob.glob("/content/drive/My Drive/Colab Notebooks/PCS-slides/*.tif")

def get_bags_paths():
  return glob.glob("/content/drive/My Drive/Colab Notebooks/Patches/Bags/*.csv")

def get_labels():
  return pd.read_csv(LABEL_PATH)

def import_slide(path):
  img = cv2.imread(path, cv2.IMREAD_COLOR)
  gray_img = cv2.cvtColor(img, cv2.COLOR_RGBA2GRAY)
  th, ret = cv2.threshold(gray_img,0,255,cv2.THRESH_OTSU)
  return img, gray_img, ret, th

def import_bag(csv_path):
  df = pd.read_csv(csv_path)
  patches = []
  paths = df["Path"].tolist()
  label = df["Label"].iloc[0]
  for x, path in enumerate(paths):
    print(path)
    patch = cv2.imread("/content/drive/My Drive/Colab Notebooks/Patches/" + path)
    print(patch.shape)
    patches.append((patch, 0, (x*X_PIXEL, 0)))
  return patches, label

def patch_image(img, ret, th):
  # def true_count(ret):
  #   sum = 0
  #   for x in ret.shape[0]:
  #     for y in ret.shape[1]:
  #       sum += ret[x,y]
  #   return sum/(X_PIXEL+Y_PIXEL)

  patches = []
  for x in range(0, img.shape[0], X_PIXEL):
    for y in range(0, img.shape[1], Y_PIXEL):
      if x+X_PIXEL < img.shape[0] and y+Y_PIXEL < img.shape[1]:
        if np.sum(ret[x:x+X_PIXEL, y:y+Y_PIXEL])/(X_PIXEL*Y_PIXEL) < th*0.2 :
          patch = img[x:x+X_PIXEL, y:y+Y_PIXEL]
          patches.append( (patch, 0, (x, y)) )

  #patches.sort(key=take_second)
  return patches


def create_prob_map(coords, pred, max_x, max_y):
  #def take_second(x):
    #return x[1]
  #img = np.zeros((max_x, max_y, 3)).astype(int)
  img = np.zeros([max_x, max_y], dtype=int)
 
  print("Creating Probability Map")
  patch = np.ones((X_PIXEL, Y_PIXEL))
  for i, coord in enumerate(coords):
    if coord[0]+X_PIXEL < max_x and coord[1]+Y_PIXEL < max_y:
      pred_img = np.round(patch*pred[i]*255)
      # plt.imshow(pred_img)
      # plt.show()
      img[coord[0]:coord[0]+X_PIXEL, coord[1]:coord[1]+Y_PIXEL] = pred_img
  return img

def count_threshold_pass(patches, th):
  sum = 0
  for p in patches:
    if p[1] < th:
      sum += 1
  return sum


def create_model_arrays(patches, label, th):
  #X
  x_train = []
  x_train_coords = []
  max_index = 0;
  for i, p in enumerate(patches, start=1):
    #print("%d of %d patches" % (i, len(patches)))
    if p[1] < th :
      max_index += 1
      # img = cv2.cvtColor(p[0], cv2.COLOR_GRAY2RGB)
      img = p[0]
      x_train.append(img.reshape(299,299,3))
      x_train_coords.append(p[2])
  #Y
  #max_index = count_threshold_pass(patches, th)
  y_train = np.zeros(max_index)
  if label == "Positive":
    y_train[:] = 1
  else:
    y_train[:] = 0
  return np.array(x_train), y_train, x_train_coords

def new_training(patches, gaussian, coords, label):
  threshold = 0.6 #arbitrary until a method for a threshold is decided/implemented
  new_patches = []
  max_index = 0
  
  new_coords = []

  for i, coord in enumerate(coords):
    if gaussian[coord[0], coord[1]] > threshold:
      x = coord[0]
      y = coord[1]
      new_patches.append(patches[i])
      new_coords.append(coord)
      max_index += 1
  new_labels = np.zeros(max_index)
  if label == "Positive":
    y_train[:] = 1
  else:
    y_train[:] = 0
  return np.array(new_patches), new_labels, new_coords 

def patch_cleaner(input_patch):
  lower_red = np.array([30,150,50])
  upper_red = np.array([255,255,180])

  # Convert the patch to HSV
  patch_in_hsv = cv2.cvtColor(input_patch, cv2.COLOR_RGB2HSV)
  # Remove red colors from the patch
  mask = cv2.inRange(patch_in_hsv, lower_red, upper_red)
  clean = cv2.bitwise_and(input_patch, input_patch, mask=mask)

  return clean


print(tf.__version__)


TensorFlow 2.x selected.
2.1.0-rc1


## **Download and modify Xception**


In [20]:
#Patch level
base_model = Xception(include_top=False, weights='imagenet', input_shape=(X_PIXEL,Y_PIXEL,3))
# for layer in base_model.layers[:]:
#     layer.trainable=False
for layer in base_model.layers[:]:
    layer.trainable=True
base_model.summary()

Downloading data from https://github.com/fchollet/deep-learning-models/releases/download/v0.4/xception_weights_tf_dim_ordering_tf_kernels_notop.h5
Model: "xception"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_1 (InputLayer)            [(None, 296, 296, 3) 0                                            
__________________________________________________________________________________________________
block1_conv1 (Conv2D)           (None, 147, 147, 32) 864         input_1[0][0]                    
__________________________________________________________________________________________________
block1_conv1_bn (BatchNormaliza (None, 147, 147, 32) 128         block1_conv1[0][0]               
__________________________________________________________________________________________________
block1_conv1_act (Activation)   (None, 147,

## **Model specification**

In [17]:

from keras import regularizers
from keras import metrics

ru = tf.keras.initializers.RandomUniform(minval=-0.05, maxval=0.05, seed=None)

model=Sequential()
# model.add(base_model)
# model.add(GlobalAveragePooling2D())

# #  kernel_regularizer=regularizers.l2(0.01), activity_regularizer=regularizers.l1(0.01)

# model.add(Dense(1024,activation='relu', kernel_regularizer=regularizers.l2(0.01), activity_regularizer=regularizers.l1(0.01)))
# model.add(Dropout(0.5))
# model.add(Dense(512,activation='relu',  kernel_regularizer=regularizers.l2(0.01), activity_regularizer=regularizers.l1(0.01))) 
# model.add(Dropout(0.5))
# model.add(Dense(2, activation="softmax"))

model.add(Conv2D(16, (3 ,3), input_shape=(X_PIXEL,Y_PIXEL,3),))
model.add(MaxPooling2D(pool_size=(2,2)))
model.add(Dropout(0.5))
model.add(Conv2D(32, (3 ,3)))
model.add(MaxPooling2D(pool_size=(2,2)))
# model.add(Dropout(0.5))
# model.add(Conv2D(256, (3 ,3)))
# model.add(MaxPooling2D(pool_size=(2,2)))
# model.add(Dropout(0.5))
# model.add(Conv2D(64, (3 ,3)))
# model.add(MaxPooling2D(pool_size=(2,2)))
# model.add(Dropout(0.5))
# model.add(Conv2D(32, (3 ,3)))
model.add(Flatten())
model.add(Dropout(0.5))
model.add(Dense(32, activation="relu", kernel_initializer=ru))
model.add(Dropout(0.5))
model.add(Dense(16, activation="relu", kernel_initializer=ru))
model.add(Dense(1, activation="softmax"))




adam = Adam(learning_rate=0.0001, beta_1=0.9, beta_2=0.999, amsgrad=False)

model.summary()
model.compile(optimizer=adam,loss='binary_crossentropy', metrics=['acc'])


Model: "sequential_9"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_24 (Conv2D)           (None, 294, 294, 16)      448       
_________________________________________________________________
max_pooling2d_23 (MaxPooling (None, 147, 147, 16)      0         
_________________________________________________________________
dropout_32 (Dropout)         (None, 147, 147, 16)      0         
_________________________________________________________________
conv2d_25 (Conv2D)           (None, 145, 145, 32)      4640      
_________________________________________________________________
max_pooling2d_24 (MaxPooling (None, 72, 72, 32)        0         
_________________________________________________________________
flatten_9 (Flatten)          (None, 165888)            0         
_________________________________________________________________
dropout_33 (Dropout)         (None, 165888)           

## **Training discriminative network**
Discriminative = A patch has the same label as the whole slide

**Put into console to run for 12h (CTRL+SHIFT+I -> console)**

function ClickConnect(){
console.log("Working"); 
document.querySelector("colab-toolbar-button#connect").click() 
}
setInterval(ClickConnect,60000)

In [0]:

def blur(img):
  #tmp = np.zeros((299, 299, 3))
  #tmp[:,:, 0] = cv2.cvtColor(img, cv2.COLOR_RGB2GRAY)
  return cv2.GaussianBlur(img, (5,5), 0)

# aug = ImageDataGenerator(featurewise_std_normalization=True, rotation_range=90)

# paths = glob.glob('/content/drive/My Drive/Colab Notebooks/Patches/Train/Negative/*.tif')
# fit_data = []
# for i in range(0, 1000):
#   path = r.choice(paths)
#   #print(path)
#   fit_data.append(cv2.imread(path).reshape(X_PIXEL,Y_PIXEL,3))

# fit_data = np.array(fit_data)

# aug.fit(fit_data)

In [19]:
aug = ImageDataGenerator(horizontal_flip=True, 
                        vertical_flip=True,
                        rotation_range=90,
                        preprocessing_function=blur)

EPOCHS = 1
BATCH_SIZE = 64
STEPS_EPOCH = 64

train_generator = aug.flow_from_directory('/content/drive/My Drive/Colab Notebooks/Patches/Data/Train',
                                          target_size=(X_PIXEL, Y_PIXEL), 
                                          batch_size=BATCH_SIZE, 
                                          class_mode='binary')
validation_generator = aug.flow_from_directory('/content/drive/My Drive/Colab Notebooks/Patches/Data/Validation',
    target_size=(X_PIXEL, Y_PIXEL),
    batch_size=BATCH_SIZE,
    class_mode='binary')

while(True):
  model.fit_generator(train_generator, steps_per_epoch=STEPS_EPOCH, validation_data = validation_generator, validation_steps = 32, epochs=EPOCHS)
  model.save("/content/drive/My Drive/Colab Notebooks/Anton-200114-v4.h5") ###### <------- CHANGE NAME HERE#########

Found 2970 images belonging to 2 classes.
Found 738 images belonging to 2 classes.

KeyboardInterrupt: ignored

19*,487*,9*,488*,489*,493*,494*,495*,500*,82*,71*,124*,125*,142*,167*,169*,191*,194*,195*,207*,213*,214*,257*,377*,398*,411*,305*,439* -> Positive Test

33.*,43.*,60.*,122.*,131.*,140.*,158.*,168.*,176.*,184.*,204.*,206.*,216.*,250.* -> Positive Validation

1.*,11.*,13.*,21.*,38.*,55.*,56.*,59.*,61.*,67.*,74.*,190.*,222.*,226.*,324.*,346.*,353.*,360.*,379.*,381.*,387.*,393.*,396.*,400.*,405.*,413.*,420.*,472.* -> Negative Test

2.*,10.*,16.*,22.*,46.*,58.*,64.*,148.*,181.*,228.*,427.*,477.*,479.*,482.* -> Negative Validation

# Load Saved Model

In [0]:
from tensorflow.keras.models import load_model

model = load_model('/content/drive/My Drive/Colab Notebooks/19-12-01.h5') ############ <------- Choose which model to train ##############

aug = ImageDataGenerator(horizontal_flip=True, 
                        vertical_flip=True,
                        rotation_range=90)

EPOCHS = 1
BATCH_SIZE = 32
STEPS_EPOCH = 32

train_generator = aug.flow_from_directory('/content/drive/My Drive/Colab Notebooks/Patches/Data/Train',
                                          target_size=(X_PIXEL, Y_PIXEL), 
                                          batch_size=BATCH_SIZE, 
                                          class_mode='categorical')
validation_generator = aug.flow_from_directory('/content/drive/My Drive/Colab Notebooks/Patches/Data/Validation',
    target_size=(X_PIXEL, Y_PIXEL),
    batch_size=BATCH_SIZE,
    class_mode='categorical')

while(True):
  model.fit_generator(train_generator, steps_per_epoch=STEPS_EPOCH, validation_data = validation_generator, validation_steps = 32, epochs=EPOCHS)
  model.save("/content/drive/My Drive/Colab Notebooks/Anton-191222.h5") ###### <------- CHANGE NAME HERE#########

#**Image level classification**


In [0]:
# Load patch-model
# Load slide
# Load labels
# Path slide
# Run predict och patches
# Use preditions in SVM
# Output classification for whole slide
from tensorflow.keras.models import load_model

# MODEL_PATH = "/content/drive/My Drive/Colab Notebooks/19-11-30v2.h5"

paths = get_paths()
labels = get_labels()
labels = labels.set_index("Case")
# model = load_model(MODEL_PATH)

path = "/content/drive/My Drive/Colab Notebooks/PCS-slides/146.tif"

# for path in paths:
img, gray_img, ret, th = import_slide(path)
file_name = ntpath.basename(path)
label_i = file_name[:len(file_name)-4]
label = labels.loc[int(label_i)]["HER2 Status"]
print(label)
patches = patch_image(img)
#filter patches
x_train, y_train, coords = create_model_arrays(patches, label, th)
print(file_name + ": " + str(y_train.shape))
predictions = []
for i in range(y_train.shape[0]):
  predictions.append(model.predict(x_train)[0])
# choose patches at random/min/max ?
# classification = svm(preditions)

prediction = [[0,1], [0,1],...]

#Train SVM:
#Import sklearn
# Run model.predict() on patches from val and test
# and save N number of predictions
# Save label for slide

# X = [predictions]
# y = [labels]
# clf = svm.SVC()
# clf.fit(X, y)
# 




#**filter**

In [0]:
#!bash -c 'ls /content/drive/My\ Drive/Colab\ Notebooks/Patches/Filtered'
!mv '/content/drive/My Drive/Colab Notebooks/Patches/Train/Positive/57.tif_79.tif' '/content/drive/My Drive/Colab Notebooks/Patches/Filtered'

In [0]:
# Train 80%
# Val 20%

threshold = 1489
test_th = 1861 - threshold
train_pos = []
train_neg = []
test_pos = []
test_neg = []

count = 0
failsafe = 0
while count < threshold:
  slide = r.choice(pos_dict)
  if slide not in train_pos:
    if slide[1] +count <= threshold + failsafe:
      train_pos.append(slide)
      count += slide[1]
    else:
      failsafe =+ 1

count = 0
while count < threshold:
  slide = r.choice(neg_dict)
  if slide not in train_neg:
    if slide[1] +count <= threshold:
      train_pos.append(slide)
      count += slide[1]


for slide in neg_dict:
  if slide not in train_neg:
    test_pos.append(slide)

count = 0
while count < test_th:
  slide = r.choice(pos_dict)
  if slide not in train_pos and slide not in test_pos:
    if slide[1] +count <= test_th:
      test_pos.append(slide)
      count += slide[1]





