<a href="https://colab.research.google.com/github/khal-drog0/Breast_Cancer_Histopathology_Classification/blob/main/notebook7588f8763a.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
!pip3 install -q kaggle

In [2]:
from google.colab import files
files.upload()

KeyboardInterrupt: ignored

In [None]:
!mkdir ~/.kaggle

In [None]:
!cp kaggle.json /root/.kaggle

In [None]:
! chmod 600 /root/.kaggle/kaggle.json

In [None]:
!kaggle datasets download -d paultimothymooney/breast-histopathology-images

In [None]:
! unzip breast-histopathology-images.zip

In [3]:
import pandas as pd
import numpy as np
from glob import glob
import random

In [4]:
images = glob('IDC_regular_ps50_idx5/**/*.png', recursive = True)

In [5]:
class0 = [] # 0 = no cancer
class1 = [] # 1 = cancer

for filename in images:
    # copying class 0 and class 1 files to class0 and class1 lists,
    # respectively
    if filename.endswith("class0.png"):
        class0.append(filename)
    else:
        class1.append(filename)

In [6]:
print('class0 length:', len(class0))
print('class1 length:', len(class1))

class0 length: 198738
class1 length: 78786


In [7]:
sampled_class0 = random.sample(class0, 78786)
sampled_class1 = random.sample(class1, 78786)
len(sampled_class0)

78786

In [8]:
from matplotlib.image import imread
import cv2

def get_image_arrays(data, label):
    img_arrays = []
    for i in data:
        if i.endswith('.png'):
            img = cv2.imread(i ,cv2.IMREAD_COLOR)
            img_sized = cv2.resize(img, (70, 70), interpolation=cv2.INTER_LINEAR)
            img_arrays.append([img_sized, label])
    return img_arrays

In [9]:
class0_array = get_image_arrays(sampled_class0, 0)
class1_array = get_image_arrays(sampled_class1, 1)

KeyboardInterrupt: ignored

In [None]:
test = cv2.imread('..//IDC_regular_ps50_idx5/13689/1/13689_idx5_x801_y1501_class1.png' ,cv2.IMREAD_COLOR)
test

In [None]:
combined_data = np.concatenate((class0_array, class1_array))
random.seed(41)
random.shuffle(combined_data)

In [None]:
X = []
y = []

for features,label in combined_data:
    X.append(features)
    y.append(label)

In [None]:
X = np.array(X).reshape(-1, 70, 70, 3)

In [None]:
X.shape

In [None]:
from sklearn.model_selection import train_test_split
from keras.utils.np_utils import to_categorical

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.30, random_state=42)
y_train = to_categorical(y_train)
y_test = to_categorical(y_test)
print(X_train.shape, X_test.shape, y_train.shape, y_test.shape)

In [None]:
import matplotlib.pyplot as plt

In [None]:
plt.figure(figsize=(10, 10))

for i in range(25):
    plt.subplot(5, 5, i + 1)
    plt.xticks([])
    plt.yticks([])
    plt.grid(False)
    plt.imshow(X_train[i], cmap=plt.cm.binary)
    plt.xlabel(y_train[i])
plt.show()

In [None]:
X_train = X_train.astype('float32')
X_test = X_test.astype('float32')
X_train = X_train / 255
X_test = X_test / 255

In [None]:
from tensorflow import keras
from keras.layers import Conv2D, Conv2DTranspose, Input, Flatten, Dense, Lambda, Reshape
from keras.models import Model
from keras import backend as K

# Variational Autoencoder

## Encoder
4 conv2d, 1 flatten and 1 dense layer

In [None]:
latent_dim = 5 # number of latent dimension parameters

In [None]:
input_shape = (70, 70, 3)
num_channels = 3
img_width, img_height = (70, 70)

In [None]:
input_img = Input(shape=input_shape, name='encoder_input')

x = Conv2D(32, 3, padding='same', activation='relu')(input_img)
x = Conv2D(64, 3, padding='same', activation='relu',strides=(2, 2))(x)
x = Conv2D(64, 3, padding='same', activation='relu')(x)
x = Conv2D(64, 3, padding='same', activation='relu')(x)

In [None]:
conv_shape = K.int_shape(x) #Shape of conv to be provided to decoder

#Flatten
x = Flatten()(x)
x = Dense(32, activation='relu')(x)

In [None]:
# Two outputs, for latent mean and log variance (std. dev.)

z_mu = Dense(latent_dim, name='latent_mu')(x)   #Mean values of encoded input
z_sigma = Dense(latent_dim, name='latent_sigma')(x)  #Std dev. (variance) of encoded input

In [None]:
# REPARAMETERIZATION TRICK
# Define sampling function to sample from the distribution
# Reparameterize sample based on the process defined by Gunderson and Huang
# into the shape of: mu + sigma squared x eps
#This is to allow gradient descent to allow for gradient estimation accurately. 

def sample_z(args):
  z_mu, z_sigma = args
  eps = K.random_normal(shape=(K.shape(z_mu)[0], K.int_shape(z_mu)[1]))
  return z_mu + K.exp(z_sigma / 2) * eps

In [None]:
# sample vector from the latent distribution
# z is the labda custom layer we are adding for gradient descent calculations
  # using mu and variance (sigma)
  
z = Lambda(sample_z, output_shape=(latent_dim, ), name='z')([z_mu, z_sigma])

In [None]:
#Z (lambda layer) will be the last layer in the encoder.
# Define and summarize encoder model.

encoder = Model(input_img, [z_mu, z_sigma, z], name='encoder')
print(encoder.summary())

## Decoder
It takes in latent dim as input.

In [None]:
decoder_input = Input(shape=(latent_dim, ), name='decoder_input')

Need to start with a shape that can be remapped to original image shape as we want our final output to be same shape original input.

In [None]:
# add dense layer with dimensions that can be reshaped to desired output shape
x = Dense(conv_shape[1]*conv_shape[2]*conv_shape[3], activation='relu')(decoder_input)

In [None]:
# reshape to the shape of last conv. layer in the encoder, so we can 
x = Reshape((conv_shape[1], conv_shape[2], conv_shape[3]))(x)

In [None]:
# upscale (conv2D transpose) back to original shape
# use Conv2DTranspose to reverse the conv layers defined in the encoder
x = Conv2DTranspose(32, 3, padding='same', activation='relu',strides=(2, 2))(x)

In [None]:
# Using sigmoid activation
x = Conv2DTranspose(num_channels, 3, padding='same', activation='sigmoid', name='decoder_output')(x)

In [None]:
# Define and summarize decoder model
decoder = Model(decoder_input, x, name='decoder')
decoder.summary()

In [None]:
# apply the decoder to the latent sample 
z_decoded = decoder(z)

VAE is trained using two loss functions reconstruction loss and KL divergence.

In [None]:
# Custome Loss
# class to define a custom layer with loss
# Loss function = Reconstruction loss + KL divergence loss

class CustomLayer(keras.layers.Layer):

    def vae_loss(self, x, z_decoded):
        x = K.flatten(x)
        z_decoded = K.flatten(z_decoded)
        
        # Reconstruction loss (as we used sigmoid activation we can use binarycrossentropy)
        recon_loss = keras.metrics.binary_crossentropy(x, z_decoded)
        
        # KL divergence
        kl_loss = -5e-4 * K.mean(1 + z_sigma - K.square(z_mu) - K.exp(z_sigma), axis=-1)
        return K.mean(recon_loss + kl_loss)

    # add custom loss to the class
    def call(self, inputs):
        x = inputs[0]
        z_decoded = inputs[1]
        loss = self.vae_loss(x, z_decoded)
        self.add_loss(loss, inputs=inputs)
        return x

In [None]:
# apply the custom loss to the input images and the decoded latent distribution sample
y = CustomLayer()([input_img, z_decoded])

y is basically the original image after encoding input img to mu, sigma, z and decoding sampled z values.
This will be used as output for vae

# VAE

In [None]:
vae = Model(input_img, y, name='vae')

In [None]:
# Compile VAE
vae.compile(optimizer='adam', loss=None)
vae.summary()

In [None]:
# Train autoencoder
vae.fit(X_train, None, epochs = 10, batch_size = 1000, validation_split = 0.2)

# Visualize Results
Visualize inputs mapped to the Latent space
We have encoded inputs to latent space dimension = 5. 
Extract z_mu --> first parameter in the result of encoder prediction representing mean

In [None]:
mu, _, _ = encoder.predict(X_test)

#Plot dim1 and dim2 for mu
plt.figure(figsize=(10, 10))
plt.scatter(mu[:, 0], mu[:, 1], c=y_test, cmap='brg')
plt.xlabel('dim 1')
plt.ylabel('dim 2')
plt.colorbar()
plt.show()

Visualize images
Single decoded image with random input latent vector (of size 1x2)
Latent space range is about -5 to 5 so pick random values within this range
Try starting with -1, 1 and slowly go up to -1.5, 1.5 and see how it morphs from one image to the other.

In [None]:
sample_vector = np.array([[1,-1]])
decoded_example = decoder.predict(sample_vector)
decoded_example_reshaped = decoded_example.reshape(img_width, img_height)
plt.imshow(decoded_example_reshaped)

In [None]:
n = 20  # generate 15x15 digits
figure = np.zeros((img_width * n, img_height * n, num_channels))

Let us automate this process by generating multiple images and plotting

Use decoder to generate images by tweaking latent variables from the latent space

Create a grid of defined size with zeros. 

Take sample from some defined linear space. In this example range [-4, 4]

Feed it to the decoder and update zeros in the figure with output.

In [None]:
# Create a Grid of latent variables, to be provided as inputs to decoder.predict
# Creating vectors within range -5 to 5 as that seems to be the range in latent space
grid_x = np.linspace(-5, 5, n)
grid_y = np.linspace(-5, 5, n)[::-1]

In [None]:
# decoder for each square in the grid
for i, yi in enumerate(grid_y):
    for j, xi in enumerate(grid_x):
        z_sample = np.array([[xi, yi]])
        x_decoded = decoder.predict(z_sample)
        digit = x_decoded[0].reshape(img_width, img_height, num_channels)
        figure[i * img_width: (i + 1) * img_width,
               j * img_height: (j + 1) * img_height] = digit

In [None]:
plt.figure(figsize=(10, 10))

#Reshape for visualization
fig_shape = np.shape(figure)
figure = figure.reshape((fig_shape[0], fig_shape[1]))

In [None]:
plt.imshow(figure, cmap='gnuplot2')
plt.show()  