<a href="https://colab.research.google.com/github/allnes/pcam_train/blob/master/pcam_cnn_maxpooling.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [0]:
from google.colab import drive
drive.mount('/content/drive')
%pip install keras-metrics

In [0]:
cd  '/content/drive/My Drive/DL_PCAM'

In [0]:
%ls

In [0]:
import gzip
import shutil
def uzip_file(file_name, origin_name):
  with gzip.open(file_name, 'rb') as f_in:
      with open(origin_name, 'wb') as f_out:
          shutil.copyfileobj(f_in, f_out)

In [0]:
train_x_name = 'camelyonpatch_level_2_split_train_x.h5'
train_y_name = 'camelyonpatch_level_2_split_train_y.h5'

test_x_name = 'camelyonpatch_level_2_split_test_x.h5'
test_y_name = 'camelyonpatch_level_2_split_test_y.h5'

valid_x_name = 'camelyonpatch_level_2_split_valid_x.h5'
valid_y_name = 'camelyonpatch_level_2_split_valid_y.h5'

meta_train_name = 'camelyonpatch_level_2_split_train_meta.csv'
meta_valid_name = 'camelyonpatch_level_2_split_valid_meta.csv'
meta_test_name  = 'camelyonpatch_level_2_split_test_meta.csv'

# uzip_file(train_x_name + '.gz', train_x_name)
# uzip_file(train_y_name + '.gz', train_y_name)

# uzip_file(test_x_name + '.gz', test_x_name)
# uzip_file(test_y_name + '.gz', test_y_name)

# uzip_file(valid_x_name + '.gz', valid_x_name)
# uzip_file(valid_y_name + '.gz', valid_y_name)


In [0]:
from keras.utils import HDF5Matrix, normalize
import pandas as pd

x_train = HDF5Matrix(train_x_name, 'x')
y_train = HDF5Matrix(train_y_name, 'y')

x_valid = HDF5Matrix(valid_x_name, 'x')
y_valid = HDF5Matrix(valid_y_name, 'y')

x_test = HDF5Matrix(test_x_name, 'x')
y_test = HDF5Matrix(test_y_name, 'y')

meta_train = pd.read_csv(meta_train_name)
meta_valid = pd.read_csv(meta_valid_name)
meta_test  = pd.read_csv(meta_test_name)

In [0]:
print(len(x_train))
print(type(x_train))
print(type(x_train[0]))
print(x_train[0].shape)
from matplotlib import pyplot
for i in range(0, 9):
	pyplot.subplot(330 + 1 + i)
	pyplot.imshow(x_train[i], cmap=pyplot.get_cmap('gray'))
pyplot.show()


In [0]:
current_core = 32
full_size = current_core * current_core * 3
original_shape = (96, 96, 3)
new_shape = (current_core, current_core, 3)
new_size = (current_core, current_core)

import numpy as np
# import cv2 as cv
# def resize_for_train(x_array, x_shape):
#   return np.array([cv.resize(elem, dsize=x_shape) for elem in x_array])
# x_train = resize_for_train(x_train, new_size)
# x_valid = resize_for_train(x_valid, new_size)
# x_test  = resize_for_train(x_test,  new_size)

In [0]:
from keras.models import Sequential
from keras.layers import Dense, Conv2D, MaxPooling2D, Flatten, BatchNormalization, Dropout, Activation
from keras.regularizers import l2
from keras.preprocessing.image import ImageDataGenerator
import keras_metrics as ksm
import tensorflow as tf
import matplotlib.pyplot as plt
from keras.utils import np_utils

def plot_metrics(code_metrics, name_metrics):
    plt.plot(history.history[code_metrics])
    plt.plot(history.history['val_' + code_metrics])
    plt.title('Model ' + name_metrics)
    plt.ylabel(name_metrics)
    plt.xlabel('Epoch')
    plt.legend(['Train', 'Test'], loc='upper left')
    plt.show()

def show_metrics(list_metrics, name_stage, name_model):
    name_metrics = ['accuracy', 'precision', 'recall', 'f1-score']
    for i in range(4):
        print(name_model + ' ' + name_stage + ' ' + 
              name_metrics[i] + ' : ' + str(list_metrics[i + 1] * 100))

In [0]:
name_model = 'CNN with MaxPooling'
model = Sequential([
    # --------------------------------------------------------------------#
    # CONV => RELU => POOL
		Conv2D(32, (3, 3), padding="same", input_shape=original_shape),
		Activation("relu"),
		BatchNormalization(),
		MaxPooling2D(pool_size=(3, 3)),
		Dropout(0.25),
    # --------------------------------------------------------------------#
		# (CONV => RELU) * 2 => POOL
		Conv2D(64, (3, 3), padding="same"),
		Activation("relu"),
		BatchNormalization(),
		Conv2D(64, (3, 3), padding="same"),
		Activation("relu"),
		BatchNormalization(),
		MaxPooling2D(pool_size=(2, 2)),
		Dropout(0.25),
    # --------------------------------------------------------------------#
		# (CONV => RELU) * 2 => POOL
		Conv2D(128, (3, 3), padding="same"),
		Activation("relu"),
		BatchNormalization(),
		Conv2D(128, (3, 3), padding="same"),
		Activation("relu"),
		BatchNormalization(),
		MaxPooling2D(pool_size=(2, 2)),
    # --------------------------------------------------------------------#
    Flatten(),
		Dense(1024),
		Activation("relu"),
		BatchNormalization(),
		Dropout(0.5),
		Dense(1),
		Activation("sigmoid"),
    # --------------------------------------------------------------------#
])

model.summary()
model.compile(loss='binary_crossentropy', optimizer='sgd', 
              metrics = ['binary_accuracy', 
                         ksm.binary_precision(), 
                         ksm.binary_recall(),
                         ksm.binary_f1_score()
                        ]
             )

y_train = np.array(y_train)
y_train = y_train.reshape((y_train.shape[0], 1))

y_valid = np.array(y_valid)
y_valid = y_valid.reshape((y_valid.shape[0], 1))

y_test = np.array(y_test)
y_test = y_test.reshape((y_test.shape[0], 1))

datagen = ImageDataGenerator(
              preprocessing_function=lambda x: x/255.,
              width_shift_range=2,   # randomly shift images horizontally
              height_shift_range=2,  # randomly shift images vertically 
              horizontal_flip=True,  # randomly flip images
              vertical_flip=True)    # randomly flip images

batch_size = 1
history = model.fit_generator(
    datagen.flow(x_train, y_train, batch_size=batch_size),
    steps_per_epoch=len(x_train) // batch_size,
    epochs=10, 
    shuffle='batch', 
    use_multiprocessing=True,
    validation_data=datagen.flow(x_valid, y_valid, batch_size=batch_size)
    )

plot_metrics('loss', 'Loss')
plot_metrics('binary_accuracy',  'Accuracy')

show_metrics(model.evaluate(x_train, y_train), 'train', name_model)
show_metrics(model.evaluate(x_valid, y_valid), 'valid', name_model)
show_metrics(model.evaluate(x_test,  y_test),  'test',  name_model)