In [None]:
# IGNORE THIS CELL WHICH CUSTOMIZES LAYOUT AND STYLING OF THE NOTEBOOK !
from numpy.random import seed

seed(42)
import tensorflow as tf

tf.random.set_seed(42)
import matplotlib as mpl
import matplotlib.pyplot as plt
import seaborn as sns

sns.set(style="darkgrid")
mpl.rcParams["lines.linewidth"] = 3
%matplotlib inline
%config InlineBackend.figure_format = 'retina'
%config IPCompleter.greedy=True
import warnings

warnings.filterwarnings("ignore")
warnings.filterwarnings("ignore", category=FutureWarning)
from IPython.core.display import HTML

HTML(open("custom.html", "r").read())

# Chapter 8d: Introduction to Neural Networks
## Using pre-defined models in TensorFlow

In [None]:
from tensorflow.keras import applications

help(applications)

### ImageNet 
[ImageNet](http://image-net.org/) is a very large (> 14 million!! images) and easily accessible image database. More than 14 million annotated images indicating the object in the image and more than 1 million images with bounding box information.

Summary and statistics: http://image-net.org/about-stats


In [None]:
from tensorflow.keras.applications import VGG16

In [None]:
?VGG16

In [None]:
model = VGG16(weights="imagenet")

In [None]:
model.summary()

In [None]:
from IPython.display import Image as Img
from IPython.display import TextDisplayObject as text

display(Img(filename="./images/mr_panda_free.jpg", width=600))
print(text("source: GEORGE LU/FLICKR (CC BY 2.0)").data)

In [None]:
from tensorflow.keras.applications.vgg16 import decode_predictions, preprocess_input
from tensorflow.keras.preprocessing.image import img_to_array, load_img

image = load_img("./images/mr_panda_free.jpg", target_size=(224, 224))
# convert the image pixels to a numpy array
image = img_to_array(image)
# Prepare data for the model
image = image.reshape((1, image.shape[0], image.shape[1], image.shape[2]))
image = preprocess_input(image)
# prediction of probability of belonging to the output classes
prediction = model.predict(image)
# converting the probabilities to class labels
label = decode_predictions(prediction)
# Top 5 classes
label = label[0]
for pred in label:
    # print the classification
    print("It is: {} with probability {:.4f}%".format(pred[1], pred[2] * 100))

## Transfering knowledge

Recap: Convolutional Neural Networks can be seen as being comprised of 2 parts:
**A feature extractor (convolution , Maxpooling layers) and a classifier part (Dense layers)**

Different possibilities to work with pre-trained/pre-existing models trained on a very large datasets such as Imagenet:

* Freezing the convolution part and throwing away the classifer part. Adding your own dense layers and training them.
* Freezing only some layers in the convolution part and throwing away the classifer part. Adding your own dense layers and training the unfreezed and the dense layers.
* Only using the architecture and training the whole network again.

## Realistic example

### Histopathological Cancer Detection

https://www.kaggle.com/c/histopathologic-cancer-detection/overview

**Download data**: https://www.kaggle.com/competitions/histopathologic-cancer-detection/data

Identification of metastatic cancer in small image patches taken from larger digital pathology scans.

In [None]:
%matplotlib inline
# Plotting a few images from this dataset
import os

import matplotlib.pyplot as plt
import numpy as np
from numpy import random
from PIL import Image

random.seed(42)
import tensorflow as tf

tf.random.set_seed(42)


def plot_data(samples, top_dir):
    sub_directories = ["benign", "malign"]
    fig, ax = plt.subplots(
        len(sub_directories),
        samples,
        sharex=True,
        sharey=True,
        figsize=(3 * samples, 3 * len(sub_directories)),
    )
    labels = ["0", "1"]
    assert len(sub_directories) == 2
    for i in range(samples):
        for j, k in enumerate(sub_directories):
            tmp = os.path.join(top_dir, k)
            tmp_img = Image.open(os.path.join(tmp, random.choice(os.listdir(tmp))))
            ax[j, i].imshow(np.asarray(tmp_img))
            ax[j, i].set_title("{}: label={}".format(k, j))
            ax[j, i].grid(False)


# data_dir = "PATH_TO_histopathologic_cancer_detection_FOLDER"
data_dir = "/cluster/project/workshops/machine_learning/machinelearning-introduction-workshop/data/histopathologic_cancer_detection/"
plot_data(4, os.path.join(data_dir, "train"))

In [None]:
# Data preprocessing
from tensorflow.keras.preprocessing.image import ImageDataGenerator

train_data = ImageDataGenerator(rescale=1 / 255.0)

train_directory = os.path.join(data_dir, "train")
train_data_generator = train_data.flow_from_directory(
    train_directory, target_size=(96, 96), batch_size=256, class_mode="binary"
)

validation_data = ImageDataGenerator(rescale=1 / 255.0)
validation_directory = os.path.join(data_dir, "validation")
validation_data_generator = validation_data.flow_from_directory(
    validation_directory, target_size=(96, 96), batch_size=256, class_mode="binary"
)

In [None]:
import matplotlib.pyplot as plt
import numpy as np
from tensorflow.keras import layers, models, optimizers
from tensorflow.keras.callbacks import ModelCheckpoint, ReduceLROnPlateau

In [None]:
from tensorflow.keras.applications import VGG16

In [None]:
feature_extractor = VGG16(weights=None, include_top=False, input_shape=(96, 96, 3))
# feature_extractor = MobileNetV2(weights=None, include_top=False, input_shape=(96,96,3))
feature_extractor.summary()

In [None]:
model = models.Sequential()
model.add(feature_extractor)
model.add(layers.Flatten())
model.add(layers.Dropout(0.2))
model.add(layers.Dense(512, activation="relu"))
model.add(layers.Dense(1, activation="sigmoid"))

In [None]:
model.summary()

In [None]:
model.compile(
    optimizer=optimizers.RMSprop(learning_rate=0.0001),
    loss="binary_crossentropy",
    metrics=["accuracy"],
)

In [None]:
num_epochs = 10
reduce_lr = ReduceLROnPlateau(
    monitor="val_loss", factor=0.2, patience=2, min_lr=0.000001
)
mcp_save = ModelCheckpoint("./test/", save_freq="epoch")

In [None]:
# CPU times: user 1h 21min 11s, sys: 17min 41s, total: 1h 38min 53s
# Wall time: 1h 58min 20s wo dropout
model_run = model.fit(
    train_data_generator,
    steps_per_epoch=len(train_data_generator),
    epochs=num_epochs,
    validation_data=validation_data_generator,
    validation_steps=len(validation_data_generator),
    callbacks=[reduce_lr, mcp_save],
)

In [None]:
import pickle

# with open("./data/histopathology_run_history", "wb") as filehandler:
#    pickle.dump(model_run.history, filehandler)

In [None]:
history_file = open("./data/histopathology_run_history", "rb")
history = pickle.load(history_file)
num_epochs = 10
plt.plot(
    np.arange(0, num_epochs),
    history["val_accuracy"],
    label="Validation accuracy",
)
plt.plot(np.arange(0, num_epochs), history["accuracy"], label="Train accuracy")
plt.xlabel("epoch")
plt.ylabel("Accuracy")
plt.legend()
plt.ylim([0.6, 1])
plt.grid()

In [None]:
# Data Augmentation
train_data = ImageDataGenerator(
    rescale=1 / 255.0,
    rotation_range=90,
    width_shift_range=0.0,
    height_shift_range=0.0,
    shear_range=0.1,
    horizontal_flip=True,
    fill_mode="nearest",
)
# Visualizing what our data generator is doing
# Choosing an image randomly
from numpy import random

pic_malignant = np.asarray(
    Image.open(
        train_directory
        + "/malign/"
        + random.choice(os.listdir(train_directory + "/malign/"))
    )
)
fig, ax = plt.subplots(1, 8, sharex=True, sharey=True, figsize=(3 * 8, 3))
ax = ax.flatten()
ax[0].imshow(pic_malignant)
ax[0].grid(False)
pic_malignant = pic_malignant[np.newaxis, :]
for i, img in enumerate(train_data.flow(pic_malignant)):
    ax[i + 1].imshow(img[0])
    ax[i + 1].grid(False)
    if i == 6:
        break

## TensorFlow Hub

A great repository of trained machine learning models!

The models can be downloaded and used with just a few lines of code.

Find models here: https://tfhub.dev/

In [None]:
import tensorflow_hub as hub

In [None]:
layer = hub.KerasLayer(
    "https://tfhub.dev/google/imagenet/resnet_v2_50/classification/4", trainable=True
)

In [None]:
from tensorflow.keras.models import Sequential

model = Sequential([layer])
model.build([None, 224, 224, 3])
model.summary()