In [1]:
import warnings
warnings.filterwarnings('ignore')

import numpy as np
import pandas as pd


from keras.datasets import mnist, fashion_mnist
import tensorflow.keras as kb
from tensorflow.keras import backend
import tensorflow as tf
from sklearn.preprocessing import LabelBinarizer

import matplotlib.pyplot as plt


# Transfer Learning
Transfer Learning is the typical way MOST people will use Deep Learning. 

Transfer Learning involves using models that someone else **already trained** and either:

- building on top of the model
OR
- fine tuining some or all of the weights of the model

Transfer learning is so important because state of the art deep learning models are very **expensive** to train (both computationally expensive, time expensive, and concomitantly money expensive because that compute time costs money! Some of these models cost $50,000 to train).

You will likely not be training complex models yourself, you'll use open source models and tweak them to fit whatever needs you have. 

## Load Data From Kaggle


In [3]:
!mkdir ~/.kaggle
!cp kaggle.json ~/.kaggle/
!chmod 600 ~/.kaggle/kaggle.json

mkdir: cannot create directory ‘/root/.kaggle’: File exists


In [4]:
!kaggle competitions download -c dogs-vs-cats

Downloading dogs-vs-cats.zip to /content
100% 811M/812M [00:42<00:00, 21.4MB/s]
100% 812M/812M [00:42<00:00, 20.0MB/s]


In [5]:
!unzip -qq dogs-vs-cats.zip

In [6]:
!unzip -qq train.zip

In [7]:
# load data for next part 
import os, shutil, pathlib

original_dir = pathlib.Path("train")
new_base_dir = pathlib.Path("cats_vs_dogs_small")

def make_subset(subset_name, start_index, end_index):
    for category in ("cat", "dog"):
        dir = new_base_dir / subset_name / category
        os.makedirs(dir)
        fnames = [f"{category}.{i}.jpg" for i in range(start_index, end_index)]
        for fname in fnames:
            shutil.copyfile(src=original_dir / fname,
                            dst=dir / fname)

make_subset("train", start_index=0, end_index=1000)
make_subset("validation", start_index=1000, end_index=1500)
make_subset("test", start_index=1500, end_index=2500)


In [8]:
from tensorflow.keras.utils import image_dataset_from_directory

train_dataset = image_dataset_from_directory(
    new_base_dir / "train",
    image_size=(180, 180),
    batch_size=32)
validation_dataset = image_dataset_from_directory(
    new_base_dir / "validation",
    image_size=(180, 180),
    batch_size=32)
test_dataset = image_dataset_from_directory(
    new_base_dir / "test",
    image_size=(180, 180),
    batch_size=32)

random_numbers = np.random.normal(size=(1000, 16))
dataset = tf.data.Dataset.from_tensor_slices(random_numbers)

for i, element in enumerate(dataset):
    print(element.shape)
    if i >= 2:
        break

batched_dataset = dataset.batch(32)
for i, element in enumerate(batched_dataset):
    print(element.shape)
    if i >= 2:
        break

reshaped_dataset = dataset.map(lambda x: tf.reshape(x, (4, 4)))
for i, element in enumerate(reshaped_dataset):
    print(element.shape)
    if i >= 2:
        break

for data_batch, labels_batch in train_dataset:
    print("data batch shape:", data_batch.shape)
    print("labels batch shape:", labels_batch.shape)
    break

Found 2000 files belonging to 2 classes.
Found 1000 files belonging to 2 classes.
Found 2000 files belonging to 2 classes.


Instructions for updating:
Lambda fuctions will be no more assumed to be used in the statement where they are used, or at least in the same block. https://github.com/tensorflow/tensorflow/issues/56089


(16,)
(16,)
(16,)
(32, 16)
(32, 16)
(32, 16)
(4, 4)
(4, 4)
(4, 4)
data batch shape: (32, 180, 180, 3)
labels batch shape: (32,)



## Building FF Layers on Top of a Pre-Trained CNN

There are many pre-trained CNNs that are trained on very large repositories of images that include every-day objects and animals. Remember that the purpose of Convolutional and Pooling Layers is to *learn features* in the image (like a wheel, an eye, a petal...). If the pre-trained CNN learns enough useful features, we can use these features for a *bunch* of different applications. 

Some common pre-trained CNNs you might hear about are **VGG**, **ResNet** (the one that popularized residual connections), **Inception**, and **Xception**.

We're going to use **VGG16** and build our own classifier on top of its convolutional layers.  

We'll take the full model and remove the "head" (the FeedForward layers), and create our own FeedForward layers. 



In [9]:
vgg_conv = kb.applications.vgg16.VGG16(
    weights = "imagenet",
    include_top = False,
    input_shape = (180,180,3)
)

vgg_conv.trainable = False
vgg_conv.summary()

Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/vgg16/vgg16_weights_tf_dim_ordering_tf_kernels_notop.h5
Model: "vgg16"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_1 (InputLayer)        [(None, 180, 180, 3)]     0         
                                                                 
 block1_conv1 (Conv2D)       (None, 180, 180, 64)      1792      
                                                                 
 block1_conv2 (Conv2D)       (None, 180, 180, 64)      36928     
                                                                 
 block1_pool (MaxPooling2D)  (None, 90, 90, 64)        0         
                                                                 
 block2_conv1 (Conv2D)       (None, 90, 90, 128)       73856     
                                                                 
 block2_conv2 (Conv2D)       (None, 90, 90, 128)      

LOOK HOW MANY parameters there are?! Do you want to train that model? I sure don't.

Now that we have the base, we can add FF layers to the model. 

The function `kb.applications.vgg16.preprocess_input()` applies some pre-processing steps to the data before feeding it into the VGG model base.

In [11]:
inputs = kb.Input(shape=(180, 180, 3))
x = kb.applications.vgg16.preprocess_input(inputs)
x = vgg_conv(x)
x = kb.layers.Flatten()(x)
x = kb.layers.Dense(256)(x)
x = kb.layers.Dropout(0.5)(x)
outputs = kb.layers.Dense(1, activation="sigmoid")(x)

custom_ff = kb.Model(inputs, outputs)
custom_ff.compile(loss="binary_crossentropy",
              optimizer="rmsprop",
              metrics=["accuracy"])

In [12]:
custom_ff.fit(train_dataset, epochs = 5, validation_data = validation_dataset)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<keras.callbacks.History at 0x7fb8fbc82a30>

In [17]:
custom_ff.summary()

Model: "model"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_2 (InputLayer)        [(None, 180, 180, 3)]     0         
                                                                 
 tf.__operators__.getitem (S  (None, 180, 180, 3)      0         
 licingOpLambda)                                                 
                                                                 
 tf.nn.bias_add (TFOpLambda)  (None, 180, 180, 3)      0         
                                                                 
 vgg16 (Functional)          (None, 5, 5, 512)         14714688  
                                                                 
 flatten (Flatten)           (None, 12800)             0         
                                                                 
 dense (Dense)               (None, 256)               3277056   
                                                             

## Fine Tuning a Network

While adding a new FF portion to a pre-trained CNN is simple and often works well, we *sometimes* want to tweak the weights from the pre-trained model. In order to do this we **Freeze** and **Unfreeze** various layers of the network. 

Layers that are **Frozen** will not be updated, their weights will stay as is. 

Layers that are **NOT Frozen** are trainable and their weights will be updated using backprop + gradient descent while the FF layers we added are trained. This allows the network to learn *slightly* different features that are better suited to our purposes. Typically we will unfreeze the last few convolutional/pooling layers from the pre-trained network. 

NOTE: it would be a bad idea to unfreeze middle layers without unfreezing the layers after it.


Now let's use the same base model **VGG** but we'll unfreeze the last 4 layers, and train them along with the feed forward layers that we added.

<img src="https://drive.google.com/uc?export=view&id=1BPCDzYXeN-ERqhEBKUml48fh8uEJljvW"/>

(image from Deep Learning with Python by Chollet)



(activity modified from [this example](https://towardsdatascience.com/fine-tuning-pre-trained-model-vgg-16-1277268c537f))

In [13]:
vgg_conv = kb.applications.vgg16.VGG16(
    weights = "imagenet",
    include_top = False,
    input_shape = (180,180,3)
)

# Freeze four convolution blocks
for layer in vgg_conv.layers[:15]:
    layer.trainable = False
# Make sure you have frozen the correct layers
for i, layer in enumerate(vgg_conv.layers):
    print(i, layer.name, layer.trainable)

0 input_3 False
1 block1_conv1 False
2 block1_conv2 False
3 block1_pool False
4 block2_conv1 False
5 block2_conv2 False
6 block2_pool False
7 block3_conv1 False
8 block3_conv2 False
9 block3_conv3 False
10 block3_pool False
11 block4_conv1 False
12 block4_conv2 False
13 block4_conv3 False
14 block4_pool False
15 block5_conv1 True
16 block5_conv2 True
17 block5_conv3 True
18 block5_pool True


In [14]:
inputs = kb.Input(shape=(180, 180, 3))
x = kb.applications.vgg16.preprocess_input(inputs)
x = vgg_conv(x)
x = kb.layers.Flatten()(x)
x = kb.layers.Dense(256)(x)
x = kb.layers.Dropout(0.5)(x)
outputs = kb.layers.Dense(1, activation="sigmoid")(x)

transfer_model = kb.Model(inputs, outputs)
transfer_model.compile(loss="binary_crossentropy",
              optimizer=kb.optimizers.RMSprop(learning_rate=1e-5),
              metrics=["accuracy"])

In [15]:
transfer_model.fit(train_dataset, epochs=5, validation_data=(test_dataset))

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<keras.callbacks.History at 0x7fb8fbc82d60>

In [16]:
transfer_model.summary()

Model: "model_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_4 (InputLayer)        [(None, 180, 180, 3)]     0         
                                                                 
 tf.__operators__.getitem_1   (None, 180, 180, 3)      0         
 (SlicingOpLambda)                                               
                                                                 
 tf.nn.bias_add_1 (TFOpLambd  (None, 180, 180, 3)      0         
 a)                                                              
                                                                 
 vgg16 (Functional)          (None, 5, 5, 512)         14714688  
                                                                 
 flatten_1 (Flatten)         (None, 12800)             0         
                                                                 
 dense_2 (Dense)             (None, 256)               3277

Compare the number of `Non-trainable params:` in this model compared to the last one we trained. 