In [None]:
#@title Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

## Download Tensorflow

In [None]:
try:
  # %tensorflow_version only exists in Colab.
  %tensorflow_version 2.x
except Exception:
  pass

## Download the neccessary data into the Colab Instance
We will split our dataset into three subsets: training set, validation set and testing set.

In [None]:
import tensorflow_datasets as tfds
import tensorflow as tf
import tensorflow_hub as hub

BATCH_SIZE = 32
IMAGE_SIZE = 224

def format_image(image, label):
    image = tf.image.resize(image, (IMAGE_SIZE, IMAGE_SIZE)) / 255.0
    return  image, label

(raw_train, raw_validation, raw_test), metadata = tfds.load(
    'cats_vs_dogs',
    data_dir='/content/dataset/',
    split=['train[:80%]', 'train[80%:90%]', 'train[90%:]'],
    with_info=True,
    shuffle_files=True, 
    as_supervised=True,
)

# tf.data.experimental.cardinality(raw_train_dataset).numpy()
num_examples = metadata.splits['train'].num_examples
num_classes = metadata.features['label'].num_classes
print("Numbers of images: ", num_examples)
print("Numbers of classes: ", num_classes)

train_batches = raw_train.shuffle(num_examples // 4).map(format_image).batch(BATCH_SIZE).prefetch(1)
validation_batches = raw_validation.map(format_image).batch(BATCH_SIZE).prefetch(1)
test_batches = raw_test.map(format_image).batch(1)

[1mDownloading and preparing dataset cats_vs_dogs/4.0.0 (download: 786.68 MiB, generated: Unknown size, total: 786.68 MiB) to /content/dataset/cats_vs_dogs/4.0.0...[0m


HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Dl Completed...', max=1.0, style=Progre…

HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Dl Size...', max=1.0, style=ProgressSty…







HBox(children=(FloatProgress(value=1.0, bar_style='info', max=1.0), HTML(value='')))



Shuffling and writing examples to /content/dataset/cats_vs_dogs/4.0.0.incompleteSKFAYO/cats_vs_dogs-train.tfrecord


HBox(children=(FloatProgress(value=0.0, max=23262.0), HTML(value='')))

[1mDataset cats_vs_dogs downloaded and prepared to /content/dataset/cats_vs_dogs/4.0.0. Subsequent calls will reuse this data.[0m
Numbers of images:  23262
Numbers of classes:  2


## Transfer learning
We download the ResNet model and then take the feature extractor from the model. We define our last layer for cat vs dog classification and optimizer for our networks.

ResNet is a family of network architectures for image classification, originally published by

*Kaiming He, Xiangyu Zhang, Shaoqing Ren, Jian Sun: "Deep Residual Learning for Image Recognition", 2015.*


In [None]:
IMAGE_SIZE = (224, 224)
FV_SIZE = 1280
MODULE_HANDLE ="https://tfhub.dev/google/imagenet/resnet_v2_50/feature_vector/4"
feature_extractor = hub.KerasLayer(MODULE_HANDLE,
                                   input_shape=IMAGE_SIZE + (3,), 
                                   output_shape=[FV_SIZE],
                                   trainable=False)

model = tf.keras.Sequential([
        feature_extractor,
        tf.keras.layers.Dense(num_classes, activation='softmax')
])

model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
keras_layer_1 (KerasLayer)   (None, 2048)              23564800  
_________________________________________________________________
dense (Dense)                (None, 2)                 4098      
Total params: 23,568,898
Trainable params: 4,098
Non-trainable params: 23,564,800
_________________________________________________________________


Then, we train our networks within only 5 epochs.

In [None]:
model.compile(optimizer='adam',
                  loss='sparse_categorical_crossentropy',
                  metrics=['accuracy'])

EPOCHS = 5

history = model.fit(train_batches,
                 epochs=EPOCHS,
                 validation_data=validation_batches)

## Plotting the training process
We plot the loss and accurracy of the training process with respect to the training set and the validation set

In [None]:
import matplotlib.pyplot as plt
# list all data in history
print(history.history.keys())
# summarize history for accuracy
plt.plot(history.history['accuracy'])
plt.plot(history.history['val_accuracy'])
plt.title('model accuracy')
plt.ylabel('accuracy')
plt.xlabel('epoch')
plt.legend(['train', 'valid'], loc='upper left')
plt.show()
# summarize history for loss
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('model loss')
plt.ylabel('loss')
plt.xlabel('epoch')
plt.legend(['train', 'valid'], loc='upper left')
plt.show()

## Run your Model

Let's now take a look at actually running a prediction using the model. This code will test 100 images from the testing set

In [None]:
from tqdm import tqdm
import numpy as np

predictions = []

# This will report how many iterations per second, where each
# iteration is 100 predictions
test_labels, test_imgs = [], []
for img, label in tqdm(test_batches.take(100)):
    predictions.append(model.predict(img))
    
    test_labels.append(label.numpy()[0])
    test_imgs.append(img)


# This will tell you how many of the predictions were correct
score = 0
for item in range(0,len(predictions)):
  prediction=np.argmax(predictions[item])
  label = test_labels[item]
  if prediction==label:
    score=score+1

print("Out of 100 predictions I got " + str(score) + " correct")

## Save the model

In [None]:
CATS_VS_DOGS_SAVED_MODEL = "./content/exp_saved_model"
tf.saved_model.save(model, CATS_VS_DOGS_SAVED_MODEL)

In [None]:
import subprocess

def du(path):
    """disk usage in human readable format (e.g. '2,1GB')"""
    return subprocess.check_output(['du','-sh', path]).split()[0].decode('utf-8')
# Get file size in bytes for a given model
print("Model size in Mb:")
print(du(CATS_VS_DOGS_SAVED_MODEL))

##Exercice 1
What do you notice in term of:

* Model size 
* Training time
* Accuracy

compared to the network of the last notebook.

How much is the size of the model in byte ?


##Exercice 2
Now, we explore other feature extraction models to classify cats and dogs (by replacing MODULE_HANDLE =...) :

* InceptionNet : https://tfhub.dev/google/imagenet/inception_v3/feature_vector/4 \\

* MobileNetV2 : https://tfhub.dev/google/tf2-preview/mobilenet_v2/feature_vector/4 

Inception V3 is a neural network architecture for image classification, originally published by

> *Christian Szegedy, Vincent Vanhoucke, Sergey Ioffe, Jonathon Shlens, Zbigniew Wojna: "Rethinking the Inception Architecture for Computer Vision", 2015.*

MobileNet V2 is a family of neural network architectures for efficient on-device image classification and related tasks, originally published by:

> *Mark Sandler, Andrew Howard, Menglong Zhu, Andrey Zhmoginov, Liang-Chieh Chen: "Inverted Residuals and Linear Bottlenecks: Mobile Networks for Classification, Detection and Segmentation", 2018.*

There are many models that we can use, see here https://tfhub.dev/



## Clean Up

Before running the next exercise, run the following cell to terminate the kernel and free memory resources:

In [None]:
import os, signal
os.kill(os.getpid(), signal.SIGKILL)