## TensorFlow Hub

In [1]:
import tensorflow as tf
import tensorflow_hub as hub
import numpy as np
import matplotlib.pylab as plt

In [2]:
data_dir = tf.keras.utils.get_file(
    'flower_photos',
    'https://storage.googleapis.com/download.tensorflow.org/example_images/flower_photos.tgz',
    untar=True)

Downloading data from https://storage.googleapis.com/download.tensorflow.org/example_images/flower_photos.tgz


In [3]:
data_dir

'/root/.keras/datasets/flower_photos'

In [4]:
!ls -lrt /root/.keras/datasets/flower_photos

total 612
-rw-r----- 1 270850 5000 418049 Feb  9  2016 LICENSE.txt
drwx------ 2 270850 5000  49152 Feb 10  2016 tulips
drwx------ 2 270850 5000  36864 Feb 10  2016 sunflowers
drwx------ 2 270850 5000  36864 Feb 10  2016 roses
drwx------ 2 270850 5000  49152 Feb 10  2016 dandelion
drwx------ 2 270850 5000  32768 Feb 10  2016 daisy


Now we are going to use a generator object and invoke its `flow_from_directory` to handle the reshaping process.

In [5]:
pixels =224
BATCH_SIZE = 32 
IMAGE_SIZE = (pixels, pixels)
NUM_CLASSES = 5

In [6]:
datagen_kwargs = dict(rescale=1./255, validation_split=.20)
dataflow_kwargs = dict(target_size=IMAGE_SIZE, batch_size=BATCH_SIZE,
                   interpolation="bilinear")

valid_datagen = tf.keras.preprocessing.image.ImageDataGenerator(
    **datagen_kwargs)
valid_generator = valid_datagen.flow_from_directory(
    data_dir, subset="validation", shuffle=False, **dataflow_kwargs)

train_datagen = valid_datagen
train_generator = train_datagen.flow_from_directory(
    data_dir, subset="training", shuffle=True, **dataflow_kwargs)

Found 731 images belonging to 5 classes.
Found 2939 images belonging to 5 classes.


The interpolation argument indicates that the generator needs to resample the image data to target_size, which is 224 × 224 pixels.

Find label index and order of classes

In [7]:
labels_idx = (train_generator.class_indices)
idx_labels = dict((v,k) for k,v in labels_idx.items())

In [8]:
idx_labels

{0: 'daisy', 1: 'dandelion', 2: 'roses', 3: 'sunflowers', 4: 'tulips'}

In [9]:
model = tf.keras.Sequential([
    tf.keras.layers.InputLayer(input_shape=IMAGE_SIZE + (3,)),
    hub.KerasLayer("https://tfhub.dev/google/imagenet/resnet_v1_101/feature_vector/4",
                   trainable=False),
    tf.keras.layers.Dense(NUM_CLASSES, activation='softmax', name = 'flower_class')
])
model.build([None, 224, 224, 3])

- **trainable** - False. This indicates we want to re-use the current values from the pretrained model

In [10]:
model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
keras_layer (KerasLayer)     (None, 2048)              42605504  
_________________________________________________________________
flower_class (Dense)         (None, 5)                 10245     
Total params: 42,615,749
Trainable params: 10,245
Non-trainable params: 42,605,504
_________________________________________________________________


In [11]:
model.compile(
  optimizer=tf.keras.optimizers.SGD(lr=0.005, momentum=0.9), 
  loss=tf.keras.losses.CategoricalCrossentropy(from_logits=True, label_smoothing=0.1),
  metrics=['accuracy'])

  "The `lr` argument is deprecated, use `learning_rate` instead.")


label_smoothing is a regularization technique to help prevent over-fitting.

In [12]:
steps_per_epoch = train_generator.samples // train_generator.batch_size
validation_steps = valid_generator.samples // valid_generator.batch_size
hist = model.fit(
    train_generator,
    epochs=5, steps_per_epoch=steps_per_epoch,
    validation_data=valid_generator,
    validation_steps=validation_steps).history

Epoch 1/5


  '"`categorical_crossentropy` received `from_logits=True`, but '


Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


In [13]:
sample_test_images, ground_truth_labels = next(valid_generator)

In [14]:
type(sample_test_images)

numpy.ndarray

In [15]:
len(ground_truth_labels)

32

In [16]:
prediction = model.predict(valid_generator)

In [17]:
len(prediction)

731

In [18]:
prediction

array([[0.7880899 , 0.01445665, 0.01071287, 0.14030205, 0.04643861],
       [0.9106453 , 0.00757415, 0.00268174, 0.0500952 , 0.02900355],
       [0.79362607, 0.0648425 , 0.02009246, 0.09089768, 0.03054124],
       ...,
       [0.04388575, 0.01358517, 0.04266402, 0.02076738, 0.87909764],
       [0.00564117, 0.0105213 , 0.00117559, 0.0023402 , 0.9803217 ],
       [0.05844356, 0.1931759 , 0.09691954, 0.0413634 , 0.61009765]],
      dtype=float32)

In [19]:
labelings = tf.math.argmax(prediction, axis = -1)

In [20]:
predicted_idx = tf.math.argmax(prediction, axis = -1)

In [21]:
predicted_idx

<tf.Tensor: shape=(731,), dtype=int64, numpy=
array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 4, 0, 0, 0, 0, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1,
       1, 3, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 0, 3, 1, 1, 1, 1, 0, 3, 1, 1, 1, 3, 1, 1, 1, 1, 0,
       1, 1, 1, 1, 1, 3, 3, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1,
       1, 1, 1, 1, 1, 1, 4, 1, 1, 1, 1, 4, 1, 1, 1, 1, 4, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 3, 1, 1, 1, 1, 1, 3, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1,
       1, 4, 1, 1, 1, 0, 0, 1, 1, 1, 1, 3, 4, 1, 1, 1, 1, 1, 0, 1, 1, 1,
       0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 3, 0, 1, 3, 1, 1, 1, 1, 1, 1, 1,
     

In [22]:
idx_labels[0]

'daisy'

In [23]:
label_reference = np.asarray(list(labels_idx))
label_reference

array(['daisy', 'dandelion', 'roses', 'sunflowers', 'tulips'],
      dtype='<U10')

In [24]:
def find_label(idx):
    return idx_labels[idx]

When we want to apply a a function to each element of a NumPy array, we need to vectorize it

In [25]:
find_label_batch = np.vectorize(find_label)
result_class = find_label_batch(predicted_idx)

In [26]:
import pandas as pd
predicted_label = result_class.tolist()
file_name = valid_generator.filenames

results=pd.DataFrame({"File":file_name,
                      "Prediction":predicted_label})
results

Unnamed: 0,File,Prediction
0,daisy/100080576_f52e8ee070_n.jpg,daisy
1,daisy/10140303196_b88d3d6cec.jpg,daisy
2,daisy/10172379554_b296050f82_n.jpg,daisy
3,daisy/10172567486_2748826a8b.jpg,daisy
4,daisy/10172636503_21bededa75_n.jpg,daisy
...,...,...
726,tulips/14068200854_5c13668df9_m.jpg,tulips
727,tulips/14068295074_cd8b85bffa.jpg,tulips
728,tulips/14068348874_7b36c99f6a.jpg,tulips
729,tulips/14068378204_7b26baa30d_n.jpg,tulips


In [27]:
y_actual = pd.Series(valid_generator.classes)
y_predicted = pd.Series(predicted_idx)

In [28]:
pd.crosstab(y_actual, y_predicted, rownames = ['Actual'], colnames=['Predicted'], margins=True)

Predicted,0,1,2,3,4,All
Actual,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
0,120,2,0,3,1,126
1,13,150,0,11,5,179
2,2,2,96,8,20,128
3,2,5,3,123,6,139
4,5,2,4,9,139,159
All,142,161,103,154,171,731


- Class 0: Daisy
- Class 1: Dandelion
- Class 2: Roses
- Class 3: Sunflowers
- Class 4: Tulips

In [29]:
predicted_results = y_predicted
truth = y_actual

In [30]:
type(predicted_results[0])

numpy.int64

In [31]:
from sklearn.metrics import classification_report
report = classification_report(truth, predicted_results)


In [32]:
print(report)

              precision    recall  f1-score   support

           0       0.85      0.95      0.90       126
           1       0.93      0.84      0.88       179
           2       0.93      0.75      0.83       128
           3       0.80      0.88      0.84       139
           4       0.81      0.87      0.84       159

    accuracy                           0.86       731
   macro avg       0.86      0.86      0.86       731
weighted avg       0.87      0.86      0.86       731



## tf.keras.applications module 

In [33]:
base_model = tf.keras.applications.ResNet101V2(input_shape = (224, 224, 3), include_top = False, weights = 'imagenet')

Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/resnet/resnet101v2_weights_tf_dim_ordering_tf_kernels_notop.h5


In [34]:
model2 = tf.keras.Sequential([
  base_model,
  tf.keras.layers.GlobalAveragePooling2D(),
  tf.keras.layers.Dense(NUM_CLASSES, activation = 'softmax', name = 'flower_class')
])

In [35]:
model2.compile(
  optimizer=tf.keras.optimizers.SGD(lr=0.005, momentum=0.9), 
  loss=tf.keras.losses.CategoricalCrossentropy(from_logits=True, label_smoothing=0.1),
  metrics=['accuracy']
)

  "The `lr` argument is deprecated, use `learning_rate` instead.")


label

In [36]:
model2.fit(
    train_generator,
    epochs=5, steps_per_epoch=steps_per_epoch,
    validation_data=valid_generator,
    validation_steps=validation_steps)

Epoch 1/5


  '"`categorical_crossentropy` received `from_logits=True`, but '


Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<keras.callbacks.History at 0x7f91c2612ed0>

## Fine tuning


In [37]:
base_model.trainable = True
print("Number of layers in the base model: ", len(base_model.layers))

Number of layers in the base model:  377


In [38]:
# Fine tune from this layer and onwards
fine_tune_at = 300

In [39]:
# Freeze all the layers before the 'fine_tune_at' layer
for layer in base_model.layers[: fine_tune_at]:
  layer.trainable = False

In [40]:
model3 = tf.keras.Sequential([
  base_model,
  tf.keras.layers.GlobalAveragePooling2D(),
  tf.keras.layers.Dense(NUM_CLASSES, activation = 'softmax', name = 'flower_class')
])

In [41]:
model3.compile(
  optimizer=tf.keras.optimizers.SGD(lr=0.005, momentum=0.9), 
  loss=tf.keras.losses.CategoricalCrossentropy(from_logits=True, label_smoothing=0.1),
  metrics=['accuracy']
)

  "The `lr` argument is deprecated, use `learning_rate` instead.")


In [42]:
fine_tune_epochs = 2
steps_per_epoch = train_generator.samples // train_generator.batch_size
validation_steps = valid_generator.samples // valid_generator.batch_size
model3.fit(
    train_generator,
    epochs=fine_tune_epochs, steps_per_epoch=steps_per_epoch,
    validation_data=valid_generator,
    validation_steps=validation_steps)

Epoch 1/2


  '"`categorical_crossentropy` received `from_logits=True`, but '


Epoch 2/2


<keras.callbacks.History at 0x7f91b6cf1c50>

In [43]:
model3 = tf.keras.Sequential([
  base_model,
  tf.keras.layers.Flatten(),
  tf.keras.layers.Dense(NUM_CLASSES, activation = 'softmax', name = 'flower_class')
])

In [44]:
model3.compile(
  optimizer=tf.keras.optimizers.SGD(lr=0.005, momentum=0.9), 
  loss=tf.keras.losses.CategoricalCrossentropy(from_logits=True, label_smoothing=0.1),
  metrics=['accuracy']
)

  "The `lr` argument is deprecated, use `learning_rate` instead.")


In [45]:
model3.summary()

Model: "sequential_3"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
resnet101v2 (Functional)     (None, 7, 7, 2048)        42626560  
_________________________________________________________________
flatten (Flatten)            (None, 100352)            0         
_________________________________________________________________
flower_class (Dense)         (None, 5)                 501765    
Total params: 43,128,325
Trainable params: 19,680,773
Non-trainable params: 23,447,552
_________________________________________________________________


In [46]:
model3.fit(
    train_generator,
    epochs=5, steps_per_epoch=steps_per_epoch,
    validation_data=valid_generator,
    validation_steps=validation_steps)

Epoch 1/5


  '"`categorical_crossentropy` received `from_logits=True`, but '


Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<keras.callbacks.History at 0x7f91aefc5b90>