In [None]:
from google.colab import drive
drive.mount('/content/drive') 

In [None]:
!pip install tensorflow-gpu

In [None]:
import os

# Location of Zip File
drive_path = '/content/drive/My Drive/Colab Notebooks/catnocat/data.zip'
local_path = '/content/catnocat'

if not os.path.isdir(local_path):
  os.mkdir(local_path)

In [None]:
# Copy the zip file from the google drive
!cp '{drive_path}' '{local_path}'

In [None]:
# Navigate to the copied file and unzip it quietly
os.chdir(local_path)
!unzip -q 'data.zip'

In [None]:
!nvidia-smi

In [None]:
import tensorflow as tf
from keras.preprocessing.image import ImageDataGenerator
from keras.models import Model
from keras.layers import Input, Flatten, Dense, GlobalAveragePooling2D, Dropout
from keras.applications import NASNetMobile
import numpy as np
import os
import matplotlib.pyplot as plt

from sklearn.metrics import classification_report, confusion_matrix
import itertools
import matplotlib.image as mpimg

In [None]:
# Load the TensorBoard notebook extension
%load_ext tensorboard
import datetime, os

In [None]:
tf.__version__

Constants

In [None]:
IMG_WIDTH = 224
IMG_HEIGHT = 224
NUM_CLASSES = 2

image_size = (IMG_WIDTH, IMG_HEIGHT)
batch_size = 32

num_epochs = 10

Read the Data

In [None]:
data_path = '/content/catnocat/data'

train_data_path = os.path.join(data_path, 'train')
valid_data_path = os.path.join(data_path, 'valid')
# holytest_data_path = os.path.join(data_path, 'holytest')

print("Path to training set: ", train_data_path)
print("Path to validation set: ", valid_data_path)

In [None]:
# Set Data Generator for training, testing and validation.

# Note for testing, set shuffle = false (For proper Confusion matrix)
train_datagen = ImageDataGenerator(rescale=1.0 / 255)
train_dataset = train_datagen.flow_from_directory(train_data_path,
                                                  target_size=image_size,
                                                  batch_size=batch_size,
                                                  class_mode='categorical',
                                                  shuffle=True)

valid_datagen = ImageDataGenerator(rescale=1.0 / 255)
valid_dataset = valid_datagen.flow_from_directory(valid_data_path,
                                                  target_size=image_size,
                                                  batch_size=batch_size,
                                                  class_mode='categorical',
                                                  shuffle=False)

# test_datagen = ImageDataGenerator(rescale=1/255)
# test_dataset = test_datagen.flow_from_directory(holytest_data_path,
#                                                 target_size=image_size,
#                                                 batch_size=batch_size,
#                                                 class_mode='categorical',
#                                                 shuffle=False)

Display the NASNet Mobile CNN Layers

In [None]:
base_model = NASNetMobile(weights='imagenet',
                       include_top=False,
                       input_shape=(IMG_WIDTH, IMG_HEIGHT, 3))  # imports the NASNetMobile model and discards the last 1000 neuron layer.

In [None]:
# check the architecture
for i, layer in enumerate(base_model.layers):
    print(i, layer.name)

Helper Functions

In [None]:
def display_accuracy_plot(model_results):
  acc = model_results.history['accuracy']
  val_acc = model_results.history['val_accuracy']

  loss = model_results.history['loss']
  val_loss = model_results.history['val_loss']

  epochs_range = range(len(model_results.history['accuracy']))

  plt.figure(figsize=(15, 6))
  plt.subplot(1, 2, 1)
  plt.plot(epochs_range, acc, label='Training Accuracy')
  plt.plot(epochs_range, val_acc, label='Validation Accuracy')
  plt.legend(loc='lower right')
  plt.title('Training and Validation Accuracy')

  plt.subplot(1, 2, 2)
  plt.plot(epochs_range, loss, label='Training Loss')
  plt.plot(epochs_range, val_loss, label='Validation Loss')
  plt.legend(loc='upper right')
  plt.title('Training and Validation Loss')
  plt.show()

Compile & Train the Model
Model Variant 0

Model Variant 1

In [None]:
model_variant = 1

def model1_maker():
    base_model = NASNetMobile(include_top=False,
                           input_shape=(IMG_WIDTH, IMG_HEIGHT, 3))
    
    for layer in base_model.layers[:]:
        layer.trainable = False

    input = Input(shape=(IMG_WIDTH, IMG_HEIGHT, 3))
    custom_model = base_model(input)
    custom_model = GlobalAveragePooling2D()(custom_model)
    custom_model = Dense(64, activation='relu')(custom_model)
    custom_model = Dropout(0.5)(custom_model)
    predictions = Dense(NUM_CLASSES, activation='softmax')(custom_model)
    return Model(inputs=input, outputs=predictions)

In [None]:
# Compile and train the model.
model = model1_maker()

model.compile(optimizer='Adam', 
              loss='categorical_crossentropy', 
              metrics=['accuracy'])

In [None]:
# Adam optimizer
# loss function will be categorical cross entropy
# evaluation metric will be accuracy
model.summary()

In [None]:
# Initialize Tensorboard:

logdir = os.path.join("logs", f'variant{model_variant}_'+datetime.datetime.now().strftime("%Y%m%d-%H%M%S"))
tensorboard_callback = tf.keras.callbacks.TensorBoard(logdir, histogram_freq=1)

In [None]:
# Start the training.

epochs = num_epochs
step_size_train = train_dataset.n // train_dataset.batch_size
history = model.fit(train_dataset,
                    steps_per_epoch=step_size_train,
                    epochs=epochs,
                    validation_data=valid_dataset,
                    callbacks=[tensorboard_callback]
                    )

In [None]:
# Save the model
model.save(f'cats_nasnetmobile_variant{model_variant}.hdf5')

In [None]:
display_accuracy_plot(history)

Summarize Results with TensorBoard

In [None]:
%tensorboard --logdir logs 

Predictions

In [None]:
# load the "best" model to visualize some predictions based on the validation set
model = tf.keras.models.load_model('cats_nasnetmobile_variant1.hdf5')

### Hier Model 0 oder 1 auswählen, schauen welches als besser ist. 

Prediction - Confusion Matrix

In [None]:
# PREDICTION
Y_pred = model.predict_generator(valid_dataset, step_size_train + 1)
y_pred = np.argmax(Y_pred, axis=1)

In [None]:
# Confusion Matrix and Classification Report
print('Confusion Matrix')
print(confusion_matrix(valid_dataset.classes, y_pred))
print('Classification Report')
class_names = list(valid_dataset.class_indices.keys())
print(classification_report(valid_dataset.classes, y_pred, target_names=class_names))

In [None]:
# Plot the confusion matrix. Set Normalize = True/False
def plot_confusion_matrix(cm, classes, normalize=True, title='Confusion matrix', cmap=plt.cm.Blues):
    """
    This function prints and plots the confusion matrix.
    Normalization can be applied by setting `normalize=True`.
    """

    plt.figure(figsize=(10, 10))
    plt.imshow(cm, interpolation='nearest', cmap=cmap)
    plt.title(title)
    plt.colorbar()
    tick_marks = np.arange(len(classes))
    plt.xticks(tick_marks, classes, rotation=45)
    plt.yticks(tick_marks, classes)

    if normalize:
        cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]
        cm = np.around(cm, decimals=2)
        cm[np.isnan(cm)] = 0.0
        print("Normalized confusion matrix")
    else:
        print('Confusion matrix, without normalization')

    thresh = cm.max() / 2.
    for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])):
        plt.text(j, i, cm[i, j],
                 horizontalalignment="center",
                 color="white" if cm[i, j] > thresh else "black")

    plt.tight_layout()
    plt.ylabel('True label')
    plt.xlabel('Predicted label')
    plt.show()

In [None]:
# Confusion Matrix
print('Confusion Matrix')
cm = confusion_matrix(valid_dataset.classes, y_pred)
plot_confusion_matrix(cm, class_names, title='Confusion Matrix')

# Print Classification Report

print('Classification Report')
print(classification_report(valid_dataset.classes, y_pred, target_names=class_names))

Predictions - Explore classified images from Validation Set

In [None]:
# PREDICTIONS 2
ground_truth = valid_dataset.classes
print(ground_truth[:10])
print(len(ground_truth))

In [None]:
# Then we get the predictions. This will be a list of probability values that express how confident
# the model is about the presence of each category in each image. This step might take several minutes.

predictions = model.predict_generator(valid_dataset,
                                      steps=None)
print(predictions[:10])

In [None]:
prediction_table = {}
for index, val in enumerate(predictions):
    index_of_highest_probability = np.argmax(val)
    value_of_highest_probability = val[index_of_highest_probability]
    prediction_table[index] = [
        value_of_highest_probability,
        index_of_highest_probability,
        ground_truth[index]
    ]
assert len(predictions) == len(ground_truth) == len(prediction_table)

In [None]:
def reverse_dict(class_dict):
  reversed = {}
  for key, value in class_dict.items():
    reversed[value] = key
  return reversed

def get_images_with_sorted_probabilities(prediction_table,
                                         get_highest_probability,
                                         label,
                                         number_of_items,
                                         only_false_predictions=False):
    sorted_prediction_table = [(k, prediction_table[k])
                               for k in sorted(prediction_table,
                                               key=prediction_table.get,
                                               reverse=get_highest_probability)
                               ]
    result = []
    for index, key in enumerate(sorted_prediction_table):
        image_index, [probability, predicted_index, gt] = key
        if predicted_index == label:
            if only_false_predictions == True:
                if predicted_index != gt:
                    result.append(
                        [image_index, [probability, predicted_index, gt]])
            else:
                result.append(
                    [image_index, [probability, predicted_index, gt]])
    return result[:number_of_items]


def plot_images(filenames, distances, classification_txt, title_txt):
    images = []
    for filename in filenames:
        images.append(mpimg.imread(filename))
    plt.figure(figsize=(20, 24))
    columns = 5
    for i, image in enumerate(images):
        ax = plt.subplot(int(len(images) / columns + 1), columns, i + 1)
        ax.set_title("\n\n" + filenames[i].split("/")[-1] + 
                     "\n" + classification_txt + 
                     "\nprobability=" + str(float("{0:.2f}".format(distances[i])))
                     )
        plt.suptitle(title_txt, fontsize=20, fontweight='bold')
        plt.axis('off')
        plt.imshow(image)
    plt.show()


filenames = valid_dataset.filenames
class_dict = reverse_dict(valid_dataset.class_indices)

def display(sorted_indices, title_txt):
    similar_image_paths = []
    distances = []
    for name, value in sorted_indices:
        [probability, predicted_index, gt] = value
        if predicted_index == gt:
            classification_txt = "CORRECT"
        else:
            classification_txt = "WRONG"
        classification_txt = "{}\nground truth: {}\npredicted: {}".format(classification_txt, 
                                                                 class_dict[gt].upper(), 
                                                                 class_dict[predicted_index].upper())
        similar_image_paths.append(os.path.join(valid_data_path, filenames[name]))
        distances.append(probability)
    plot_images(similar_image_paths, distances, classification_txt, title_txt)

In [None]:
img_list = get_images_with_sorted_probabilities(prediction_table,
                                                             get_highest_probability=True,
                                                             label=0,
                                                             number_of_items=20,
                                                             only_false_predictions=False)
message = 'Images with highest probability of containing cats'
display(img_list, message)

In [None]:
img_list = get_images_with_sorted_probabilities(prediction_table,
                                                             get_highest_probability=True,
                                                             label=1,
                                                             number_of_items=20,
                                                             only_false_predictions=False)
message = 'Images with highest probability of containing no cats'
display(img_list, message)

In [None]:
img_list = get_images_with_sorted_probabilities(prediction_table,
                                                        get_highest_probability=False,
                                                        label=1,
                                                        number_of_items=20,
                                                        only_false_predictions=True)
message = 'Wrongly classified images, with lowest probability'
display(img_list, message)

In [None]:
img_list = get_images_with_sorted_probabilities(prediction_table,
                                                        get_highest_probability=False,
                                                        label=0,
                                                        number_of_items=20,
                                                        only_false_predictions=True)
message = 'Wrongly classified images, with lowest probability'
display(img_list, message)