<a href="https://colab.research.google.com/github/deuojen/fcc/blob/main/fcc_cat_dog_50.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [29]:
try:
  # This command only in Colab.
  %tensorflow_version 2.x
except Exception:
  pass
import tensorflow as tf

from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Conv2D, Flatten, Dropout, MaxPooling2D
from tensorflow.keras.preprocessing.image import ImageDataGenerator

import os
import numpy as np
import matplotlib.pyplot as plt

Colab only includes TensorFlow 2.x; %tensorflow_version has no effect.


In [None]:
# Get project files
!wget https://cdn.freecodecamp.org/project-data/cats-and-dogs/cats_and_dogs.zip

!unzip cats_and_dogs.zip

PATH = 'cats_and_dogs'

train_dir = os.path.join(PATH, 'train')
validation_dir = os.path.join(PATH, 'validation')
test_dir = os.path.join(PATH, 'test')

# Get number of files in each directory. The train and validation directories
# each have the subdirecories "dogs" and "cats".
total_train = sum([len(files) for r, d, files in os.walk(train_dir)])
total_val = sum([len(files) for r, d, files in os.walk(validation_dir)])
total_test = len(os.listdir(test_dir))

# Variables for pre-processing and training.
batch_size = 128
epochs = 15
IMG_HEIGHT = 150
IMG_WIDTH = 150

In [31]:
# 3
# EXAMPLE: see "Example of using .flow_from_directory(directory):" in
#https://www.tensorflow.org/api_docs/python/tf/keras/preprocessing/image/ImageDataGenerator
# EXAMPLE 2: see "classifier_from_little_data_script_1.py" in
#https://gist.github.com/fchollet/0830affa1f7f19fd47b06d4cf89ed44d?utm_source=www.tensorflow.org&utm_medium=referral

# 3.1. Create image generators for each of the three image data sets
#(train, validation, test). Use ImageDataGenerator to read / decode
#the images and convert them into floating point tensors. Use the
#rescale argument (and no other arguments for now) to rescale the
#tensors from values between 0 and 255 to values between 0 and 1.

train_image_generator = ImageDataGenerator(rescale=1./255)
validation_image_generator = ImageDataGenerator(rescale=1./255)
test_image_generator = ImageDataGenerator(rescale=1./255)

# 3.2. For the *_data_gen variables, use the flow_from_directory method.
#Pass in the batch size, directory, target size ((IMG_HEIGHT, IMG_WIDTH)),
#class mode, and anything else required. test_data_gen will be the trickiest
#one. For test_data_gen, make sure to pass in shuffle=False to the
#flow_from_directory method. This will make sure the final predictions stay
#is in the order that our test expects. For test_data_gen it will also be
#helpful to observe the directory structure.

#flow_from_directory(
#    directory,
#    target_size=(256, 256),
#    color_mode='rgb',
#    classes=None,
#    class_mode='categorical',
#    batch_size=32,
#    shuffle=True,
#    seed=None,
#    save_to_dir=None,
#    save_prefix='',
#    save_format='png',
#    follow_links=False,
#    subset=None,
#    interpolation='nearest',
#    keep_aspect_ratio=False
#)

train_data_gen = train_image_generator.flow_from_directory(
    train_dir, target_size=(IMG_HEIGHT, IMG_WIDTH),
    batch_size=batch_size, class_mode='binary', shuffle=True
    #classes=['cats', 'dogs']
)
val_data_gen = validation_image_generator.flow_from_directory(
    validation_dir, target_size=(IMG_HEIGHT, IMG_WIDTH),
    batch_size=batch_size, class_mode='binary', shuffle=True
    #classes=['cats', 'dogs']
)
test_data_gen = test_image_generator.flow_from_directory(directory=PATH,
                                                         classes=['test'],
                                                         target_size=(IMG_HEIGHT, IMG_WIDTH),
                                                         batch_size=batch_size,
                                                         shuffle=False,)

# After you run the code, the output should look like this:
#Found 2000 images belonging to 2 classes.
#Found 1000 images belonging to 2 classes.
#Found 50 images belonging to 1 class.

Found 2000 images belonging to 2 classes.
Found 1000 images belonging to 2 classes.
Found 50 images belonging to 1 classes.


In [None]:
# 4

# The plotImages function will be used a few times to plot images. It takes an
#array of images and a probabilities list, although the probabilities list is
#optional. This code is given to you. If you created the train_data_gen variable
#correctly, then running this cell will plot five random training images

def plotImages(images_arr, probabilities = False):
    fig, axes = plt.subplots(len(images_arr), 1, figsize=(5,len(images_arr) * 3))
    if probabilities is False:
      for img, ax in zip( images_arr, axes):
          ax.imshow(img)
          ax.axis('off')
    else:
      for img, probability, ax in zip( images_arr, probabilities, axes):
          ax.imshow(img)
          ax.axis('off')
          if probability > 0.5:
              ax.set_title("%.2f" % (probability*100) + "% dog")
          else:
              ax.set_title("%.2f" % ((1-probability)*100) + "% cat")
    plt.show()

sample_training_images, _ = next(train_data_gen)
plotImages(sample_training_images[:5])


In [33]:
# 5

# Recreate the train_image_generator using ImageDataGenerator.

# Since there are a small number of training examples, there is a risk of
#overfitting (not having enought training data). One way to fix this problem
#is by creating more training data from existing training examples by using
#random transformations.

# Add 4-6 random transformations as arguments to ImageDataGenerator. Make sure
#to rescale the same as before.

train_image_generator = ImageDataGenerator(rescale=1./255,
                                           rotation_range=45,
                                           vertical_flip=True,
                                           horizontal_flip=True)
# ImageDataGenerator(
#     rescale=1/255,
#     width_shift_range=0.01,
#     height_shift_range=0.01,
#     rotation_range=30,
#     zoom_range=0.2,
#     #shear_range=20.0,
#     brightness_range=(0.40,0.80),
#     channel_shift_range=30.0,
#     horizontal_flip=True
#     #vertical_flip=True,
# )

# Arguments examples explained:
#https://towardsdatascience.com/exploring-image-data-augmentation-with-keras-and-tensorflow-a8162d89b844

In [None]:
# 6

# You don't have to do anything for this cell. train_data_gen is created just
#like before but with the new train_image_generator. Then, a single image is
#plotted five different times using different variations.

train_data_gen = train_image_generator.flow_from_directory(batch_size=batch_size,
                                                     directory=train_dir,
                                                     target_size=(IMG_HEIGHT, IMG_WIDTH),
                                                     class_mode='binary',
                                                     shuffle=True)
# PERSONAL NOTE: train_data_gen receives 16 batches of batch_size=128 images
#(previously defined), each image with a shape of 150x150 (height x width) and
#3 channels.

# train_data_gen[0][0][0] means first image of the training images
augmented_images = [train_data_gen[0][0][0] for i in range(5)]
plotImages(augmented_images)

In [35]:
# print(train_data_gen[0][0].shape)
# print(train_data_gen.n)
# print(train_data_gen.labels)

# create augmented images
#augmented_train_images = [ train_data_gen[0][0][i] for i in range(train_data_gen.n) for _ in range(5) ]
# create augmented images's labels
#_, train_labels = next(train_data_gen)
#augmented_train_labels = [ train_labels[i] for i in range(train_data_gen.n) for _ in range(5) ]

#print(len(augmented_train_images))
#print(len(augmented_train_labels))

In [36]:
# 7

# 7.1. In this cell, create a model for the neural network that outputs class
#probabilities. It should use the Keras Sequential model. It will probably
#involve a stack of Conv2D and MaxPooling2D layers...

# Create Keras Sequential Model
model = Sequential()

# Build Convolutional Base (a stack of Conv2D and MaxPooling2D layers)
model.add( Conv2D(32, (3, 3), activation='relu', input_shape=(IMG_HEIGHT, IMG_WIDTH, 3)) )
model.add( MaxPooling2D(pool_size=(2, 2)) )

model.add(Conv2D(32, (3, 3), activation='relu'))
model.add(MaxPooling2D((2, 2)))
model.add(Conv2D(32, (3, 3), activation='relu'))
model.add(MaxPooling2D((2, 2)))
model.add(Conv2D(32, (3, 3), activation='relu'))
model.add(MaxPooling2D((2, 2)))
# PERSONAL NOTE: These two type of layers (convolutional and MaxPooling) will
#extract the features from the image.
#Going to process 32 filters of size 3x3 over the input shape
#of the data, which is (height, width, channels) = (150, 150, 3).
#Then, will perform the max pooling operation using 2x2 samples and a stride
#of 2 (if strides=None (default), it will default to pool_size).

#model.summary()
# PERSONAL OBSERVATION: After looking at the summary it's worth noting that
#the depth (frequency of filters) of our image increases but the spacial
#dimensions (height and width) reduce (shrinks) drastically.

In [None]:
# 7.2. ... and then [involve] a fully connected layer on top that is activated by a ReLU
#activation function.

model.add( Flatten() )
model.add(Dense(1, activation='relu'))
# model.add( Dense(64, activation='relu') )
# model.add( Dropout(0.5) )
# model.add( Dense(1, activation='sigmoid') )
# PERSONAL NOTE: Then these extracted features (after finishing the convolutional
#base) are flattened and fed to densely connected layers that determine the class
#of an image based on the presence of features.
# The Flatten layer changes the shape of the data as to feed it to
#the 64-node dense layer, followed by the final output layer of 2
#neurons (one for each class: 'cats' and 'dogs').

model.summary()

In [38]:
# 7.3. Compile the model passing the arguments to set the optimizer and loss. Also
#pass in metrics=['accuracy'] to view training and validation accuracy for each
#training epoch.

model.compile(optimizer=tf.keras.optimizers.Adam(),
              loss=tf.keras.losses.BinaryCrossentropy(),
              metrics=['accuracy'])

# model.compile(optimizer='rmsprop',
#               loss='binary_crossentropy',
#               metrics=['accuracy'])

# More on built-in loss functions:
#https://www.tensorflow.org/api_docs/python/tf/keras/losses

In [None]:
# 8

# Use the fit method on your model to train the network. Make sure to pass in
#arguments for x, steps_per_epoch, epochs, validation_data, and validation_steps.

history = model.fit(train_data_gen, validation_data=val_data_gen, epochs=epochs)

# model.fit(
#     x=train_data_gen,
#     steps_per_epoch=total_train // batch_size,
#     epochs=epochs,
#     validation_data=val_data_gen,
#     validation_steps=total_val // batch_size,
#     verbose=1
# )

# More on fit method:
#https://www.tensorflow.org/api_docs/python/tf/keras/Model#fit

#verbose -> 'auto', 0, 1, or 2. Verbosity mode. 0 = silent, 1 = progress bar,
#2 = one line per epoch

In [None]:
# 9
acc = history.history['accuracy']
val_acc = history.history['val_accuracy']

loss = history.history['loss']
val_loss = history.history['val_loss']

epochs_range = range(epochs)

plt.figure(figsize=(8, 8))
plt.subplot(1, 2, 1)
plt.plot(epochs_range, acc, label='Training Accuracy')
plt.plot(epochs_range, val_acc, label='Validation Accuracy')
plt.legend(loc='lower right')
plt.title('Training and Validation Accuracy')

plt.subplot(1, 2, 2)
plt.plot(epochs_range, loss, label='Training Loss')
plt.plot(epochs_range, val_loss, label='Validation Loss')
plt.legend(loc='upper right')
plt.title('Training and Validation Loss')
plt.show()

In [None]:
# 10

predictions = model.predict(test_data_gen)
plotImages(test_data_gen[0][0], probabilities=predictions)
print(predictions)

# Now it is time to use your model to predict whether a brand new image is a cat
#or a dog.

# In this cell, get the probability that each test image (from test_data_gen) is
#a dog or a cat. probabilities should be a list of integers.

# train_images, train_labels = next(train_data_gen)
# val_images, val_labels = next(val_data_gen)
# test_images, _ = next(test_data_gen)
# plotImages(test_images[:2])
#print(train_labels)

# probabilities = model.predict(test_data_gen[0][0])
# probabilities = np.reshape( np.round(probabilities), newshape=(50) ).tolist()
# print(probabilities)
#plotImages(test_images, probabilities)

#augmented_images = [train_data_gen[0][0][0] for i in range(5)]
#plotImages(augmented_images)

# Call the plotImages function and pass in the test images and the probabilities
#corresponding to each test image.
# plotImages(test_data_gen[0][0], probabilities)

# After you run the cell, you should see all 50 test images with a label showing
#the percentage of "sure" that the image is a cat or a dog. The accuracy will
#correspond to the accuracy shown in the graph above (after running the previous
#cell). More training images could lead to a higher accuracy.

In [42]:
#print(test_data_gen.n)
#print(test_data_gen.labels)
#print(test_images.shape)
#print(probabilities)
#print(probabilities.shape)
#print(test_images[0])
#print(test_data_gen[0][0].shape)
#dir(test_data_gen)

In [None]:
# 11
answers =  [1, 0, 0, 1, 0, 0, 0, 0, 1, 1, 0,
            1, 0, 1, 0, 1, 1, 0, 1, 1, 0, 0,
            1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 1,
            1, 0, 1, 1, 1, 1, 0, 1, 0, 1, 1,
            0, 0, 0, 0, 0, 0]

correct = 0

for probability, answer in zip(probabilities, answers):
  if round(probability) == answer:
    correct +=1

percentage_identified = (correct / len(answers)) * 100

passed_challenge = percentage_identified >= 63

print(f"Your model correctly identified {round(percentage_identified, 2)}% of the images of cats and dogs.")

if passed_challenge:
  print("You passed the challenge!")
else:
  print("You haven't passed yet. Your model should identify at least 63% of the images. Keep trying. You will get it!")