<a href="https://colab.research.google.com/github/akiabe/coursera-dl-tf-in-practice/blob/master/C1W2_Kaggle_Dogs_vs_Cats_ImageAugmentation_Ver2.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [0]:
# Download example data
!wget --no-check-certificate \
    "https://download.microsoft.com/download/3/E/1/3E1C3F21-ECDB-4869-8368-6DEBA77B919F/kagglecatsanddogs_3367a.zip" \
    -O "/tmp/cats-and-dogs.zip"

In [0]:
# Access to the file system and unzip file
import os
import zipfile

# # UnZip to '/tmp' directory
local_zip = '/tmp/cats-and-dogs.zip'

zip_ref = zipfile.ZipFile(local_zip, 'r')
zip_ref.extractall('/tmp')
zip_ref.close()

In [0]:
# Check file number
print(len(os.listdir('/tmp/PetImages/Cat/')))
print(len(os.listdir('/tmp/PetImages/Dog/')))

In [0]:
# Define directory for cats-v-dogs, and subdirectories for training and testing
try:
    os.mkdir('/tmp/cats-v-dogs')
    os.mkdir('/tmp/cats-v-dogs/training')
    os.mkdir('/tmp/cats-v-dogs/testing')
    os.mkdir('/tmp/cats-v-dogs/training/cats')
    os.mkdir('/tmp/cats-v-dogs/training/dogs')
    os.mkdir('/tmp/cats-v-dogs/testing/cats')
    os.mkdir('/tmp/cats-v-dogs/testing/dogs')
except OSError:
    pass

In [0]:
# Define function of split file randomly 
import random
import shutil
from shutil import copyfile
from os import getcwd

def split_data(SOURCE, TRAINING, TESTING, SPLIT_SIZE):
  """
  Argument:
  SOURCE -- SOURCE directory containing the files
  TRAINING -- TRAINING directory that a portion of the files will be copied to
  TESTING -- TESTING directory that a portion of the files will be copie to
  SPLIT_SIZE -- SPLIT SIZE to determine the portion

  """
  all_files = []

  # Pick up file_name from SOURCE directory and joint PATH to SOURCE directory
  for file_name in os.listdir(SOURCE):
    file_path = SOURCE + file_name

    # If file size in file_path  is bigger than 0, append file_name to list
    if os.path.getsize(file_path) > 0:
      all_files.append(file_name)
    else:
      print(file_name + " is zero length, so ignoring.")
  
    n_files = len(all_files)
    split_point = int(n_files * SPLIT_SIZE)
    
    # random.sample(list, len(list)) shuffles a list
    shuffled = random.sample(all_files, n_files)
    
    train_set = shuffled[:split_point]
    test_set = shuffled[split_point:]
    
    # copyfile(source, destination) copies a file from source to destination
    for file_name in train_set:
        copyfile(SOURCE + file_name, TRAINING + file_name)
    
    for file_name in test_set:
        copyfile(SOURCE + file_name, TESTING + file_name)

In [0]:
# PATH
CAT_SOURCE_DIR = "/tmp/PetImages/Cat/"
TRAINING_CATS_DIR = "/tmp/cats-v-dogs/training/cats/"
TESTING_CATS_DIR = "/tmp/cats-v-dogs/testing/cats/"
DOG_SOURCE_DIR = "/tmp/PetImages/Dog/"
TRAINING_DOGS_DIR = "/tmp/cats-v-dogs/training/dogs/"
TESTING_DOGS_DIR = "/tmp/cats-v-dogs/testing/dogs/"


# 90% of the images in PetImages/Cat will be copied to the TRAINING dir 
# and 10% of the images will be copied to the TESTING dir
split_size = .9
split_data(CAT_SOURCE_DIR, TRAINING_CATS_DIR, TESTING_CATS_DIR, split_size)
split_data(DOG_SOURCE_DIR, TRAINING_DOGS_DIR, TESTING_DOGS_DIR, split_size)

In [0]:
# Check file numbers
print(len(os.listdir('/tmp/cats-v-dogs/training/cats/')))
print(len(os.listdir('/tmp/cats-v-dogs/training/dogs/')))
print(len(os.listdir('/tmp/cats-v-dogs/testing/cats/')))
print(len(os.listdir('/tmp/cats-v-dogs/testing/dogs/')))

In [0]:
import tensorflow as tf

# Build the model
model = tf.keras.models.Sequential([
    tf.keras.layers.Conv2D(32, (3,3), input_shape=(150, 150, 3), activation='relu'),
    tf.keras.layers.MaxPooling2D(2,2),
    
    tf.keras.layers.Conv2D(64, (3,3), activation='relu'),
    tf.keras.layers.MaxPooling2D(2,2),
    
    tf.keras.layers.Conv2D(128, (3, 3), activation='relu'),
    tf.keras.layers.MaxPooling2D(2,2),
    
    tf.keras.layers.Flatten(),
    tf.keras.layers.Dense(512, activation='relu'),
    tf.keras.layers.Dense(128, activation='relu'),
    tf.keras.layers.Dense(1, activation='sigmoid')
])

In [0]:
model.summary()

In [0]:
# Optimizer
from tensorflow.keras.optimizers import RMSprop

model.compile(optimizer=RMSprop(lr=0.001),
              loss='binary_crossentropy',
              metrics=['acc'])

In [0]:
from tensorflow.keras.preprocessing.image import ImageDataGenerator

# TRAINING PATH
TRAINING_DIR = '/tmp/cats-v-dogs/training'

# Training images processing and generator
train_datagen = ImageDataGenerator(
    rescale=1/255,
    rotation_range=40,
    width_shift_range=0.2,
    height_shift_range=0.2,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True,
    fill_mode='nearest')

# Flow training images in batches of 10 using train_generator
train_generator = train_datagen.flow_from_directory(
    TRAINING_DIR,
    batch_size=10,
    class_mode='binary',
    target_size=(150, 150))

# VALIDATION PATH
VALIDATION_DIR = '/tmp/cats-v-dogs/testing'

# Validation images processing and generator
validation_datagen =  ImageDataGenerator(
    rescale=1/255,
    rotation_range=40,
    width_shift_range=0.2,
    height_shift_range=0.2,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True,
    fill_mode='nearest')

# Flow validation images in batches of 10 using train_generator
validation_generator = validation_datagen.flow_from_directory(
    VALIDATION_DIR,
    batch_size=10,
    class_mode='binary',
    target_size=(150, 150))

In [0]:
# Training the model
history = model.fit(
      train_generator,
      steps_per_epoch=270,  # 2700 images = batch_size * steps
      epochs=100,
      validation_data=validation_generator,
      validation_steps=30,  # 300 images = batch_size * steps
      verbose=2)

In [0]:
# Plot loss and accuracy
%matplotlib inline

import matplotlib.image  as mpimg
import matplotlib.pyplot as plt

# Retrieve a list of list results on training and test data sets for each training epoch
acc=history.history['accuracy']
val_acc=history.history['val_accuracy']
loss=history.history['loss']
val_loss=history.history['val_loss']

epochs=range(len(acc)) # Get number of epochs

# Plot training and validation accuracy per epoch
plt.plot(epochs, acc, 'r', "Training Accuracy")
plt.plot(epochs, val_acc, 'b', "Validation Accuracy")
plt.title('Training and validation accuracy')
plt.figure()

# Plot training and validation loss per epoch
plt.plot(epochs, loss, 'r', "Training Loss")
plt.plot(epochs, val_loss, 'b', "Validation Loss")

plt.title('Training and validation loss')

In [0]:
# Upload an image and classify
import numpy as np
from google.colab import files
from keras.preprocessing import image

uploaded = files.upload()

for fn in uploaded.keys():
 
  # Predicting images
  path = '/content/' + fn
  img = image.load_img(path, target_size=(150, 150))
  x = image.img_to_array(img)
  x = np.expand_dims(x, axis=0)

  images = np.vstack([x])
  classes = model.predict(images, batch_size=10)
  print(classes[0])
  
  if classes[0] > 0.5:
    print(fn + " is a dog")
  else:
    print(fn + " is a cat")