In [1]:
import tensorflow as tf

In [2]:
import urllib.request
import os
import zipfile
import random
from shutil import copyfile

In [3]:
data_url = "https://download.microsoft.com/download/3/E/1/3E1C3F21-ECDB-4869-8368-6DEBA77B919F/kagglecatsanddogs_3367a.zip"
data_file_name = "catsdogs.zip"
download_dir = '/Users/shankar/dev/data/competitions/ms/cats_and_dogs/'

In [4]:
# urllib.request.urlretrieve(data_url, data_file_name)
# zip_ref = zipfile.ZipFile(data_file_name, 'r')
# zip_ref.extractall(download_dir)
# zip_ref.close()

In [5]:
print("Number of cat images:",len(os.listdir(f'{download_dir}PetImages/Cat/')))
print("Number of dog images:", len(os.listdir(f'{download_dir}PetImages/Dog/')))

# Expected Output:
# Number of cat images: 12501
# Number of dog images: 12501

Number of cat images: 12501
Number of dog images: 12501


In [7]:
# try:
#os.mkdir(f'{download_dir}cats-v-dogs')
# os.mkdir(f'{download_dir}cats-v-dogs/training')
# os.mkdir(f'{download_dir}cats-v-dogs/testing')
# os.mkdir(f'{download_dir}cats-v-dogs/training/cats')
# os.mkdir(f'{download_dir}cats-v-dogs/training/dogs')
# os.mkdir(f'{download_dir}cats-v-dogs/testing/cats')
# os.mkdir(f'{download_dir}cats-v-dogs/testing/dogs')
# except OSError:
#     print('Error')
#     pass

## Split data into Training and Test sets

In [8]:
import random
from shutil import copyfile

def split_data(SOURCE, TRAINING, TESTING, SPLIT_SIZE):
    files = []
    for filename in os.listdir(SOURCE):
        file = SOURCE + filename
        if(os.path.getsize(file) > 0):
            files.append(filename)
        else:
            print(filename + " is zero length, so ignoring.")
            
    training_length = int(len(files) * SPLIT_SIZE)
    testing_length = int(len(files) - training_length)
    shuffled_set = random.sample(files, len(files))
    training_set = shuffled_set[0:training_length]
    testing_set = shuffled_set[training_length:]
    
    for filename in training_set:
        this_file = SOURCE + filename
        destination = TRAINING + filename
        copyfile(this_file, destination)
        
    for filename in testing_set:
        this_file = SOURCE + filename
        destination = TESTING + filename
        copyfile(this_file, destination)
        


# Expected output
# 666.jpg is zero length, so ignoring
# 11702.jpg is zero length, so ignoring

In [9]:
CAT_SOURCE_DIR = f'{download_dir}PetImages/Cat/'
TRAINING_CATS_DIR = f'{download_dir}cats-v-dogs/training/cats/'
TESTING_CATS_DIR = f'{download_dir}cats-v-dogs/testing/cats/'
DOG_SOURCE_DIR = f'{download_dir}PetImages/Dog/'
TRAINING_DOGS_DIR = f'{download_dir}cats-v-dogs/training/dogs/'
TESTING_DOGS_DIR = f'{download_dir}cats-v-dogs/testing/dogs/'

split_size = .9
split_data(CAT_SOURCE_DIR, TRAINING_CATS_DIR, TESTING_CATS_DIR, split_size)
split_data(DOG_SOURCE_DIR, TRAINING_DOGS_DIR, TESTING_DOGS_DIR, split_size)

666.jpg is zero length, so ignoring.
11702.jpg is zero length, so ignoring.


In [10]:
print("Number of training cat images", len(os.listdir(f'{download_dir}cats-v-dogs/training/cats/')))
print("Number of training dog images", len(os.listdir(f'{download_dir}cats-v-dogs/training/dogs/')))
print("Number of testing cat images", len(os.listdir(f'{download_dir}cats-v-dogs/testing/cats/')))
print("Number of testing dog images", len(os.listdir(f'{download_dir}cats-v-dogs/testing/dogs/')))

# expected output
# Number of training cat images 11250
# Number of training dog images 11250
# Number of testing cat images 1250
# Number of testing dog images 1250

Number of training cat images 12360
Number of training dog images 12389
Number of testing cat images 2360
Number of testing dog images 2389


In [13]:
from tensorflow.keras.preprocessing.image import ImageDataGenerator


In [16]:
TRAINING_DIR = f'{download_dir}cats-v-dogs/training/'

train_datagen = tf.keras.preprocessing.image.ImageDataGenerator(
    rescale=1./255,
    rotation_range=40,
    width_shift_range=0.2,
    height_shift_range=0.2,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True,
    fill_mode='nearest'
)

train_generator = train_datagen.flow_from_directory(
    TRAINING_DIR,
    batch_size=100,
    class_mode='binary',
    target_size=(150, 150)
)

VALIDATION_DIR = f'{download_dir}cats-v-dogs/testing'
validation_datagen = tf.keras.preprocessing.image.ImageDataGenerator(rescale=1./255)
validation_generator = validation_datagen.flow_from_directory(
    VALIDATION_DIR,
    batch_size=100,
    class_mode='binary',
    target_size=(150, 150)
)

Found 24747 images belonging to 2 classes.
Found 4748 images belonging to 2 classes.


In [17]:
weights_url = "https://storage.googleapis.com/mledu-datasets/inception_v3_weights_tf_dim_ordering_tf_kernels_notop.h5"
weights_file = "inception_v3.h5"
urllib.request.urlretrieve(weights_url, weights_file)

pre_trained_model = tf.keras.applications.inception_v3.InceptionV3(
    input_shape=(150, 150, 3),
    include_top=False,
    weights=None
)
pre_trained_model.load_weights(weights_file)

for layer in pre_trained_model.layers:
    layer.trainable = False
    
pre_trained_model.summary()

Model: "inception_v3"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_1 (InputLayer)            [(None, 150, 150, 3) 0                                            
__________________________________________________________________________________________________
conv2d (Conv2D)                 (None, 74, 74, 32)   864         input_1[0][0]                    
__________________________________________________________________________________________________
batch_normalization (BatchNorma (None, 74, 74, 32)   96          conv2d[0][0]                     
__________________________________________________________________________________________________
activation (Activation)         (None, 74, 74, 32)   0           batch_normalization[0][0]        
_______________________________________________________________________________________

In [18]:
last_layer = pre_trained_model.get_layer('mixed7')
print(f'Last layer output shape: {last_layer.output_shape}')
last_output = last_layer.output

Last layer output shape: (None, 7, 7, 768)


## Add Layers

In [21]:
# Flatten the output layer to 1 dimension
x = tf.keras.layers.Flatten()(last_output)
# Add a fully connected layer with 1024 hidden units and ReLU activation
x = tf.keras.layers.Dense(1024, activation='relu')(x)
# Add a final sigmoid layer for classification
x = tf.keras.layers.Dense(1, activation='sigmoid')(x)

model = tf.keras.Model(pre_trained_model.input, x)

## Train the Model

In [23]:
model.compile(
    optimizer=tf.keras.optimizers.RMSprop(lr=0.0001),
    loss='binary_crossentropy',
    metrics=['acc']
)
history = model.fit(
    train_generator,
    validation_data=validation_generator,
    epochs=2,
    verbose=1
)

In [None]:
%matplotlib inline

import matplotlib.image  as mpimg
import matplotlib.pyplot as plt

#-----------------------------------------------------------
# Retrieve a list of list results on training and test data
# sets for each training epoch
#-----------------------------------------------------------
acc=history.history['acc']
val_acc=history.history['val_acc']
loss=history.history['loss']
val_loss=history.history['val_loss']

epochs=range(len(acc)) # Get number of epochs

#------------------------------------------------
# Plot training and validation accuracy per epoch
#------------------------------------------------
plt.plot(epochs, acc, 'r', "Training Accuracy")
plt.plot(epochs, val_acc, 'b', "Validation Accuracy")
plt.title('Training and validation accuracy')
plt.figure()

In [None]:
import numpy as np
from google.colab import files
from keras.preprocessing import image

uploaded = files.upload()

for fn in uploaded.keys():
 
    # predicting images
    path = '/content/' + fn
    img = image.load_img(path, target_size=(150, 150))
    x = image.img_to_array(img)
    x = np.expand_dims(x, axis=0)

    image_tensor = np.vstack([x])
    classes = model.predict(image_tensor)
    print(classes)
    print(classes[0])
    if classes[0]>0.5:
        print(fn + " is a dog")
    else:
        print(fn + " is a cat")