In [None]:
import tensorflow as tf
import matplotlib.pyplot as plt
import random
import os
import numpy as np
import matplotlib.image as imageio

from scipy import ndimage
from tensorflow.keras import regularizers
from scipy.io import loadmat

tf.keras.backend.set_floatx('float32')

train_dir = "train"
val_dir = "val"
test_dir = "test"

# Download data

In [None]:
import shutil
from urllib.request import urlretrieve
import zipfile
import os

shutil.rmtree(train_dir)
shutil.rmtree(val_dir)
shutil.rmtree(test_dir)

def download_file(url, path):
    print("Downloading from %s" % url)
    zip_path = path + ".zip"
    urlretrieve(url, zip_path)
    with zipfile.ZipFile(zip_path, 'r') as zip_ref:
        zip_ref.extractall("")
    os.remove(zip_path)  
    
#download_file("https://storage.googleapis.com/kagglesdsdata/competitions/3362/31148/test1.zip?GoogleAccessId=web-data@kaggle-161607.iam.gserviceaccount.com&Expires=1586455286&Signature=oS7Fn9tB7KyNGUL3AYhy%2Bu8v37g4wLyeNEl%2BG5f%2BjX2J4JprknIdxzrnn4dKICgT8XChG%2Bglp2NvV0ZbRSsDrUQ9EjRAnpJW%2BMUGYjtDChu3lJ5zcAae9fwYLmcJ1qmhQgNL4Wl5zx8FHR%2FWf9A%2BJaHwITBSlMcOsV%2F1ISwdnKgUTSGys8XlRrykqHm41lCA1t1%2FBo0OHLc68AEph%2FCFSvKC%2BmHkmytVLKw2j3nwyU2oAvoU91MB7Vvu52PsqzKEjVWkW%2Fn8dUonQApgCBvVQwVGfWXC5uAyAx%2BArUhDptW%2By7DKp7VpqjrjQevknjhxCVMZgRP0%2Fal1MSI8Hkw0KA%3D%3D&response-content-disposition=attachment%3B+filename%3Dtest1.zip", "test")
#download_file("https://storage.googleapis.com/kagglesdsdata/competitions/3362/31148/train.zip?GoogleAccessId=web-data@kaggle-161607.iam.gserviceaccount.com&Expires=1586723425&Signature=F3w4rcUihV%2F67w7XwAS5e1PTaDmlNZLgUW8OQrDBXpr%2BR8qWi%2BYdsZgxK9Es22PQODjU%2BiwVKA1dxXpFbJYmMRLK095WotTicMGExZVUPA1PFfmDOwsAXpJfSa6clfhS9ai0Q0IfH8%2BGOmy%2F916x1Wy0KhN%2Bppx8fGc0aSCkyQ19iYOHGCk3Ok8tk2UJHviViuaHMnPzxBDAse0v9XhSOkL9ehSBsqKF9tSqocY%2FgXqZ969LHi6ROaI7sV%2B3u6RtqNw3cDwuhUcslsHT9f724nh5EZdoxp%2B0V9Y9YAJikb81k3a%2FaPQkyHKPzVg1Bx7M%2FdhIAmUrvZe%2FFJ3Maq5J8Q%3D%3D&response-content-disposition=attachment%3B+filename%3Dtrain.zip", "train")

#os.mkdir("test")
#for test_file in os.listdir("test1"):
#    os.rename("test1/" + test_file, "test/" + test_file)
#os.rmdir("test1")


os.mkdir(validation_dir)
os.mkdir(test_dir)

os.mkdir(train_dir + "/dog")
os.mkdir(train_dir + "/cat")

os.mkdir(val_dir + "/dog")
os.mkdir(val_dir + "/cat")

os.mkdir(test_dir + "/dog")
os.mkdir(test_dir + "/cat")

val_size = 1000
test_size = 2000

image_files = os.listdir(train_dir)
random.shuffle(image_files)

for i, file in enumerate(image_files):
    if "jpg" not in file:
        continue
        
    animal, im_id, _ = file.split('.')
    if i <= val_size:
        subfolder = val_dir
    elif i <= val_size + test_size:
        subfolder = test_dir
    else:
        subfolder = train_dir

    os.rename(train_dir + "/" + file, subfolder + "/" + animal + "/" + file)

# Verify data

In [None]:
num_cats_tr = len(os.listdir(os.path.join(train_dir, 'cat') ))
num_dogs_tr = len(os.listdir(os.path.join(train_dir, 'dog')))

num_cats_val = len(os.listdir(os.path.join(val_dir, 'cat') ))
num_dogs_val = len(os.listdir(os.path.join(val_dir, 'dog') ))

num_cats_test = len(os.listdir(os.path.join(test_dir, 'cat')))
num_dogs_test = len(os.listdir(os.path.join(test_dir, 'dog')))


total_train = num_cats_tr + num_dogs_tr
total_val = num_cats_val + num_dogs_val
total_test = num_dogs_test + num_cats_test

print('total training cat images:', num_cats_tr)
print('total training dog images:', num_dogs_tr)
print("--")
print('total validation cat images:', num_cats_val)
print('total validation dog images:', num_dogs_val)
print("--")
print('total test cat images:', num_cats_test)
print('total test dog images:', num_dogs_test)
print("--")
print("Total training images:", total_train)
print("Total validation images:", total_val)
print("Total test images:", total_test)

from tensorflow.keras.preprocessing.image import ImageDataGenerator

batch_size = 32 
IMG_HEIGHT = 150
IMG_WIDTH = 150

train_image_generator = ImageDataGenerator(rescale=1./255) 
val_image_generator = ImageDataGenerator(rescale=1./255)
test_image_generator = ImageDataGenerator(rescale=1./255)

train_data_gen = train_image_generator.flow_from_directory(
    batch_size=batch_size,
    directory=train_dir,
    shuffle=True,
    target_size=(IMG_HEIGHT, IMG_WIDTH),
    class_mode='binary')

val_data_gen = val_image_generator.flow_from_directory(
    batch_size=batch_size,
    directory=val_dir,
    shuffle=True,
    target_size=(IMG_HEIGHT, IMG_WIDTH),
    class_mode='binary')

test_data_gen = val_image_generator.flow_from_directory(
    batch_size=batch_size,
    directory=test_dir,
    shuffle=True,
    target_size=(IMG_HEIGHT, IMG_WIDTH),
    class_mode='binary')

def plot_images(images):
    fig, axes = plt.subplots(1, 5, figsize=(20,20))
    axes = axes.flatten()
    for img, ax in zip(images, axes):
        ax.imshow(img)
        ax.axis('off')
    plt.tight_layout()
    plt.show()
    
sample_training_images, _ = next(train_data_gen)
plot_images(sample_training_images[:5])

# Simple classification model

In [None]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Conv2D, Flatten, Dropout, MaxPooling2D

model = Sequential([
    Conv2D(32, 3, activation='relu', input_shape=(IMG_HEIGHT, IMG_WIDTH ,3)),
    MaxPooling2D((2, 2)),
    
    Conv2D(64, 3, activation='relu'),
    MaxPooling2D((2, 2)),
    
    Conv2D(64, 3, activation='relu'),
    MaxPooling2D((2, 2)),
    
    Flatten(),
    Dense(256, activation='relu'),
    Dense(256, activation='relu'),
    Dense(1, activation='sigmoid')
])

model.compile(optimizer='rmsprop',
              loss='binary_crossentropy',
              metrics=['accuracy'])

model.summary()

In [None]:
history = model.fit_generator(
    train_data_gen,
    steps_per_epoch=150,
    epochs=10,
    validation_data=val_data_gen,
    validation_steps=50,
    verbose=1)

In [None]:
acc = history.history['accuracy']
val_acc = history.history['val_accuracy']

loss=history.history['loss']
val_loss=history.history['val_loss']

epochs_range = range(3)

plt.figure(figsize=(8, 8))
plt.subplot(1, 2, 1)
plt.plot(epochs_range, acc, label='Training Accuracy')
plt.plot(epochs_range, val_acc, label='Validation Accuracy')
plt.legend(loc='lower right')
plt.title('Training and Validation Accuracy')

plt.subplot(1, 2, 2)
plt.plot(epochs_range, loss, label='Training Loss')
plt.plot(epochs_range, val_loss, label='Validation Loss')
plt.legend(loc='upper right')
plt.title('Training and Validation Loss')
plt.show()

In [None]:
import imageio

test_results = []
for image_name in os.listdir(test_dir):
    image_id, _ = image_name.split('.')
    image = imageio.imread(test_dir + "/" + image_name)
    image = tf.image.resize(image, (IMG_HEIGHT, IMG_WIDTH)) / 255
    prediction = model.predict(np.array([image]))[0][0]
    if prediction > 0:
        test_results.append((image_id, 1))
    else:
        test_results.append((image_id, 0))

# Image augmentation

In [None]:
image_gen_train = ImageDataGenerator(
      rescale=1./255,
      rotation_range=40,
      width_shift_range=0.2,
      height_shift_range=0.2,
      shear_range=0.2,
      zoom_range=0.2,
      horizontal_flip=True,
      fill_mode='nearest')

train_data_gen = image_gen_train.flow_from_directory(batch_size=BATCH_SIZE, 
                                                     directory=train_dir, 
                                                     shuffle=True, 
                                                     target_size=(IMG_SHAPE,IMG_SHAPE),
                                                     class_mode='binary')

image_gen_val = ImageDataGenerator(rescale=1./255)

val_data_gen = image_gen_val.flow_from_directory(batch_size=BATCH_SIZE, 
                                                 directory=validation_dir, 
                                                 target_size=(IMG_SHAPE, IMG_SHAPE),
                                                 class_mode='binary')

In [None]:
model = tf.keras.models.Sequential([
    tf.keras.layers.Conv2D(32, (3,3), activation='relu', input_shape=(150, 150, 3)),
    tf.keras.layers.MaxPooling2D(2, 2),

    tf.keras.layers.Conv2D(64, (3,3), activation='relu'),
    tf.keras.layers.MaxPooling2D(2,2),

    tf.keras.layers.Conv2D(128, (3,3), activation='relu'),
    tf.keras.layers.MaxPooling2D(2,2),

    tf.keras.layers.Conv2D(128, (3,3), activation='relu'),
    tf.keras.layers.MaxPooling2D(2,2),

    tf.keras.layers.Dropout(0.5),
    tf.keras.layers.Flatten(),
    tf.keras.layers.Dense(512, activation='relu'),
    tf.keras.layers.Dense(2, activation='softmax')
])

model.compile(optimizer='adam', 
              loss='sparse_categorical_crossentropy',
              metrics=['accuracy'])

In [None]:
history = model.fit_generator(train_data_gen,
                   steps_per_epoch=10,
                   epochs=50,
                   validation_data=val_data_gen,
                   validation_steps=10,
                   verbose=1)

# Existing networks

In [None]:
inception_base = tf.keras.applications.inception_v3.InceptionV3(weights='imagenet', include_top=False)

x = inception_base.output
x = tf.keras.layers.GlobalAveragePooling2D()(x)
x = tf.keras.layers.Dense(1024, activation='relu')(x)
x = tf.keras.layers.Dense(1024, activation='relu')(x)
x = tf.keras.layers.Dense(512, activation='relu')(x)
preds = tf.keras.layers.Dense(120, activation='softmax')(x)

inception_model=tf.keras.Model(inputs=inception_base.input, outputs=preds)

In [None]:
inception_model.compile(optimizer='adam', 
              loss='sparse_categorical_crossentropy',
              metrics=['accuracy'])

In [None]:
history = inception_model.fit_generator(train_data_gen,
                   steps_per_epoch=50,
                   epochs=20,
                   validation_data=val_data_gen,
                   validation_steps=10,
                   verbose=1)

In [None]:
score, acc = inception_model.evaluate_generator(test_data_gen, steps=20)
print('Test inception model accuracy:', acc)