In [None]:
import numpy as np 
import tensorflow as tf
import tensorflow.keras.layers as tfl

## Building a Human vs Horse classifier
The data set used here is downloaded from this [site](https://laurencemoroney.com/datasets.html#horses-or-humans-dataset)

In [None]:
import zipfile
main_directory = "utility_files/human_vs_horse/"

train_zip = "/".join([main_directory, "horse-or-human.zip"])
validation_zip = "/".join([main_directory, "validation-horse-or-human.zip"])


In [None]:
# unzip the trainin data set
zip_ref = zipfile.ZipFile(train_zip, 'r')
zip_ref.extractall(main_directory + '/train')
zip_ref.close()


In [None]:
# unzip the validation data set
zip_ref = zipfile.ZipFile(validation_zip, 'r')
zip_ref.extractall(main_directory + '/test')
zip_ref.close()


In [None]:
# let's create a directory object to refer to
import os
human = "humans"
horse = "horses"
train_dir_human = os.path.join("/".join([main_directory, "train", human]))
train_dir_horse = os.path.join("/".join([main_directory, "train", horse]))
test_dir_human = os.path.join("/".join([main_directory, "test", human]))
test_dir_horse = os.path.join("/".join([main_directory, "test", horse]))

In [None]:
train_horse_names = os.listdir(train_dir_horse)
print(train_horse_names[:10])

tra = os.listdir(train_dir_human)
print(tra[:10])

## remove the zipped files
# os.remove(main_directory + "horse-or-human.zip")
# os.remove(main_directory + "validation-horse-or-human.zip")

In [None]:
# this cell will contain all the significant variables
img_size = (300, 300)

In [None]:
from tensorflow.keras.preprocessing.image import ImageDataGenerator

# All images will be rescaled by 1./255
train_datagen = ImageDataGenerator(rescale=1/255)

# Flow training images in batches of 128 using train_datagen generator
train_generator = train_datagen.flow_from_directory(
        main_directory + "train",  # This is the source directory for training images
        target_size=img_size,  # All images will be resized to 300x300
        batch_size=128,
        # Since we use binary_crossentropy loss, we need binary labels
        class_mode='binary')

In [None]:
# the model to be trained is the same as the one in the "fashion_classification_CNN.ipynb" notebook

def cnn_fashion_model(input_shape=(28, 28, 1), num_classes=10):
    # define the input
    inputs = tf.keras.Input(shape=input_shape)
    # relu activation is used extensively:
    relu = tfl.ReLU()
    # 2D convolutional layer
    f1 = 16
    size1  = (3, 3)
    conv1 = tfl.Conv2D(filters=f1, kernel_size=size1, strides=(2, 2), padding='same')
    
    pool1 = tfl.MaxPool2D((2, 2))
    f2 = 32
    conv2 = tfl.Conv2D(filters=f2, kernel_size=size1, strides=(1, 1), padding='same')
    
    # normalize the input on the the channels axis
    batnor = tfl.BatchNormalization(axis=-1)
    size2 = (3, 3)
    f3 = 64
    conv3 = tfl.Conv2D(filters=f3, kernel_size=size2, strides=(2, 2), padding='same')

    f4 = 64
    conv4 = tfl.Conv2D(filters=f4, kernel_size=size2, strides=(1, 1), padding='same')

    pool2 = tfl.MaxPool2D((2, 2))

    # the neural network should be as follows:
    # con1 * 2 -> conv2 *2 -> pool1 -> conv3 * 2 -> conv4 * 2 -> pool2 -> fully connected1 -> fullyconnected2 -> softmax
    x = conv1(inputs)
    x = relu(x)

    x = conv2(inputs)
    x = relu(x)

    x = batnor(x)
    x = pool1(x)
    
    x = conv3(x)
    x = relu(x)

    x = conv4(x)
    x = relu(x)

    x = batnor(x)
    x = pool2(x)

    flatten = tfl.Flatten() 
    fc1 = tfl.Dense(128, activation='relu')
    fc2 = tfl.Dense(128, activation='relu')
    if num_classes == 2:
        fc3 = tfl.Dense(1 , activation='sigmoid')
    else:
        fc3 = tfl.Dense(num_classes, activation='softmax')
    x = flatten(x)
    x = fc1(x)
    x = fc2(x)
    outputs = fc3(x)
    
    model = tf.keras.Model(inputs=inputs, outputs=outputs)
    return model

In [None]:
our_model = cnn_fashion_model(input_shape=(*img_size, 3), num_classes=2)

from tensorflow.keras.optimizers import RMSprop

optimizer = RMSprop(learning_rate=0.001)
loss = tf.keras.losses.BinaryCrossentropy()
metrics = ['accuracy']
our_model.compile(optimizer=optimizer, loss=loss, metrics=metrics)

In [None]:
history = our_model.fit(
      train_generator, # set the generator as a stream of data
      steps_per_epoch=8,  
      epochs=15,
      verbose=1)
      

In [None]:

# All images will be rescaled by 1./255
test_data_gen = ImageDataGenerator(rescale=1/255)

# Flow training images in batches of 128 using train_datagen generator
test_gen = test_data_gen.flow_from_directory(
        main_directory + "test",  # This is the source directory for the test images
        target_size=img_size,  # All images will be resized to 300x300
        batch_size=128,
        # Since we use binary_crossentropy loss, we need binary labels
        class_mode='binary')

In [None]:
history = our_model.fit(
      train_generator, # set the generator as a stream of data
      steps_per_epoch=8,  
      epochs=15,
      verbose=1,
      validation_data = test_gen,
      validation_steps=8
      )
