## 1: Loading the datasets

#### Importing the dog dataset 

In [2]:
from sklearn.datasets import load_files
from keras.utils import np_utils
import numpy as np
from glob import glob

#Using the load_files function from the scikit-learn library to
#populate the following variables
#train, valid and test_files are numpy arrays containing file paths to images
#train, valid and test_targets are numpy arrays containing onehot-encoded labels
#dog_names is a list os strings containing the dog breed names
def load_dataset(path):
    data = load_files(path)
    dog_files = np.array(data['filenames'])
    dog_targets = np_utils.to_categorical(np.array(data['target']), 133) #133 breeds
    return dog_files, dog_targets

train_files, train_targets = load_dataset('dogImages/train')
test_files, test_targets = load_dataset('dogImages/test')
valid_files, valid_targets = load_dataset('dogImages/valid')

dog_names = [item[20:-1] for item in sorted(glob('dogImages/train/*/'))]

#print some statistics
print('There are %d total dog categories.' % len(dog_names))
print('There are %s total dog images.\n' % len(np.hstack([train_files, valid_files, test_files])))
print('There are %d training dog images.' % len(train_files))
print('There are %d validation dog images.' % len(valid_files))
print('There are %d test dog images.'% len(test_files))

There are 133 total dog categories.
There are 8351 total dog images.

There are 6680 training dog images.
There are 835 validation dog images.
There are 836 test dog images.


#### Importing the humans dataset 

In [3]:
import random
random.seed(8675309)

human_files = np.array(glob('lfw/*/*'))
random.shuffle(human_files)

#print some statistics
print('There are %d total human images.' % len(human_files))

There are 13233 total human images.


## 2: Human detection

#### Face detection with opencv Haar feature-based cascade  

In [4]:
import cv2

#load the classifier into a variable
face_cascade = cv2.CascadeClassifier('haarcascades/haarcascade_frontalface_alt.xml')

#Given an image path this function whil return true if a face was
#detected in the image and false otherwise
def HaarCascade_faceDetector(img_path):
        img = cv2.imread(img_path)
        gray_img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        faces = face_cascade.detectMultiScale(gray_img)
        return len(faces) > 0

#### Face detection with opencv deep learning approach 

#### Now it's time to test the two approaches 

In [5]:
human_files_short = human_files[:100]
dog_files_short = train_files[:100]

count_humans = 0
count_dogs = 0

for path in human_files_short:
    if(HaarCascade_faceDetector(path)):
        count_humans += 1

for path in dog_files_short:
    if(HaarCascade_faceDetector(path)):
        count_dogs += 1

print('Testing in the first 100 images of humans and dogs')
print('%d human faces were detected in the first 100 images of humans' % count_humans)
print('%d human faces were detected in the first 100 images of dogs' % count_dogs)

Testing in the first 100 images of humans and dogs
98 human faces were detected in the first 100 images of humans
10 human faces were detected in the first 100 images of dogs


## 3: Detecting dogs 

#### Using ResNet50 trained in the ImageNet dataset to detect if there is a dog in the image

In [6]:
from keras.applications.resnet50 import ResNet50, preprocess_input, decode_predictions
from keras.preprocessing import image
from tqdm import tqdm

ResNet50_model = ResNet50(weights='imagenet')

#Keras CNNs require a 4D tensor as input in the form
#      (nb_samples, rows, columns, channels)
#So we use the path_to_tensor function to convert the image into (1, 224, 224, 3)
#And the paths_to_tensor function to make all tensors together (nb_samples, 224, 224, 3)

def path_to_tensor(img_path):
    #read and resize the image
    img = image.load_img(img_path, target_size=(224,224))
    #convert image to a 3D tensor with shape (224, 224, 3)
    tensor_3d = image.img_to_array(img)
    #convert the 3D tensor to a 4D tensor with shape (1, 224, 224, 3)
    return np.expand_dims(tensor_3d, axis=0)

def paths_to_tensor(img_paths):
    list_of_tensors = [path_to_tensor(img_path) for img_path in tqdm(img_paths)]
    return np.vstack(list_of_tensors)

#Getting the 4D tensor ready to the ResNet50 requires some additional processing
#like converting the RGB image to BGR and some normalization steps that
#the preprecess function from keras will make for us
def ResNet50_predict_labels(img_path):
    img = preprocess_input(path_to_tensor(img_path))
    return np.argmax(ResNet50_model.predict(img))

#now we can create the dog detector function
def dog_detector(img_path):
    prediction = ResNet50_predict_labels(img_path)
    return ((prediction <= 268) & (prediction >= 151))

Instructions for updating:
keep_dims is deprecated, use keepdims instead


#### testing the dog detector 

In [7]:
count_humans = 0
count_dogs = 0

for path in human_files_short:
    if(dog_detector(path)):
        count_humans += 1

for path in dog_files_short:
    if(dog_detector(path)):
        count_dogs += 1

print('dogs found in Human dataset:', count_humans)
print('dogs found in dogs dataset:', count_dogs)

dogs found in Human dataset: 1
dogs found in dogs dataset: 100


## Creating the CNNs to classify Dog Breeds 

#### Creating a CNN from scratch in Keras 

In [8]:
from PIL import ImageFile
ImageFile.LOAD_TRUNCATED_IMAGES = True

train_tensors = paths_to_tensor(train_files).astype('float32')/255
valid_tensors = paths_to_tensor(valid_files).astype('float32')/255
test_tensors = paths_to_tensor(test_files).astype('float32')/255

100%|██████████| 6680/6680 [02:42<00:00, 41.08it/s]
100%|██████████| 835/835 [00:20<00:00, 40.43it/s]
100%|██████████| 836/836 [00:18<00:00, 44.64it/s]


In [9]:
#defining the architecture
from keras.layers import Conv2D, MaxPooling2D, GlobalAveragePooling2D
from keras.layers import Dropout, Flatten, Dense
from keras.models import Sequential

model = Sequential()
model.add(Conv2D(filters=32, kernel_size=3, padding='valid', activation='relu', input_shape=(224, 224, 3)))
model.add(Conv2D(filters=64, kernel_size=3, padding='valid', activation='relu'))
model.add(MaxPooling2D(pool_size=(2,2), padding='valid'))

model.add(Flatten())
model.add(Dense(128, activation='relu'))
model.add(Dropout(0.2))
model.add(Dense(133, activation='softmax'))
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_1 (Conv2D)            (None, 222, 222, 32)      896       
_________________________________________________________________
conv2d_2 (Conv2D)            (None, 220, 220, 64)      18496     
_________________________________________________________________
max_pooling2d_2 (MaxPooling2 (None, 110, 110, 64)      0         
_________________________________________________________________
flatten_2 (Flatten)          (None, 774400)            0         
_________________________________________________________________
dense_1 (Dense)              (None, 128)               99123328  
_________________________________________________________________
dropout_1 (Dropout)          (None, 128)               0         
_________________________________________________________________
dense_2 (Dense)              (None, 133)               17157     
Total para

In [10]:
#compiling the model
model.compile(optimizer='rmsprop', loss='categorical_crossentropy', metrics=['accuracy'])

Instructions for updating:
keep_dims is deprecated, use keepdims instead
Instructions for updating:
keep_dims is deprecated, use keepdims instead


In [11]:
#Training
from keras.callbacks import ModelCheckpoint

epochs = 5
checkpointer = ModelCheckpoint(filepath='saved_models/weights.best.from_scratch.keras.hdf5',
                              verbose=1, save_best_only=True)

model.fit(train_tensors, train_targets,
          validation_data=(valid_tensors, valid_targets),
          epochs=epochs, batch_size=20, callbacks=[checkpointer], verbose=1)

Train on 6680 samples, validate on 835 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<keras.callbacks.History at 0x7f65af77feb8>

In [12]:
#Load the best model and test it
model.load_weights('saved_models/weights.best.from_scratch.keras.hdf5')

dog_breed_predictions = [np.argmax(model.predict(np.expand_dims(tensor, axis=0))) for tensor in test_tensors]

test_accuracy = 100*np.sum(np.array(dog_breed_predictions)==np.argmax(test_targets, axis=1))/len(dog_breed_predictions)
print('Test accuracy: %.4f%%' % test_accuracy)

Test accuracy: 4.7847%
