## 1: Loading the datasets

#### Importing the dog dataset 

In [1]:
from sklearn.datasets import load_files
from keras.utils import np_utils
import numpy as np
from glob import glob

#Using the load_files function from the scikit-learn library to
#populate the following variables
#train, valid and test_files are numpy arrays containing file paths to images
#train, valid and test_targets are numpy arrays containing onehot-encoded labels
#dog_names is a list os strings containing the dog breed names
def load_dataset(path):
    data = load_files(path)
    dog_files = np.array(data['filenames'])
    dog_targets = np_utils.to_categorical(np.array(data['target']), 133) #133 breeds
    return dog_files, dog_targets

train_files, train_targets = load_dataset('dogImages/train')
test_files, test_targets = load_dataset('dogImages/test')
valid_files, valid_targets = load_dataset('dogImages/valid')

dog_names = [item[20:-1] for item in sorted(glob('dogImages/train/*/'))]

#print some statistics
print('There are %d total dog categories.' % len(dog_names))
print('There are %s total dog images.\n' % len(np.hstack([train_files, valid_files, test_files])))
print('There are %d training dog images.' % len(train_files))
print('There are %d validation dog images.' % len(valid_files))
print('There are %d test dog images.'% len(test_files))

Using TensorFlow backend.


There are 133 total dog categories.
There are 8351 total dog images.

There are 6680 training dog images.
There are 835 validation dog images.
There are 836 test dog images.


#### Importing the humans dataset 

In [3]:
import random
random.seed(8675309)

human_files = np.array(glob('lfw/*/*'))
random.shuffle(human_files)

#print some statistics
print('There are %d total human images.' % len(human_files))

There are 13233 total human images.


## 2: Human detection

#### Face detection with opencv Haar feature-based cascade  

In [4]:
import cv2

#load the classifier into a variable
face_cascade = cv2.CascadeClassifier('haarcascades/haarcascade_frontalface_alt.xml')

#Given an image path this function whil return true if a face was
#detected in the image and false otherwise
def HaarCascade_faceDetector(img_path):
        img = cv2.imread(img_path)
        gray_img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        faces = face_cascade.detectMultiScale(gray_img)
        return len(faces) > 0

#### Face detection with opencv deep learning approach 

#### Now it's time to test the two approaches 

In [5]:
human_files_short = human_files[:100]
dog_files_short = train_files[:100]

count_humans = 0
count_dogs = 0

for path in human_files_short:
    if(HaarCascade_faceDetector(path)):
        count_humans += 1

for path in dog_files_short:
    if(HaarCascade_faceDetector(path)):
        count_dogs += 1

print('Testing in the first 100 images of humans and dogs')
print('%d human faces were detected in the first 100 images of humans' % count_humans)
print('%d human faces were detected in the first 100 images of dogs' % count_dogs)

Testing in the first 100 images of humans and dogs
98 human faces were detected in the first 100 images of humans
10 human faces were detected in the first 100 images of dogs


## 3: Detecting dogs 

#### Using ResNet50 trained in the ImageNet dataset to detect if there is a dog in the image

In [24]:
from keras.applications.resnet50 import ResNet50, preprocess_input, decode_predictions
from keras.preprocessing import image
from tqdm import tqdm

ResNet50_model = ResNet50(weights='imagenet')

#Keras CNNs require a 4D tensor as input in the form
#      (nb_samples, rows, columns, channels)
#So we use the path_to_tensor function to convert the image into (1, 224, 224, 3)
#And the paths_to_tensor function to make all tensors together (nb_samples, 224, 224, 3)

def path_to_tensor(img_path):
    #read and resize the image
    img = image.load_img(img_path, target_size=(224,224))
    #convert image to a 3D tensor with shape (224, 224, 3)
    tensor_3d = image.img_to_array(img)
    #convert the 3D tensor to a 4D tensor with shape (1, 224, 224, 3)
    return np.expand_dims(tensor_3d, axis=0)

def paths_to_tensor(img_paths):
    list_of_tensors = [path_to_tensor(img_path) for img_path in tqdm(img_paths)]
    return np.vstack(list_of_tensors)

#Getting the 4D tensor ready to the ResNet50 requires some additional processing
#like converting the RGB image to BGR and some normalization steps that
#the preprecess function from keras will make for us
def ResNet50_predict_labels(img_path):
    img = preprocess_input(path_to_tensor(img_path))
    return np.argmax(ResNet50_model.predict(img))

#now we can create the dog detector function
def dog_detector(img_path):
    prediction = ResNet50_predict_labels(img_path)
    return ((prediction <= 268) & (prediction >= 151))

#### testing the dog detector 

In [7]:
count_humans = 0
count_dogs = 0

for path in human_files_short:
    if(dog_detector(path)):
        count_humans += 1

for path in dog_files_short:
    if(dog_detector(path)):
        count_dogs += 1

print('dogs found in Human dataset:', count_humans)
print('dogs found in dogs dataset:', count_dogs)

dogs found in Human dataset: 1
dogs found in dogs dataset: 100


## 4: Creating the CNNs to classify Dog Breeds 

#### 4.1 Creating a CNN from scratch in Keras 

In [8]:
from PIL import ImageFile
ImageFile.LOAD_TRUNCATED_IMAGES = True

train_tensors = paths_to_tensor(train_files).astype('float32')/255
valid_tensors = paths_to_tensor(valid_files).astype('float32')/255
test_tensors = paths_to_tensor(test_files).astype('float32')/255

100%|██████████| 6680/6680 [02:42<00:00, 41.08it/s]
100%|██████████| 835/835 [00:20<00:00, 40.43it/s]
100%|██████████| 836/836 [00:18<00:00, 44.64it/s]


In [13]:
#defining the architecture
from keras.layers import Conv2D, MaxPooling2D, GlobalAveragePooling2D
from keras.layers import Dropout, Flatten, Dense
from keras.models import Sequential

model = Sequential()
model.add(Conv2D(filters=32, kernel_size=3, padding='valid', activation='relu', input_shape=(224, 224, 3)))
model.add(Conv2D(filters=64, kernel_size=3, padding='valid', activation='relu'))
model.add(MaxPooling2D(pool_size=(2,2), padding='valid'))

model.add(Flatten())
model.add(Dense(128, activation='relu'))
model.add(Dropout(0.2))
model.add(Dense(133, activation='softmax'))
model.summary()

Instructions for updating:
keep_dims is deprecated, use keepdims instead
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_1 (Conv2D)            (None, 222, 222, 32)      896       
_________________________________________________________________
conv2d_2 (Conv2D)            (None, 220, 220, 64)      18496     
_________________________________________________________________
max_pooling2d_1 (MaxPooling2 (None, 110, 110, 64)      0         
_________________________________________________________________
flatten_1 (Flatten)          (None, 774400)            0         
_________________________________________________________________
dense_1 (Dense)              (None, 128)               99123328  
_________________________________________________________________
dropout_1 (Dropout)          (None, 128)               0         
_________________________________________________________________
den

In [10]:
#compiling the model
model.compile(optimizer='rmsprop', loss='categorical_crossentropy', metrics=['accuracy'])

Instructions for updating:
keep_dims is deprecated, use keepdims instead
Instructions for updating:
keep_dims is deprecated, use keepdims instead


In [11]:
#Training
from keras.callbacks import ModelCheckpoint

epochs = 5
checkpointer = ModelCheckpoint(filepath='saved_models/weights.best.from_scratch.keras.hdf5',
                              verbose=1, save_best_only=True)

model.fit(train_tensors, train_targets,
          validation_data=(valid_tensors, valid_targets),
          epochs=epochs, batch_size=20, callbacks=[checkpointer], verbose=1)

Train on 6680 samples, validate on 835 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<keras.callbacks.History at 0x7f65af77feb8>

In [12]:
#Load the best model and test it
model.load_weights('saved_models/weights.best.from_scratch.keras.hdf5')

dog_breed_predictions = [np.argmax(model.predict(np.expand_dims(tensor, axis=0))) for tensor in test_tensors]

test_accuracy = 100*np.sum(np.array(dog_breed_predictions)==np.argmax(test_targets, axis=1))/len(dog_breed_predictions)
print('Test accuracy: %.4f%%' % test_accuracy)

Test accuracy: 4.7847%


#### 4.2 Using transfer learning to classify dogs 

In [7]:
#Reading the bottleneck features of the Resnet to use to transfer learning
bottleneck_features = np.load('bottleneck_features/DogResnet50Data.npz')
train_RESNET50 = bottleneck_features['train']
test_RESNET50 = bottleneck_features['test']
valid_RESNET50 = bottleneck_features['valid']

In [15]:
#print(train_RESNET50.shape)
#train_RESNET50.shape = (6680, 1, 1, 2048)
#To pass to our fully conected NN we will need only the shape of the flatten
#vector and not the number of samples(6680)
model_RNT = Sequential()
model_RNT.add(GlobalAveragePooling2D(input_shape=train_RESNET50.shape[1:]))
model_RNT.add(Dense(256, activation='relu'))
model_RNT.add(Dropout(0.2))
model_RNT.add(Dense(133, activation='softmax'))
model_RNT.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
global_average_pooling2d_2 ( (None, 2048)              0         
_________________________________________________________________
dense_5 (Dense)              (None, 256)               524544    
_________________________________________________________________
dropout_3 (Dropout)          (None, 256)               0         
_________________________________________________________________
dense_6 (Dense)              (None, 133)               34181     
Total params: 558,725.0
Trainable params: 558,725.0
Non-trainable params: 0.0
_________________________________________________________________


In [17]:
#compiling the model
model_RNT.compile(optimizer='sgd', loss='categorical_crossentropy', metrics=['accuracy'])

In [21]:
from keras.callbacks import ModelCheckpoint
checkpointer = ModelCheckpoint(filepath='saved_models/weights.best.RESNET50.sgd.hdg5')
model_RNT.fit(train_RESNET50, train_targets,
         validation_data=(valid_RESNET50, valid_targets),
         epochs=40, batch_size=20, verbose=1, callbacks=[checkpointer])

Train on 6680 samples, validate on 835 samples
Epoch 1/40
Epoch 2/40
Epoch 3/40
Epoch 4/40
Epoch 5/40
Epoch 6/40
Epoch 7/40
Epoch 8/40
Epoch 9/40
Epoch 10/40
Epoch 11/40
Epoch 12/40
Epoch 13/40
Epoch 14/40
Epoch 15/40
Epoch 16/40
Epoch 17/40
Epoch 18/40
Epoch 19/40
Epoch 20/40
Epoch 21/40
Epoch 22/40
Epoch 23/40
Epoch 24/40
Epoch 25/40
Epoch 26/40
Epoch 27/40
Epoch 28/40
Epoch 29/40
Epoch 30/40
Epoch 31/40
Epoch 32/40
Epoch 33/40
Epoch 34/40
Epoch 35/40
Epoch 36/40
Epoch 37/40
Epoch 38/40
Epoch 39/40
Epoch 40/40


<keras.callbacks.History at 0x7f808cf9e438>

In [22]:
#Load the best model
model_RNT.load_weights('saved_models/weights.best.RESNET50.sgd.hdg5')

In [23]:
RESNET50_predictions = [np.argmax(model_RNT.predict(np.expand_dims(feature, axis=0))) for feature in test_RESNET50]

test_accuracy = 100*np.sum(np.array(RESNET50_predictions)==np.argmax(test_targets, axis=1))/len(RESNET50_predictions)
print('Test accuracy: %.4f%%' % test_accuracy)

Test accuracy: 84.6890%


#### Defining the function to predict a breed of a dog 

In [26]:
from extract_bottleneck_features import *

def RESNET50_predict_breed(img_path):
    bottleneck_feature = extract_Resnet50(path_to_tensor(img_path))
    predicted_vector = model_RNT.predict(bottleneck_feature)
    return dog_names[np.argmax(predicted_vector)]

## 5: Writing the algorithm 

In [27]:
def Application(img_path):
    #check if there is a dog in the image
    if(dog_detector(img_path)):
        breed = RESNET50_predict_breed(img_path)
        return 'The predicted breed of this dog is:' + breed
    elif(face_detector(img_path)):
        human_breed = RESNET50_predict_breed(img_path)
        return 'This humans look like a:' + human_breed
    else:
        return 'Dog nor human detected'

## 6: Testing