# Import dog data from Stanford ImageNet dataset

In [17]:
from sklearn.datasets import load_files
from keras.utils import np_utils
import numpy as np
from glob import glob
from sklearn.model_selection import train_test_split

# define function to load train, test, and validation datasets
def load_dataset(path):
    data = load_files(path)
    dog_files = np.array(data['filenames'])
    dog_targets = np_utils.to_categorical(np.array(data['target']), 120)
    x_train, x_left, y_train, y_left = train_test_split(dog_files, dog_targets, test_size=0.3, random_state=42)
    x_test, x_valid, y_test, y_valid = train_test_split(x_left,y_left, test_size=0.4,random_state=50)
    return x_train,x_test,x_valid, y_train,y_test,y_valid, dog_files, dog_targets

# load train, test, and validation datasets
x_train,x_test,x_valid, y_train,y_test,y_valid,dog_files, dog_targets = load_dataset('images')

In [24]:
# load list of dog names
dog_names = [item[17:] for item in sorted(glob("images/*"))]

# print statistics about the dataset
print('-----------------------------------')
print('There are %d total dog categories.' % len(dog_names))
print('There are %d total dog images.' % len(all_files))
print('\tThere are %d total training files.' % len(x_train))
print('\tThere are %d total testing files.' % len(x_test))
print('\tThere are %d total valid files.' % len(x_valid))
assert(len(all_files) == (len(x_train) + len(x_test) + len(x_valid)))

-----------------------------------
There are 120 total dog categories.
There are 20580 total dog images.
	There are 14406 total training files.
	There are 3704 total testing files.
	There are 2470 total valid files.


# Import Human Dataset

In the code cell below, we import a dataset of human images, where the file paths are stored in the numpy array `human_files`.

In [26]:
import random
random.seed(86709)

# load filenames in shuffled human dataset
human_files = np.array(glob("lfw/*/*"))
random.shuffle(human_files)

# print statistics about the dataset
print('There are %d total human images.' % len(human_files))

There are 13233 total human images.


## Step 1: Detect Humans

In [30]:
import cv2                
import matplotlib.pyplot as plt

# extract pre-trained face detector
face_cascade = cv2.CascadeClassifier('haarcascades/haarcascade_frontalface_alt.xml')

# returns "True" if face is detected in image stored at img_path
def face_detector(img_path):
    img = cv2.imread(img_path)
    gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    faces = face_cascade.detectMultiScale(gray)
    return len(faces) > 0

### Assess the Human Face Detector

In [32]:
human_files_short = human_files[:100]
dog_files_short = x_train[:100]
# Do NOT modify the code above this line.

## TODO: Test the performance of the face_detector algorithm 
## on the images in human_files_short and dog_files_short.
results_human = [face for face in human_files_short if face_detector(face)]
results_dog = [face for face in dog_files_short if face_detector(face)]
print("human faces that contain human_face : {}%".format(len(results_human)))
print("dog faces that contain human_face : {}%".format(len(results_dog)))

human faces that contain human_face : 96%
dog faces that contain human_face : 7%


## Step 2: Detect Dogs

In [33]:
from keras.applications.resnet50 import ResNet50

# define ResNet50 model
ResNet50_model = ResNet50(weights='imagenet')

### Pre-process the Data

In [34]:
from keras.preprocessing import image                  
from tqdm import tqdm

def path_to_tensor(img_path):
    # loads RGB image as PIL.Image.Image type
    img = image.load_img(img_path, target_size=(224, 224))
    # convert PIL.Image.Image type to 3D tensor with shape (224, 224, 3)
    x = image.img_to_array(img)
    # convert 3D tensor to 4D tensor with shape (1, 224, 224, 3) and return 4D tensor
    return np.expand_dims(x, axis=0)

def paths_to_tensor(img_paths):
    list_of_tensors = [path_to_tensor(img_path) for img_path in tqdm(img_paths)]
    return np.vstack(list_of_tensors)

### Making Predictions with ResNet-50

In [35]:
from keras.applications.resnet50 import preprocess_input, decode_predictions

def ResNet50_predict_labels(img_path):
    # returns prediction vector for image located at img_path
    img = preprocess_input(path_to_tensor(img_path))
    return np.argmax(ResNet50_model.predict(img))

### Write a Dog Detector

In [36]:
### returns "True" if a dog is detected in the image stored at img_path
def dog_detector(img_path):
    prediction = ResNet50_predict_labels(img_path)
    return ((prediction <= 268) & (prediction >= 151)) 

### Assess the Dog Detector

In [37]:
### Test the performance of the dog_detector function
### on the images in human_files_short and dog_files_short.
results_human = [face for face in human_files_short if dog_detector(face)]
results_dog = [face for face in dog_files_short if dog_detector(face)]
print("human faces that contain dog_face : {}%".format(len(results_human)))
print("dog faces that contain dog_face : {}%".format(len(results_dog)))

human faces that contain dog_face : 0%
dog faces that contain dog_face : 98%
