# Import dog data from Stanford ImageNet dataset

In [9]:
from sklearn.datasets import load_files
from keras.utils import np_utils
import numpy as np
from glob import glob
from sklearn.model_selection import train_test_split

# define function to load train, test, and validation datasets
def load_dataset(path):
    data = load_files(path)
    dog_files = np.array(data['filenames'])
    dog_targets = np_utils.to_categorical(np.array(data['target']), 120)
    x_train, x_left, y_train, y_left = train_test_split(dog_files, dog_targets, test_size=0.3, random_state=42)
    x_test, x_valid, y_test, y_valid = train_test_split(x_left,y_left, test_size=0.4,random_state=50)
    return x_train,x_test,x_valid, y_train,y_test,y_valid, dog_files, dog_targets

# load train, test, and validation datasets
x_train,x_test,x_valid, y_train,y_test,y_valid,dog_files, dog_targets = load_dataset('dog-images')

In [10]:
# load list of dog names
dog_names = [item[17:] for item in sorted(glob("dog-images/*"))]

# print statistics about the dataset
print('-----------------------------------')
print('There are %d total dog categories.' % len(dog_names))
print('There are %d total dog images.' % len(dog_files))
print('\tThere are %d total training files.' % len(x_train))
print('\tThere are %d total testing files.' % len(x_test))
print('\tThere are %d total valid files.' % len(x_valid))
assert(len(dog_files) == (len(x_train) + len(x_test) + len(x_valid)))

-----------------------------------
There are 120 total dog categories.
There are 20580 total dog images.
	There are 14406 total training files.
	There are 3704 total testing files.
	There are 2470 total valid files.


## Step 1: Detect Humans

### Assess the Human Face Detector

In [11]:
from human import human_face_detector
import random

random.seed(86709)

# load filenames in shuffled human dataset
human_files = np.array(glob("lfw/*/*"))
random.shuffle(human_files)

# print statistics about the dataset
print('There are %d total human images.' % len(human_files))

human_files_short = human_files[:100]
dog_files_short = x_train[:100]
# Do NOT modify the code above this line.

## TODO: Test the performance of the face_detector algorithm 
## on the images in human_files_short and dog_files_short.
results_human = [face for face in human_files_short if human_face_detector(face)]
results_dog = [face for face in dog_files_short if human_face_detector(face)]
print("human faces that contain human_face : {}%".format(len(results_human)))
print("dog faces that contain human_face : {}%".format(len(results_dog)))

There are 13233 total human images.
human faces that contain human_face : 96%
dog faces that contain human_face : 7%


## Step 2: Detect Dogs

### Assess the Dog Detector

In [8]:
%matplotlib inline 
from dog import dog_detector

### Test the performance of the dog_detector function
### on the images in human_files_short and dog_files_short.
results_human = [face for face in human_files_short if dog_detector(face)]
results_dog = [face for face in dog_files_short if dog_detector(face)]
print("human faces that contain dog_face : {}%".format(len(results_human)))
print("dog faces that contain dog_face : {}%".format(len(results_dog)))

human faces that contain dog_face : 0%
dog faces that contain dog_face : 98%
