<h1>Find hard example and update the model</h1>

In [None]:
# IMPORT
import cv2
import copy
import math
import os
import numpy as np
from matplotlib import pyplot as plt
from keras.models import load_model
from keras.models import Sequential
from os import listdir
from sklearn.cluster import DBSCAN
from sklearn.preprocessing import StandardScaler

In [None]:
# %load create_model_with_hard_example.py
import os, cv2, random
import numpy as np
import pandas as pd

import matplotlib.pyplot as plt
from matplotlib import ticker
import seaborn as sns


from keras.models import Sequential
from keras.layers import Input, Dropout, Flatten, Conv2D, MaxPooling2D, Dense, Activation
from keras.optimizers import RMSprop
from keras.callbacks import ModelCheckpoint, Callback, EarlyStopping
from keras.utils import np_utils
from keras.models import load_model


TRAIN_1_DIR = './data/train/1/'
TRAIN_0_DIR = './data/train/0/'
HARD_EXAMPLE_DIR = 'data_save_difficult_no_faces/'


ROWS = 36
COLS = 36
CHANNELS = 1


TRIAN_1_PATH = list(filter(lambda x: '.DS' not in x,[TRAIN_1_DIR+i for i in os.listdir(TRAIN_1_DIR)]))
TRIAN_0_PATH = list(filter(lambda x: '.DS' not in x,[TRAIN_0_DIR+i for i in os.listdir(TRAIN_0_DIR)]))


NB_TEST_BY_CLASS = 3000

NB_EPOCH = 5
BATCH_SIZE = 32

##
# Read the images at the indicate path and return a vectorized image
##
def read_image(file_path):
    img = cv2.imread(file_path, cv2.IMREAD_GRAYSCALE) #
    return cv2.resize(img, (ROWS, COLS), interpolation=cv2.INTER_CUBIC)

##
# Takes a lists of images path and return a list of vectorized images
##
def prep_data(images):
    count = len(images)
    data = np.ndarray((count, ROWS, COLS), dtype=np.uint8)

    for i, image_file in enumerate(images):
        image = read_image(image_file)
        data[i] = image
    
    return data

TRIAN_1_IMAGES = prep_data(TRIAN_1_PATH)
TRIAN_0_IMAGES = prep_data(TRIAN_0_PATH)

##
# shuffle the set of train images
##
def shuffle_and_get_new_train_set():
    random.shuffle(TRIAN_1_IMAGES)
    random.shuffle(TRIAN_0_IMAGES)
    test_images_1 = TRIAN_1_IMAGES[:NB_TEST_BY_CLASS]
    test_images_0 = TRIAN_0_IMAGES[:NB_TEST_BY_CLASS]
    train_1 = TRIAN_1_IMAGES[NB_TEST_BY_CLASS:]
    train_0 = TRIAN_0_IMAGES[NB_TEST_BY_CLASS:]
    return train_1, train_0, test_images_1, test_images_0

##
# reutrn the train set and the test set of images and coresponding labels 
##
def prepar_train_images():
    hard_example_paths = list(filter(lambda x: '.DS' not in x,[HARD_EXAMPLE_DIR+i for i in os.listdir(HARD_EXAMPLE_DIR)]))
    hard_example_set = prep_data(hard_example_paths)
    train_1, train_0, test_images_1, test_images_0 = shuffle_and_get_new_train_set()
    print("inside function : " + str(len(hard_example_set)))
    train_images = np.array(list(train_1[:(len(train_0)+len(hard_example_set))]) +
                            list(train_0) + list(hard_example_set))
    train_images.resize((len(train_images), 36, 36, 1))
    train_and_label = list(zip(train_images, ([1]*(len(train_images)//2)) + ([0]*(len(train_images)//2))))
    random.shuffle(train_and_label)
    train_images = list(map(lambda x: x[0], train_and_label))
    train_labels = list(map(lambda x: x[1], train_and_label))
    test_imagies = list(test_images_1) + list(test_images_0)
    test_imagies = np.array(test_imagies)
    test_imagies.resize((NB_TEST_BY_CLASS*2, 36, 36, 1))
    return np.array(train_images), np.array(train_labels), test_imagies

##
# Create the untrained model
##
def faceRecognition():
    
    model = Sequential()

    model.add(Conv2D(4, 5, strides=(1,1), border_mode='same',
                     input_shape=(36, 36, 1), data_format="channels_last", activation='relu'))
    model.add(MaxPooling2D(pool_size=(2, 2)))

    model.add(Conv2D(14, 3, strides=(1,1), border_mode='same', activation='relu'))
    model.add(MaxPooling2D(pool_size=(2, 2)))

    model.add(Flatten())
    model.add(Dense(14, activation='relu'))

    model.add(Dense(1))
    model.add(Activation('sigmoid'))

    model.compile(loss='binary_crossentropy', optimizer=RMSprop(lr=1e-4), metrics=['accuracy'])
    return model

##
# train the model on the train set with the hard examples and return it
##
def get_model(verbose_train=0):
    train_images, train_labels, test_imagies = prepar_train_images()
    print("LEN train : " + str(len(train_images)))


    model = faceRecognition()
    early_stopping = EarlyStopping(monitor='val_loss', patience=3, verbose=0, mode='auto')
    model.fit(train_images, train_labels, batch_size=BATCH_SIZE, nb_epoch=NB_EPOCH,
                validation_split=0.25, verbose=verbose_train, shuffle=True, callbacks=[early_stopping])
    return model



In [None]:
# Constant
path_repo = "data_difficult_faces/0/"
path_save = "data_save_difficult_no_faces/"
THRESHOLD = 0.7

In [None]:
def pyramid_search_difficult (path_image, size_filter_X, size_filter_Y, nb_iterations, model, threshold):
    # We get and tranform the image in gray.
    img_search = cv2.imread(path_image)
    gray_image = cv2.cvtColor(img_search, cv2.COLOR_BGR2GRAY)
    gray_imag_rect  = copy.deepcopy(gray_image)
    
    # Make the reduction.
    # --> search
    height, width = gray_image.shape
    nbPixelToReduceX = int((width  - size_filter_X) / (nb_iterations -1) )
    nbPixelToReduceY = int((height - size_filter_Y) / (nb_iterations -1) ) 
    
    list_reponse = []
    for ite in range(nb_iterations) :
        
        height, width = gray_image.shape
        
        if(ite == nb_iterations-1):
            gray_image = cv2.resize(gray_image, (size_filter_X, size_filter_Y)) # Last iteration
            size_filter_X_new = width - nbPixelToReduceX
            size_filter_Y_new = height - nbPixelToReduceY
            height, width = gray_image.shape
        elif(ite == 0):
            size_filter_X_new = size_filter_X - nbPixelToReduceX
            size_filter_Y_new = size_filter_Y - nbPixelToReduceY
        else :
            gray_image = cv2.resize(gray_image, (width - nbPixelToReduceX, height - nbPixelToReduceY))   
        
        gray_imag_rect_copy  = copy.deepcopy(gray_image)
        
        # we get a list of all visage.
        list_reponse = list_reponse + search_visage(gray_image, size_filter_X, size_filter_Y, model, threshold)
        
    return list_reponse

In [None]:
def search_visage(gray_image, size_filter_X, size_filter_Y, model, threshold):
    
    # Raise an exception, if we can't apply the fitlter
    width, height = gray_image.shape[0], gray_image.shape[1]
    
    if width < size_filter_Y and height < size_filter_X :
        raise Exception ("impossible to crop properly")
        
    if (size_filter_X/2) % 2 != 0 or (size_filter_Y/2) % 2 != 0:
        raise Exception ("All dimension of the filter should be pair")
    
    # loop on the image.
    fil_divi_2_X = int (size_filter_X/2)
    fil_divi_2_Y = int (size_filter_Y/2)
    step_x = int(fil_divi_2_X/3)
    step_y = int(fil_divi_2_Y/3)
    listImgPos = []
    for y in range(fil_divi_2_X, height - fil_divi_2_X, step_x):
        for x in range(fil_divi_2_Y, width - fil_divi_2_Y, step_y):
            crop_img = gray_image[y - fil_divi_2_Y: y + fil_divi_2_Y, x - fil_divi_2_X : x + fil_divi_2_X]
            
            crop_imag_copy  = copy.deepcopy(crop_img)
            crop_img = np.array(crop_img)
            crop_img.resize((1,36,36,1))
            
            if int(model.predict(crop_img,verbose = 0)[0] + threshold) == 1:
                listImgPos.append(crop_imag_copy)

    return listImgPos

In [None]:
def find_and_save_difficult_images(path_repo, path_save, threshold, model, indice):
    list_image = []
    # get the image.
    for index, filename in enumerate(os.listdir(path_repo)):
        if filename == ".DS_Store":
            continue
        list_difficult_faces =  pyramid_search_difficult(os.path.join(path_repo, filename), SIZE_FILTER_X, SIZE_FILTER_Y, NB_ITERATION, model, threshold)
        print("Traite l'image : "+ str(index) + " find : "+ str(len (list_difficult_faces)))
        list_image +=  list_difficult_faces
    
    max_indice = 0    
    for index, image in enumerate(list_image):
        width, height = image.shape[0], image.shape[1]
        max_indice = indice + index
        if width == 36 and height == 36:
            cv2.imwrite(os.path.join(path_save, str(max_indice) + ".jpg"), image) 
            print()
    
    return max_indice

In [None]:
def update_model (path_save, threshold):
    path_repo = "data_difficult_faces/"
    indice = 0
    model = None
    
    lenght_repo = 0
    if ".DS_Store" in os.listdir("data_difficult_faces/"):
        lenght_repo = len(os.listdir("data_difficult_faces/"))-1
    else:
        lenght_repo = len(os.listdir("data_difficult_faces/"))-1
        
    
    for ite in range(lenght_repo):
        print ("iteration : " + str(ite) + " threshold : "+  str(1-threshold))
        cur_path_rep = path_repo + str(ite) + "/"
        print(cur_path_rep)
        model = create_model_with_hard_example.get_model(1)
        indice += find_and_save_difficult_images(cur_path_rep, path_save, threshold, model, indice)
        threshold = threshold * THRESHOLD
        model.save('model_it'+ str(ite) +'.h5')
                       
    model.save('my_model.h5')

In [None]:
update_model(path_save, 0.2)