In [1]:
%matplotlib inline
import cv2
import sys
import numpy as np
import os
from skimage import io
from scipy import misc
import matplotlib.pyplot as plt
from skimage.transform import resize

In [2]:
img_size = 350
faces_in_image_limit = 1

In [3]:
def extract_faces(img):
    face_cascade = cv2.CascadeClassifier('utils/haarcascade_frontalface_default.xml')
    eye_cascade = cv2.CascadeClassifier('utils/haarcascade_eye.xml')
    imageDataFin = []
    
    gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    faces = face_cascade.detectMultiScale(gray)
    
    for(x, y, w, h) in faces:
        roi_gray = gray[y:y+h, x:x+w]
        roi_color = img[y:y+h, x:x+w]
        
        eyes = eye_cascade.detectMultiScale(roi_gray)
        
        if len(eyes) >= 1:
            im = resize(roi_color, (img_size, img_size))
            imageDataFin.append(im)
            
    if len(imageDataFin) > faces_in_image_limit:
        return []
    else:
        return imageDataFin

In [4]:
def print_progress(total, current):
    sys.stdout.write('\rProgress: %.1f%%' % ((current/total)*100))
    sys.stdout.flush()
    
def folder_count(path):
    return len([name for name in path if not name[0] == "."])

In [7]:
images = []
labels = []
data_path = '/home/zedmor/Development/tinder-bot/data'

dislikes_folder_path = os.listdir(os.path.join(data_path, 'dislikes'))
likes_folder_path = os.listdir(os.path.join(data_path, 'likes'))

def process_folder(path, like_type):
    folder_number_of_files = folder_count(path)
    files_processed = 0
    
    for img in path:
        print_progress(folder_number_of_files, files_processed)
        if not img.startswith('.'):
            faces = extract_faces(cv2.imread(os.path.join(data_path, os.path.join(like_type, img))))
            for face in faces:
                images.append(face)
                if like_type == 'likes':
                    labels.append(1)
                else:
                    labels.append(0)
            files_processed += 1
    print("\nProcessing of {} images complete".format(like_type))

print("Processing disliked images")
process_folder(dislikes_folder_path, "dislikes")
print("Processing liked images")
process_folder(likes_folder_path, "likes")

images = np.array(images)
labels = np.array(labels)
print("Image processing complete! Hurray!")

Processing disliked images
Progress: 0.0%

  warn("The default mode, 'constant', will be changed to 'reflect' in "
  warn("Anti-aliasing will be enabled by default in skimage 0.15 to "


Progress: 99.9%
Processing of dislikes images complete
Processing liked images
Progress: 99.7%
Processing of likes images complete
Image processing complete! Hurray!


In [8]:
print(images.shape)
print(labels.shape)

def save_file(data, file_path_name):
    print("Saving {}.npy".format(file_path_name))
    np.save(file_path_name, data)
    
save_file(images, "processed_images")
save_file(labels, "processed_labels")

(561, 350, 350, 3)
(561,)
Saving processed_images.npy
Saving processed_labels.npy


In [8]:
images

array([[[[0.42400449, 0.50635743, 0.62792606],
         [0.41960784, 0.51372549, 0.63137255],
         [0.42352941, 0.51764706, 0.63529412],
         ...,
         [0.06214909, 0.14140969, 0.22376263],
         [0.08175693, 0.16506003, 0.25133454],
         [0.11081164, 0.19352455, 0.2812243 ]],

        [[0.43137255, 0.51325041, 0.63481904],
         [0.42352941, 0.51764706, 0.63529412],
         [0.42745098, 0.52156863, 0.63921569],
         ...,
         [0.05096169, 0.12350302, 0.20585596],
         [0.04008782, 0.12244076, 0.20871527],
         [0.06923238, 0.15111024, 0.23880999]],

        [[0.42739643, 0.51217932, 0.63510681],
         [0.42194917, 0.51452064, 0.63526004],
         [0.42457826, 0.51714974, 0.63788913],
         ...,
         [0.07054396, 0.14472682, 0.21995853],
         [0.05816127, 0.13421724, 0.21193168],
         [0.07554859, 0.14990516, 0.23264455]],

        ...,

        [[0.22810154, 0.30700799, 0.44283765],
         [0.25820208, 0.33663345, 0.47388836]