## Boiler Plate

In [47]:
%reload_ext autoreload
%autoreload 2
%matplotlib inline

In [48]:
import pandas as pd
import numpy as np
import cv2
import imutils
import glob
import os
import shutil
from tqdm import tqdm_notebook
import matplotlib.pyplot as plt
pd.set_option('display.max_columns', 500)
tqdm_notebook().pandas()

## Step1: Image Face finder

In [52]:
## Importing image using open cv
img = cv2.imread('Aayush Agrawal1.jpg',1)

## Resizing to constant width
img = imutils.resize(img, width=300)

## Converting BGR to RGB
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)

## Taken from https://github.com/opencv/opencv/tree/master/data/haarcascades
face_cascade = cv2.CascadeClassifier('haarcascade_frontalface_default.xml')

face_coord = face_cascade.detectMultiScale(gray,1.2,10,minSize=(30,30))

img_cp = img.copy()
for coords in face_coord:
    X, Y, w, h = coords
    cv2.rectangle(
        img = img_cp, 
        pt1 = (X - int(w*0.1), Y - int(0.35*h)), 
        pt2 = (X + int(1.1*w), Y + int(1.1*h)), 
        color=(0, 0, 255), 
        thickness=2
    )
    
cv2.imshow('image',img_cp)
cv2.waitKey(0)
cv2.destroyAllWindows()

In [67]:
img[Y - int(0.35*h):Y + int(1.1*h),X - int(w*0.1):X + int(1.1*w)].shape

(250, 207, 3)

## Step2: Video Face Finder

In [6]:
## Taken from https://github.com/opencv/opencv/tree/master/data/haarcascades
face_cascade = cv2.CascadeClassifier('haarcascade_frontalface_default.xml')

In [7]:
cap = cv2.VideoCapture(0)

while(True):
    # Capture frame-by-frame
    ret, frame = cap.read()
    
    ## Resizing to constant width
    img = imutils.resize(frame, width=300)
    
    # Our operations on the frame come here
    gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
    
    # Find faces using Haar cascade
    face_coord = face_cascade.detectMultiScale(gray,1.1,5,minSize=(30,30))
    
    for coords in face_coord:
        X, Y, w, h = coords
        cv2.rectangle(img=frame, pt1=(X, Y), pt2=(X + w, Y + h), color=(0, 0, 255), thickness=2)

    # Display the resulting frame
    cv2.imshow('frame',frame)
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

# When everything done, release the capture
cap.release()
cv2.destroyAllWindows()

## Downloading Database

In [4]:
## !kaggle datasets download -d jessicali9530/celeba-datasetb

## Extracting frontal face from images

In [None]:
## Loading Haar Cascade
## Taken from https://github.com/opencv/opencv/tree/master/data/haarcascades
face_cascade = cv2.CascadeClassifier('haarcascade_frontalface_default.xml')

In [21]:
def face_extractor(origin, destination, fc):
    ## Importing image using open cv
    img = cv2.imread(origin,1)

    ## Resizing to constant width
    img = imutils.resize(img, width=200)
    H,W,_ = img.shape
    ## Converting BGR to RGB
    gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    
    ## Detecting faces on the image
    face_coord = fc.detectMultiScale(gray,1.2,10,minSize=(50,50))
    
    if len(face_coord) == 1:
        X, Y, w, h = face_coord[0]
    
    elif len(face_coord)==0:
        return None
    
    else:
        max_val = 0
        max_idx = 0
        for idx in range(len(face_coord)):
            _, _, w_i, h_i = face_coord[idx]
            if w_i*h_i > max_val:
                max_idx = idx
                max_val = w_i*h_i
            else:
                pass
            
            X, Y, w, h = face_coord[max_idx]
            
    img_cp = img[
            max(0,Y - int(0.35*h)): min(Y + int(1.35*h), H),
            max(0,X - int(w*0.35)): min(X + int(1.35*w), W)
        ].copy()
    
    cv2.imwrite(destination, img_cp)

In [26]:
path = '../data/celeba-dataset/faces/'
item_list = glob.glob('../data/celeba-dataset/img_align_celeba/*.jpg')
print(len(item_list))

202599


In [25]:
for org in tqdm_notebook(item_list):
    face_extractor(origin = org, destination = path+org.split('\\')[1], fc=face_cascade)




 list_eval_partition.csv: Recommended partitioning of images into training, validation, testing sets. Images 1-162770 are training, 162771-182637 are validation, 182638-202599 are testing

In [44]:
item_list = glob.glob('../data/celeba-dataset/faces/*.jpg')

In [53]:
for idx in tqdm_notebook(range(1,202600)):
    if idx <= 182637:
        destination = path+'training/'
    else:
        destination = path+'validation/'
    try:
        shutil.move(
            path+str(idx).zfill(6)+'.jpg', 
            destination+str(idx).zfill(6)+'.jpg'
        )
    except:
        pass




## Label prep

In [165]:
label_df = pd.read_csv('../data/celeba-dataset/list_attr_celeba.csv')
column_list = pd.Series(list(label_df.columns)[1:])

def label_generator(row):
    return(' '.join(column_list[[True if i==1 else False for i in row[column_list]]]))

label_df['label'] = label_df.progress_apply(lambda x: label_generator(x), axis=1)
label_df = label_df.loc[:,['image_id','label']]
label_df.to_csv('../data/celeba-dataset/labels.csv')


In [206]:
item_list = glob.glob('../data/celeba-dataset/faces/*/*.jpg')
item_df = pd.DataFrame({'image_name':pd.Series(item_list).apply(lambda x: '/'.join(x.split('\\')[1:]))})
item_df['image_id'] = item_df.image_name.apply(lambda x: x.split('/')[1])

In [216]:
label_df = pd.read_csv('../data/celeba-dataset/labels.csv')
label_df = label_df.merge(item_df, on='image_id', how='inner')
label_df.rename(columns={'label':'tags'}, inplace=True)
label_df.loc[:,['image_name','tags']].to_csv('../data/celeba-dataset/faces/labels.csv', index=False)

## Creating data bunch

In [219]:
from fastai.vision import *
path = Path('../data/celeba-dataset/faces/')

In [220]:
df = pd.read_csv('../data/celeba-dataset/faces/labels.csv')
def validation_func(x):
    return 'validation' in x

In [251]:
tfms = get_transforms(do_flip=False, flip_vert=False, max_rotate=30, max_lighting=0.3)

In [252]:
data = (ImageItemList.from_csv(path, csv_name='labels.csv')
       .split_by_valid_func(validation_func)
       .label_from_df(label_delim=' ')
       .transform(tfms, size=200)
       .databunch().normalize(imagenet_stats))