In [None]:
# default_exp data_unlabeled

In [None]:
from fastai import *
from fastai.vision import *
from pathlib import Path
import cv2

In [None]:
import matplotlib.pyplot as plt

def plot(im, figsize=None): # im - np.arr(h,w,3), figsize - tuple(2)
    ax = plt.figure(figsize=figsize)
    if len(im.squeeze().shape) == 2: plt.imshow(im, cmap='gray')
    else: plt.imshow(im)
    return plt.show()

In [None]:
FACE_PATH = Path("C:\\Users\\domin\\Documents\\FACE DATASETS\\")
OUT_DIR = FACE_PATH/"unlabeled"/"images"

In [None]:
def save_image(image, im_name):
    image.save(OUT_DIR/(str(im_name)+".jpg"))
    
def save_cv2_image(im, im_name):
    image = Image(tensor(im).permute(2,0,1).type(torch.float32)/255.)
    save_image(image, im_name)

In [None]:
def prepare_image(image): # fastai.vision.Image
    im = (image2np(image.data)*255).astype(np.uint8)
    while im.shape[0] < 200 or im.shape[1] < 200:
        im = cv2.resize(im, dsize=None, fx=2, fy=2)
    return im

In [None]:
def pad_bbox(bbox=[10,10,20,20], pad=[.35,.5,.4,.5], im_shape=[100,200,3]):
    t,l,b,r = bbox
    h, w = b-t, r-l
    p = (np.array(pad)*np.array([h,w,h,w])).astype(int)
    t,l,b,r = np.array(bbox) + np.array([-p[0], -p[1], p[2], p[3]])
    t,l = max(0,t), max(0,l)
    b,r = min(im_shape[0],b), min(im_shape[1],r)
    return [t,l,b,r]

In [None]:
def modify_and_save_image(im_path, i, data_name, im_func=None):
    image = open_image(im_path)
    im = prepare_image(image)
    if im_func is not None: im = im_func(im)
    faces = detect_faces(im)
    if len(faces) == 0: return
    save_cv2_image(im, data_name+str(i))

### Caffe Face detection

In [None]:
net = cv2.dnn.readNetFromCaffe("../models/deploy.prototxt.txt", "../models/res10_300x300_ssd_iter_140000.caffemodel")

In [None]:
def detect_faces(image, min_confidence=0.5):
#     image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR).astype(np.uint8)
    (h, w) = image.shape[:2]
    blob = cv2.dnn.blobFromImage(cv2.resize(image, (300, 300)), 1.0, (300, 300), (104.0, 177.0, 123.0))
    net.setInput(blob)
    detections = net.forward()
    out = []
    for i in range(0, detections.shape[2]):
        confidence = detections[0, 0, i, 2]
        if confidence > min_confidence:
            box = detections[0, 0, i, 3:7] * np.array([w, h, w, h])
            (startX, startY, endX, endY) = box.astype("int")
            out.append((confidence, [startY, startX, endY, endX]))
    return out

### Cascade face detection

In [None]:
def cascade_detect_regions(img, cascade_classifier):
    img_gray = cv2.cvtColor(img, cv2.COLOR_RGB2GRAY).astype(np.uint8)
    img_gray = cv2.equalizeHist(img_gray)
    regions = cascade_classifier.detectMultiScale(img_gray)
    return [ (t,l,t+h,l+w) for (l,t,w,h) in regions] # out: (t,l,b,r)

face_cascade = cv2.CascadeClassifier()
face_cascade.load(cv2.samples.findFile('../haarcascades/haarcascade_frontalface_default.xml'))
detect_faces = lambda img: cascade_detect_regions(img, face_cascade)

### lfw

In [None]:
def lfw_paths():
    path = FACE_PATH/"LFW"/"lfw"
    im_paths = []
    for cat_path in path.iterdir():
        cat_name = cat_path.stem
        im_paths += get_image_files(cat_path)
    return im_paths
len(lfw_paths())

In [None]:
def prep_image(im):
    return cv2.resize(im, dsize=None, fx=1.5, fy=1.5)

_modify_and_save_images = partial(modify_and_save_image, data_name="lfw", im_func=prep_image)
parallel(_modify_and_save_images, lfw_paths(), max_workers=1)

### sof

In [None]:
def sof_paths():
    path = FACE_PATH/"SOF"/"original images"
    return get_image_files(path)
len(sof_paths())

In [None]:
_modify_and_save_images = partial(modify_and_save_image, data_name="sof")
parallel(_modify_and_save_images, sof_paths(), max_workers=1)

### facescrub

In [None]:
path = FACE_PATH/"FACESCRUB"
actors_path = path/"facescrub_actors.txt"
actress_path = path/"facescrub_actresses.txt"

In [None]:
readlines = lambda p: open(p).readlines()
parseline = lambda l: l.split('\t')

In [None]:
global errors
errors = 0
def download_image(dest, url, i, timeout=4):
    global errors
    suffix = url.split('.')[-1]
    dest = dest/f"{i:08d}.{suffix}"
    try: r = download_url(url, dest, overwrite=True, show_progress=False, timeout=timeout, retries=1)
    except Exception as e:
        errors += 1
        #print(f"Error {url} {e}")

In [None]:
def facescrub_download(max_workers=1, timeout=1):
    global errors
    path = FACE_PATH/"FACESCRUB"
    images_path = path/"images"
    actors_path = path/"facescrub_actors.txt"
    actress_path = path/"facescrub_actresses.txt"
    urls = []
    for txt_path in [actors_path, actress_path]:
        lines = map(parseline, readlines(txt_path)[1:])
        urls += [url for name, image_id, face_id, url, bbox, sha256 in lines]
    parallel(partial(download_image, images_path, timeout=timeout), urls, max_workers=max_workers)
    print("errors:", errors)

In [None]:
facescrub_download()

In [None]:
def facesrub_verify(): # deletes broken images
    images_path = path/"images"
    verify_images(images_path, delete=True, max_workers=1, max_size=None, recurse=False, dest=images_path)

In [None]:
facesrub_verify()

### bio id

In [None]:
def bioid_paths():
    path = FACE_PATH/"BIOID"/"images"
    return get_image_files(path)
len(bioid_paths())

In [None]:
_modify_and_save_images = partial(modify_and_save_image, data_name="bioid")
parallel(_modify_and_save_images, bioid_paths(), max_workers=1)

### utk

In [None]:
def utk_path():
    path = FACE_PATH/"UTK_FACE"
    paths = []
    for _id in range(1,4):
        dir_path = path/("part"+str(_id))
        paths += get_image_files(dir_path)
    return paths
len(utk_path())

In [None]:
_modify_and_save_images = partial(modify_and_save_image, data_name="utk")
parallel(_modify_and_save_images, utk_path(), max_workers=1)

### youtube

In [None]:
choice = lambda arr, num_items: [random.choice(arr) for _ in range(num_items)]

In [None]:
def youtube_path():
    path = FACE_PATH/"YOUTUBE"/"frame_images_DB"
    paths = []
    for cat_path in path.iterdir():
        if cat_path.is_dir():
            for video_path in cat_path.iterdir():
                paths += choice([p for p in video_path.iterdir()], 3)
    return paths
len(youtube_path())

In [None]:
_modify_and_save_images = partial(modify_and_save_image, data_name="yt")
parallel(_modify_and_save_images, youtube_path(), max_workers=1)

### imdb + wiki

In [None]:
def imdb_path():
    path = FACE_PATH/"IMDB_WIKI"/"imdb_crop"
    return get_image_files(path, recurse=True)
len(imdb_path())

In [None]:
def wiki_path():
    path = FACE_PATH/"IMDB_WIKI"/"wiki_crop"
    return get_image_files(path, recurse=True)
len(wiki_path())

In [None]:
_modify_and_save_images = partial(modify_and_save_image, data_name="imdb")
parallel(_modify_and_save_images, imdb_path(), max_workers=1)

In [None]:
_modify_and_save_images = partial(modify_and_save_image, data_name="wiki")
parallel(_modify_and_save_images, wiki_path(), max_workers=1)

### create images txt

In [None]:
def create_images_txt(path=FACE_PATH/"unlabeled"/"images.txt"):
    with open(str(path), "w") as f:
        for path in OUT_DIR.iterdir()
            f.write(str(path).split("/")[-1]+"\n")

In [None]:
create_images_txt()