Notebook for downloading example data set from Google images and splitting it into train and valid folders for use with the fastai function ImageDataBunch.from_folder()

In [1]:
from google_images_download import google_images_download

import random
import os
import shutil

In [2]:
def create_directory(d):
    '''creates a directory if it doesnt already exist'''
    if not os.path.exists(d):
        os.makedirs(d)

def create_face_training_data(k=None, images_dir=None, n_images=30):
    if k and images_dir:
        train_dir_name = 'train'
        valid_dir_name = 'valid'
        create_directory(images_dir)
        create_directory(os.path.join(images_dir, train_dir_name, k))
        create_directory(os.path.join(images_dir, valid_dir_name, k))
            
        response = google_images_download.googleimagesdownload()
        absolute_image_paths = response.download(arguments={'keywords': k,
                                                    'type': 'face',
                                                    'format': 'jpg',
                                                    'output_directory': images_dir,
                                                    'no_directory': True,
                                                    'limit': n_images})
        
        files = [f for f in absolute_image_paths[k] if len(f) > 0]
        random.shuffle(files)
        
        train_ratio = 0.7
        split_point = int(train_ratio * len(files))
        train_files = files[:split_point]
        validation_files = files[split_point:]
        for f in train_files:
            shutil.move(f, os.path.join(os.path.split(f)[0], train_dir_name, k, os.path.split(f)[1]))
        for f in validation_files:
            shutil.move(f, os.path.join(os.path.split(f)[0], valid_dir_name, k, os.path.split(f)[1]))
            

In [None]:
model_list = ['lily cole', 'kate upton', 'kate moss', 'miranda kerr', 'naomi campbell', 'cindy crawford', 'claudia schiffer']

images_dir = './Images'
images_per_person = 50

for k in model_list:
    create_face_training_data(k=k, images_dir=images_dir, n_images=images_per_person)