# Downloading and Generating Masks for Non Class Examples

In [45]:
from PIL import Image
from google_images_download import google_images_download
import Augmentor
import glob
import shutil
import os

In [46]:
def create_masks(path):
    """
    This function generate empty black masks for each of 
    the images downloaded from Google. 
    """
    x = path.rstrip('\n')
    t = x.split('/')[-1].rstrip('.jpg')
    im = Image.open(x)
    im_size = im.size
    img = Image.new('RGB', im_size)
    img.save("no_mask/masks/"+"mask_"+t+".jpg", "JPEG")
    return

In [49]:
def create_image_dataset(search_term, number_of_images, augmentation=0):
    """
    Download images from Google, and generate the corresponding
    masks for these images. There is a limit of 100, which can 
    be overridden by setting up Selenium and Chromedriver. 
    """
    response = google_images_download.googleimagesdownload()
    arguments = {"keywords":search_term,"limit":number_of_images,"print_urls":False, 
                "output_directory":"no_mask", "image_directory":"temp", "format":"jpg"} 
    response.download(arguments) 
    path = 'no_mask/temp'
    i = 0
    for filename in os.listdir(path):
        os.rename(os.path.join(path,filename), os.path.join(path,'image'+str(i)+'.jpg'))
        i = i +1
    if augmentation != 0:
        p = Augmentor.Pipeline(source_directory="no_mask/temp", output_directory="../images")
        p.rotate(probability=0.6, max_left_rotation=10, max_right_rotation=10)
        p.zoom(probability=0.5, min_factor=1.1, max_factor=1.5)
        p.crop_random(probability=0.6, percentage_area=0.7)
        p.sample(augmentation)
    shutil.rmtree('no_mask/temp')
    path = 'no_mask/images'
    i = 0
    for filename in os.listdir(path):
        os.rename(os.path.join(path,filename), os.path.join(path,'image'+str(i)+'.jpg'))
        i = i +1
    list_of_files = glob.glob("no_mask/images/*.jpg")
    os.mkdir('no_mask/masks')
    for file in list_of_files:
        create_masks(file)
    return
    

In [50]:
create_image_dataset("test", 10, 2)


Item no.: 1 --> Item name = test
Evaluating...
Starting Download...
Completed Image ====> 1. test.jpg
Completed Image ====> 2. test.jpg
Completed Image ====> 3. bigstock-test-word-on-white-keyboard-27134336.jpg
Completed Image ====> 4. test-intelligenza-sociale.jpg
Completed Image ====> 5. bubble-test.jpg
Completed Image ====> 6. image.jpg
URLError on an image...trying next one... Error: <urlopen error timed out>
Completed Image ====> 7. standard-exam-600x400.jpg
Completed Image ====> 8. latest?cb=20180119233937.jpg
Completed Image ====> 9. resultados-test_9.jpg


Processing <PIL.Image.Image image mode=RGB size=251x168 at 0x10C2B0F98>: 100%|██████████| 2/2 [00:00<00:00, 57.22 Samples/s]

Completed Image ====> 10. road-sign-361513_960_720.jpg

Errors: 1

Initialised with 10 image(s) found.
Output directory set to no_mask/temp/../images.




In [8]:
import nomask
import importlib
importlib.reload(nomask)

<module 'nomask' from '/Volumes/Work_Drive/temp/clomask/data-acquisition/nomask_generator/nomask.py'>

In [9]:
object_1 = nomask.NoMask("test", 10)

In [10]:
object_1.create_image_dataset()


Item no.: 1 --> Item name = test
Evaluating...
Starting Download...
Completed Image ====> 1. test.jpg
Completed Image ====> 2. test.jpg
Completed Image ====> 3. bigstock-test-word-on-white-keyboard-27134336.jpg
Completed Image ====> 4. bubble-test.jpg
Completed Image ====> 5. test-intelligenza-sociale.jpg
Completed Image ====> 6. 160224172541-sat-test-bubble-super-tease.jpg
Completed Image ====> 7. test-computer-key-in-blue-showing-quiz-or-online-questionnaire_fydjnndu.jpg
Completed Image ====> 8. image.jpg
Completed Image ====> 9. resultados-test_9.jpg
Completed Image ====> 10. standard-exam-600x400.jpg

Errors: 0

