## Image Preprocessing

The current product images are raw, unformatted pixel data. Before the images can be used to train a custom model, the image data needs preprocessing to create new pixel data in a normalised, clean format, so that images are comparable within the dataset.

Images may have glare, saturation differences, exposure differences, contrast issues, all that could make one set of pixels not comparable to the next. Some possible preprocessing steps are: contrast enhancement, rotation, translation, and feature extraction through various transformations.

In our case, we will transform all images to a particular format: 128x128x3 pixels (this means a 3-channel, 128x128 pixel square image)

In [42]:
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
from PIL import Image
from PIL import ImageOps
import urllib3
import sys
import os
%matplotlib inline

In [36]:
def resize(image_name, dirPath):
    """
    Pick a basic color (Black) and pad the images that do not have a 1:1 aspect ratio.
    Reshape without stretching to a 128x128 pixel array shape
    """
    
    image_path = full_directory + image_name
    im = Image.open(image_path)
        
    desired_size = 128
    old_size = im.size  # old_size[0] is in (width, height) format

    ratio = float(desired_size)/max(old_size)
    new_size = tuple([int(x*ratio) for x in old_size])

    im = im.resize(new_size, Image.ANTIALIAS)
    
    # create a new image and paste the resized on it
    new_im = Image.new("RGB", (desired_size, desired_size))
    new_im.paste(im, ((desired_size-new_size[0])//2,
                        (desired_size-new_size[1])//2))

    delta_w = desired_size - new_size[0]
    delta_h = desired_size - new_size[1]
    padding = (delta_w//2, delta_h//2, delta_w-(delta_w//2), delta_h-(delta_h//2))
    new_im = ImageOps.expand(im, padding)

    filename, file_extension = os.path.splitext(image_name)
    new_filename = filename + "_resized.jpeg"
    new_im.save(full_directory + new_filename, "JPEG")
    
    return new_filename

In [37]:
def equalize_image(image_name, dirPath): #"imagename_resized.JPEG"
    """
    Ensure for each image that the pixel range is [0,255] (constrast stretching)
    by applying the equalise method (normalise works also)
    """
    
    image_path = full_directory + image_name
    im = Image.open(image_path)
    
    # Plotting histogram for resized image
    #im_array = np.array(im)
    #plt.hist(im_array.flatten(), bins=50, range=(0.0, 300))
    
    # Equalize image
    im_out = ImageOps.equalize(im)
    
    # Save equalized image
    filename, file_extension = os.path.splitext(image_name) 
    new_filename = filename + "_equalized.jpeg"
    im_out.save(full_directory + new_filename, "JPEG")
    
    return new_filename
    

#### Loop through all images within the gear_images folder, and apply the resize and equalise functions

In [None]:
rootDir = "gear_images/"
directories = ['axes', 'boots', 'carabiners', 'crampons', 'gloves', 'hardshell_jackets', 'harnesses',
              'helmets', 'insulated_jackets', 'pulleys', 'rope', 'tents']

number_files = 0
for subDirectory in subDirectories:
    dirPath = rootDir + '/' + subDirectory
    filelist = os.listdir(dirPath) # dir is your directory path
    number_files += len(filelist)
print(number_files)

progress = 0

for directory in directories:   
    folderPath = rootDir + '/' + directory + '/'
    print('Folder: {}'.format(folderPath))
    
    for fname in os.listdir(folderPath):

        # Image Processing
        try:
            resized_image_name = resize(fname, folderPath)
            equalized_image_name = equalize_image(resized_image_name, folderPath)
        except Exception as e:
            print('Following exception occured during the processing of {}: {}'.format(fname, str(e)))

        # Clearning up
        try:
            os.remove(folderPath + fname)
        except Exception as e:
            print('Exception occured when removing {} : {}'.format(fname, str(e)))
        try:
            os.remove(resized_image_name)
        except Exception as e:
            print('Exception occured when removing {} : {}'.format(resized_image_name, str(e)))

        progress += 1

        if progress % 100 == 0:
            print('Current progress : {}/{}'.format(str(progress), str(number_files)))