# Multiclass_Preprocessing_Seperate_Arrays
    
1. load
2. resize
3. rotate
4. store
    
Load the image as `mode L` for 8-bit gray scale
    
Resize the image to given size using antialias filter
    
Load original image into dataframe.
    
Rotate original image seven times 45$^{\\circ}$ and load each one into the df. This will give a 7 fold increase in the training images.

Store each source directory as it's own numpy array. This is being done in an attempt to cut down on the time it takes to process these images.

In [None]:
# All the necessary imports, no bloat here!
import os
import sys
import time
import numpy as np
from PIL import Image

In [None]:
# Get the first image and use it to create the array and target list
# I don't know why, but I can't figure out how to create an empty array, so I'll create
# an array with the first image. This is addressed in the technical report

def initialize_array(directory):
    
# Create the directory name
    imgdir = '../../images/train/' + directory +'/'
    

# Get the first file from the directory
    file_name = os.listdir(imgdir)[0]

# Creat the file name    
    imagefile = imgdir + file_name
    
    
# open and convert to 8bit grayscale
    im1 = Image.open(imagefile).convert('L')

    
# resize with antialias filter
    im1 = im1.resize((width, height), Image.ANTIALIAS)

    
# reshape
    im1_array = np.ravel(np.array(im1)).reshape((1,length))
  
    
# Close the image and release it
    im1.close()
    
    
# Create the targetlist
    target = [directory]
        
    return im1_array, target

In [None]:
# process_images loads, converts, rotates, flattens, and stores the images

def process_images(filename, directory, images, targets):
    
# Open the image and convert to 8bit grayscale
    image = Image.open(filename).convert('L')
    
# Resize image with antialias filter
    image = image.resize((width, height), Image.ANTIALIAS)
    
# Flatten the matrix to an array and then reshape to a row vector
    flatimage = np.ravel(np.array(image)).reshape((1,length))
    
# Stack image onto the array and update target
    images = np.vstack((images, flatimage))
    targets.append(directory)
    
    
# Create a 45degree tilt 7 times for more images, flatten and stack, update target
# Only do this for the classes with extremely low numbers
    if directory in ['motorcycle','bicycle','non-motorized_vehicles']:
        for i in range(1,8):
            newimage = image.rotate((45 * i))
            flatnewimage = np.ravel(np.array(newimage)).reshape((1,length))
            images = np.vstack((images, flatnewimage))
            targets.append(directory)
            newimage.close()
            image.close()
    else:
        image.close()

            
    return images, targets

In [None]:
# From the given directory, step though each file. If it's an image, process it

def get_images(directory):

# Start the clock
    start_time = time.time()
    
    
# Create the complete image directory
    imgdir = '../../images/train/' + directory + '/'
    

# save file name. If you needed a comment to figure this one out, go lie down and put a wet towel on your head
    img_save_file = '../../data/' + directory + 'data'
    target_save_file = '../../data/' + directory + 'target'


# Initalize Arrays
    image_array, target_list = initialize_array(directory)
    
    imagefile_list = os.listdir(imgdir)
    
    print(f'Starting {directory} with {len(imagefile_list)} images')
    
# Step through the files, processing the .jpg's
    for file in imagefile_list:

        filename = os.fsdecode(file)
        if filename.endswith(".jpg"):
            fullfile = imgdir + filename
            image_array, target_list = process_images(fullfile, directory, image_array, target_list)
            if (len(image_array) % 10000 == 0):
                elapsed_time = time.time() - start_time
                print(f'{len(image_array)} images processed in {np.round(elapsed_time,2)}')
        else:
            continue
            
    total_time = time.time() - start_time
    print(f'Shape of image_array: {image_array.shape}')
    print(f'Length of target: {len(target_list)}')
    print(f'Size of image_array in MB {sys.getsizeof(image_array) / 1024**2}')
    print(f'Total Time: {np.round(total_time,2)}s')

    # Write the array out
    np.save(img_save_file, image_array)
    print(f'File saved to: {img_save_file}')

    # Write target array
    np.save(target_save_file, target_list)
    print(f'Target saved to: {target_save_file}')
                                 
    return None

## Get the images and store them in the dataframe.

Step through the image folders in order

In [None]:
# Constants

# List of directory names
directory_list = ['articulated_truck','background','bicycle','bus','car',
                  'motorcycle','non-motorized_vehicle','pedestrian',
                  'pickup_truck','single_unit_truck','work_van']

# height and width of images, length of flattened array. This gets adjusted for different sizes
width = height = 28
length = width * height

In [None]:
# Step throught the directory list and get the images in each directory
start_time = time.time()

for imgdir in directory_list:
    
    get_images(imgdir)
    
elapsed_time = time.time() - start_time

print(f'Total Time: {np.round(elapsed_time,2)}s')