In [1]:
import numpy as np
import os
from collections import defaultdict
import cv2
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
from loading_img import load_images_from_folder
from skimage.transform import resize
from random import sample

In [2]:
classes = []

data_dir = os.path.join(os.getcwd(), 'dataset')
image_paths = defaultdict(list)

#Get every folder names corresponding to a letter
for subdir, dirs, files in os.walk(data_dir):
    if subdir.split('\\')[-1] == 'dataset':
        continue
    folder = subdir.split('\\')[-1]
    classes.append(folder)
    
    #Create image path for every images of every class
    for file in files:
        filepath = os.path.join(subdir, file)
        if filepath.endswith(".jpg"):
            image_paths[classes[-1]].append(filepath)

In [5]:
img_class_ids = {classe: index for index, classe in enumerate(classes)}

In [6]:
img_paths_and_classes = []
for classe, paths in image_paths.items():
    for path in paths:
        img_paths_and_classes.append((path, img_class_ids[classe]))

In [7]:
test_ratio = 0.2
train_size = int(len(img_paths_and_classes) * (1 - test_ratio))

np.random.shuffle(img_paths_and_classes)

img_paths_and_classes_train = img_paths_and_classes[:train_size]
img_paths_and_classes_test = img_paths_and_classes[train_size:]

In [8]:
def prepare_image(image, target_width = 299, target_height = 299, max_zoom = 0.2):
    """Zooms and crops the image randomly for data augmentation."""

    # First, let's find the largest bounding box with the target size ratio that fits within the image
    height = image.shape[0]
    width = image.shape[1]
    image_ratio = width / height
    target_image_ratio = target_width / target_height
    crop_vertically = image_ratio < target_image_ratio
    crop_width = width if crop_vertically else int(height * target_image_ratio)
    crop_height = int(width / target_image_ratio) if crop_vertically else height
        
    # Now let's shrink this bounding box by a random factor (dividing the dimensions by a random number
    # between 1.0 and 1.0 + `max_zoom`.
    resize_factor = np.random.rand() * max_zoom + 1.0
    crop_width = int(crop_width / resize_factor)
    crop_height = int(crop_height / resize_factor)
    
    # Next, we can select a random location on the image for this bounding box.
    x0 = np.random.randint(0, width - crop_width)
    y0 = np.random.randint(0, height - crop_height)
    x1 = x0 + crop_width
    y1 = y0 + crop_height
    
    # Let's crop the image using the random bounding box we built.
    image = image[y0:y1, x0:x1]

    # Let's also flip the image horizontally with 50% probability:
    if np.random.rand() < 0.5:
        image = np.fliplr(image)

    # Now, let's resize the image to the target dimensions.
    # The resize function of scikit-image will automatically transform the image to floats ranging from 0.0 to 1.0
    image = resize(image, (target_width, target_height))
    
    # Finally, let's ensure that the colors are represented as 32-bit floats:
    return image.astype(np.float32)

In [9]:
def prepare_batch(img_paths_and_classes, batch_size):
    batch_paths_and_classes = sample(img_paths_and_classes, batch_size)
    images = [mpimg.imread(path)[:, :, :] for path, labels in batch_paths_and_classes]
    prepared_images = [prepare_image(image) for image in images]
    X_batch = np.stack(prepared_images)
    y_batch = np.array([labels for path, labels in batch_paths_and_classes], dtype=np.int32)
    return X_batch, y_batch

In [10]:
X_batch, y_batch = prepare_batch(img_paths_and_classes_train,50)

  warn("The default mode, 'constant', will be changed to 'reflect' in "
  warn("Anti-aliasing will be enabled by default in skimage 0.15 to "


(50, 299, 299, 3)