In [37]:
import os
import cv2
import random

In [None]:
base_folder = "./flower_photos/"
resized_folder = "./resized_flower_photos/"

all_labels = sorted([f for f in os.listdir(base_folder) if not f.startswith(".")])
label_mapping = {label: i for i, label in enumerate(all_labels)}

label_file = "./label.txt"
with open(label_file, 'wb') as thefile:
    thefile.write("\n".join(all_labels))

In [None]:
def preprocess_step():
    def resize_image(image_path, resize_width=299, resize_height=299):
        img = Image.open(image_path).convert("RGB")
        img = img.resize(size=(resize_width, resize_height), resample=Image.BICUBIC)
        return img
    
    def resize_all_images(image_folder, save_folder, resize_width=299, resize_height=299):
        all_label_images = [f for f in os.listdir(image_folder) if f.endswith(".jpg")]
        all_resized_label_images = []
        for f in all_label_images:
            image_path = os.path.join(image_folder, f)
            img = resize_image(image_path, resize_width, resize_height)
            new_image_path = os.path.join(save_folder, f)
            img.save(new_image_path, "JPEG")
            all_resized_label_images.append(new_image_path)
        return all_resized_label_images
    
    for label in all_labels:
        label_folder = os.path.join(base_folder, label)
        resize_label_folder = os.path.join(resized_folder, label)
        print(resize_label_folder)
        if not os.path.isdir(resize_label_folder):
            os.makedirs(resize_label_folder)

        all_label_images = resize_all_images(label_folder, resize_label_folder)

In [None]:
preprocess_step()

In [38]:
def split_train_test(image_list, fraction=0.1):
    random.shuffle(image_list)
    n = int(fraction*len(image_list))
    return image_list[n:], image_list[:n]

In [51]:
def get_train_test_data():
    train_images = []
    test_images = []
    
    for label in all_labels:
        label_folder = os.path.join(resized_folder, label)
        label_train_images = [f for f in os.listdir(label_folder) if f.endswith(".jpg")]
        test_folder = os.path.join(label_folder, 'test')
        if not os.path.isdir(test_folder):
            os.makedirs(test_folder)
            
        label_test_images = [f for f in os.listdir(test_folder) if f.endswith(".jpg")]
        
        if not label_test_images:
            label_train_images, label_test_images = split_train_test(label_train_images)
            for img in label_test_images:
                src = os.path.join(label_folder, img)
                dst = os.path.join(test_folder, img)
                os.rename(src, dst)
        for img in label_train_images:
            img_full_path = os.path.join(label_folder, img)
            train_images.append([img_full_path, label_mapping[label]])
        
        for img in label_test_images:
            img_full_path = os.path.join(test_folder, img)
            test_images.append([img_full_path, label_mapping[label]])
        print(label, len(label_train_images), len(label_test_images))
        
    return train_images, test_images

In [53]:
train_images, test_images = get_train_test_data()

('daisy', 570, 63)
('dandelion', 809, 89)
('roses', 577, 64)
('sunflowers', 630, 69)
('tulips', 720, 79)


In [54]:
def batch_iter(data_list, batch_size=16, shuffle=True):
    if shuffle:
        random.shuffle(data_list)
    
    while len(data_list) > 0:
        batch_data_list, data_list = data_list[:batch_size], data_list[batch_size:]
        yield batch_data_list