In [8]:
import os
from matplotlib.pyplot import imshow
import numpy as np
import shutil

In [9]:
from PIL import Image

In [10]:
from keras.preprocessing.image import load_img

In [11]:
import random
SEED = 101

In [12]:
category_paths = [f'data/categories_castle/categories/{i}/' for i in range(1, 31)]

In [13]:
no_cats = 30

In [14]:
def get_filenames(mypath):
    onlyfiles = [f for f in os.listdir(mypath) if not f.startswith('.')]
    return onlyfiles

In [15]:
def view_photo(category, number):
    test_img = os.path.join(category_paths[category-1], get_filenames(category_paths[category-1])[number-1])
    print(get_filenames(category_paths[category])[number])
    # load the image
    img = load_img(test_img)
    # report details about the image
    print(type(img))
    print(img.format)
    print(img.mode)
    print(img.size)
    # show the image
    imshow(img)

In [16]:
def overwrite_dir(dir):
    if os.path.exists(dir):
        shutil.rmtree(dir)
    os.makedirs(dir)

In [26]:
def compress_and_save(image_path, dest_image_path):
    image = Image.open(image_path)
    newImage = image.resize((150, 150), Image.LANCZOS)
    newImage.save(dest_image_path, 'JPEG', quality=90)

Splitting data into train and test

In [28]:
def split_train_test(base_dir, no_cats, compress=False):
    data_path = os.path.join('data/', base_dir)
    
    overwrite_dir(os.path.join(data_path, 'test/'))
    overwrite_dir(os.path.join(data_path, 'train/'))

    for c in range(1, no_cats+1):
        category_path = os.path.join(data_path, f'categories/{c}/')
        os.makedirs(os.path.join(data_path, f'test/{c}/'))
        os.makedirs(os.path.join(data_path, f'train/{c}/'))

        list_of_filenames = get_filenames(category_path)
        random.seed(SEED)
        test_filename = random.choice(list_of_filenames)
        test_filename_path = os.path.join(category_path, test_filename)
        if compress:
            compress_and_save(test_filename_path, f'{data_path}/test/{c}/{test_filename}')
        else:
            shutil.copyfile(test_filename_path, f'{data_path}/test/{c}/{test_filename}')

        # copy what's left to train
        list_of_filenames.remove(test_filename) # doesn't deal with duplicates
        for train_filename in list_of_filenames:
            train_filename_path = os.path.join(category_path, train_filename)
            if compress:
                compress_and_save(train_filename_path, f'{data_path}/train/{c}/{train_filename}')
            else:
                shutil.copyfile(train_filename_path, f'{data_path}/train/{c}/{train_filename}')

In [29]:
split_train_test('categories_castle', no_cats, compress=True)