In [26]:
import os
from scipy import misc
import numpy as np
import pandas as pd

In [27]:
def create_dir(directory):
    os.makedirs(directory)

In [28]:
def check_dir(directory):
    if not os.path.exists(directory):
        create_dir(directory)

### Split and transform train data to train and val, 32x32 size

In [73]:
%%time
dir_train_transformed = 'data/keras/train'
dir_val_transformed = 'data/keras/validation'

dir_train = 'data/train'
img_size=(32, 32)
test_size = 0.25
    
for ind, folder in enumerate(os.listdir(dir_train)):
        subfolder = os.path.join(dir_train, folder)
        if os.path.isdir(subfolder):
            image_class = folder[:3]
            
            val_size = int(test_size * len(os.listdir(subfolder)))
            val_count = 0
            dir_train_to_save = os.path.join(dir_train_transformed, image_class)
            dir_val_to_save = os.path.join(dir_val_transformed, image_class)
            check_dir(dir_train_to_save)
            check_dir(dir_val_to_save)
            
            if ind % 10 == 0:
                print('folder - {}, validation size - {}, total size - {}'.format(folder, val_size, len(os.listdir(subfolder))))
            for image_file in os.listdir(subfolder):
                if not image_file.startswith('.'):
                    name = os.path.join(subfolder, image_file)
                    image = misc.imread(name)
                    if len(image.shape) == 2:
                        image = np.stack((image,)*3)
                    image = misc.imresize(image, img_size, interp='bicubic')
                    if val_count < val_size:
                        fold_to_save = os.path.join(dir_val_to_save, image_file)
                        misc.imsave(fold_to_save, image)
                        val_count = val_count + 1
                    else:
                        fold_to_save = os.path.join(dir_train_to_save, image_file)
                        misc.imsave(fold_to_save, image)

folder - 010.beer-mug, validation size - 16, total size - 64
folder - 020.brain-101, validation size - 13, total size - 53
folder - 030.canoe, validation size - 18, total size - 74
folder - 040.cockroach, validation size - 23, total size - 94
folder - 050.covered-wagon, validation size - 16, total size - 67
folder - 060.duck, validation size - 14, total size - 57
folder - 070.fire-extinguisher, validation size - 13, total size - 54
folder - 080.frog, validation size - 21, total size - 86
folder - 090.gorilla, validation size - 45, total size - 182
folder - 100.hawksbill-101, validation size - 15, total size - 63
folder - 110.hourglass, validation size - 13, total size - 55
folder - 120.joy-stick, validation size - 25, total size - 100
folder - 130.license-plate, validation size - 15, total size - 61
folder - 140.menorah-101, validation size - 14, total size - 59
folder - 150.octopus, validation size - 20, total size - 81
folder - 160.pez-dispenser, validation size - 13, total size - 53

### Transform test data to 64x64 size

In [12]:
%%time
dir_test_transformed = 'data/test_transformed'
dir_test = 'data/test'
img_size=(64, 64)

check_dir(dir_test_transformed)
for ind, image_name in enumerate(os.listdir(dir_test)):
        if ind % 600 == 0:
            print('{}'.format(ind))
        if not image_name.startswith('.'):
            name = os.path.join(dir_test, image_name)
            image = misc.imread(name)
            if len(image.shape) == 2:
                image = np.stack((image,)*3)
            image = misc.imresize(image, img_size, interp='bicubic')
            fold_to_save = os.path.join(dir_test_transformed, image_name)
            misc.imsave(fold_to_save, image)

0
600
1200
1800
2400
3000
3600
4200
4800
5400
6000
6600
7200
CPU times: user 1min 20s, sys: 7.68 s, total: 1min 28s
Wall time: 1min 33s


### Transform images for keras

In [4]:
train_df = pd.read_csv('data/data_analys.csv', index_col = 0)

In [5]:
images = np.zeros((train_df.shape[0], 64, 64, 3), dtype='uint8')
targets = np.zeros((train_df.shape[0],), dtype='object')

In [25]:
dir_train = 'data/train_transformed'
img_size=(64, 64)

i =0
for ind, image_name in enumerate(os.listdir(dir_train)):
    image_file = os.path.join(dir_train, image_name)
    if ind % 2000 == 0:
        print('{}'.format(ind))
    if not image_file.startswith('.'):
        #name = os.path.join(subfolder, image_file)
        image = misc.imread(image_file)
        if len(image.shape) == 2:
            image = np.stack((image,)*3)
        images[i] = misc.imresize(image, img_size, interp='bicubic')
        targets[i] = int(image_name[:3])
        i = i+1

0
2000
4000
6000
8000
10000
12000
14000
16000
18000
20000
22000


In [17]:
np.save('data/keras/train_images.npy', images)
np.save('data/keras/targets.npy', targets)

In [18]:
%%time
dir_test_name = 'data/test_transformed'
dir_test = os.listdir(dir_test_name)
img_size=(64, 64)

test_images = np.zeros((len(dir_test), img_size[0], img_size[1], 3), dtype='uint8')

i = 0
for ind, image_name in enumerate(os.listdir(dir_test_name)):
        if ind % 600 == 0:
            print('{}'.format(ind))
        if not image_name.startswith('.'):
            name = os.path.join(dir_test_name, image_name)
            image = misc.imread(name)
            if len(image.shape) == 2:
                image = np.stack((image,)*3)
            test_images[i] = misc.imresize(image, img_size, interp='bicubic')
            i = i+1

0
600
1200
1800
2400
3000
3600
4200
4800
5400
6000
6600
7200
CPU times: user 4.19 s, sys: 628 ms, total: 4.82 s
Wall time: 5.51 s


In [19]:
np.save('data/keras/test_images.npy', test_images)