Precalculate the last convolutional layer of restnet50 for `train` and `valid` datasets to use in other notebooks.

In [1]:
import numpy as np
import tensorflow as tf
import bcolz

## Use base keras instead of tf.keras because of: https://github.com/tensorflow/tensorflow/issues/11868
# from tensorflow.contrib.keras.python.keras import applications
# from tensorflow.contrib.keras.python.keras.preprocessing import image
# from tensorflow.contrib.keras.python.keras.applications import imagenet_utils
##
from keras import applications
from keras.preprocessing import image
from keras.applications import imagenet_utils
from keras.utils import np_utils
##

Using TensorFlow backend.


Load data into np arrays

In [2]:
def get_batches(dirpath, gen=image.ImageDataGenerator(), target_size=(224, 224), shuffle=True, batch_size=64, class_mode='categorical'):
    return gen.flow_from_directory(dirpath, target_size=target_size, class_mode=class_mode, shuffle=shuffle, batch_size=batch_size)

In [3]:
train_batches = get_batches('./data/train', shuffle=False, batch_size=1, class_mode=None, target_size=(224, 224))

Found 3025 images belonging to 8 classes.


In [5]:
train_batches.samples

3025

In [6]:
train_data = np.concatenate([train_batches.next() for i in range(train_batches.samples)])

In [8]:
train_data.shape

(3025, 224, 224, 3)

In [9]:
valid_batches = get_batches('./data/valid', shuffle=False, batch_size=1, class_mode=None, target_size=(224, 224))

Found 752 images belonging to 8 classes.


In [10]:
valid_data = np.concatenate([valid_batches.next() for i in range(valid_batches.samples)])

In [11]:
valid_data.shape

(752, 224, 224, 3)

In [12]:
test_stg1_batches = get_batches('./data/test_stg1', shuffle=False, batch_size=1, class_mode=None, target_size=(224, 224))

Found 1000 images belonging to 1 classes.


In [13]:
test_stg1_data = np.concatenate([test_stg1_batches.next() for i in range(test_stg1_batches.samples)])

In [14]:
test_stg1_data.shape

(1000, 224, 224, 3)

In [15]:
test_stg2_batches = get_batches('./data/test_stg2', shuffle=False, batch_size=1, class_mode=None, target_size=(224, 224))

Found 12153 images belonging to 1 classes.


In [16]:
test_stg2_data = np.concatenate([test_stg2_batches.next() for i in range(test_stg2_batches.samples)])

In [17]:
test_stg2_data.shape

(12153, 224, 224, 3)

In [18]:
test_data = np.concatenate([test_stg1_data, test_stg2_data])

In [19]:
test_data.shape

(13153, 224, 224, 3)

Predict the the 3 datasets

In [20]:
resnet50 = applications.ResNet50(weights='imagenet', include_top=False, pooling='max')

In [21]:
train_pred = resnet50.predict(train_data)

In [22]:
train_pred.shape

(3025, 2048)

In [23]:
valid_pred = resnet50.predict(valid_data)

In [24]:
valid_pred.shape

(752, 2048)

In [25]:
test_pred = resnet50.predict(test_data)

In [None]:
##

In [26]:
test_pred.shape

(13153, 2048)

In [28]:
def save_array(fname, arr):
    c = bcolz.carray(arr, rootdir=fname, mode='w')
    c.flush()

In [29]:
save_array('data/train_resnet50_conv.arr', train_pred)

In [30]:
save_array('data/valid_resnet50_conv.arr', valid_pred)

In [31]:
save_array('data/test_resnet50_conv.arr', test_pred)

Save the labels for `train` and `valid` and filenames for `test`

In [32]:
def onehot(x):
    return np_utils.to_categorical(x)

In [33]:
save_array('data/train_labels.arr', onehot(train_batches.classes))

In [34]:
save_array('data/valid_labels.arr', onehot(valid_batches.classes))

In [35]:
test_stg1_batches.filenames[0], test_stg2_batches.filenames[0]

('unknown/img_07538.jpg', 'unknown/image_01802.jpg')

In [36]:
test_stg1_batches_filenames = [fname[8:] for fname in test_stg1_batches.filenames]

In [37]:
test_stg2_batches_filenames = ["test_stg2/" + fname[8:] for fname in test_stg2_batches.filenames]

In [38]:
test_stg1_batches_filenames[0], test_stg2_batches_filenames[0]

('img_07538.jpg', 'test_stg2/image_01802.jpg')

In [39]:
test_filenames = test_stg1_batches_filenames + test_stg2_batches_filenames

In [40]:
len(test_filenames)

13153

In [42]:
save_array('data/test_resnet50_filenames.arr', test_filenames)