+ random transforms
+ thread-safe gens
+ 128x128 images
+ extra dense layer
+ K-Folds

In [1]:
RUN = 'F'
RND = 777

In [2]:
IMAGES_DIR = '/d2/caches/kaggle-planet/processed-images-64x64/'
IMAGE_SHAPE = (64, 64, 4)

TRAIN_BATCH_SIZE = 8
TRAIN_EPOCHS = 1

VAL_SPLIT = 0.2

VAL_BATCH_SIZE = 8

TENSORBOARD_DIR = '/tensorboard/planet/' + RUN
MODEL_CHECKPOINT_DIR = '/d2/caches/kaggle-planet/models/' + RUN

In [3]:
%run 'Lib.ipynb'

MODEL_CHECKPOINT_DIR not defined 


Using TensorFlow backend.


In [4]:
import keras
from keras.layers import *
from keras.applications import *
from keras.models import *
from keras.callbacks import *

In [12]:
# load data
X_files = np.load('out/X_files.npy')[:1280]
Y_tags_misc = np.load('out/Y_tags_misc.npy')[:1280]
Y_tags_weather = np.load('out/Y_tags_weather.npy')[:1280]

In [5]:
# load data
X_files = np.load('out/X_files.npy')
Y_tags_misc = np.load('out/Y_tags_misc.npy')
Y_tags_weather = np.load('out/Y_tags_weather.npy')

In [6]:
# split data into tran/val sets
from sklearn.model_selection import train_test_split

X_files_train, X_files_val, \
Y_tags_misc_train, Y_tags_misc_val, \
Y_tags_weather_train, Y_tags_weather_val = \
train_test_split(
    X_files,
    Y_tags_misc,
    Y_tags_weather,
    test_size=VAL_SPLIT,
    random_state=RND)

In [7]:
# calculate samples per epoch so that epoch can consist of integer number of batches
TRAIN_SAMPLES_PER_EPOCH = int(len(X_files_train) / TRAIN_BATCH_SIZE) * TRAIN_BATCH_SIZE
print ('Samples per epoch (train):', TRAIN_SAMPLES_PER_EPOCH, 'of', len(X_files_train))

Samples per epoch (train): 32376 of 32383


In [8]:
# calculate samples per epoch so that epoch can consist of integer number of batches
VAL_SAMPLES_PER_EPOCH = int(len(X_files_val) / VAL_BATCH_SIZE) * VAL_BATCH_SIZE
print ('Samples per epoch (val):', VAL_SAMPLES_PER_EPOCH, 'of', len(X_files_val))

Samples per epoch (val): 8096 of 8096


In [9]:
# define model


def create_model():

    image_input = Input(shape=IMAGE_SHAPE, name='image_input')

    x = Conv2D(32, (3, 3), padding='same', activation='relu')(image_input)
    x = Conv2D(32, (3, 3), padding='same', activation='relu')(x)
    x = MaxPool2D(pool_size=2)(x)

    x = Conv2D(32, (3, 3), padding='same', activation='relu')(x)
    x = Conv2D(32, (3, 3), padding='same', activation='relu')(x)
    x = MaxPool2D(pool_size=2)(x)

    x = Conv2D(64, (3, 3), padding='same', activation='relu')(x)
    x = Conv2D(64, (3, 3), padding='same', activation='relu')(x)
    x = MaxPool2D(pool_size=2)(x)

    x = Conv2D(128, (3, 3), padding='same', activation='relu')(x)
    x = Conv2D(128, (3, 3), padding='same', activation='relu')(x)
    x = MaxPool2D(pool_size=2)(x)

    x = Conv2D(256, (3, 3), padding='same', activation='relu')(x)
    x = Conv2D(256, (3, 3), padding='same', activation='relu')(x)
    x = MaxPool2D(pool_size=2)(x)

    x = Flatten()(x)
    x = Dropout(rate=0.5)(x)

    x = Dense(512, activation='relu')(x)
    features = Dropout(rate=0.5)(x)

    out_tags_misc = Dense(
        Y_tags_misc.shape[1], activation='sigmoid', name='tags_misc')(features)
    out_tags_weather = Dense(
        Y_tags_weather.shape[1], activation='softmax',
        name='tags_weather')(features)

    model = Model(
        inputs=[image_input], outputs=[out_tags_misc, out_tags_weather])

    model.compile(
        optimizer='adadelta',
        loss={
            'tags_misc': 'binary_crossentropy',
            'tags_weather': 'binary_crossentropy'
        },
        loss_weights={'tags_misc': 1.,
                      'tags_weather': 0.333})

    return model

In [10]:
def random_transform_batch(b):
    for i, img in enumerate(b[0]):
        b[0][i] = random_transform(img, debug=False)
    return b

In [11]:
# data generation


def train_generator():

    while True:

        train_batch_index = np.random.randint(
            TRAIN_SAMPLES_PER_EPOCH / TRAIN_BATCH_SIZE)

        b = generate_batch(
            n_samples=TRAIN_BATCH_SIZE,
            batch_index=train_batch_index,
            X_files=X_files_train,
            Y_tags_misc=Y_tags_misc_train,
            Y_tags_weather=Y_tags_weather_train,
            images_dir=IMAGES_DIR,
            image_shape=IMAGE_SHAPE)

        b = random_transform_batch(b)

        yield b


def val_generator():
    while True:
        val_batch_index = np.random.randint(
            VAL_SAMPLES_PER_EPOCH / VAL_BATCH_SIZE)

        b = generate_batch(
            n_samples=VAL_BATCH_SIZE,
            batch_index=val_batch_index,
            X_files=X_files_val,
            Y_tags_misc=Y_tags_misc_val,
            Y_tags_weather=Y_tags_weather_val,
            images_dir=IMAGES_DIR,
            image_shape=IMAGE_SHAPE)

        yield b

In [12]:
model = create_model()

In [13]:
class MyCallback(keras.callbacks.Callback):
    def on_epoch_end(self, epoch, logs):
        print(logs)

In [14]:
model.fit_generator(
    train_generator(),
    steps_per_epoch=TRAIN_SAMPLES_PER_EPOCH,
    epochs=TRAIN_EPOCHS,
    verbose=1,
    callbacks=[
        TensorBoard(log_dir=TENSORBOARD_DIR, histogram_freq=0),
        ModelCheckpoint(
                MODEL_CHECKPOINT_DIR + \
                '/e{epoch:02d}-l={loss:.5f}-vl={val_loss:.5f}.h5',
                monitor='val_loss', verbose=0, save_best_only=False,
                save_weights_only=False, mode='auto'
            ),
        MyCallback()
    ],
    validation_data=val_generator(),
    validation_steps=VAL_SAMPLES_PER_EPOCH,
    class_weight=None,
    max_q_size=40,
    workers=4,
    pickle_safe=True,
    initial_epoch=0)



Epoch 1/1



{'val_tags_weather_loss': 0.112647494308102, 'val_tags_misc_loss': 0.12317933918486751, 'tags_misc_loss': 0.14074548956566463, 'val_loss': 0.16069095540357378, 'loss': 0.18732768220379156, 'tags_weather_loss': 0.13988646256981827}


<keras.callbacks.History at 0x7f6413e4f828>

---

In [15]:
preds = model.predict_generator(val_generator(), steps=VAL_SAMPLES_PER_EPOCH/VAL_BATCH_SIZE, verbose=1)



In [16]:
p_tags = preds[0]; p_weather = preds[1]

In [17]:
p = np.hstack((p_tags, p_weather))

In [18]:
t = np.hstack((Y_tags_misc_val, Y_tags_weather_val))

In [19]:
from sklearn.metrics import fbeta_score

In [23]:
fbeta_score(t, p > 0.01, beta=2, average='samples')

0.61798881319456889

In [24]:
np.save?