In [1]:
%%javascript
IPython.OutputArea.auto_scroll_threshold = 9999;

<IPython.core.display.Javascript object>

In [2]:
%matplotlib inline

from __future__ import division

import brightside as bs
import numpy as np
import ntcir
import ntcir.IO as IO

from keras import backend as K
from keras.optimizers import SGD
from keras.preprocessing.image import ImageDataGenerator
from keras.callbacks import ModelCheckpoint
from experiments.utils import HistoryLog
from experiments.utils import generate_batch
import experiments as exp

Using TensorFlow backend.


Loading the data

In [3]:
users = IO.load_annotations(ntcir.filepaths)
categories = IO.load_categories(ntcir.filepaths)
sorted_users = ntcir.utils.sort(users)

# Full day sequences
num_frames_per_day = 2880
sequences = ntcir.get_sequences(sorted_users, num_frames_per_day)

Preparing training batches

In [4]:
training_set = ntcir.read_split('training_split.txt')
validation_set = ntcir.read_split('validation_split.txt')

training_batches = ntcir.get_training_batches(training_set, sequences)
validation_batches = ntcir.get_batches(validation_set, sequences)

## Keras

Forcing keras to use CPU

In [5]:
# import os
# os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"   # see issue #152
# os.environ["CUDA_VISIBLE_DEVICES"] = ""

In [None]:
from keras import backend as K
from keras.optimizers import SGD
from keras.preprocessing.image import ImageDataGenerator
from keras.callbacks import ModelCheckpoint
from experiments.utils import HistoryLog
from experiments.utils import generate_batch
import experiments as exp

K.set_learning_phase(1)

np.random.seed(42)
learning_rate=0.00001
sgd = SGD(lr=learning_rate, decay=0.000005, momentum=0.9, nesterov=True)
model = exp.vgg_16_plus_lstm(vgg16_weights='weights.VGG-16.best.hdf5')
model.compile(loss='categorical_crossentropy', optimizer=sgd, metrics=['accuracy'])

# prepare data augmentation configuration
train_datagen = ImageDataGenerator(rescale=1./255,
                                   rotation_range=40,
                                   width_shift_range=0.2,
                                   height_shift_range=0.2,
                                   zoom_range=0.2,
                                   horizontal_flip=True)

val_datagen = ImageDataGenerator(rescale=1./255)

train_generator = generate_batch(train_datagen, users, training_batches)

val_generator = generate_batch(val_datagen, users, validation_batches)
    
# checkpoint
weights_filepath="weights.VGG-16+LSTM.{epoch:02d}.hdf5"
checkpoint = ModelCheckpoint(weights_filepath, monitor='val_acc', verbose=1)
history = HistoryLog()

# fine-tune the model
model.fit_generator(
        train_generator,
        steps_per_epoch=len(training_batches),#36095,#15,
        epochs=20,
        callbacks=[checkpoint, history],
        validation_data=val_generator,
        validation_steps=len(validation_batches))#6225)#20)

loss_filepath = "VGG-16+LSTM.lr_{}.loss.log".format(learning_rate)
history.log_training_loss(loss_filepath)

epoch_filepath = "VGG-16+LSTM.lr_{}.epoch.log".format(learning_rate)
history.log_epoch(epoch_filepath)

K.clear_session()

Using TensorFlow backend.


Epoch 1/20
   55/35564 [..............................] - ETA: 6352s - loss: 2.9791 - acc: 0.0891     

In [9]:
K.clear_session()

In [5]:
len(training_batches)

35564

In [6]:
val_datagen = ImageDataGenerator(rescale=1./255)

val_generator = generate_batch(val_datagen, users, validation_batches)

In [8]:
from __future__ import division
from keras.preprocessing.image import load_img
from keras.preprocessing.image import img_to_array
from keras import backend as K
import numpy as np

from keras.callbacks import Callback
from easydict import EasyDict as edict


def _load_images_batch(image_data_generator, users, batch, num_classes=21, target_size=(224, 224)):
    image_shape = target_size + (3,)

    batch_x = np.zeros((1,batch.size,) + image_shape, dtype=K.floatx())
    batch_y = np.zeros((1,batch.size, num_classes), dtype='float32')

    for i, ind in enumerate(batch.indices):
        image = users[batch.user_id][batch.date].images[ind]
        print image.path
#         img = load_img(image.path, target_size=target_size, grayscale=False)
#         x = img_to_array(img, dim_ordering='default')
#         x = image_data_generator.random_transform(x)
#         x = image_data_generator.standardize(x)
#         batch_x[0, i] = x
        batch_y[0, i, image.label] = 1.
    return batch_x, batch_y


def _generate_batch(image_data_generator, users, batches, steps_per_epoch=None, num_classes=21, target_size=(224, 224)):
    if not steps_per_epoch:
        steps_per_epoch = len(batches)
    while True:
        np.random.shuffle(batches)
        for i in range(steps_per_epoch):
            yield _load_images_batch(image_data_generator, users, batches[i], num_classes, target_size)