In [10]:
import numpy as np
import numpy.ma as ma
import pandas as pd
import os
import gc
import configparser
import json

from keras.preprocessing.image import ImageDataGenerator

from utils.loaderjpg import *
from pretrained.vgg16 import *

In [11]:
config_file = 'cfg/default.cfg'

print('reading configurations from config file: {}'.format(config_file))

settings = configparser.ConfigParser()
settings.read(config_file)
data_dir = settings.get('data', 'data_dir')

rescaled_dim = 224

reading configurations from config file: cfg/default.cfg




In [12]:
# if not is_training_set_in_cache(rescaled_dim):
#     # populate the training dataset cache
#     df_train = pd.read_csv(data_dir + 'train_v2.csv')
#     load_training_set(df_train, rescaled_dim)

In [13]:
# if not is_test_set_in_cache(rescaled_dim):
#     # populate the test dataset cache
#     df_test = pd.read_csv(data_dir + 'sample_submission_v2.csv')
#     load_test_set(df_test, rescaled_dim)

In [14]:
df_train = pd.read_csv(data_dir + 'train_v2.csv')
x_train, y_train = load_training_set(df_train, rescaled_dim)
print(x_train.shape)
print(y_train.shape)

(40479L, 224L, 224L, 3L)
(40479L, 17L)


In [15]:
bottleneck_weights_file = data_dir + 'bottleneck/vgg16/bottleneck_features.npy'

if not os.path.exists(bottleneck_weights_file):
# Reference: https://gist.github.com/baraldilorenzo/07d7802847aaad0a35d3
# BGR mean values [103.94, 116.78, 123.68] should be subtracted before feeding into the model
    x_train[:,:,:,0] -= 104
    x_train[:,:,:,1] -= 117
    x_train[:,:,:,2] -= 124

    x_train = x_train.transpose(0,3,1,2)  # https://github.com/fchollet/keras/issues/2681
    print(x_train.shape)

    model = vgg16_model_fc_truncated(channel=3)

    train_datagen = ImageDataGenerator()
    train_gen = train_datagen.flow(x_train, y_train, shuffle=False, batch_size=64)

    bottleneck_features = model.predict_generator(train_gen, x_train.shape[0])
    np.save(bottleneck_weights_file, bottleneck_features)

In [16]:
x_train = np.load(bottleneck_weights_file)

In [17]:
# cat verification
# import matplotlib.pyplot as plt
# %matplotlib inline
# import cv2

# cat = cv2.imread('C:/Users/Me/github/DenseNet-Keras/resources/cat.jpg')

# plt.imshow(cv2.cvtColor(cat, cv2.COLOR_BGR2RGB))

# im = cv2.resize(cat, (224, 224)).astype(np.float32)
# im[:,:,0] -= 103.939
# im[:,:,1] -= 116.779
# im[:,:,2] -= 123.68
# im = im.transpose((2,0,1))
# im = np.expand_dims(im, axis=0)

# # Test pretrained model
# model = vgg16_model(channel=3)
# out = model.predict(im)
# print np.argmax(out)


In [18]:
from keras.callbacks import EarlyStopping, ModelCheckpoint
from keras import optimizers

number_of_samples = x_train.shape[0]
one_tenth = int(number_of_samples * 0.10)
                     
x_train_fc, x_valid_fc = x_train[:one_tenth*8], x_train[one_tenth*8:one_tenth*9]
y_train_fc, y_valid_fc = y_train[:one_tenth*8], y_train[one_tenth*8:one_tenth*9]

model = custom_fc_layers(x_train.shape[1:], num_classes=17)

# early stopping prevents overfitting on training data
early_stop = EarlyStopping(monitor='val_loss', patience=3, min_delta=0, verbose=0, mode='auto')

model_filepath = data_dir + 'bottleneck/vgg16/bottleneck_fc_model.h5'
# save only the best model, not the latest epoch model.
checkpoint = ModelCheckpoint(model_filepath, monitor='val_loss', verbose=1, save_best_only=True, save_weights_only=False)

adam = optimizers.Adam()

model.compile(loss='binary_crossentropy',
              optimizer=adam,
              metrics=['accuracy', 'recall', 'precision'])

model.fit(x_train_fc, y_train_fc,
          nb_epoch=100,
          batch_size=64,
          verbose=1,
          validation_data=(x_valid_fc, y_valid_fc),
          callbacks=[early_stop, checkpoint])


Train on 32376 samples, validate on 4047 samples
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100


<keras.callbacks.History at 0x71ae6cf8>