In [1]:
from __future__ import print_function
from keras.preprocessing.image import ImageDataGenerator
from keras.models import Sequential
from keras.layers import Convolution2D, MaxPooling2D
from keras.layers import Activation, Dropout, Flatten, Dense
from keras.callbacks import ModelCheckpoint, EarlyStopping, LambdaCallback
from keras.optimizers import Adam

import numpy as np
import matplotlib.pyplot as plt

# reproducibility
np.random.seed(31337)

Using TensorFlow backend.


In [2]:
import h5py
import numpy as np
with h5py.File("ResNet50-300x300_codes-train.h5") as hf:
    X_train = hf["X_train"][:]
    Y_train = hf["Y_train"][:]
    X_valid = hf["X_valid"][:]
    Y_valid = hf["Y_valid"][:]
    

# with h5py.File("ResNet50-300x300_codes-test.h5") as hf:
#      X_test = hf["X_test"][:]


In [None]:
model = Sequential()
model.add(Dropout(0.2, input_shape=(2048,)))
model.add(Dense(16, activation='relu', input_dim=2048))
# model.add(Dropout(0.5))
model.add(Dense(2, input_dim=2048, activation="softmax"))

# Callbacks
early_stop_cb = EarlyStopping(monitor='val_loss', patience=20, verbose=1)

checkpoit_cb = ModelCheckpoint("ResNet-head-Dense2.h5")#, save_best_only=True)

# Print the batch number at the beginning of every batch.
batch_print_cb = LambdaCallback(on_batch_begin=lambda batch, logs: print(".",end=''), 
                                on_epoch_end=lambda batch, logs: print(batch))

# Plot the loss after every epoch.
plot_loss_cb = LambdaCallback(on_epoch_end=lambda epoch, logs: 
                              print (epoch, logs))
                              #plt.plot(np.arange(epoch), logs['loss']))


model.compile(loss='categorical_crossentropy',
              optimizer=Adam(lr=0.0001),
              metrics=['accuracy'],
             )

model.summary()
X_train.shape[1:]

____________________________________________________________________________________________________
Layer (type)                     Output Shape          Param #     Connected to                     
dropout_1 (Dropout)              (None, 2048)          0           dropout_input_1[0][0]            
____________________________________________________________________________________________________
dense_1 (Dense)                  (None, 16)            32784       dropout_1[0][0]                  
____________________________________________________________________________________________________
dense_2 (Dense)                  (None, 2)             34          dense_1[0][0]                    
Total params: 32,818
Trainable params: 32,818
Non-trainable params: 0
____________________________________________________________________________________________________


(2048,)

In [None]:
history = model.fit(
        X_train,
        Y_train,
        nb_epoch=100,
        batch_size=1024,
        validation_data=(X_valid,Y_valid),
        callbacks=[early_stop_cb, checkpoit_cb, batch_print_cb, plot_loss_cb],
        verbose=0
        )


.......................0
0 {'acc': 0.67593359300485489, 'loss': 0.61457940618313134, 'val_acc': 0.89399997568130496, 'val_loss': 0.36338787460327149}
.......................1
1 {'acc': 0.90335090938822271, 'loss': 0.29945019231008191, 'val_acc': 0.95949999952316289, 'val_loss': 0.17859927618503571}
.......................2
2 {'acc': 0.95385419957762141, 'loss': 0.16278603003308831, 'val_acc': 0.97500000095367434, 'val_loss': 0.10554605048894883}
.......................3
3 {'acc': 0.96880038364597731, 'loss': 0.10603578030982565, 'val_acc': 0.98049993801116941, 'val_loss': 0.0776646619439125}
.......................4
4 {'acc': 0.97363719627890344, 'loss': 0.084492036876603047, 'val_acc': 0.98299997377395631, 'val_loss': 0.064122485399246212}
.......................5
5 {'acc': 0.97816898444207012, 'loss': 0.070823171034587115, 'val_acc': 0.98599995136260987, 'val_loss': 0.056152971744537351}
.......................6
6 {'acc': 0.98087062750087495, 'loss': 0.061768968993438096, 'val_acc': 

In [None]:
score=model.evaluate(X_valid,Y_valid, verbose=0)
print("OOS %s: %.2f%%" % (model.metrics_names[1], score[1]*100))
print("OOS %s: %.2f" % (model.metrics_names[0], score[0]))

#print("min(val los)",np.min(history.history['val_loss']))

In [None]:
# summarize history for accuracy
plt.plot(history.history['acc'])
plt.plot(history.history['val_acc'])
plt.title('model accuracy')
plt.ylabel('accuracy')
plt.xlabel('epoch')
plt.legend(['train', 'val'], loc='lower right')
plt.show()


# summarize history for loss
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('model loss (log scale)')
plt.ylabel('loss')
plt.xlabel('epoch')
plt.legend(['train', 'val'], loc='upper right')
plt.yscale('log')
plt.show()

In [None]:
from keras.models import load_model
from sklearn.metrics import log_loss

model = load_model("ResNet-head-Dense2.h5")
with h5py.File("ResNet50-300x300_codes-test.h5") as hf:
     X_test = hf["X_test"][:]


In [None]:
Y_train_preds=model.predict(X_train)
print(log_loss(Y_train,Y_train_preds))
Y_valid_preds=model.predict(X_valid)
print(log_loss(Y_valid,Y_valid_preds))

In [None]:
for i in range(1,10):
    m=i/100.0
    print(m,log_loss(Y_valid,Y_valid_preds.clip(min=m, max=1-m)))
    
for i in range(1,10):
    m=i/100.0
    print(m,log_loss(Y_train,Y_train_preds.clip(min=m, max=1-m)))


In [None]:
import re
datagen = ImageDataGenerator()
test_batches = datagen.flow_from_directory("test", model.input_shape[1:3], shuffle=False, batch_size=32, class_mode=None)
#test_codes = model.predict_generator(test_batches, test_batches.nb_sample)

ids = [re.split('/|\.',x)[1] for x in test_batches.filenames]


In [None]:
Y_test = model.predict(X_test)

In [None]:
Y_test_csv=np.column_stack((ids,Y_test[:,1]))
np.savetxt('Y_test_ResNet-head-Dense2.csv',Y_test_csv, fmt="%s,%s", header=("id,label"),comments='')

In [None]:
Y_test_csv=np.column_stack((ids,Y_test[:,1].clip(min=0.02, max=0.98)))
np.savetxt('Y_test_ResNet-head-Dense2-clipped.csv',Y_test_csv, fmt="%s,%s", header=("id,label"),comments='')