In [None]:
# all imports for keras go here and base model is created for  image of size 224x224x3 using weights from imagenet
from keras.models import Sequential, Model, load_model
from keras import applications
from keras import optimizers
from keras.layers import Dropout, Flatten, Dense
try:
    import h5py
except ImportError:
    h5py = None
img_rows, img_cols, img_channel = 224, 224, 3

base_model = applications.VGG16(weights='imagenet', include_top=False, input_shape=(img_rows, img_cols, img_channel))

In [None]:
# sequential layers are added
add_model = Sequential()
add_model.add(Flatten(input_shape=base_model.output_shape[1:]))
add_model.add(Dense(256, activation='relu'))
add_model.add(Dense(1, activation='sigmoid'))

model = Model(inputs=base_model.input, outputs=add_model(base_model.output))
# we are using binary_crossenropy as this is a binary classification problem,optimizer is SGD
model.compile(loss='binary_crossentropy', optimizer=optimizers.SGD(lr=1e-4, momentum=0.9),
              metrics=['accuracy'])
# print summary of model
model.summary()

In [None]:
#load image augmentation data generator
from keras.preprocessing.image import ImageDataGenerator
from keras.callbacks import ModelCheckpoint
# set batch size
batch_size = 32
# set epochs
epochs = 50
# image data generator uses augmentatation like width shift,height shift,etc. here
train_datagen = ImageDataGenerator(
        rotation_range=30, 
        width_shift_range=0.1,
        height_shift_range=0.1, 
        horizontal_flip=True)
train_datagen.fit(x_train)

#save history of a model to track its accuracy and loss,save_best_only saves the best model automatically
history = model.fit_generator(
    train_datagen.flow(x_train, y_train, batch_size=batch_size),
    steps_per_epoch=x_train.shape[0] // batch_size,
    epochs=epochs,
    validation_data=(x_test, y_test),
    callbacks=[ModelCheckpoint('VGG16-transferlearning.model', monitor='val_acc', save_best_only=True)]
)

In [None]:
# summarize history for accuracy
plt.figure(figsize=(15, 5))
plt.subplot(1, 2, 1)
plt.plot(history.history['acc']); plt.plot(history.history['val_acc']);
plt.title('model accuracy'); plt.ylabel('accuracy');
plt.xlabel('epoch'); plt.legend(['train', 'valid'], loc='upper left');

# summarize history for loss
plt.subplot(1, 2, 2)
plt.plot(history.history['loss']); plt.plot(history.history['val_loss']);
plt.title('model loss'); plt.ylabel('loss');
plt.xlabel('epoch'); plt.legend(['train', 'valid'], loc='upper left');
plt.savefig('augmented.png')
plt.show()

In [None]:
# submit to csv for kaggle score
sample_submission2 = pd.read_csv("sample_submission.csv")
for i, name in enumerate(test_names):
    sample_submission2.loc[sample_submission2['name'] == name, 'invasive'] = predictions[i]

sample_submission2.to_csv("submit_augmented.csv", index=False)