In [1]:
from keras import backend as K
from keras.applications.vgg16 import VGG16
from keras.layers import GlobalAveragePooling2D, Flatten, Dense, Input, Dropout
from keras.layers.normalization import BatchNormalization
from keras.models import Model, load_model
from keras.optimizers import Adam, RMSprop, SGD
from keras.preprocessing.image import ImageDataGenerator
from keras.regularizers import l2


file = 'all'
if file=='all':
    path = '/home/ubuntu/nbs/state_farm/data/'
    batch_size=64
    nb_train_samples = 34000
    nb_validation_samples = 4800
elif file=='sample':
    path = '/home/ubuntu/nbs/state_farm/data/sample/'
    batch_size=64
    nb_train_samples = 1600
    nb_validation_samples = 300
elif file=='mini_sample':
    path = '/home/ubuntu/nbs/state_farm/data/mini_sample/'
    batch_size=1
    nb_train_samples = 200
    nb_validation_samples = 20
else:
    print('Unknown file name')
    exit()

#path = '/home/ubuntu/nbs/state_farm/data/'
target_size=(224, 224)

#generate the batches
def get_batches(directory, target_size=target_size, batch_size=batch_size, shuffle=False):
    datagen = ImageDataGenerator()
    return datagen.flow_from_directory(directory=directory,
                                          target_size=target_size,
                                          batch_size=batch_size,
                                          class_mode='categorical',
                                          shuffle=shuffle)

train_gen = ImageDataGenerator(rotation_range=0, 
                                     width_shift_range=0, 
                                     height_shift_range=0.1, 
                                     shear_range=0.1, 
                                     zoom_range=0.2, 
                                     channel_shift_range=10,
                                     fill_mode='nearest')

batches = train_gen.flow_from_directory(directory=path+'train_set',  
                                        target_size=target_size,
                                        batch_size=batch_size,
                                        class_mode='categorical',
                                        shuffle=True)

#batches = get_batches(path+'train_set', shuffle=True)

valid_batches = get_batches(path+'valid_set', batch_size=batch_size*2, shuffle=False)



Using Theano backend.
 https://github.com/Theano/Theano/wiki/Converting-to-the-new-gpu-back-end%28gpuarray%29

Using gpu device 0: Tesla K80 (CNMeM is disabled, cuDNN 5103)


Found 17616 images belonging to 10 classes.
Found 4808 images belonging to 10 classes.


In [2]:
# initialize the model
initial_model = VGG16(weights="imagenet", include_top = False, input_shape = (224,224,3))


In [3]:
x = Flatten()(initial_model.output)
x = Dense(4096, activation='relu')(x)
x = Dropout(0.5)(x)
x = BatchNormalization()(x)
predictions = Dense(batches.num_class, activation = 'softmax')(x)
model = Model(input = initial_model.input, output = predictions)


# we freeze the other layers 
for layer in initial_model.layers: layer.trainable=False

opt = Adam(lr=10e-5)

model.compile(optimizer=opt,
              loss='categorical_crossentropy', 
              metrics=['accuracy'])

model.fit_generator(batches, epochs=10, 
                    steps_per_epoch=nb_train_samples // batch_size,
                    validation_data=valid_batches, 
                    validation_steps=nb_validation_samples // batch_size)



Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x7f5df1cb5450>

In [4]:
#model.save('models/batchnorm_step1.h5')

for layer in model.layers[:10]: 
    layer.trainable = False 

for layer in model.layers[10:]: 
    layer.trainable = True
    
opt = SGD(lr=10e-5) 
model.compile(optimizer=opt, loss='categorical_crossentropy', metrics=['accuracy'])
model.fit_generator(batches, epochs=10, 
                    steps_per_epoch=nb_train_samples // batch_size,
                    validation_data=valid_batches, 
                    validation_steps=nb_validation_samples // batch_size)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x7f5e18037110>

In [31]:
# predictions on new data
print('start!')
model.save('models/batchnorm_ft.h5')
test_batches = get_batches(path+'test', batch_size=batch_size)
preds = model.predict_generator(test_batches, steps=test_batches.samples)
preds = preds.clip(min=0.02, max=0.98)

start!
Found 79726 images belonging to 1 classes.


KeyboardInterrupt: 

In [32]:
import numpy as np
import pandas as pd
filenames = test_batches.filenames
ids = np.array([f[8:] for f in filenames])

df_subm = pd.DataFrame(preds, index=ids, columns=['c0','c1','c2','c3','c4','c5','c6','c7','c8','c9'])
df_subm.reset_index(level=0, inplace=True)
df_subm=df_subm.rename(columns = {'index':'img'})
df_subm.head()

In [None]:
from IPython.display import FileLink
submission_file_name = 'submission.csv.gz'
df_subm.to_csv(submission_file_name, index=False, compression='gzip')
FileLink(submission_file_name)