In [1]:
%matplotlib inline

In [2]:
from keras.applications import VGG16
#from keras.applications import InceptionResNetV2
from keras.preprocessing import image
from keras.models import Model, Sequential
from keras.layers import Dense, BatchNormalization, Activation, Flatten, Dropout, Conv2D, MaxPooling2D
from keras.applications.vgg16 import preprocess_input
from keras.optimizers import Adam, Nadam
import numpy as np
from PIL import Image
from scipy.misc import toimage
from matplotlib import pyplot as plt

train_path = '/home/arman/deep-learning/kaggle-competitions/kaggle-state-farm-notebook/data/train/'
valid_path = '/home/arman/deep-learning/kaggle-competitions/kaggle-state-farm-notebook/data/valid/'
test_path = '/home/arman/deep-learning/kaggle-competitions/kaggle-state-farm-notebook/data/test/'
model_path = '/home/arman/deep-learning/kaggle-competitions/kaggle-state-farm-notebook/'

Using Theano backend.
Using cuDNN version 5103 on context None
Mapped name None to device cuda: Tesla K80 (30FC:00:00.0)


## VGG16 

In [4]:
base_model = VGG16(include_top=False, weights='imagenet', input_tensor=None, input_shape=(3, 224, 224), pooling=None, classes=1000)

In [82]:
#base_model.summary()

In [83]:
dropout_rate = 0.5

#base_model.layers.pop()  # remove last max pooling layer
x = Flatten()(base_model.layers[-1].output)

x = BatchNormalization()(x)
#x = Dropout(dropout_rate)(x)

x = Dense(1024, kernel_initializer='truncated_normal', activation='relu')(x)
x = Dropout(dropout_rate)(x)
x = BatchNormalization()(x)

x = Dense(1024, kernel_initializer='truncated_normal', activation='relu')(x)
#x = BatchNormalization()(x)
x = Dropout(dropout_rate)(x)

y = Dense(10, kernel_initializer='truncated_normal', name='state_farm_classifier', activation='softmax')(x)

inp = base_model.input
out = y

state_farm_model = Model(inp, out)

for i, layer in enumerate(state_farm_model.layers):
    if i < 19:
        state_farm_model.layers[i].trainable = False
    else:
        state_farm_model.layers[i].trainable = True

In [70]:
#state_farm_model.summary()

In [None]:
for layer in state_farm_model.layers:
    print(layer.name + " " + str(layer.trainable))

In [85]:
state_farm_model.compile(optimizer=Adam(lr=0.0005), loss='categorical_crossentropy', metrics=['accuracy'])

## Homegrown CNN

In [4]:
dropout_rate = 0.5

state_farm_model = Sequential()
state_farm_model.add(Conv2D(64,3,activation='relu', padding='same', input_shape=(3,224,224)))
#state_farm_model.add(Conv2D(64,3,activation='relu', padding='same'))
state_farm_model.add(MaxPooling2D())
state_farm_model.add(BatchNormalization(axis=1))
state_farm_model.add(Conv2D(128,3,activation='relu', padding='same'))
#state_farm_model.add(Conv2D(128,3,activation='relu', padding='same'))
state_farm_model.add(MaxPooling2D())
state_farm_model.add(BatchNormalization(axis=1))
state_farm_model.add(Conv2D(256,3,activation='relu', padding='same'))
#state_farm_model.add(Conv2D(256,3,activation='relu', padding='same'))
#state_farm_model.add(Conv2D(256,3,activation='relu', padding='same'))
state_farm_model.add(MaxPooling2D())
state_farm_model.add(BatchNormalization(axis=1))
state_farm_model.add(Flatten())
state_farm_model.add(Dense(256,activation='relu'))
state_farm_model.add(Dropout(dropout_rate))
state_farm_model.add(BatchNormalization())
state_farm_model.add(Dense(256,activation='relu'))
state_farm_model.add(Dropout(dropout_rate))
state_farm_model.add(BatchNormalization())
state_farm_model.add(Dense(10, activation='softmax'))
state_farm_model.compile(optimizer=Adam(lr=0.001), loss='categorical_crossentropy', metrics=['accuracy'])


## Common training code

In [5]:
batch_size = 32
train_gen = image.ImageDataGenerator(rotation_range=15, width_shift_range=0.10, height_shift_range=0.05, # zoom_range=0.1,
                                     shear_range=0.1, channel_shift_range=20)
valid_gen = image.ImageDataGenerator()
train_batches = train_gen.flow_from_directory(train_path, target_size=(224,224), shuffle=True, batch_size=batch_size, class_mode='categorical')
valid_batches = valid_gen.flow_from_directory(valid_path, target_size=(224,224), shuffle=False, batch_size=batch_size*2, class_mode='categorical')
steps_per_epoch = int(np.ceil(train_batches.samples/batch_size))
validation_steps = int(np.ceil(valid_batches.samples/(batch_size*2)))

Found 19125 images belonging to 10 classes.
Found 3299 images belonging to 10 classes.


In [6]:
state_farm_model.fit_generator(train_batches, steps_per_epoch=steps_per_epoch, epochs=1, 
                    validation_data = valid_batches, validation_steps=validation_steps)

Epoch 1/1


<keras.callbacks.History at 0x7f88472b2780>

In [14]:
state_farm_model.optimizer.lr.set_value(0.01)

In [10]:
state_farm_model.save_weights(model_path + 'state_farm.h5')

## Kaggle submission code

In [None]:
test_gen = image.ImageDataGenerator()
test_batches = test_gen.flow_from_directory(test_path, target_size=(224,224), shuffle=False, batch_size=8, class_mode=None)
steps = int(np.ceil(test_batches.samples/8))


In [33]:
pred_gen = state_farm_model.predict_generator(test_batches, steps)

In [None]:
pred_gen.shape
#Image.open(test_path + test_batches.filenames[5])
#print(test_batches.filenames[0])

In [None]:
#probabilities = pred_gen.clip(min=0.05, max=0.95)

In [21]:
ids = [element[8:] for element in test_batches.filenames]
ids = np.asarray(ids).reshape(-1,1)
print(pred_gen.shape)
print(ids.shape)

In [54]:
results = np.concatenate([ids,pred_gen], axis=-1)
results.shape

In [None]:
submission_file_name = '/home/arman/deep-learning/kaggle-competitions/kaggle-state-farm-notebook/submission1.csv'
np.savetxt(submission_file_name, results, fmt='%5s',delimiter=',', header='img,c0,c1,c2,c3,c4,c5,c6,c7,c8,c9')