In [1]:
## import libaries
import pandas as pd
import numpy as np
import cv2
import os, sys
from tqdm import tqdm

In [2]:
## load data
train = pd.read_csv('../../data/train.csv')
test = pd.read_csv('../../data/test.csv')

In [3]:
# function to read image
def read_img(img_path):
    img = cv2.imread(img_path, cv2.IMREAD_COLOR)
    img = cv2.resize(img, (224,224))
    return img

In [4]:
## set path for images
TRAIN_PATH = '../../data/train_img/'
TEST_PATH = '../../data/test_img/'

In [5]:
# load data
train_img, test_img = [],[]
for img_path in tqdm(train['image_id'].values):
    train_img.append(read_img(TRAIN_PATH + img_path + '.png'))

for img_path in tqdm(test['image_id'].values):
    test_img.append(read_img(TEST_PATH + img_path + '.png'))

100%|██████████| 3215/3215 [00:10<00:00, 316.41it/s]
100%|██████████| 1732/1732 [00:04<00:00, 360.49it/s]


In [6]:
# normalize images
x_train = np.array(train_img, np.float32) / 224.
x_test = np.array(test_img, np.float32) / 224.

In [7]:
# target variable - encoding numeric value
label_list = train['label'].tolist()
Y_train = {k:v+1 for v,k in enumerate(set(label_list))}
y_train = [Y_train[k] for k in label_list]   
y_train = np.array(y_train)

In [8]:
from keras import applications
from keras.models import Model
from keras import optimizers
from keras.models import Sequential
from keras.layers import Dense, Dropout, Flatten
from keras.layers import Conv2D, MaxPooling2D
from keras.layers.normalization import BatchNormalization
from keras.metrics import categorical_accuracy
from keras.preprocessing.image import ImageDataGenerator
from keras.callbacks import EarlyStopping
from keras.utils import to_categorical
from keras.preprocessing.image import ImageDataGenerator
from keras.callbacks import ModelCheckpoint

Using TensorFlow backend.


In [9]:
y_train = to_categorical(y_train)

In [43]:
#Transfer learning with Inception V3 
base_model = applications.VGG16(weights='imagenet', include_top=False, input_shape=(224, 224, 3))

In [47]:
for layers in base_model.layers[:5]:
    layers.trainble=False

In [49]:
## set model architechture 
add_model = Sequential()
add_model.add(Flatten(input_shape=base_model.output_shape[1:]))
add_model.add(Dense(224, activation='relu'))
add_model.add(Dropout(0.25))
add_model.add(Dense(y_train.shape[1], activation='softmax'))

model = Model(inputs=base_model.input, outputs=add_model(base_model.output))
model.compile(loss='categorical_crossentropy', optimizer=optimizers.SGD(lr=1e-4, momentum=0.9, decay=1e-6,nesterov=True),
              metrics=['accuracy'])

model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_3 (InputLayer)         (None, 224, 224, 3)       0         
_________________________________________________________________
block1_conv1 (Conv2D)        (None, 224, 224, 64)      1792      
_________________________________________________________________
block1_conv2 (Conv2D)        (None, 224, 224, 64)      36928     
_________________________________________________________________
block1_pool (MaxPooling2D)   (None, 112, 112, 64)      0         
_________________________________________________________________
block2_conv1 (Conv2D)        (None, 112, 112, 128)     73856     
_________________________________________________________________
block2_conv2 (Conv2D)        (None, 112, 112, 128)     147584    
_________________________________________________________________
block2_pool (MaxPooling2D)   (None, 56, 56, 128)       0         
__________

In [50]:
batch_size = 16  # tune it
epochs = 70 # increase it

train_datagen = ImageDataGenerator(
        rotation_range=30,
        width_shift_range=0.1,
        height_shift_range=0.1, 
        horizontal_flip=True)
train_datagen.fit(x_train)

In [51]:
# Continuing after 24 epochs 
history = model.fit_generator(
    train_datagen.flow(x_train, y_train, batch_size=batch_size),
    steps_per_epoch=x_train.shape[0] // batch_size,
    epochs=epochs,
    callbacks=[ModelCheckpoint('VGG16-transferlearning.model', monitor='val_acc', save_best_only=True)],
    verbose=2
)

Epoch 1/70




349s - loss: 3.1412 - acc: 0.1016
Epoch 2/70
345s - loss: 2.6835 - acc: 0.2219
Epoch 3/70
344s - loss: 2.2556 - acc: 0.3266
Epoch 4/70
344s - loss: 1.8487 - acc: 0.4376
Epoch 5/70
349s - loss: 1.6019 - acc: 0.5239
Epoch 6/70
343s - loss: 1.3949 - acc: 0.5733
Epoch 7/70
338s - loss: 1.1518 - acc: 0.6405
Epoch 8/70
338s - loss: 1.0481 - acc: 0.6758
Epoch 9/70
338s - loss: 0.9401 - acc: 0.7099
Epoch 10/70
339s - loss: 0.8003 - acc: 0.7565
Epoch 11/70
338s - loss: 0.7221 - acc: 0.7677
Epoch 12/70
339s - loss: 0.6625 - acc: 0.7943
Epoch 13/70
339s - loss: 0.5845 - acc: 0.8136
Epoch 14/70
339s - loss: 0.5150 - acc: 0.8350
Epoch 15/70
347s - loss: 0.4635 - acc: 0.8490
Epoch 16/70
345s - loss: 0.4254 - acc: 0.8628
Epoch 17/70
340s - loss: 0.3730 - acc: 0.8734
Epoch 18/70
344s - loss: 0.3420 - acc: 0.8930
Epoch 19/70
344s - loss: 0.3094 - acc: 0.8994
Epoch 20/70
344s - loss: 0.2742 - acc: 0.9100
Epoch 21/70
340s - loss: 0.2656 - acc: 0.9075
Epoch 22/70
338s - loss: 0.2472 - acc: 0.9212
Epoch 23

In [52]:
## predict test data
predictions = model.predict(x_test)


In [53]:
# get labels
predictions = np.argmax(predictions, axis=1)
rev_y = {v:k for k,v in Y_train.items()}
pred_labels = [rev_y[k] for k in predictions]

In [42]:
## make submission
# assert False 
# print("Don't forget to commit before this")
sub = pd.DataFrame({'image_id':test.image_id, 'label':pred_labels})
sub.to_csv('sub_vgg_exp4_2disabled.csv', index=False) ## ~0.59