In [1]:
import matplotlib.pyplot as plt
%matplotlib inline
import numpy as np
import pandas as pd
import cv2
import math
from glob import glob
import os

In [2]:
import tensorflow as tf
config = tf.ConfigProto()
config.gpu_options.allow_growth=True
sess = tf.Session(config=config)

In [3]:
DATA_DIR = '/home/chicm/ml/cnnpractices/cervc/data/crop'
TRAIN_DIR = DATA_DIR+'/train'
TEST_DIR = DATA_DIR + '/test'
VAL_DIR = DATA_DIR + '/valid'
RESULT_DIR = DATA_DIR + '/results'

batch_size = 64


## Use VGG

In [4]:
from keras.models import Sequential, Model, load_model
from keras import applications
from keras import optimizers
from keras.layers import Dropout, Flatten, Dense
from keras.optimizers import SGD, RMSprop, Adam
from keras.layers.normalization import BatchNormalization
#from keras.applications.imagenet_utils import preprocess_input
from keras import backend as K

img_rows, img_cols, img_channel = 224, 224, 3


Using TensorFlow backend.


In [5]:
vgg16_model = applications.VGG16(weights='imagenet', include_top=False, 
                                 input_shape=(img_rows, img_cols, img_channel))

In [5]:
incep_model = applications.InceptionV3(include_top=False, 
                                       weights='imagenet', 
                                       input_shape=(img_rows, img_cols, img_channel))

In [6]:
res_model = applications.ResNet50(
            include_top=False, 
            weights='imagenet',
            input_tensor=None, 
            input_shape=(img_rows, img_cols, img_channel),
            pooling=None)

In [10]:
base_model = incep_model

add_model = Sequential()

add_model.add(Flatten(input_shape=base_model.output_shape[1:]))
add_model.add(Dense(256, activation='relu'))
add_model.add(BatchNormalization())
add_model.add(Dense(256, activation='relu'))
add_model.add(BatchNormalization())
#add_model.add(Dropout(0.4))
add_model.add(Dense(3, activation='softmax'))

model = Model(inputs=base_model.input, outputs=add_model(base_model.output))
for layer in model.layers[:300]:#
    layer.trainable = False
    
model.compile(loss='categorical_crossentropy', optimizer=optimizers.SGD(lr=0.0001, momentum=0.9),
              metrics=['accuracy'])
#model.compile(Adam(), loss='categorical_crossentropy', metrics=['accuracy'])
#model.summary()

In [11]:
model.summary()

____________________________________________________________________________________________________
Layer (type)                     Output Shape          Param #     Connected to                     
input_1 (InputLayer)             (None, 224, 224, 3)   0                                            
____________________________________________________________________________________________________
conv2d_1 (Conv2D)                (None, 111, 111, 32)  864                                          
____________________________________________________________________________________________________
batch_normalization_1 (BatchNorm (None, 111, 111, 32)  96                                           
____________________________________________________________________________________________________
activation_1 (Activation)        (None, 111, 111, 32)  0                                            
___________________________________________________________________________________________

In [12]:
#model.summary()
def preprocess_input(x, data_format=None):
    #print(x.shape)
    if data_format is None:
        data_format = K.image_data_format()
    assert data_format in {'channels_last', 'channels_first'}
    x = x[:, :, ::-1]
    x[:, :, 0] -= 103.939
    x[:, :, 1] -= 116.779
    x[:, :, 2] -= 123.68
    return x

In [13]:
from keras.preprocessing.image import ImageDataGenerator
from keras.callbacks import ModelCheckpoint

batch_size = 32

train_datagen = ImageDataGenerator(
        rotation_range=30, 
        zoom_range=0.1,
        #channel_shift_range=20,
        #width_shift_range=0.1,
        #height_shift_range=0.05, 
        #shear_range=0.1,
        vertical_flip=True,
        horizontal_flip=True,
        rescale = 1./255,
        preprocessing_function=preprocess_input)

test_datagen = ImageDataGenerator(rescale=1. / 255)

train_generator = train_datagen.flow_from_directory(
    TRAIN_DIR,
    target_size=(img_rows, img_cols),
    batch_size=batch_size,
    class_mode='categorical')

validation_generator = test_datagen.flow_from_directory(
    VAL_DIR,
    target_size=(img_rows, img_cols),
    batch_size=batch_size,
    class_mode='categorical')

Found 7024 images belonging to 3 classes.
Found 600 images belonging to 3 classes.


In [14]:
print(train_generator.samples)
print(len(model.layers))
#print(model.summary())

7024
312


In [15]:
from keras.callbacks import LearningRateScheduler
def lr_schedule(epoch):
    if epoch <= 5:
        return 0.001
    if epoch <= 10:
        return 0.00001
    if epoch <= 30:
        return 0.0001
    elif epoch <= 50: 
        return 0.00005
    else:
        return 0.00001

In [None]:
epochs = 300
w_file = RESULT_DIR + '/inc_1.h5'

#model.load_weights(RESULT_DIR + '/res_1.h5')

history = model.fit_generator(
    train_generator,
    steps_per_epoch = train_generator.samples // batch_size,
    epochs=epochs,
    validation_data = validation_generator, 
    validation_steps = validation_generator.samples // batch_size,
    verbose=2,
    callbacks=[ModelCheckpoint(w_file, monitor='val_acc', save_best_only=True),
              LearningRateScheduler(lr_schedule)]
)

Epoch 1/300
50s - loss: 1.2089 - acc: 0.4142 - val_loss: 1.1238 - val_acc: 0.4392
Epoch 2/300
44s - loss: 1.0245 - acc: 0.4893 - val_loss: 0.9996 - val_acc: 0.4982
Epoch 3/300
42s - loss: 0.9916 - acc: 0.5118 - val_loss: 1.0409 - val_acc: 0.4789
Epoch 4/300
43s - loss: 0.9828 - acc: 0.5203 - val_loss: 1.0140 - val_acc: 0.4648
Epoch 5/300
43s - loss: 0.9731 - acc: 0.5240 - val_loss: 1.0015 - val_acc: 0.4930
Epoch 6/300
43s - loss: 0.9649 - acc: 0.5248 - val_loss: 1.0193 - val_acc: 0.4859
Epoch 7/300
44s - loss: 0.9605 - acc: 0.5310 - val_loss: 1.0129 - val_acc: 0.5053
Epoch 8/300
43s - loss: 0.9599 - acc: 0.5351 - val_loss: 1.0136 - val_acc: 0.5106
Epoch 9/300
44s - loss: 0.9499 - acc: 0.5365 - val_loss: 1.0130 - val_acc: 0.5123
Epoch 10/300
43s - loss: 0.9399 - acc: 0.5364 - val_loss: 0.9711 - val_acc: 0.5405
Epoch 11/300
43s - loss: 0.9434 - acc: 0.5338 - val_loss: 1.0026 - val_acc: 0.5018
Epoch 12/300
43s - loss: 0.9412 - acc: 0.5428 - val_loss: 0.9899 - val_acc: 0.5282
Epoch 13/300


In [12]:
train_generator.class_indices

{'Type_1': 0, 'Type_2': 1, 'Type_3': 2}

In [None]:
test_generator = test_datagen.flow_from_directory(
    TEST_DIR,
    target_size=(img_rows, img_cols),
    batch_size=batch_size,
    class_mode='categorical')

In [23]:

model.load_weights(w_file)
preds1 = model.predict_generator(test_generator, steps = test_generator.samples // batch_size)
print(preds1.shape)

(512, 3)


In [24]:
w_file2 = RESULT_DIR + '/res_1.h5'
model.load_weights(w_file2)
preds2 = model.predict(test_images)
print(preds2.shape)

(512, 3)


In [25]:
preds = np.mean([preds1, preds2], axis=0)
print(preds.shape)

(512, 3)


In [26]:
print(preds[:3])

[[  9.41162463e-04   9.99011397e-01   4.74229528e-05]
 [  7.16020539e-03   8.88448358e-01   1.04391396e-01]
 [  7.55762756e-02   6.26130223e-01   2.98293501e-01]]


In [28]:
def do_clip(arr, mx): 
    return np.clip(arr, (1-mx)/2, mx)

In [None]:
subm = do_clip(preds, 0.83)
subm_name = RESULT_DIR+'/sub1.csv' 

classes = sorted(train_generator.class_indices, key=train_generator.class_indices.get)
print(classes)

submission = pd.DataFrame(subm, columns=classes)

submission.insert(0, 'image_name', [a[8:] for a in test_generator.filenames])
    #print [a for a in batches.filenames][:10]
print(submission.head())
submission.to_csv(RESULT_DIR+"/submit1.csv", index=False)

In [37]:
subm = do_clip(preds, 0.83)
sample_submission = pd.read_csv("/home/chicm/ml/cnnpractices/cervc/data/sample_submission.csv")

#for i, name in enumerate(test_filenames):
#    f_name = name.split('/')[1]
#    sample_submission.loc[sample_submission['image_name'] == f_name, 'Type_1'] = subm[i][0]
#    sample_submission.loc[sample_submission['image_name'] == f_name, 'Type_2'] = subm[i][1]
#    sample_submission.loc[sample_submission['image_name'] == f_name, 'Type_3'] = subm[i][2]
sample_submission['Type_1'] = subm[:, 0]
sample_submission['Type_2'] = subm[:, 1]
sample_submission['Type_3'] = subm[:, 2]
sample_submission['image_name'] = fnames

sample_submission.to_csv(RESULT_DIR+"/submit1.csv", index=False)

In [31]:
print(subm[:5])

[[ 0.085       0.82999998  0.085     ]
 [ 0.085       0.82999998  0.1043914 ]
 [ 0.085       0.62613022  0.2982935 ]
 [ 0.085       0.085       0.82999998]
 [ 0.085       0.21464032  0.78290635]]


In [38]:
print(sample_submission)

    image_name    Type_1    Type_2    Type_3
0       17.jpg  0.085000  0.830000  0.085000
1      416.jpg  0.085000  0.830000  0.104391
2      372.jpg  0.085000  0.626130  0.298294
3       44.jpg  0.085000  0.085000  0.830000
4       94.jpg  0.085000  0.214640  0.782906
5       35.jpg  0.085000  0.085000  0.830000
6      381.jpg  0.085000  0.716729  0.255482
7       42.jpg  0.085000  0.830000  0.085000
8      482.jpg  0.566722  0.085000  0.433181
9      178.jpg  0.085000  0.085000  0.830000
10      99.jpg  0.085000  0.556539  0.427407
11     241.jpg  0.085000  0.085000  0.830000
12      52.jpg  0.085000  0.085000  0.830000
13     165.jpg  0.085000  0.764459  0.218825
14     355.jpg  0.830000  0.085000  0.085000
15     345.jpg  0.085000  0.830000  0.085000
16     365.jpg  0.085000  0.830000  0.085000
17     306.jpg  0.085000  0.830000  0.085000
18     395.jpg  0.085000  0.830000  0.085000
19     393.jpg  0.085000  0.085000  0.830000
20     118.jpg  0.085000  0.085000  0.830000
21      39