In [1]:
import matplotlib.pyplot as plt
%matplotlib inline
import numpy as np
import pandas as pd
import cv2
import math
from glob import glob
import os

In [2]:
import tensorflow as tf
config = tf.ConfigProto()
config.gpu_options.allow_growth=True
sess = tf.Session(config=config)

In [3]:
DATA_DIR = '/home/chicm/ml/cnnpractices/cervc/data/crop'
TRAIN_DIR = DATA_DIR+'/train'
TEST_DIR = DATA_DIR + '/test'
VAL_DIR = DATA_DIR + '/valid'
RESULT_DIR = DATA_DIR + '/results'

batch_size = 64


## Use convnet

In [4]:
from keras.models import Sequential, Model, load_model
from keras import applications
from keras import optimizers
from keras.layers import Dropout, Flatten, Dense
from keras.optimizers import SGD, RMSprop, Adam
from keras.layers.normalization import BatchNormalization
from keras.layers.convolutional import Conv2D, MaxPooling2D


input_shape = (224, 224, 3)



Using TensorFlow backend.


In [12]:

def get_conv_layers(input_shape):
    return [
        Conv2D(32, (3,3), activation='relu', input_shape=input_shape),
        BatchNormalization(axis=-1),
        Conv2D(32, (3,3), activation='relu'),
        BatchNormalization(axis=-1),
        MaxPooling2D((2, 2), strides=(2, 2)),
        
        Conv2D(64, (3,3), activation='relu'),
        BatchNormalization(axis=-1),
        Conv2D(64, (3,3), activation='relu'),
        BatchNormalization(axis=-1),
        MaxPooling2D((2, 2), strides=(2, 2)),
        
        Conv2D(128, (3,3), activation='relu'),
        BatchNormalization(axis=-1),
        Conv2D(128, (3,3), activation='relu'),
        BatchNormalization(axis=-1),
        MaxPooling2D((2, 2), strides=(2, 2)),
        
        Conv2D(256, (3,3), activation='relu'),
        BatchNormalization(axis=-1),
        MaxPooling2D((2, 2), strides=(2, 2)),
        Conv2D(256, (3,3), activation='relu'),
        BatchNormalization(axis=-1),
        MaxPooling2D((2, 2), strides=(2, 2)),
        
        Flatten(),
        Dropout(0.25),
        Dense(128, activation='relu'),
        BatchNormalization(),
        Dropout(0.25),
        Dense(128, activation='relu'),
        BatchNormalization(),
        Dropout(0.25),
        Dense(3, activation='softmax')
    ]
    

In [14]:
del model
model = Sequential(get_conv_layers(input_shape))
model.compile(Adam(lr=0.01), loss='categorical_crossentropy', metrics=['accuracy'])
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_17 (Conv2D)           (None, 222, 222, 32)      896       
_________________________________________________________________
batch_normalization_21 (Batc (None, 222, 222, 32)      128       
_________________________________________________________________
conv2d_18 (Conv2D)           (None, 220, 220, 32)      9248      
_________________________________________________________________
batch_normalization_22 (Batc (None, 220, 220, 32)      128       
_________________________________________________________________
max_pooling2d_11 (MaxPooling (None, 110, 110, 32)      0         
_________________________________________________________________
conv2d_19 (Conv2D)           (None, 108, 108, 64)      18496     
_________________________________________________________________
batch_normalization_23 (Batc (None, 108, 108, 64)      256       
__________

In [15]:
from keras.preprocessing.image import ImageDataGenerator
from keras.callbacks import ModelCheckpoint

batch_size = 32

train_datagen = ImageDataGenerator(
        rotation_range=30, 
        zoom_range=0.2,
        width_shift_range=0.1,
        height_shift_range=0.05, 
        shear_range=0.1,
        vertical_flip=True,
        horizontal_flip=True,
        rescale = 1./255)

test_datagen = ImageDataGenerator(rescale=1. / 255)

train_generator = train_datagen.flow_from_directory(
    TRAIN_DIR,
    target_size=(224, 224),
    batch_size=batch_size,
    class_mode='categorical')

validation_generator = test_datagen.flow_from_directory(
    VAL_DIR,
    target_size=(224, 224),
    batch_size=batch_size,
    class_mode='categorical')

Found 7024 images belonging to 3 classes.
Found 600 images belonging to 3 classes.


In [16]:
print(train_generator.samples)
print(len(model.layers))
#print(model.summary())

7024
30


In [17]:
from keras.callbacks import LearningRateScheduler
def lr_schedule(epoch):
    if epoch <= 10:
        return 0.01
    elif epoch <= 50: 
        return 0.001
    else:
        return 0.0001

In [18]:
epochs = 300
w_file = RESULT_DIR + '/conv_1.h5'

#model.load_weights(RESULT_DIR + '/res_1.h5')

history = model.fit_generator(
    train_generator,
    steps_per_epoch = train_generator.samples // batch_size,
    epochs=epochs,
    validation_data = validation_generator, 
    validation_steps = validation_generator.samples // batch_size,
    verbose=2,
    callbacks=[ModelCheckpoint(w_file, monitor='val_acc', save_best_only=True),
              LearningRateScheduler(lr_schedule)]
)

Epoch 1/300
45s - loss: 1.1284 - acc: 0.4603 - val_loss: 1.0547 - val_acc: 0.4497
Epoch 2/300
46s - loss: 1.0265 - acc: 0.4867 - val_loss: 1.0206 - val_acc: 0.4472
Epoch 3/300
45s - loss: 0.9990 - acc: 0.4956 - val_loss: 0.9933 - val_acc: 0.5282
Epoch 4/300
46s - loss: 0.9811 - acc: 0.5060 - val_loss: 0.9735 - val_acc: 0.5423
Epoch 5/300
49s - loss: 0.9782 - acc: 0.5030 - val_loss: 0.9566 - val_acc: 0.4771
Epoch 6/300
46s - loss: 0.9721 - acc: 0.5061 - val_loss: 1.0904 - val_acc: 0.4613
Epoch 7/300
44s - loss: 0.9711 - acc: 0.4980 - val_loss: 1.0370 - val_acc: 0.5211
Epoch 8/300
44s - loss: 0.9730 - acc: 0.5047 - val_loss: 0.9975 - val_acc: 0.5106
Epoch 9/300
44s - loss: 0.9648 - acc: 0.5111 - val_loss: 1.0349 - val_acc: 0.4542
Epoch 10/300
44s - loss: 0.9666 - acc: 0.5091 - val_loss: 0.9496 - val_acc: 0.5194
Epoch 11/300
45s - loss: 0.9674 - acc: 0.5081 - val_loss: 0.9509 - val_acc: 0.5387
Epoch 12/300
47s - loss: 0.9480 - acc: 0.5121 - val_loss: 0.9193 - val_acc: 0.5440
Epoch 13/300


KeyboardInterrupt: 

In [12]:
train_generator.class_indices

{'Type_1': 0, 'Type_2': 1, 'Type_3': 2}

In [21]:
import gc
gc.collect()

0

In [None]:
test_generator = test_datagen.flow_from_directory(
    TEST_DIR,
    target_size=(img_rows, img_cols),
    batch_size=batch_size,
    class_mode='categorical')

In [23]:

model.load_weights(w_file)
preds1 = model.predict_generator(test_generator, steps = test_generator.samples // batch_size)
print(preds1.shape)

(512, 3)


In [24]:
w_file2 = RESULT_DIR + '/res_1.h5'
model.load_weights(w_file2)
preds2 = model.predict(test_images)
print(preds2.shape)

(512, 3)


In [25]:
preds = np.mean([preds1, preds2], axis=0)
print(preds.shape)

(512, 3)


In [26]:
print(preds[:3])

[[  9.41162463e-04   9.99011397e-01   4.74229528e-05]
 [  7.16020539e-03   8.88448358e-01   1.04391396e-01]
 [  7.55762756e-02   6.26130223e-01   2.98293501e-01]]


In [28]:
def do_clip(arr, mx): 
    return np.clip(arr, (1-mx)/2, mx)

In [None]:
subm = do_clip(preds, 0.83)
subm_name = RESULT_DIR+'/sub1.csv' 

classes = sorted(train_generator.class_indices, key=train_generator.class_indices.get)
print(classes)

submission = pd.DataFrame(subm, columns=classes)

submission.insert(0, 'image_name', [a[8:] for a in test_generator.filenames])
    #print [a for a in batches.filenames][:10]
print(submission.head())
submission.to_csv(subm_name, index=False)

In [37]:
subm = do_clip(preds, 0.83)
sample_submission = pd.read_csv("/home/chicm/ml/cnnpractices/cervc/data/sample_submission.csv")

#for i, name in enumerate(test_filenames):
#    f_name = name.split('/')[1]
#    sample_submission.loc[sample_submission['image_name'] == f_name, 'Type_1'] = subm[i][0]
#    sample_submission.loc[sample_submission['image_name'] == f_name, 'Type_2'] = subm[i][1]
#    sample_submission.loc[sample_submission['image_name'] == f_name, 'Type_3'] = subm[i][2]
sample_submission['Type_1'] = subm[:, 0]
sample_submission['Type_2'] = subm[:, 1]
sample_submission['Type_3'] = subm[:, 2]
sample_submission['image_name'] = fnames

sample_submission.to_csv(RESULT_DIR+"/submit1.csv", index=False)

In [31]:
print(subm[:5])

[[ 0.085       0.82999998  0.085     ]
 [ 0.085       0.82999998  0.1043914 ]
 [ 0.085       0.62613022  0.2982935 ]
 [ 0.085       0.085       0.82999998]
 [ 0.085       0.21464032  0.78290635]]


In [38]:
print(sample_submission)

    image_name    Type_1    Type_2    Type_3
0       17.jpg  0.085000  0.830000  0.085000
1      416.jpg  0.085000  0.830000  0.104391
2      372.jpg  0.085000  0.626130  0.298294
3       44.jpg  0.085000  0.085000  0.830000
4       94.jpg  0.085000  0.214640  0.782906
5       35.jpg  0.085000  0.085000  0.830000
6      381.jpg  0.085000  0.716729  0.255482
7       42.jpg  0.085000  0.830000  0.085000
8      482.jpg  0.566722  0.085000  0.433181
9      178.jpg  0.085000  0.085000  0.830000
10      99.jpg  0.085000  0.556539  0.427407
11     241.jpg  0.085000  0.085000  0.830000
12      52.jpg  0.085000  0.085000  0.830000
13     165.jpg  0.085000  0.764459  0.218825
14     355.jpg  0.830000  0.085000  0.085000
15     345.jpg  0.085000  0.830000  0.085000
16     365.jpg  0.085000  0.830000  0.085000
17     306.jpg  0.085000  0.830000  0.085000
18     395.jpg  0.085000  0.830000  0.085000
19     393.jpg  0.085000  0.085000  0.830000
20     118.jpg  0.085000  0.085000  0.830000
21      39