In [1]:
from tensorflow.python.keras import backend as K
from tensorflow.python.keras.models import Model
from tensorflow.python.keras.layers import Flatten, Dense, Dropout
from tensorflow.python.keras.applications.resnet50 import ResNet50
from tensorflow.python.keras.optimizers import Adam
from tensorflow.python.keras.preprocessing.image import ImageDataGenerator
from keras.callbacks import ReduceLROnPlateau
import os


# ????
DATASET_PATH  = '/home/cslab/Desktop/2nd_ML/kaggle_dogcat'

# ????
IMAGE_SIZE = (224, 224)

# ?????
NUM_CLASSES = 2

# ? GPU ????????? batch size ????????
BATCH_SIZE = 8

# ??????
FREEZE_LAYERS = 2

# Epoch ?
NUM_EPOCHS = 20

# ?????????
WEIGHTS_FINAL = 'model-resnet50-final.h5'

  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])
Using TensorFlow backend.


In [2]:
# ?? data augmentation ?????????????
train_datagen = ImageDataGenerator(rotation_range=40,
                                   width_shift_range=0.2,
                                   height_shift_range=0.2,
                                   shear_range=0.2,
                                   zoom_range=0.2,
                                   channel_shift_range=10,
                                   horizontal_flip=True,
                                   fill_mode='nearest')
train_batches = train_datagen.flow_from_directory(DATASET_PATH + '/train',
                                                  target_size=IMAGE_SIZE,
                                                  interpolation='bicubic',
                                                  class_mode='categorical',
                                                  shuffle=True,
                                                  batch_size=BATCH_SIZE)

valid_datagen = ImageDataGenerator()
valid_batches = valid_datagen.flow_from_directory(DATASET_PATH + '/valid',
                                                  target_size=IMAGE_SIZE,
                                                  interpolation='bicubic',
                                                  class_mode='categorical',
                                                  shuffle=False,
                                                  batch_size=BATCH_SIZE)


Found 4000 images belonging to 2 classes.
Found 800 images belonging to 2 classes.


In [3]:
# ?????????
for cls, idx in train_batches.class_indices.items():
    print('Class #{} = {}'.format(idx, cls))

Class #0 = adogs
Class #1 = bcats


In [4]:
# ????? ResNet50 ?????????
# ?? ResNet50 ??? fully connected layers
net = ResNet50(include_top=False, weights='imagenet', input_tensor=None,
               input_shape=(IMAGE_SIZE[0],IMAGE_SIZE[1],3))
x = net.output
x = Flatten()(x)

# ?? DropOut layer
x = Dropout(0.5)(x)

# ?? Dense layer?? softmax ?????????
output_layer = Dense(NUM_CLASSES, activation='softmax', name='softmax')(x)

# ??????????????
net_final = Model(inputs=net.input, outputs=output_layer)
for layer in net_final.layers[:FREEZE_LAYERS]:
    layer.trainable = False
for layer in net_final.layers[FREEZE_LAYERS:]:
    layer.trainable = True
    
# ?? Adam optimizer????? learning rate ?? fine-tuning
net_final.compile(optimizer=Adam(lr=1e-5),
                  loss='categorical_crossentropy', metrics=['accuracy'])

# ????????
print(net_final.summary())

__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_1 (InputLayer)            (None, 224, 224, 3)  0                                            
__________________________________________________________________________________________________
conv1 (Conv2D)                  (None, 112, 112, 64) 9472        input_1[0][0]                    
__________________________________________________________________________________________________
bn_conv1 (BatchNormalization)   (None, 112, 112, 64) 256         conv1[0][0]                      
__________________________________________________________________________________________________
activation (Activation)         (None, 112, 112, 64) 0           bn_conv1[0][0]                   
__________________________________________________________________________________________________
max_poolin

In [5]:
reduce_lr = ReduceLROnPlateau(factor=0.5, 
                              min_lr=1e-12, 
                              monitor='val_loss', 
                              patience=5, 
                              verbose=1)

In [6]:
# ????
net_final.fit_generator(train_batches,
                        steps_per_epoch = train_batches.samples // BATCH_SIZE,
                        validation_data = valid_batches,
                        validation_steps = valid_batches.samples // BATCH_SIZE,
                        epochs = NUM_EPOCHS,
                        
                       )

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


<tensorflow.python.keras.callbacks.History at 0x7fd76c0c12e8>

In [7]:
# ????????
net_final.save(WEIGHTS_FINAL)

In [9]:

loss, accuracy = net_final.evaluate_generator(valid_batches, train_batches.samples // BATCH_SIZE)
print("Test: accuracy = %f  ;  loss = %f " % (accuracy, loss))


Test: accuracy = 0.993750  ;  loss = 0.017053 


## Prepare Testing Data

In [11]:
import pandas as pd

test_filenames = os.listdir("/home/cslab/Desktop/2nd_ML/kaggle_dogcat/test")
test_df = pd.DataFrame({
    'filename': test_filenames
})
nb_samples = test_df.shape[0]

In [12]:
nb_samples

400

## Create Testing Generator

In [14]:
from keras_preprocessing.image import ImageDataGenerator

In [15]:
test_gen = ImageDataGenerator(rescale=1./255)
test_generator = test_gen.flow_from_dataframe(
    test_df, 
    "/home/cslab/Desktop/2nd_ML/kaggle_dogcat/test", 
    x_col='filename',
    y_col=None,
    class_mode=None,
    batch_size=BATCH_SIZE,
    target_size=IMAGE_SIZE,
    shuffle=False
)

Found 400 validated image filenames.


## Predict

In [18]:
import numpy as np

In [32]:
predict = net_final.predict_generator(test_generator, steps=np.ceil(nb_samples/BATCH_SIZE))[:,1]

In [33]:
# y_pred = model.predict(image)[:,1] 

## Submission

In [36]:
test_df['Predicted'] = predict
# threshold = 0.5
# test_df['category'] = np.where(test_df['Predicted'] > threshold, 0,0)
# test_df.drop(['category'], axis=1,inplace=True)

In [37]:
test_df.head(10)

Unnamed: 0,filename,Predicted
0,363.jpg,0.263575
1,091.jpg,0.532961
2,194.jpg,0.408824
3,245.jpg,0.449729
4,033.jpg,0.350716
5,155.jpg,0.478875
6,303.jpg,0.409777
7,224.jpg,0.367594
8,176.jpg,0.346841
9,131.jpg,0.336298


In [43]:
submission_df = test_df.copy()

In [44]:
submission_df['ID'] = submission_df['filename'].str.split('.').str[0]
submission_df.drop(['filename'], axis=1, inplace=True)
# submission_df.to_csv('submission.csv', index=False)


In [45]:
submission_df.sort_values('ID',inplace=True)

In [47]:
submission_df.reset_index(drop=True, inplace=True)

In [48]:
submission_df.head(10)

Unnamed: 0,Predicted,ID
0,0.406734,0
1,0.600633,1
2,0.467558,2
3,0.456179,3
4,0.393997,4
5,0.35597,5
6,0.290599,6
7,0.437213,7
8,0.37983,8
9,0.468401,9


In [49]:
submission_df.to_csv('submission.csv', index=False)