In [1]:
!git clone "https://github.com/domiurg/ML_class_kaggle"

Cloning into 'ML_class_kaggle'...
remote: Enumerating objects: 4569, done.[K
^Cceiving objects:   3% (148/4569), 18.47 MiB | 124.00 KiB/s   


In [1]:
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Flatten, Dense, Dropout, GlobalAveragePooling2D
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping, CSVLogger
# from tensorflow.keras.applications.vgg16 import VGG16, preprocess_input
# from tensorflow.keras.applications.vgg19 import VGG19
from tensorflow.keras.applications.resnet_v2 import ResNet50V2, preprocess_input
from tensorflow.keras.preprocessing.image import ImageDataGenerator
import os


In [2]:
training_set_path = 'train_kaggle'
val_set_path = 'val_kaggle'

class DataGenerator(object):
    def __init__(self, train_path='', val_path='', preprocess=None, img_size=(224, 224), b_size=4):
        self.train_path = train_path
        self.val_path = val_path
        self.preprocess = preprocess
        self.img_size = img_size
        self.b_size = b_size
        self.train_datagen = ImageDataGenerator(preprocessing_function=self.preprocess,
                                                rotation_range=40,
                                                width_shift_range=0.2,
                                                height_shift_range=0.2,
                                                shear_range=0.2,
                                                zoom_range=0.2,
                                                channel_shift_range=10,
                                                horizontal_flip=True,
                                                fill_mode='nearest')
        self.val_datagen = valid_datagen = ImageDataGenerator(preprocessing_function=self.preprocess)

    def get_batches(self):
        train_batches = self.train_datagen.flow_from_directory(self.train_path,
                                                               target_size=self.img_size,
                                                               interpolation='bicubic',
                                                               class_mode='categorical',
                                                               shuffle=True,
                                                               batch_size=self.b_size)
        val_bathces = self.val_datagen.flow_from_directory(self.val_path,
                                                           target_size=self.img_size,
                                                           interpolation='bicubic',
                                                           class_mode='categorical',
                                                           shuffle=False,
                                                           batch_size=self.b_size)
        return train_batches, val_bathces

In [3]:
IMAGE_SIZE = (224, 224)
NUM_CLASSES = 5
BATCH_SIZE = 4  # try reducing batch size or freeze more layers if your GPU runs out of memory
NUM_EPOCHS = 1500
WEIGHTS_FINAL = 'model-resnet50v2-final.h5'

datagen = DataGenerator(training_set_path, val_set_path, preprocess_input, IMAGE_SIZE, BATCH_SIZE)
train_batches, val_batches = datagen.get_batches()

# show class indices
print('****************')
for cls, idx in train_batches.class_indices.items():
    print('Class #{} = {}'.format(idx, cls))
print('****************')

Found 3708 images belonging to 5 classes.
Found 895 images belonging to 5 classes.
****************
Class #0 = food
Class #1 = furniture
Class #2 = hotdog
Class #3 = people
Class #4 = pets
****************


In [4]:
net = ResNet50V2(include_top=False, weights='imagenet', input_tensor=None,
                  input_shape=(*IMAGE_SIZE, 3))
x = net.output
x = GlobalAveragePooling2D()(x)
x = Flatten()(x)
x = Dropout(0.5)(x)
x = Dense(units=512, activation='relu')(x)
x = Dropout(0.5)(x)
output_layer = Dense(NUM_CLASSES, activation='softmax', name='softmax')(x)
net_final = Model(inputs=net.input, outputs=output_layer)

for layer in net.layers[:len(net.layers) - 12]:
    layer.trainable = False
net_final.compile(optimizer=Adam(lr=0.0001),
                  loss='categorical_crossentropy', metrics=['accuracy'])

print(net_final.summary())
print(len(net.layers))

Instructions for updating:
If using Keras pass *_constraint arguments to layers.
Model: "model"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_1 (InputLayer)            [(None, 224, 224, 3) 0                                            
__________________________________________________________________________________________________
conv1_pad (ZeroPadding2D)       (None, 230, 230, 3)  0           input_1[0][0]                    
__________________________________________________________________________________________________
conv1_conv (Conv2D)             (None, 112, 112, 64) 9472        conv1_pad[0][0]                  
__________________________________________________________________________________________________
pool1_pad (ZeroPadding2D)       (None, 114, 114, 64) 0           conv1_conv[0][0]                 
_____________

In [5]:
# Define Callbacks
filepath = "res50v2-best.hdf5"
checkpoint = ModelCheckpoint(filepath, monitor='val_acc', verbose=1,
                             save_best_only=True, mode='max')
early_stop = EarlyStopping(monitor='val_acc', mode='max', verbose=1, patience=150)
csv_history = 'res50v2-history.csv'
csv_logger = CSVLogger(csv_history, append=False)
callbacks = [checkpoint, early_stop, csv_logger]

In [6]:
# train the model
H = net_final.fit_generator(train_batches,
                            steps_per_epoch=train_batches.samples // BATCH_SIZE,
                            validation_data=val_batches,
                            validation_steps=val_batches.samples // BATCH_SIZE,
                            epochs=NUM_EPOCHS,
                            callbacks=callbacks)

net_final.save(WEIGHTS_FINAL)

Epoch 1/1500
Epoch 00001: val_acc improved from -inf to 0.92040, saving model to res152v2-best.hdf5
Epoch 2/1500
Epoch 00002: val_acc improved from 0.92040 to 0.93722, saving model to res152v2-best.hdf5
Epoch 3/1500
114/927 [==>...........................] - ETA: 39s - loss: 0.5473 - acc: 0.7961

KeyboardInterrupt: 