In [21]:
import glob
import json
import os
import random
import sys
import math
import datetime
import tensorflow as tf
import matplotlib.pyplot as plt
import numpy as np
from typing import Union
from tqdm import tqdm
from tensorflow.keras import layers, Model
from PIL import Image


if not os.path.exists("./save_weights"):
    os.makedirs("./save_weights")

In [22]:
img_size = {"B0": 224,
            "B1": 240,
           "B2": 260,
            "B3": 300,
            "B4": 380,
            "B5": 456,
            "B6": 528,
            "B7": 600}

num_model = "B0"
im_height = im_width = img_size[num_model]

batch_size = 16
epochs = 30
num_classes = 5
freeze_layers = True
initial_lr = 0.01

In [23]:
data_root = "./flower_data"
log_dir = "./logs/" + datetime.datetime.now().strftime("%Y%m/%d-%H%M%S")


train_writer = tf.summary.create_file_writer(os.path.join(log_dir, "train"))
val_writer = tf.summary.create_file_writer(os.path.join(log_dir, "val"))

train_dir = os.path.join(data_root, "train")
val_dir = os.path.join(data_root, "val")

flower_class = [cla for cla in os.listdir(train_dir) if os.path.isdir(os.path.join(train_dir, cla))]
class_dict = dict((value, index) for index, value in enumerate(flower_class))
class_indices = dict((k, v) for v,k in enumerate(flower_class))
json_str = json.dumps(dict((val, key) for key, val in class_indices.items()), indent=4)
with open('class.json', 'w') as json_file:
    json_file.write(json_str)

train_img_path = glob.glob(train_dir + "/*/*.jpg")
random.shuffle(train_img_path)
train_label_list = [class_dict[path.split(os.path.sep)[-2]] for path in train_img_path]
train_num = len(train_img_path)

val_img_path = glob.glob(val_dir + "/*/*.jpg")
random.shuffle(val_img_path)
val_label_list = [class_dict[path.split(os.path.sep)[-2]] for path in val_img_path]
val_num = len(val_img_path)

print("using {} images for training, {} images for validation.".format(train_num, val_num))

using 2572 images for training, 1098 images for validation.


In [24]:
def process_train_image(img_path, label):
    image = tf.io.read_file(img_path)
    image = tf.image.decode_jpeg(image, channels=3)
    image = tf.cast(image, tf.float32)
    image = tf.image.resize_with_crop_or_pad(image, im_height, im_width)
    image = tf.image.random_flip_left_right(image)
    return image, label

def process_val_image(img_path, label):
    image = tf.io.read_file(img_path)
    image = tf.image.decode_jpeg(image, channels=3)
    image = tf.cast(image, tf.float32)
    image = tf.image.resize_with_crop_or_pad(image, im_height, im_width)
    return image, label
AUTOTUNE = tf.data.experimental.AUTOTUNE

In [25]:
train_dataset = tf.data.Dataset.from_tensor_slices((tf.constant(train_img_path),
                                                    tf.constant(train_label_list)))
train_dataset = train_dataset.map(process_train_image, num_parallel_calls=AUTOTUNE)\
    .cache().shuffle(buffer_size=train_num).batch(batch_size).prefetch(buffer_size=AUTOTUNE)

val_dataset = tf.data.Dataset.from_tensor_slices((tf.constant(val_img_path),
                                                  tf.constant(val_label_list)))
val_dataset = val_dataset.map(process_val_image, num_parallel_calls=AUTOTUNE)\
    .cache().batch(batch_size).prefetch(buffer_size=AUTOTUNE)

Net Model

In [26]:
CONV_KERNEL_INITIALIZER = {
    'class_name': 'VarianceScaling',
    'config': {
        'scale': 2.0,
        'mode': 'fan_out',
        'distribution': 'truncated_normal'
    }
}

DENSE_KERNEL_INITIALIZER = {
    'class_name': 'VarianceScaling',
    'config': {
        'scale': 1. / 3.,
        'mode': 'fan_out',
        'distribution': 'uniform'
    }
}

def correct_pad(input_size: Union[int, tuple], kernel_size: int):

    if isinstance(input_size, int):
        input_size = (input_size, input_size)

    kernel_size = (kernel_size, kernel_size)

    adjust = (1 - input_size[0] % 2, 1 - input_size[1] % 2)
    correct = (kernel_size[0] // 2, kernel_size[1] // 2)
    return ((correct[0] - adjust[0], correct[0]),
            (correct[1] - adjust[1], correct[1]))


def block(inputs,
          activation: str = "swish",
          drop_rate: float = 0.,
          name: str = "",
          input_channel: int = 32,
          output_channel: int = 16,
          kernel_size: int = 3,
          strides: int = 1,
          expand_ratio: int = 1,
          use_se: bool = True,
          se_ratio: float = 0.25):

    filters = input_channel * expand_ratio
    if expand_ratio != 1:
        x = layers.Conv2D(filters=filters,
                          kernel_size=1,
                          padding="same",
                          use_bias=False,
                          kernel_initializer=CONV_KERNEL_INITIALIZER,
                          name=name + "expand_conv")(inputs)
        x = layers.BatchNormalization(name=name + "expand_bn")(x)
        x = layers.Activation(activation, name=name + "expand_activation")(x)
    else:
        x = inputs

    if strides == 2:
        x = layers.ZeroPadding2D(padding=correct_pad(filters, kernel_size),
                                 name=name + "dwconv_pad")(x)

    x = layers.DepthwiseConv2D(kernel_size=kernel_size,
                               strides=strides,
                               padding="same" if strides == 1 else "valid",
                               use_bias=False,
                               depthwise_initializer=CONV_KERNEL_INITIALIZER,
                               name=name + "dwconv")(x)
    x = layers.BatchNormalization(name=name + "bn")(x)
    x = layers.Activation(activation, name=name + "activation")(x)

    if use_se:
        filters_se = int(input_channel * se_ratio)
        se = layers.GlobalAveragePooling2D(name=name + "se_squeeze")(x)
        se = layers.Reshape((1, 1, filters), name=name + "se_reshape")(se)
        se = layers.Conv2D(filters=filters_se,
                           kernel_size=1,
                           padding="same",
                           activation=activation,
                           kernel_initializer=CONV_KERNEL_INITIALIZER,
                           name=name + "se_reduce")(se)
        se = layers.Conv2D(filters=filters,
                           kernel_size=1,
                           padding="same",
                           activation="sigmoid",
                           kernel_initializer=CONV_KERNEL_INITIALIZER,
                           name=name + "se_expand")(se)
        x = layers.multiply([x, se], name=name + "se_excite")

    x = layers.Conv2D(filters=output_channel,
                      kernel_size=1,
                      padding="same",
                      use_bias=False,
                      kernel_initializer=CONV_KERNEL_INITIALIZER,
                      name=name + "project_conv")(x)
    x = layers.BatchNormalization(name=name + "project_bn")(x)
    if strides == 1 and input_channel == output_channel:
        if drop_rate > 0:
            x = layers.Dropout(rate=drop_rate,
                               noise_shape=(None, 1, 1, 1),
                               name=name + "drop")(x)
        x = layers.add([x, inputs], name=name + "add")

    return x

def efficient_net(width_coefficient,
                  depth_coefficient,
                  input_shape=(224, 224, 3),
                  dropout_rate=0.2,
                  drop_connect_rate=0.2,
                  activation="swish",
                  model_name="efficientnet",
                  include_top=True,
                  num_classes=1000):


    block_args = [[3, 1, 32, 16, 1, 1, True],
                  [3, 2, 16, 24, 6, 2, True],
                  [5, 2, 24, 40, 6, 2, True],
                  [3, 3, 40, 80, 6, 2, True],
                  [5, 3, 80, 112, 6, 1, True],
                  [5, 4, 112, 192, 6, 2, True],
                  [3, 1, 192, 320, 6, 1, True]]

    def round_filters(filters, divisor=8):
        filters *= width_coefficient
        new_filters = max(divisor, int(filters + divisor / 2) // divisor * divisor)

        if new_filters < 0.9 * filters:
            new_filters += divisor
        return int(new_filters)

    def round_repeats(repeats):
        return int(math.ceil(depth_coefficient * repeats))

    img_input = layers.Input(shape=input_shape)

    x = layers.experimental.preprocessing.Rescaling(1. / 255.)(img_input)
    x = layers.experimental.preprocessing.Normalization()(x)
    x = layers.ZeroPadding2D(padding=correct_pad(input_shape[:2], 3),
                             name="stem_conv_pad")(x)
    x = layers.Conv2D(filters=round_filters(32),
                      kernel_size=3,
                      strides=2,
                      padding="valid",
                      use_bias=False,
                      kernel_initializer=CONV_KERNEL_INITIALIZER,
                      name="stem_conv")(x)
    x = layers.BatchNormalization(name="stem_bn")(x)
    x = layers.Activation(activation, name="stem_activation")(x)

    b = 0
    num_blocks = float(sum(round_repeats(i[1]) for i in block_args))
    for i, args in enumerate(block_args):
        assert args[1] > 0

        args[2] = round_filters(args[2])
        args[3] = round_filters(args[3])

        for j in range(round_repeats(args[1])):
            x = block(x,
                      activation=activation,
                      drop_rate=drop_connect_rate * b / num_blocks,
                      name="block{}{}_".format(i + 1, chr(j + 97)),
                      kernel_size=args[0],
                      input_channel=args[2] if j == 0 else args[3],
                      output_channel=args[3],
                      expand_ratio=args[4],
                      strides=args[5] if j == 0 else 1,
                      use_se=args[6])
            b += 1

    x = layers.Conv2D(round_filters(1280),
                      kernel_size=1,
                      padding="same",
                      use_bias=False,
                      kernel_initializer=CONV_KERNEL_INITIALIZER,
                      name="top_conv")(x)
    x = layers.BatchNormalization(name="top_bn")(x)
    if include_top:
        x = layers.GlobalAveragePooling2D(name="avg_pool")(x)
        if dropout_rate > 0:
            x = layers.Dropout(dropout_rate, name="top_dropout")(x)
        x = layers.Dense(units=num_classes,
                         activation="softmax",
                         kernel_initializer=DENSE_KERNEL_INITIALIZER,
                         name="predictions")(x)

    model = Model(img_input, x, name=model_name)

    return model


def efficientnet_b0(num_classes=1000,
                    include_top=True,
                    input_shape=(224, 224, 3)):
    # https://storage.googleapis.com/keras-applications/efficientnetb0.h5
    return efficient_net(width_coefficient=1.0,
                         depth_coefficient=1.0,
                         input_shape=input_shape,
                         dropout_rate=0.2,
                         model_name="efficientnetb0",
                         include_top=include_top,
                         num_classes=num_classes)


def efficientnet_b1(num_classes=1000,
                    include_top=True,
                    input_shape=(240, 240, 3)):
    # https://storage.googleapis.com/keras-applications/efficientnetb1.h5
    return efficient_net(width_coefficient=1.0,
                         depth_coefficient=1.1,
                         input_shape=input_shape,
                         dropout_rate=0.2,
                         model_name="efficientnetb1",
                         include_top=include_top,
                         num_classes=num_classes)


def efficientnet_b2(num_classes=1000,
                    include_top=True,
                    input_shape=(260, 260, 3)):
    # https://storage.googleapis.com/keras-applications/efficientnetb2.h5
    return efficient_net(width_coefficient=1.1,
                         depth_coefficient=1.2,
                         input_shape=input_shape,
                         dropout_rate=0.3,
                         model_name="efficientnetb2",
                         include_top=include_top,
                         num_classes=num_classes)


def efficientnet_b3(num_classes=1000,
                    include_top=True,
                    input_shape=(300, 300, 3)):
    # https://storage.googleapis.com/keras-applications/efficientnetb3.h5
    return efficient_net(width_coefficient=1.2,
                         depth_coefficient=1.4,
                         input_shape=input_shape,
                         dropout_rate=0.3,
                         model_name="efficientnetb3",
                         include_top=include_top,
                         num_classes=num_classes)


def efficientnet_b4(num_classes=1000,
                    include_top=True,
                    input_shape=(380, 380, 3)):
    # https://storage.googleapis.com/keras-applications/efficientnetb4.h5
    return efficient_net(width_coefficient=1.4,
                         depth_coefficient=1.8,
                         input_shape=input_shape,
                         dropout_rate=0.4,
                         model_name="efficientnetb4",
                         include_top=include_top,
                         num_classes=num_classes)


def efficientnet_b5(num_classes=1000,
                    include_top=True,
                    input_shape=(456, 456, 3)):
    # https://storage.googleapis.com/keras-applications/efficientnetb5.h5
    return efficient_net(width_coefficient=1.6,
                         depth_coefficient=2.2,
                         input_shape=input_shape,
                         dropout_rate=0.4,
                         model_name="efficientnetb5",
                         include_top=include_top,
                         num_classes=num_classes)


def efficientnet_b6(num_classes=1000,
                    include_top=True,
                    input_shape=(528, 528, 3)):
    # https://storage.googleapis.com/keras-applications/efficientnetb6.h5
    return efficient_net(width_coefficient=1.8,
                         depth_coefficient=2.6,
                         input_shape=input_shape,
                         dropout_rate=0.5,
                         model_name="efficientnetb6",
                         include_top=include_top,
                         num_classes=num_classes)


def efficientnet_b7(num_classes=1000,
                    include_top=True,
                    input_shape=(600, 600, 3)):
    # https://storage.googleapis.com/keras-applications/efficientnetb7.h5
    return efficient_net(width_coefficient=2.0,
                         depth_coefficient=3.1,
                         input_shape=input_shape,
                         dropout_rate=0.5,
                         model_name="efficientnetb7",
                         include_top=include_top,
                         num_classes=num_classes)

model = efficientnet_b0(num_classes=num_classes)
model.summary()

Model: "efficientnetb0"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 input_3 (InputLayer)           [(None, 224, 224, 3  0           []                               
                                )]                                                                
                                                                                                  
 rescaling_2 (Rescaling)        (None, 224, 224, 3)  0           ['input_3[0][0]']                
                                                                                                  
 normalization_2 (Normalization  (None, 224, 224, 3)  7          ['rescaling_2[0][0]']            
 )                                                                                                
                                                                                     

In [27]:
pre_weights_path = ''
assert os.path.exists(pre_weights_path),"cannot find {}".format(pre_weights_path)
model.load_weights(pre_weights_path, by_name=True, skip_mismatch=True)

if freeze_layers:
    unfreeze_layers = ["top_conv", "top_bn", "predictions"]
    for layer in model.layers:
        if layer.name not in unfreeze_layers:
            layer.trainable = False
        else:
            print("training {}".format(layer.name))

AssertionError: cannot find 

In [None]:
# custom learning rate curve
def scheduler(now_epoch):
    end_lr_rate = 0.01
    rate = ((1 + math.cos(now_epoch * math.pi / epochs)) / 2) * (1 - end_lr_rate) + end_lr_rate
    new_lr = rate * initial_lr

    # writing lr into tensorboard
    with train_writer.as_default():
        tf.summary.scalar('learning rate', data=new_lr, step= epoch)

    return new_lr

loss_object = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=False)
optimizer = tf.keras.optimizers.SGD(learning_rate=initial_lr, momentum=0.9)

train_loss = tf.keras.metrics.Mean(name="train_loss")
train_accuracy = tf.keras.metrics.SparseCategoricalAccuracy(name='train_accuracy')

val_loss = tf.keras.metrics.Mean(name="val_loss")
val_accuracy = tf.keras.metrics.SparseCategoricalAccuracy(name='val_accuracy')

In [None]:
best_val_acc = 0.
for epoch in range(epochs):
    train_loss.reset_states()
    train_accuracy.reset_states()
    val_loss.reset_states()
    val_accuracy.reset_states()

    train_bar = tqdm(train_dataset, file=sys.stdout)
    for images,labels in train_bar:
        with tf.GradientTape() as tape:
            output = model(images, training = True)
            loss = loss_object(labels, output)
        gradients = tape.gradient(loss, model.trainable_variables)
        optimizer.apply_gradients(zip(gradients, model.trainable_variables))

        train_loss(loss)
        train_accuracy(labels, output)

        train_bar.desc = "train epoch:[{}/{}] loss:{:.3f} acc:{:.3f}".format(epoch + 1,
                                                                             epochs,
                                                                             train_loss.result(),
                                                                             train_accuracy.result())

    optimizer.learning_rate = scheduler(epoch)

    val_bar = tqdm(val_dataset, file=sys.stdout)
    for images, labels in val_bar:
        output = model(images, training=False)
        loss = loss_object(labels, output)

        val_loss(loss)
        val_accuracy(labels, output)

        val_bar.desc = "val epoch:[{}/{}] loss:{:.3f} acc:{:.3f}".format(epoch + 1,
                                                                         epochs,
                                                                         val_loss.result(),
                                                                         val_accuracy.result())

    with train_writer.as_default():
        tf.summary.scalar("loss", train_loss.result(), epoch)
        tf.summary.scalar("accuracy", train_accuracy.result(), epoch)

    with val_writer.as_default():
        tf.summary.scalar("loss", val_loss.result(), epoch)
        tf.summary.scalar("accuracy", val_accuracy.result(), epoch)

    if val_accuracy.result() > best_val_acc:
        best_val_acc = val_accuracy.result()
        save_name = "./save_weights/efficientnet.ckpt"
        model.save_weights(save_name, save_format="tf")

In [None]:
img_path = "../input/flower/flower.jpg"
assert os.path.exists(img_path),"file:'{}‘ does not exist.".format(img_path)

img = Image.open(img_path)
img = img.resize((im_width, im_height))
plt.imshow(img)

img = np.array(img).astype(np.float32)
img = (np.expand_dims(img, 0))

json_path = './class_indices.json'
assert os.path.exists(json_path),"file: '{}' does not exist.".format(json_path)

with open(json_path, "r") as f:
    class_indict = json.load(f)

#weights_path = './save_weights/efficientnet.ckpt'
#model.load_weights(weights_path)

result = np.squeeze(model.predict(img))
predict_class = np.argmax(result)

print_res = "class:{} prob:{:.3f} ".format(class_indict[str(predict_class)],
                                           result[predict_class])

plt.title(print_res)
for i in range(len(result)):
    print("class: {:10} prob: {:.3}".format(class_indict[str(i)],
                                             result[i]))
plt.show()