In [1]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load in 

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import gc
import tensorflow as tf

print(tf.__version__)


# Input data files are available in the "../input/" directory.
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# Any results you write to the current directory are saved as output.

2.1.0-rc0
/kaggle/input/chinese-char-recognition-smmo19/test.npy
/kaggle/input/chinese-char-recognition-smmo19/train-3.npy
/kaggle/input/chinese-char-recognition-smmo19/train-2.npy
/kaggle/input/chinese-char-recognition-smmo19/train-4.npy
/kaggle/input/chinese-char-recognition-smmo19/train-1.npy
/kaggle/input/chinese-char-recognition-smmo19/random_labels.csv


In [2]:
data_train = np.ndarray(shape=(0, 2), dtype=np.float32)

for i in range(1, 5):
    data = np.load(f"/kaggle/input/chinese-char-recognition-smmo19/train-{i}.npy", allow_pickle=True)
    data_train = np.concatenate([data_train, data])
    gc.collect()

x_test = np.load(f"/kaggle/input/chinese-char-recognition-smmo19/test.npy", allow_pickle=True)

In [3]:
HEIGHT = 143
WIDTH = 128
batch_size = 64  # 32
val_size = 0
unique = np.unique(data_train[:, 1])
LABELS = len(unique)
char_to_id = {}
for label in range(LABELS):
    char_to_id[unique[label]] = label


def train_gen():
    for img, label in data_train[int(len(data_train) * val_size):]:
        img = img[..., None]
        yield img, char_to_id[label]


def preprocess_train(x, y):
    x = tf.image.resize_with_crop_or_pad(
        x, HEIGHT, WIDTH)
    x = x / 255 - 0.5
    return x, y


def preprocess(x, y):
    x = tf.image.resize_with_crop_or_pad(
        x, HEIGHT + 14, WIDTH + 13)
    x = tf.image.random_crop(x, [HEIGHT, WIDTH, 1])
    # x = tf.image.central_crop(x, central_fraction=0.7)
    # x = tf.image.random_brightness(x, 0.05)
    # x = tf.image.random_contrast(x, 0.7, 1.3)
    return x, y


def test_gen():
    for img in x_test:
        img = img[..., None]
        yield img


def preprocess_test(x):
    x = tf.image.resize_with_crop_or_pad(x, HEIGHT, WIDTH)
    x = x / 255 - 0.5
    return x

Эксперимент покзал, что сильный preprocess для картинок с иероглифами не дает особого прироста качества, но значительно увеличивает время обучения модели. Поэтому был вклчен только самый необходимый препроцессинг.

HEIGHT = 143 и WIDTH = 128 были выбраны в соответствии со средним значением размера картинок.

Для итогового предсказания был установлен val_size = 0, чтобы обучиться на всех данных.

In [4]:
ds_train = tf.data.Dataset.from_generator(train_gen,
                                          output_types=(tf.float32, tf.int32),
                                          output_shapes=((None, None, 1), ())
                                          ).map(preprocess_train, num_parallel_calls=-1).map(preprocess, num_parallel_calls=-1).prefetch(-1).shuffle(1024).batch(batch_size).repeat()

ds_test = tf.data.Dataset.from_generator(test_gen,
                                         output_types=(tf.float32),
                                         output_shapes=((None, None, 1))
                                         ).map(preprocess_test, num_parallel_calls=-1).batch(batch_size)

In [5]:
from tensorflow.python.keras import backend
from tensorflow.python.keras.models import Sequential
from tensorflow.python.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Activation, Dropout, BatchNormalization
from tensorflow.python.keras.layers.advanced_activations import LeakyReLU
from tensorflow.python.keras.models import load_model


def make_model():
    initializer = tf.keras.initializers.glorot_normal()
    model = tf.keras.models.Sequential()
    model.add(Conv2D(filters=64, padding='same', kernel_size=(3, 3), input_shape=(HEIGHT, WIDTH, 1), kernel_initializer=initializer))
    model.add(LeakyReLU(0.2))
    model.add(Conv2D(filters=64, padding='same', kernel_size=(3, 3), kernel_initializer=initializer))
    model.add(LeakyReLU(0.2))
    model.add(MaxPooling2D(pool_size=(2, 2), padding='same'))
    model.add(Dropout(0.3))

    model.add(Conv2D(filters=128, padding='same', kernel_size=(3, 3), kernel_initializer=initializer))
    model.add(LeakyReLU(0.1))
    model.add(Conv2D(filters=128, padding='same', kernel_size=(3, 3), kernel_initializer=initializer))
    model.add(LeakyReLU(0.1))
    model.add(MaxPooling2D(pool_size=(2, 2), padding='same'))
    model.add(Dropout(0.25))

    model.add(Conv2D(filters=256, padding='same', kernel_size=(3, 3), kernel_initializer=initializer))
    model.add(LeakyReLU(0.1))
    model.add(Conv2D(filters=256, padding='same', kernel_size=(3, 3), kernel_initializer=initializer))
    model.add(BatchNormalization())
    model.add(LeakyReLU(0.1))
    model.add(MaxPooling2D(pool_size=(2, 2), padding='same'))
    model.add(Dropout(0.2))

    # model.add(Conv2D(filters=256, padding='same', kernel_size=(3,3), kernel_initializer=initializer))
    # model.add(LeakyReLU(0.1))
    model.add(Conv2D(filters=512, padding='same', kernel_size=(3, 3), kernel_initializer=initializer))
    model.add(LeakyReLU(0.1))
    model.add(Conv2D(filters=512, padding='same', kernel_size=(3, 3), kernel_initializer=initializer))
    model.add(BatchNormalization())
    model.add(LeakyReLU(0.1))
    model.add(MaxPooling2D(pool_size=(2, 2), padding='same'))
    model.add(Dropout(0.2))

    model.add(Flatten())
    model.add(Dense(1024, kernel_initializer=initializer))
    model.add(LeakyReLU(0.1))
    model.add(Dropout(0.5))
    model.add(Dense(LABELS, kernel_initializer=initializer))
    model.add(Activation("softmax"))

    return model

Представленная модель состоит из 4-х блоков сверток (примерная схема была уже опробована мной в третьей домашке). Для двух последних блоков сверток применена Батч Нормализация. Для каждого из блоков применен MaxPooling2D. Также происходит уменьшение параметров функций LeakyReLU и Dropout, чтобы не выбросить слишком много важной информации в конце. Последний слой Dense на 1024 дал ключевой прирост в качестве, что помогло продвинуться в лидерборде.

In [6]:
model = make_model()
model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d (Conv2D)              (None, 143, 128, 64)      640       
_________________________________________________________________
leaky_re_lu (LeakyReLU)      (None, 143, 128, 64)      0         
_________________________________________________________________
conv2d_1 (Conv2D)            (None, 143, 128, 64)      36928     
_________________________________________________________________
leaky_re_lu_1 (LeakyReLU)    (None, 143, 128, 64)      0         
_________________________________________________________________
max_pooling2d (MaxPooling2D) (None, 72, 64, 64)        0         
_________________________________________________________________
dropout (Dropout)            (None, 72, 64, 64)        0         
_________________________________________________________________
conv2d_2 (Conv2D)            (None, 72, 64, 128)       7

In [7]:
INIT_LR = 5e-3
EPOCHS = 20

model = make_model()

model.compile(
    loss='sparse_categorical_crossentropy',
    optimizer=tf.keras.optimizers.Adamax(lr=INIT_LR),
    metrics=['accuracy']
)


def lr_scheduler(epoch):
    return INIT_LR * 0.9 ** epoch


history = model.fit(
    ds_train,
    steps_per_epoch=5200,
    epochs=EPOCHS,
    callbacks=[tf.keras.callbacks.LearningRateScheduler(lr_scheduler),
               # ModelSaveCallback(model_filename)
               ],
    shuffle=True,
    verbose=2,
    initial_epoch=0
)

Train for 5200 steps
Epoch 1/20
5200/5200 - 842s - loss: 4.0910 - accuracy: 0.3903
Epoch 2/20
5200/5200 - 838s - loss: 0.3334 - accuracy: 0.9082
Epoch 3/20
5200/5200 - 836s - loss: 0.1725 - accuracy: 0.9512
Epoch 4/20
5200/5200 - 836s - loss: 0.1142 - accuracy: 0.9669
Epoch 5/20
5200/5200 - 837s - loss: 0.0825 - accuracy: 0.9761
Epoch 6/20
5200/5200 - 837s - loss: 0.0628 - accuracy: 0.9816
Epoch 7/20
5200/5200 - 836s - loss: 0.0493 - accuracy: 0.9853
Epoch 8/20
5200/5200 - 836s - loss: 0.0405 - accuracy: 0.9875
Epoch 9/20
5200/5200 - 836s - loss: 0.0335 - accuracy: 0.9901
Epoch 10/20
5200/5200 - 835s - loss: 0.0276 - accuracy: 0.9916
Epoch 11/20
5200/5200 - 836s - loss: 0.0236 - accuracy: 0.9926
Epoch 12/20
5200/5200 - 836s - loss: 0.0205 - accuracy: 0.9937
Epoch 13/20
5200/5200 - 836s - loss: 0.0176 - accuracy: 0.9947
Epoch 14/20
5200/5200 - 835s - loss: 0.0151 - accuracy: 0.9954
Epoch 15/20
5200/5200 - 836s - loss: 0.0136 - accuracy: 0.9957
Epoch 16/20
5200/5200 - 836s - loss: 0.0122

Значение steps_per_epoch=5200 выбрано, исходя из размера батча и общего размера выборки.

In [8]:
res = model.predict_classes(ds_test, batch_size=None)
pred = [unique[i] for i in res]
df = pd.DataFrame({'Id': range(1, len(pred) + 1), 'Category': pred})
df.to_csv('KagglePrediction.csv', index=False)

Данный код был полностью запущен на Kaggle, выбранные мной предсказания для лидерборда именно оттуда (при желании могу предоставить ссылку).