In [None]:
# !pip install git+https://github.com/qubvel/efficientnet
from efficientnet.tfkeras import EfficientNetB5

In [None]:
import os
import cv2
import numpy as np
import pandas as pd
from sklearn.model_selection import StratifiedKFold, KFold

import tensorflow as tf
from tensorflow.keras import layers, models, optimizers
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau, ModelCheckpoint

from evaluate import _f1_score

In [None]:
BATCH_SIZE = 8
EPOCHS = 100
K_FOLDS = 6
PATIENCE = 10
SEED = 2432
IMAGE_SIZE = 300
N_CLASSES = 196
CUTMIX = True

DATA_PATH = "../input"
CSV_PATH = os.path.join(DATA_PATH, "2019-3rd-ml-month-with-kakr")
# TRAIN_IMG_PATH = os.path.join(CSV_PATH, "train")
# TEST_IMG_PATH = os.path.join(CSV_PATH, "test")

df_train = pd.read_csv(os.path.join(CSV_PATH, 'train.csv'))
df_test = pd.read_csv(os.path.join(CSV_PATH, 'test.csv'))
df_class = pd.read_csv(os.path.join(CSV_PATH, 'class.csv'))

model_path = './model/'
if not os.path.exists(model_path):
    os.mkdir(model_path)
    
CROP_PATH = os.path.join(DATA_PATH,'3rd-ml-month-car-image-cropping-dataset')
TRAIN_CROPPED_PATH = os.path.join(CROP_PATH, 'train_crop')
TEST_CROPPED_PATH = os.path.join(CROP_PATH, 'test_crop')

df_train = df_train[['img_file', 'class']]
df_test = df_test[['img_file']]

In [None]:
def get_model():
    base_model = EfficientNetB5(weights='imagenet', input_shape=(IMAGE_SIZE, IMAGE_SIZE, 3), include_top=False)
    model = models.Sequential()
    model.add(base_model)
    model.add(layers.GlobalAveragePooling2D())
    model.add(layers.Dense(512))
    model.add(layers.LeakyReLU())
    model.add(layers.Dropout(0.2))
    model.add(layers.Dense(256))
    model.add(layers.LeakyReLU())
    model.add(layers.Dense(196, activation='softmax'))

    optimizer = optimizers.RMSprop(lr=0.0001)
    model.compile(loss='categorical_crossentropy', optimizer=optimizer, metrics=['acc', _f1_score])
    return model

In [None]:
fold_models = []
skf = StratifiedKFold(n_splits=K_FOLDS, random_state=SEED)

for fold_i, (train_idx, valid_idx) in enumerate(skf.split(img_file_lst, img_labels)):

    train_idx, valid_idx = list(train_idx), list(valid_idx)

    train_file_lst = itemgetter(*train_idx)(img_file_lst)
    train_labels = itemgetter(*train_idx)(img_labels)

    valid_file_lst = itemgetter(*valid_idx)(img_file_lst)
    valid_labels = itemgetter(*valid_idx)(img_labels)

    print("** {} Fold **".format(fold_i+1))

    if CUTMIX:
        from loader import Dataloader
        img_file_lst = list(df_train.img_file)
        img_labels = list(df_train['class'])
        
        train_generator = Dataloader(
                train_file_lst,
                TRAIN_CROPPED_PATH,
                train_labels,
                batch_size=BATCH_SIZE,
                n_classes=N_CLASSES,
                img_size=IMAGE_SIZE)

        valid_generator = Dataloader(
                valid_file_lst,
                TRAIN_CROPPED_PATH,
                valid_labels,
                batch_size=BATCH_SIZE,
                n_classes=N_CLASSES,
                img_size=IMAGE_SIZE,
                prob_flip=.0,
                prob_rotate=.0,
                prob_cutmix=.0,
                shuffle=False)

    else:
        from tensorflow.keras.preprocessing.image import ImageDataGenerator

        train_gen = ImageDataGenerator(
                rescale=1./255,
                rotation_range=90,
                width_shift_range=20,
                height_shift_range=20,
                brightness_range=[0.5, 1.5],
                fill_mode='nearest',
                zoom_range=0.1,
                shear_range=0.1,
                horizontal_flip=True,
                vertical_flip=True)

        valid_gen = ImageDataGenerator(
                rescale=1./255)

        train_generator = train_datagen.flow_from_dataframe(
            dataframe=df_train,
            directory=TRAIN_CROPPED_PATH,
            x_col='img_file',
            y_col='class',
            target_size=(IMAGE_SIZE, IMAGE_SIZE),
            color_mode='rgb',
            class_mode='categorical',
            batch_size=BATCH_SIZE,
            seed=SEED,
            shuffle=True)

        valid_generator = valid_datagen.flow_from_dataframe(
                dataframe=validdf,
                directory=TRAIN_CROPPED_PATH,
                x_col='img_file',
                y_col='class',
                target_size=(IMAGE_SIZE, IMAGE_SIZE),
                color_mode='rgb',
                class_mode='categorical',
                batch_size=BATCH_SIZE,
                seed=SEED,
                shuffle=True)

    model_name = model_path + "_EffB5_cm_fold_{}.hdf5".format(fold_i+1)
    fold_models.append(model_name)

    model = get_model()

    history = model.fit_generator(
        generator=train_generator,
        validation_data=valid_generator,
        steps_per_epoch=len(train_generator),
        validation_steps=len(valid_generator),
        verbose=1,
        workers=10,
        max_queue_size=30,
        epochs=100,
        callbacks=get_callback(model_name, PATIENCE)
    )