## Import Packages

In [1]:
import os
import numpy as np
import pandas as pd
from tqdm import tqdm
from sklearn.metrics import f1_score
from sklearn.model_selection import StratifiedKFold

import tensorflow as tf
from tensorflow.keras import backend as K
from tensorflow.keras.regularizers import l2
from tensorflow_addons.metrics import F1Score
from tensorflow.keras.models import Model, load_model
from tensorflow_addons.optimizers import AdamW, Lookahead
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.callbacks import ReduceLROnPlateau
from tensorflow.keras.callbacks import ModelCheckpoint
from tensorflow.keras.layers import Concatenate
from tensorflow.keras.layers import Dense, Dropout, Flatten
from tensorflow.keras.layers import BatchNormalization, Input
from tensorflow.keras.layers import ZeroPadding2D, MaxPooling2D
from tensorflow.keras.layers import GlobalAveragePooling2D, Add
from tensorflow.keras.layers import GlobalMaxPooling2D, Reshape
from tensorflow.keras.layers import Activation, Conv2D, Average
from tensorflow.keras.preprocessing.image import ImageDataGenerator

## Read true labels + Basic EDA

In [2]:
class_map = {
    'Airplane': 0,
    'Candle': 1,
    'Christmas_Tree': 2,
    'Jacket': 3,
    'Miscellaneous': 4,
    'Snowman': 5
}

In [3]:
train_df = pd.read_csv("../input/hackereath-holiday-season-deep-learning-contest/dataset/train.csv")
train_df['Class_enc'] = train_df['Class'].map(class_map)
train_df['Class_enc'] = train_df['Class_enc'].astype('str')
train_df.head()

Unnamed: 0,Image,Class,Class_enc
0,image3476.jpg,Miscellaneous,4
1,image5198.jpg,Candle,1
2,image4183.jpg,Snowman,5
3,image1806.jpg,Miscellaneous,4
4,image7831.jpg,Miscellaneous,4


In [4]:
test_dir = "../input/hackereath-holiday-season-deep-learning-contest/dataset/test"
test_files = []

for file in tqdm(os.listdir(test_dir)):
    test_files.append(file)

test_df = pd.DataFrame(test_files, columns=['Image'])
test_df['Class'] = '0'
test_df.head()

100%|██████████| 3489/3489 [00:00<00:00, 961051.20it/s]


Unnamed: 0,Image,Class
0,image7761.jpg,0
1,image3202.jpg,0
2,image688.jpg,0
3,image233.jpg,0
4,image4332.jpg,0


## Calculate class-weights

In [5]:
temp_df = train_df.groupby(['Class_enc']).size().reset_index().rename(columns={0:'count'})
total_count = np.sum(temp_df['count'].values)
temp_df['class%'] = (temp_df['count'] / total_count) * 100
lowest_pct = min(temp_df['class%'])
temp_df['class_weight'] = lowest_pct / temp_df['class%']
class_weight = temp_df[['Class_enc', 'class_weight']].to_dict()['class_weight']
class_weight

{0: 0.6747663551401869,
 1: 0.6087689713322091,
 2: 0.23456790123456786,
 3: 0.5640625,
 4: 0.12888254194930382,
 5: 1.0}

## Model Hyperparameters

In [6]:
dataset_dir = "../input/hackereath-holiday-season-deep-learning-contest/dataset/"
mini_batch_size = 64
image_dim = (400, 400, 3)
test_steps = int(np.ceil(test_df.shape[0] / float(mini_batch_size)))

## Configure image generators

In [7]:
def train_generator(df, mode='train'):

    while True:
        if mode=='train':
            train_datagen = ImageDataGenerator(rotation_range = 30,
                                               zoom_range = 0.2,
                                               horizontal_flip = True,
                                               fill_mode = 'nearest',
                                               height_shift_range = 0.1,
                                               width_shift_range = 0.1)
            
            batches = train_datagen.flow_from_dataframe(df,
                                                        directory = os.path.join(dataset_dir,"train"),
                                                        x_col = "Image", 
                                                        y_col = "Class_enc",
                                                        target_size = (image_dim[0], image_dim[1]),
                                                        batch_size = mini_batch_size,
                                                        class_mode = "categorical",
                                                        interpolation="bicubic")
        else:
            train_datagen = ImageDataGenerator()
            
            batches = train_datagen.flow_from_dataframe(df,
                                                        directory = os.path.join(dataset_dir,"train"),
                                                        x_col = "Image", 
                                                        y_col = "Class_enc",
                                                        shuffle=False,
                                                        target_size = (image_dim[0], image_dim[1]),
                                                        batch_size = mini_batch_size,
                                                        class_mode = "categorical",
                                                        interpolation="bicubic")
        
        idx0 = 0
        for batch in batches:
            idx1 = idx0 + batch[0].shape[0]

            yield [batch[0], batch[0], batch[0]], batch[1]

            idx0 = idx1
            if idx1 >= df.shape[0]:
                break

In [8]:
def test_generator():

    while True:
        test_datagen = ImageDataGenerator()

        batches = test_datagen.flow_from_dataframe(test_df,
                                                   directory = os.path.join(dataset_dir,"test"),
                                                   x_col = "Image",
                                                   y_col = "Class",
                                                   shuffle=False,
                                                   target_size = (image_dim[0], image_dim[1]),
                                                   batch_size = mini_batch_size,
                                                   class_mode = "categorical",
                                                   interpolation="bicubic")

        idx0 = 0
        for batch in batches:
            idx1 = idx0 + batch[0].shape[0]

            yield [batch[0], batch[0], batch[0]], batch[1]

            idx0 = idx1
            if idx1 >= test_df.shape[0]:
                break

## Build and validate the model

In [9]:
def head(x1, x2, x3):
    
    x = Concatenate()([x1, x2, x3])
    
    x = Dense(units=2048, kernel_regularizer=l2(0.0001), 
              kernel_initializer='he_uniform')(x)
    x = BatchNormalization()(x)
    x = Activation('relu')(x)
    x = Dropout(rate=0.5)(x)
    
    x = Dense(units=512, kernel_regularizer=l2(0.0001), 
              kernel_initializer='he_uniform')(x)
    x = BatchNormalization()(x)
    x = Activation('relu')(x)
    x = Dropout(rate=0.5)(x)
    
    x = Dense(units=512, kernel_regularizer=l2(0.0001), 
              kernel_initializer='he_uniform')(x)
    x = BatchNormalization()(x)
    x = Activation('relu')(x)
    x = Dropout(rate=0.5)(x)
    
    x = Dense(units=128, kernel_regularizer=l2(0.0001), 
              kernel_initializer='he_uniform')(x)
    x = BatchNormalization()(x)
    x = Activation('relu')(x)
    x = Dropout(rate=0.5)(x)
    
    x = Dense(units=6, activation='softmax', 
              kernel_initializer='he_uniform')(x)

    return x

In [10]:
# Define K-fold cross validation test harness
kfold = StratifiedKFold(n_splits=5, shuffle=True, random_state=0)
y_pred_final = np.zeros((test_df.shape[0], 6))
final_oof_score = 0
counter = 0

# Load the pre-trained models
base_model1 = load_model('../input/holiday-season-enetb4-mav34/hackerearth_holiday_season_model.h5')
base_model2 = load_model('../input/holiday-season-enetb5-mav4/hackerearth_holiday_season_model.h5')
base_model3 = load_model('../input/holiday-season-enetb6-mav4/hackerearth_holiday_season_model.h5')

# Freeze weights for base models
base_model1.trainable = False
base_model2.trainable = False
base_model3.trainable = False

# Update layer names for base models
for layer in tqdm(base_model1.layers):
    layer._name = 'm1_' + layer.name

for layer in tqdm(base_model2.layers):
    layer._name = 'm2_' + layer.name

for layer in tqdm(base_model3.layers):
    layer._name = 'm3_' + layer.name

100%|██████████| 530/530 [00:00<00:00, 29662.69it/s]
100%|██████████| 632/632 [00:00<00:00, 20623.98it/s]
100%|██████████| 722/722 [00:00<00:00, 29704.24it/s]


In [11]:
for train, val in kfold.split(train_df.loc[:, train_df.columns!='Class_enc'], train_df['Class_enc']):
    counter += 1
    
    # Prepare data for model training
    train_ds, val_ds = train_df.iloc[train], train_df.iloc[val]
    steps_per_epoch = int(np.ceil(train_ds.shape[0] / float(mini_batch_size)))
    validation_steps = int(np.ceil(val_ds.shape[0] / float(mini_batch_size)))
    
    # Build the model
    model = Model(inputs=[base_model1.input, base_model2.input, base_model3.input], 
                  outputs=head(base_model1.get_layer('m1_activation_11').output, 
                               base_model2.get_layer('m2_activation_11').output,
                               base_model3.get_layer('m3_activation_11').output),
                  name='HackerEarth_Holiday_Season_Model')

    # Compile the final model
    model.compile(loss='categorical_crossentropy', 
                  metrics=[F1Score(num_classes=6, average='weighted'), 
                           'categorical_accuracy'],
                  optimizer=Lookahead(AdamW(lr=1e-4, 
                                            weight_decay=1e-5, 
                                            clipvalue=700), 
                                      sync_period=10))

    # Load best weights from pre-trained model
    model.load_weights('../input/holiday-season-enetb456-mav2/hackerearth_holiday_season_model.h5')

    # Compile the final model
    model.compile(loss='categorical_crossentropy', 
                  metrics=[F1Score(num_classes=6, average='weighted'), 
                           'categorical_accuracy'],
                  optimizer=Lookahead(AdamW(lr=1e-4, 
                                            weight_decay=1e-5, 
                                            clipvalue=700), 
                                      sync_period=10))
    
    # Configure model callbacks
    early = EarlyStopping(monitor="val_loss", mode="min", patience=5, 
                          restore_best_weights=True, verbose=1)

    reduce_lr = ReduceLROnPlateau(monitor="val_loss", factor=0.2, 
                                  min_lr=1e-6, patience=3, 
                                  verbose=1, mode='min')

    chk_point = ModelCheckpoint('hackerearth_holiday_season_model.h5', 
                                monitor='val_loss', verbose=1, 
                                save_weights_only=True, 
                                save_best_only=True, mode='min')
    
    # Fit the model
    history = model.fit(
        train_generator(train_ds), 
        steps_per_epoch=steps_per_epoch,
        class_weight=class_weight, 
        epochs=15, 
        verbose=1,
        callbacks=[reduce_lr, early, chk_point], 
        validation_data=train_generator(val_ds, 'valid'),
        validation_steps=validation_steps
    )
    
    # Load best model
    model = Model(inputs=[base_model1.input, base_model2.input, base_model3.input], 
                  outputs=head(base_model1.get_layer('m1_activation_11').output, 
                               base_model2.get_layer('m2_activation_11').output,
                               base_model3.get_layer('m3_activation_11').output),
                  name='HackerEarth_Holiday_Season_Model')

    model.compile(loss='categorical_crossentropy', 
                  metrics=[F1Score(num_classes=6, average='weighted'), 
                           'categorical_accuracy'],
                  optimizer=Lookahead(AdamW(lr=1e-4, 
                                            weight_decay=1e-5, 
                                            clipvalue=700), 
                                      sync_period=10))

    model.load_weights('./hackerearth_holiday_season_model.h5')
    
    # Make predictions and capture metrics
    y_true = val_ds['Class_enc'].values
    y_pred = model.predict(train_generator(val_ds, 'valid'), steps=validation_steps, verbose=0)
    y_pred = np.argmax(y_pred, axis=1)
    y_pred_final += model.predict(test_generator(), steps=test_steps, verbose=0)
    score = f1_score(y_true.astype(int), y_pred, average='weighted')
    final_oof_score += score
    print("\nFold-{} | F1-Score: {}\n".format(counter, score))

y_pred_final = y_pred_final / float(counter)
final_oof_score /= float(counter)
print("\n\nAggregate F1-Score: {}".format(final_oof_score))

Found 5175 validated image filenames belonging to 6 classes.
Epoch 1/15
Found 1294 validated image filenames belonging to 6 classes.

Epoch 00001: val_loss improved from inf to 0.19371, saving model to hackerearth_holiday_season_model.h5
Epoch 2/15

Epoch 00002: val_loss improved from 0.19371 to 0.18545, saving model to hackerearth_holiday_season_model.h5
Epoch 3/15

Epoch 00003: val_loss improved from 0.18545 to 0.18132, saving model to hackerearth_holiday_season_model.h5
Epoch 4/15

Epoch 00004: val_loss did not improve from 0.18132
Epoch 5/15

Epoch 00005: val_loss improved from 0.18132 to 0.17975, saving model to hackerearth_holiday_season_model.h5
Epoch 6/15

Epoch 00006: val_loss did not improve from 0.17975
Epoch 7/15

Epoch 00007: val_loss improved from 0.17975 to 0.17554, saving model to hackerearth_holiday_season_model.h5
Epoch 8/15

Epoch 00008: val_loss improved from 0.17554 to 0.17551, saving model to hackerearth_holiday_season_model.h5
Epoch 9/15

Epoch 00009: val_loss im

## Create submission file

In [12]:
rev_class_map = {
    0: 'Airplane',
    1: 'Candle',
    2: 'Christmas_Tree',
    3: 'Jacket',
    4: 'Miscellaneous',
    5: 'Snowman'
}

In [13]:
test_df['Class'] = np.argmax(y_pred_final, axis=1)
test_df['Class'] = test_df['Class'].map(rev_class_map)
test_df.head()

Unnamed: 0,Image,Class
0,image7761.jpg,Miscellaneous
1,image3202.jpg,Miscellaneous
2,image688.jpg,Snowman
3,image233.jpg,Candle
4,image4332.jpg,Christmas_Tree


In [14]:
test_df.to_csv("/kaggle/working/submission.csv", index=False)