## Import Packages

In [1]:
import os
import cv2
import itertools
import numpy as np
import pandas as pd
import seaborn as sns
from tqdm import tqdm
import matplotlib.pyplot as plt
from sklearn.metrics import confusion_matrix
from sklearn.metrics import classification_report
from sklearn.model_selection import StratifiedShuffleSplit
from tensorflow.keras.models import Model, load_model
from tensorflow.keras.regularizers import l2
from tensorflow_addons.metrics import F1Score
from tensorflow_addons.optimizers import AdamW, Lookahead
from tensorflow.keras.layers import Dense, Dropout, Flatten
from tensorflow.keras.layers import BatchNormalization, Input
from tensorflow.keras.layers import ZeroPadding2D, MaxPooling2D
from tensorflow.keras.layers import Activation, Add, Conv2D
from tensorflow.keras.layers import GlobalAveragePooling2D
from tensorflow.keras.layers import multiply, Lambda, Concatenate
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau

## Read true labels + Basic EDA

In [2]:
class_map = {
    'Airplane': 0,
    'Candle': 1,
    'Christmas_Tree': 2,
    'Jacket': 3,
    'Miscellaneous': 4,
    'Snowman': 5
}

rev_class_map = {
    0: 'Airplane',
    1: 'Candle',
    2: 'Christmas_Tree',
    3: 'Jacket',
    4: 'Miscellaneous',
    5: 'Snowman'
}

In [3]:
train_df = pd.read_csv("../input/hackereath-holiday-season-deep-learning-contest/dataset/train.csv")
train_df['Class_enc'] = train_df['Class'].map(class_map)
train_df['Class_enc'] = train_df['Class_enc'].astype('str')
train_df.head()

Unnamed: 0,Image,Class,Class_enc
0,image3476.jpg,Miscellaneous,4
1,image5198.jpg,Candle,1
2,image4183.jpg,Snowman,5
3,image1806.jpg,Miscellaneous,4
4,image7831.jpg,Miscellaneous,4


In [4]:
test_dir = "../input/hackereath-holiday-season-deep-learning-contest/dataset/test"
test_files = []

for file in tqdm(os.listdir(test_dir)):
    test_files.append(file)

test_df = pd.DataFrame(test_files, columns=['Image'])
test_df['Class'] = '0'
test_df.head()

100%|██████████| 3489/3489 [00:00<00:00, 1037352.14it/s]


Unnamed: 0,Image,Class
0,image7761.jpg,0
1,image3202.jpg,0
2,image688.jpg,0
3,image233.jpg,0
4,image4332.jpg,0


## Model Hyperparameters

In [5]:
dataset_dir = "../input/hackereath-holiday-season-deep-learning-contest/dataset/"
mini_batch_size = 64
train_steps = int(np.ceil(train_df.shape[0] / float(mini_batch_size)))
test_steps = int(np.ceil(test_df.shape[0] / float(mini_batch_size)))
image_dim1 = (375, 375, 3)
image_dim2 = (380, 380, 3)
image_dim3 = (456, 456, 3)
image_dim4 = (464, 464, 3)

## Configure image generators

In [6]:
train_datagen = ImageDataGenerator()

train_generator1 = train_datagen.flow_from_dataframe(train_df,
                                                     directory = os.path.join(dataset_dir,"train"),
                                                     x_col = "Image", 
                                                     y_col = "Class_enc",
                                                     shuffle=False,
                                                     target_size = (image_dim1[0], image_dim1[1]),
                                                     batch_size = mini_batch_size,
                                                     class_mode = "categorical",
                                                     interpolation="bicubic")

train_generator2 = train_datagen.flow_from_dataframe(train_df,
                                                     directory = os.path.join(dataset_dir,"train"),
                                                     x_col = "Image", 
                                                     y_col = "Class_enc",
                                                     shuffle=False,
                                                     target_size = (image_dim2[0], image_dim2[1]),
                                                     batch_size = mini_batch_size,
                                                     class_mode = "categorical",
                                                     interpolation="bicubic")

Found 6469 validated image filenames belonging to 6 classes.
Found 6469 validated image filenames belonging to 6 classes.


In [7]:
def train_generator3():

    while True:
        train_datagen = ImageDataGenerator()

        batches = train_datagen.flow_from_dataframe(train_df,
                                                    directory = os.path.join(dataset_dir,"train"),
                                                    x_col = "Image", 
                                                    y_col = "Class_enc",
                                                    shuffle=False,
                                                    target_size = (image_dim3[0], image_dim3[1]),
                                                    batch_size = mini_batch_size,
                                                    class_mode = "categorical",
                                                    interpolation="bicubic")

        idx0 = 0
        for batch in batches:
            idx1 = idx0 + batch[0].shape[0]

            yield [batch[0], batch[0], batch[0]], batch[1]

            idx0 = idx1
            if idx1 >= train_df.shape[0]:
                break

In [8]:
def train_generator4():

    while True:
        train_datagen = ImageDataGenerator()

        batches = train_datagen.flow_from_dataframe(train_df,
                                                    directory = os.path.join(dataset_dir,"train"),
                                                    x_col = "Image", 
                                                    y_col = "Class_enc",
                                                    shuffle=False,
                                                    target_size = (image_dim4[0], image_dim4[1]),
                                                    batch_size = mini_batch_size,
                                                    class_mode = "categorical",
                                                    interpolation="bicubic")

        idx0 = 0
        for batch in batches:
            idx1 = idx0 + batch[0].shape[0]

            yield [batch[0], batch[0]], batch[1]

            idx0 = idx1
            if idx1 >= train_df.shape[0]:
                break

In [9]:
test_datagen = ImageDataGenerator()

test_generator1 = test_datagen.flow_from_dataframe(test_df,
                                                   directory = os.path.join(dataset_dir,"test"),
                                                   x_col = "Image",
                                                   y_col = "Class",
                                                   shuffle=False,
                                                   target_size = (image_dim1[0], image_dim1[1]),
                                                   batch_size = mini_batch_size,
                                                   class_mode = "categorical",
                                                   interpolation="bicubic")

test_generator2 = test_datagen.flow_from_dataframe(test_df,
                                                   directory = os.path.join(dataset_dir,"test"),
                                                   x_col = "Image",
                                                   y_col = "Class",
                                                   shuffle=False,
                                                   target_size = (image_dim2[0], image_dim2[1]),
                                                   batch_size = mini_batch_size,
                                                   class_mode = "categorical",
                                                   interpolation="bicubic")

Found 3489 validated image filenames belonging to 1 classes.
Found 3489 validated image filenames belonging to 1 classes.


In [10]:
def test_generator3(tta=False):

    while True:
        if tta is False:
            test_datagen = ImageDataGenerator()
        else:
            test_datagen = ImageDataGenerator(rotation_range = 45,
                                              zoom_range = 0.2,
                                              horizontal_flip = True,
                                              vertical_flip = True,
                                              fill_mode = 'nearest',
                                              shear_range = 0.1,
                                              height_shift_range = 0.1,
                                              width_shift_range = 0.1)

        batches = test_datagen.flow_from_dataframe(test_df,
                                                   directory = os.path.join(dataset_dir,"test"),
                                                   x_col = "Image",
                                                   y_col = "Class",
                                                   shuffle=False,
                                                   target_size = (image_dim3[0], image_dim3[1]),
                                                   batch_size = mini_batch_size,
                                                   class_mode = "categorical",
                                                   interpolation="bicubic")

        idx0 = 0
        for batch in batches:
            idx1 = idx0 + batch[0].shape[0]

            yield [batch[0], batch[0], batch[0]], batch[1]

            idx0 = idx1
            if idx1 >= test_df.shape[0]:
                break

In [11]:
def test_generator4():

    while True:
        test_datagen = ImageDataGenerator()

        batches = test_datagen.flow_from_dataframe(test_df,
                                                   directory = os.path.join(dataset_dir,"test"),
                                                   x_col = "Image",
                                                   y_col = "Class",
                                                   shuffle=False,
                                                   target_size = (image_dim4[0], image_dim4[1]),
                                                   batch_size = mini_batch_size,
                                                   class_mode = "categorical",
                                                   interpolation="bicubic")

        idx0 = 0
        for batch in batches:
            idx1 = idx0 + batch[0].shape[0]

            yield [batch[0], batch[0]], batch[1]

            idx0 = idx1
            if idx1 >= test_df.shape[0]:
                break

## Blend the models

In [12]:
# Load the pre-trained models
base_model1 = load_model('../input/hackerearth-holiday-season-v4/hackerearth_holiday_season_model.h5')
base_model2 = load_model('../input/holiday-season-enet5-v4/hackerearth_holiday_season_model.h5')
base_model3 = load_model('../input/fork-of-hackerearth-holiday-season-enet57/hackerearth_holiday_season_model.h5')
base_model4 = load_model('../input/holiday-season-enet6-v4/hackerearth_holiday_season_model.h5')
base_model5 = load_model('../input/hackerearth-holiday-season-enet-b567/hackerearth_holiday_season_model.h5')

In [13]:
# Make predictions on training set
y_meta_pred1 = base_model1.predict(train_generator1, verbose=1)
y_meta_pred2 = base_model2.predict(train_generator1, verbose=1)
y_meta_pred3 = base_model3.predict(train_generator4(), steps=train_steps, verbose=1)
y_meta_pred4 = base_model4.predict(train_generator2, verbose=1)
y_meta_pred5 = base_model5.predict(train_generator3(), steps=train_steps, verbose=1)

Found 6469 validated image filenames belonging to 6 classes.
Found 6469 validated image filenames belonging to 6 classes.
Found 6469 validated image filenames belonging to 6 classes.
Found 6469 validated image filenames belonging to 6 classes.


In [14]:
# Make predictions on test set
y_pred1 = base_model1.predict(test_generator1, verbose=1)
y_pred2 = base_model2.predict(test_generator1, verbose=1)
y_pred3 = base_model3.predict(test_generator4(), steps=test_steps, verbose=1)
y_pred4 = base_model4.predict(test_generator2, verbose=1)
y_pred5 = base_model5.predict(test_generator3(), steps=test_steps, verbose=1)

Found 3489 validated image filenames belonging to 1 classes.
Found 3489 validated image filenames belonging to 1 classes.


In [15]:
np.savez_compressed('Holiday_Season_Meta_Features_dataset.npz',
                    y_meta_pred1=y_meta_pred1, y_pred1=y_pred1, 
                    y_meta_pred2=y_meta_pred2, y_pred2=y_pred2, 
                    y_meta_pred3=y_meta_pred3, y_pred3=y_pred3, 
                    y_meta_pred4=y_meta_pred4, y_pred4=y_pred4, 
                    y_meta_pred5=y_meta_pred5, y_pred5=y_pred5)

## Create submission file

In [16]:
y_pred_final = (y_pred1 * 0.15) + (y_pred2 * 0.15) + (y_pred3 * 0.35) + (y_pred5 * 0.35)

test_df['Class'] = np.argmax(y_pred_final, axis=1)
test_df['Class'] = test_df['Class'].map(rev_class_map)
test_df.to_csv("/kaggle/working/submission.csv", index=False)
test_df.head()

Unnamed: 0,Image,Class
0,image7761.jpg,Airplane
1,image3202.jpg,Miscellaneous
2,image688.jpg,Snowman
3,image233.jpg,Candle
4,image4332.jpg,Christmas_Tree
