# Image Classification

### The main idea of this Kaggle is to take a base model pre-trained on ImageNet and to retrain it on our data for classification problem solution

In [None]:
!nvidia-smi

In [None]:
!pip install -q efficientnet

In [None]:
#import libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import itertools
import pickle
import zipfile
import csv
import cv2
import sys
import os


import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.preprocessing import image
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.callbacks import LearningRateScheduler, ModelCheckpoint
from tensorflow.keras.callbacks import Callback
from tensorflow.keras.regularizers import l2
from tensorflow.keras import optimizers

import albumentations as A

from tensorflow.keras.models import Model
from keras.models import Sequential

from tensorflow.keras.applications.xception import Xception
from tensorflow.keras.applications import EfficientNetB0

from tensorflow.keras import *
from tensorflow.keras.activations import *
from tensorflow.keras.applications import *
from tensorflow.keras.callbacks import *
from tensorflow.keras.layers import *
from tensorflow.keras.layers.experimental.preprocessing import *
from tensorflow.keras.losses import *
from tensorflow.keras.optimizers import *
from tensorflow.keras.optimizers.schedules import *


from tensorflow.keras.layers import *

from sklearn.model_selection import train_test_split, StratifiedKFold

import PIL
from PIL import ImageOps, ImageFilter

#increasing default size of the plots
from pylab import rcParams
rcParams['figure.figsize'] = 10, 5

#графики в svg выглядят более четкими
%config InlineBackend.figure_format = 'svg' 
%matplotlib inline

print(os.listdir("../input"))
print('Python       :', sys.version.split('\n')[0])
print('Numpy        :', np.__version__)
print('Tensorflow   :', tf.__version__)
print('Keras        :', tf.keras.__version__)

# Основные настройки

In [None]:
# Main Settings

EPOCHS               = 5  # it was defined empirically that 5 epochs is enough for learning
BATCH_SIZE           = 32 # if the network is big we need to decrease the Batch size, otherwise we`ll run out of memory
LR                   = 1e-4 # it was defined empirically that this values is the best for learning rate
VAL_SPLIT            = 0.2 # the share of the validation dataset
CLASS_NUM            = 10  # number of classes in our problem
IMG_SIZE             = 224 # default image size
IMG_CHANNELS         = 3   # RGB has 3 channels
input_shape          = (IMG_SIZE, IMG_SIZE, IMG_CHANNELS)

DATA_PATH = '../input/'
PATH = "../working/car/" # working directory

In [None]:
os.makedirs(PATH,exist_ok=True)

RANDOM_SEED = 42
np.random.seed(RANDOM_SEED)  
PYTHONHASHSEED = 0

# EDA

In [None]:
# Reading data from the folders
train_df = pd.read_csv(DATA_PATH+"train.csv")
sample_submission = pd.read_csv(DATA_PATH+"sample-submission.csv")
train_df.head()

In [None]:
train_df.info()

#### Let's look at the classes distributions in the training dataset

In [None]:
train_df.Category.value_counts()

#### The training dataset looks pretty balanced

In [None]:
train_df.hist(bins=100, figsize=(5,5))

In [None]:
# Unzipping the files so that you can see them..
print('Unpacking images')

for data_zip in ['train.zip', 'test.zip']:
    with zipfile.ZipFile("../input/"+data_zip,"r") as z:
        z.extractall(PATH)
        
print(os.listdir(PATH))

In [None]:
print('Images examples (random sample)')
plt.figure(figsize=(12,8))

random_image = train_df.sample(n=9)
random_image_paths = random_image['Id'].values
random_image_cat = random_image['Category'].values

for index, path in enumerate(random_image_paths):
    im = PIL.Image.open(PATH+f'train/{random_image_cat[index]}/{path}')
    plt.subplot(3,3, index+1)
    plt.imshow(im)
    plt.title('Class: '+str(random_image_cat[index]))
    plt.axis('off')
plt.show()

In [None]:
# Single image
image = PIL.Image.open(PATH+'/train/0/100380.jpg')
imgplot = plt.imshow(image)
plt.show()
image.size

# Data Preparation

In [None]:
train_datagen = ImageDataGenerator(
    
    validation_split=VAL_SPLIT,
    # augmentation parameters:
    horizontal_flip=True,
    rotation_range=10,
    shear_range=0.2,
    brightness_range=(0.8, 1.2),
)

val_datagen = ImageDataGenerator(    
    validation_split=VAL_SPLIT,
)

sub_datagen = ImageDataGenerator(
    
)

train_generator = train_datagen.flow_from_directory(
    PATH+'train/',
    target_size=(IMG_SIZE, IMG_SIZE),
    batch_size=BATCH_SIZE,
    class_mode='categorical',
    shuffle=True,
    seed=RANDOM_SEED,
    subset='training'
)

val_generator = val_datagen.flow_from_directory(
    PATH+'train/',
    target_size=(IMG_SIZE, IMG_SIZE),
    batch_size=BATCH_SIZE,
    class_mode='categorical',
    shuffle=True,
    seed=RANDOM_SEED,
    subset='validation'
)

sub_generator = sub_datagen.flow_from_dataframe( 
    dataframe=sample_submission,
    directory=PATH+'test_upload/',
    x_col="Id",
    y_col=None,
    class_mode=None,
    target_size=(IMG_SIZE, IMG_SIZE),
    batch_size=BATCH_SIZE,
    shuffle=False,
    seed=RANDOM_SEED
)

In [None]:
# Showing few examples of the images from our dataset
def show_first_images(generator, count=6, labels=True, figsize=(20, 5), normalized=False):
  generator = itertools.islice(generator, count)
  fig, axes = plt.subplots(nrows=1, ncols=count, figsize=figsize)
  for batch, ax in zip(generator, axes):
    if labels:
      img_batch, labels_batch = batch
      img, label = img_batch[0], np.argmax(labels_batch[0]) 
    else:
      img_batch = batch
      img = img_batch[0]
    if not normalized:
      img = img.astype(np.uint8)
    ax.imshow(img)
   
    if labels:
      ax.set_title(f'Class: {label}')
  plt.show()

print('Train:')
show_first_images(train_generator)

print('Val:')
show_first_images(val_generator)

print('Sub:')
show_first_images(sub_generator, labels=False)

# Building a base model

#### Let's start with EfficientNetB0 as the base model

In [None]:
base_model = EfficientNetB0(weights='imagenet', input_shape=input_shape, include_top=False)

In [None]:
# building a model
model = Sequential([
  base_model, #pre-trained neural network from keras.applications module
  GlobalMaxPool2D(),
  Dropout(0.5),
  Dense(CLASS_NUM, activation='softmax')
])

model.compile(
    loss=CategoricalCrossentropy(from_logits=True),
    optimizer=Adam(ExponentialDecay(LR, 100, 0.9)),
    metrics='accuracy'
)

In [None]:
# training model
model.fit(train_generator, validation_data=val_generator, epochs=5, callbacks=[history])

In [None]:
# Let`s show the learning metrics for the model based on EfficientNetB0 base model on the chart
acc = history.history['accuracy']
val_acc = history.history['val_accuracy']
loss = history.history['loss']
val_loss = history.history['val_loss']
 
epochs = range(len(acc))
 
plt.plot(epochs, acc, 'b', label='Training acc')
plt.plot(epochs, val_acc, 'r', label='Validation acc')
plt.title('Training and validation accuracy')
plt.legend()
 
plt.figure()
 
plt.plot(epochs, loss, 'b', label='Training loss')
plt.plot(epochs, val_loss, 'r', label='Validation loss')
plt.title('Training and validation loss')
plt.legend()
 
plt.show()

### Original model showed 84% accuracy, which resulted in little bit worse score of 83% in Kaggle Competition. As long as this result was far from the TOP, I proceeded with experiment

# Data Augmentation

### I tried 5 different augmentation settings in ImageDataGenerator and the one below performed the best

In [None]:
# Data Augmentation settings
train_datagen2 = ImageDataGenerator(
        featurewise_center=False,  # set input mean to 0 over the dataset
        samplewise_center=False,  # set each sample mean to 0
        featurewise_std_normalization=False,  # divide inputs by std of the dataset
        samplewise_std_normalization=False,  # divide each input by its std
        zca_whitening=False,  # apply ZCA whitening
        rotation_range = 30,  # randomly rotate images in the range (degrees, 0 to 180)
        zoom_range = 0.2, # Randomly zoom image 
        width_shift_range=0.1,  # randomly shift images horizontally (fraction of total width)
        height_shift_range=0.1,  # randomly shift images vertically (fraction of total height)
        horizontal_flip = True,  # randomly flip images
        vertical_flip=False)  # randomly flip images


train_generator2 = train_datagen2.flow_from_directory(
    PATH+'train/',
    target_size=(IMG_SIZE, IMG_SIZE),
    batch_size=BATCH_SIZE,
    class_mode='categorical',
    shuffle=True,
    seed=0,
    subset='training'
)

In [None]:
# training a model with one Epoch
model.fit(train_generator2, validation_data=val_generator, epochs=1, callbacks=[history])

In [None]:
# training a model with 8 Epochs
model.fit(train_generator2, validation_data=val_generator, epochs=EPOCHS, callbacks=[history])

### The current model showed a good result comparing to the initial model without augmentation, accuracy - 83% in Kaggle Competition, however, it's not enough, so we have to continue experiments

### Augmentation using albumentations library

#### I must mention that I tried up to 10 different augmentations with Albumentation library, both from best practices and from my own experiments. Neither of them worked well.

In [None]:
#installing Albumentations library
!pip install albumentations -q -U

In [None]:
def augment(image):
    image = image.astype(np.uint8)
    
    aug = A.Compose([
        A.HorizontalFlip(p=0.5),
        A.Rotate(limit=30, interpolation=1, border_mode=4, value=None, mask_value=None, always_apply=False, p=0.5),
        A.OneOf([
            A.CenterCrop(height=224, width=200),
            A.CenterCrop(height=200, width=224),
        ],p=0.5),
        A.OneOf([
            A.RandomBrightnessContrast(brightness_limit=0.3, contrast_limit=0.3),
            A.RandomBrightnessContrast(brightness_limit=0.1, contrast_limit=0.1)
        ],p=0.5),
        A.GaussianBlur(p=0.05),
        A.HueSaturationValue(p=0.5),
        A.RGBShift(p=0.5),
        A.FancyPCA(alpha=0.1, always_apply=False, p=0.5),
        A.Resize(IMG_SIZE, IMG_SIZE)
])   
    
    return aug(image=image)['image']
    

album_datagen = ImageDataGenerator(
    preprocessing_function=augment
)

for _ in range(3):

    album_generator = album_datagen.flow_from_directory(
      PATH+'train/',
      target_size=(IMG_SIZE, IMG_SIZE),
      batch_size=1,
      shuffle=True,
      seed=RANDOM_SEED
    )  
    
    show_first_images(album_generator)    


In [None]:
# training a model
model.fit(album_generator, validation_data=val_generator, epochs=1, callbacks=[history])

#### In general, Albumentation library not only failed to improve the metrics, but it even didn't show any good results. My assumption is, that it is because of the specifics of the images.

## Fine-tuning

#### Let's try to apply the fine-tuning technique to improve the result of the initial model

In [None]:
# Checking number of layers in the base model
print("Number of layers in the base model: ", len(base_model.layers))

In [None]:
base_model.trainable = True

# training only half of base model layers
fine_tune_at = len(base_model.layers)//2

for layer in base_model.layers[:fine_tune_at]:
  layer.trainable =  False

In [None]:
# Check number of trainable layers
len(base_model.trainable_variables)

In [None]:
# Training a model
model.fit(train_generator2, validation_data=val_generator, epochs=EPOCHS, callbacks=[history])

#### Overall fine-tuned model showed a little bit better result in Kaggle Competition - around 85%, which is not enough though. 

### Let's try EfficientNetB3 model with different parameters

In [None]:
base_model_enb3 = EfficientNetB3(weights='imagenet', include_top=False, input_shape=input_shape)

In [None]:
# Building a model. The layers that I use in this model is the result of extended experiments
model_enb3 = Sequential([
    base_model_enb3, 
    GlobalAveragePooling2D(),
    BatchNormalization(),
    Dropout(0.25),
    Dense(256,activation='relu'),
    BatchNormalization(),
    Dense(CLASS_NUM, activation='softmax')
])

model_enb3.compile(loss='categorical_crossentropy',
              optimizer=optimizers.Adam(learning_rate=ExponentialDecay(
                  0.0009, decay_steps=100, decay_rate=0.9)),
              metrics='accuracy')

In [None]:
# Adding callback for saving the model with the best accuracy
checkpoint = ModelCheckpoint('best_model_enb3.hdf5' , monitor = ['val_accuracy'] , verbose = 1  , mode = 'max')
earlystop = EarlyStopping(monitor='accuracy', patience=5, restore_best_weights=True)
callbacks_list = [checkpoint, earlystop]

In [None]:
# An object for storing the model learning metrics
history_enb3 = History()

In [None]:
# Making all the layers of the base model trainable. The model showed its best results with such settings
base_model_enb3.trainable = True

In [None]:
# Training a model
model_enb3.fit(train_generator2, validation_data=val_generator, epochs=EPOCHS, callbacks=[history_enb3, checkpoint, earlystop])

In [None]:
# Let`s show the learning metrics for the model based on EfficientNetB3 base model on the chart
acc = history_enb3.history['accuracy']
val_acc = history_enb3.history['val_accuracy']
loss = history_enb3.history['loss']
val_loss = history_enb3.history['val_loss']
 
epochs = range(len(acc))
 
plt.plot(epochs, acc, 'b', label='Training acc')
plt.plot(epochs, val_acc, 'r', label='Validation acc')
plt.title('Training and validation accuracy')
plt.legend()
 
plt.figure()
 
plt.plot(epochs, loss, 'b', label='Training loss')
plt.plot(epochs, val_loss, 'r', label='Validation loss')
plt.title('Training and validation loss')
plt.legend()
 
plt.show()

In [None]:
# Saving the model
model_enb3.save('../working/model_enb3.hdf5')

### Let's try TTA improvement for this model

In [None]:
# Applying augmentations for the test data
sub_datagen_enb3 = ImageDataGenerator(
    featurewise_center=False,  # set input mean to 0 over the dataset
    samplewise_center=False,  # set each sample mean to 0
    featurewise_std_normalization=False,  # divide inputs by std of the dataset
    samplewise_std_normalization=False,  # divide each input by its std
    zca_whitening=False,  # apply ZCA whitening
    rotation_range = 30,  # randomly rotate images in the range (degrees, 0 to 180)
    zoom_range = 0.2, # Randomly zoom image 
    width_shift_range=0.1,  # randomly shift images horizontally (fraction of total width)
    height_shift_range=0.1,  # randomly shift images vertically (fraction of total height)
    horizontal_flip = True,  # randomly flip images
    vertical_flip=False  # randomly flip images
)

sub_generator_enb3 = sub_datagen_enb3.flow_from_dataframe( 
    dataframe=sample_submission,
    directory=PATH+'test_upload/',
    x_col="Id",
    y_col=None,
    class_mode=None,
    target_size=(IMG_SIZE, IMG_SIZE),
    batch_size=BATCH_SIZE,
    shuffle=False,
    seed=RANDOM_SEED  
)

In [None]:
# Averaging the results
predictions_enb3 = []
for _ in range(5):
    predictions_enb3.append(model_enb3.predict(sub_generator_enb3, verbose=1))
    sub_generator_enb3.reset()
predictions_enb3 = np.array(predictions_enb3)
predictions_enb3.shape

In [None]:
final_predictions_enb3 = predictions_enb3.mean(axis=0).argmax(axis=-1)
final_predictions_enb3

In [None]:
# Sumbitting predictions
submission_enb3 = pd.DataFrame({
    'Id': sub_generator_enb3.filenames,
    'Category': final_predictions_enb3
}, columns=['Id', 'Category'])
submission_enb3.to_csv('submission_enb3.csv', index=False)

#### The EfficientNetB3 model with all necessary optimizations showed the best accuracy so far. I've got 95,695% of accuracy on Kaggle Competition, however, this is not even a TOP 100 result. So, let's proceed with experiments

### Let's try another model - EfficientNetB7

#### First we need to make some modifications for the model parameters before running this model

In [None]:
# Changing BATCH size to 8, to fit into model
train_generator_enb7 = train_datagen2.flow_from_directory(
    PATH+'train/',
    target_size=(IMG_SIZE, IMG_SIZE),
    batch_size=16,
    class_mode='categorical',
    shuffle=True,
    seed=RANDOM_SEED,
    subset='training'
)

val_generator_enb7 = val_datagen.flow_from_directory(
    PATH+'train/',
    target_size=(IMG_SIZE, IMG_SIZE),
    batch_size=16,
    class_mode='categorical',
    shuffle=True,
    seed=RANDOM_SEED,
    subset='validation'
)

sub_generator_enb7 = sub_datagen.flow_from_dataframe( 
    dataframe=sample_submission,
    directory=PATH+'test_upload/',
    x_col="Id",
    y_col=None,
    class_mode=None,
    target_size=(IMG_SIZE, IMG_SIZE),
    batch_size=16,
    shuffle=False
)

In [None]:
base_model_enb7 = EfficientNetB7(weights='imagenet', include_top=False, input_shape=input_shape)

In [None]:
# Building a model
model_enb7 = Sequential([
    base_model_enb7,     
    GlobalAveragePooling2D(),
    BatchNormalization(),
    Dropout(0.25),
    Dense(256,activation='relu'),
    BatchNormalization(),
    Dense(CLASS_NUM, activation='softmax')
])


model_enb7.compile(loss='categorical_crossentropy',
              optimizer=optimizers.Adam(learning_rate=ExponentialDecay(
                  0.0009, decay_steps=100, decay_rate=0.9)),
              metrics='accuracy')

In [None]:
# Adding callback for saving the model with the best accuracy
checkpoint = ModelCheckpoint('best_model_enb7.hdf5' , monitor = ['val_accuracy'] , verbose = 1  , mode = 'max')
earlystop = EarlyStopping(monitor='accuracy', patience=5, restore_best_weights=True)
callbacks_list = [checkpoint, earlystop]

In [None]:
# The experiments showed that the model performs best if we disable the base model training at all.
base_model_enb7.trainable = False

In [None]:
# Training a model
model_enb7.fit(train_generator_enb7, validation_data=val_generator_enb7, epochs=EPOCHS, callbacks=[history_enb7, checkpoint, earlystop])

In [None]:
# Let`s show the learning metrics on the chart
acc = history_enb7.history['accuracy']
val_acc = history_enb7.history['val_accuracy']
loss = history_enb7.history['loss']
val_loss = history_enb7.history['val_loss']
 
epochs = range(len(acc))
 
plt.plot(epochs, acc, 'b', label='Training acc')
plt.plot(epochs, val_acc, 'r', label='Validation acc')
plt.title('Training and validation accuracy')
plt.legend()
 
plt.figure()
 
plt.plot(epochs, loss, 'b', label='Training loss')
plt.plot(epochs, val_loss, 'r', label='Validation loss')
plt.title('Training and validation loss')
plt.legend()
 
plt.show()

In [None]:
# Saving the model
model_enb7.save('../working/model_enb7.hdf5')

### Let's try TTA improvement for EfficientNetB7 model

In [None]:
sub_datagen_enb7 = ImageDataGenerator(
    featurewise_center=False,  # set input mean to 0 over the dataset
    samplewise_center=False,  # set each sample mean to 0
    featurewise_std_normalization=False,  # divide inputs by std of the dataset
    samplewise_std_normalization=False,  # divide each input by its std
    zca_whitening=False,  # apply ZCA whitening
    rotation_range = 30,  # randomly rotate images in the range (degrees, 0 to 180)
    zoom_range = 0.2, # Randomly zoom image 
    width_shift_range=0.1,  # randomly shift images horizontally (fraction of total width)
    height_shift_range=0.1,  # randomly shift images vertically (fraction of total height)
    horizontal_flip = True,  # randomly flip images
    vertical_flip=False  # randomly flip images
)

sub_generator_enb7 = sub_datagen_enb7.flow_from_dataframe( 
    dataframe=sample_submission,
    directory=PATH+'test_upload/',
    x_col="Id",
    y_col=None,
    class_mode=None,
    target_size=(IMG_SIZE, IMG_SIZE),
    batch_size=16,
    shuffle=False,
    seed=RANDOM_SEED  
)

In [None]:
# Averaging results
predictions_enb7 = []
for _ in range(5):
    predictions_enb7.append(model_enb7.predict(sub_generator_enb7, verbose=1))
    sub_generator_enb7.reset()
predictions_enb7 = np.array(predictions_enb7)
predictions_enb7.shape

In [None]:
final_predictions_enb7 = predictions_enb7.mean(axis=0).argmax(axis=-1)
final_predictions_enb7

In [None]:
# Sumbitting predictions
submission_enb7 = pd.DataFrame({
    'Id': sub_generator_enb7.filenames,
    'Category': final_predictions_enb7
}, columns=['Id', 'Category'])
submission_enb7.to_csv('submission_enb7.csv', index=False)

#### The EfficientNetB7 model with all necessary optimizations showed the best accuracy so far. I've got 95,9% of accuracy on Kaggle Competition, however, this is not even a TOP 100 result. So, let's proceed with experiments

### Let's try an ansamble of EfficientNetB3 and EfficientNetB7 models

In [None]:
# Empirically I came out with the ideal coefficients of 0,5 for both models
predictions_ans = 0.5*model_enb3.predict(sub_generator_enb3) + 0.5*model_enb7.predict(sub_generator_enb7) 
predictions_ans = predictions_ans.argmax(axis=1)
predictions_ans

In [None]:
# Sumbitting predictions
submission_ans = pd.DataFrame({
    'Id': sub_generator.filenames,
    'Category': predictions_ans
}, columns=['Id', 'Category'])
submission_ans.to_csv('submission_ans2.csv', index=False)

### The ensemble of two base models EfficientNetB3 and EfficientNetB7 gave the best score that I achieved on Kaggle Competition - 0.96554

## Let's try another base model, from different family - Xception

In [None]:
base_model_x = Xception(weights='imagenet', include_top=False, input_shape = input_shape)

In [None]:
# Building a model
model_x = Sequential([
  base_model_x,
  GlobalAveragePooling2D(),
  BatchNormalization(),
  Dropout(0.25),
  Dense(256,activation='relu'),
  BatchNormalization(),
  Dense(CLASS_NUM, activation='softmax')
])

model_x.compile(loss='categorical_crossentropy',
              optimizer=optimizers.Adam(learning_rate=ExponentialDecay(
                  0.0009, decay_steps=100, decay_rate=0.9)),
              metrics='accuracy')

In [None]:
# Adding callback for saving the model with the best accuracy
checkpoint = ModelCheckpoint('best_model_x.hdf5' , monitor = ['val_accuracy'] , verbose = 1  , mode = 'max')
earlystop = EarlyStopping(monitor='accuracy', patience=5, restore_best_weights=True)
callbacks_list = [checkpoint, earlystop]

In [None]:
# An object for storing learning metrics
history_x = History()

In [None]:
# Training a model
model_x.fit(train_generator2, validation_data=val_generator, epochs=EPOCHS, callbacks=[history_x, checkpoint, earlystop])

In [None]:
# Let`s show the learning metrics on the chart
acc = history_x.history['accuracy']
val_acc = history_x.history['val_accuracy']
loss = history_x.history['loss']
val_loss = history_x.history['val_loss']
 
epochs = range(len(acc))
 
plt.plot(epochs, acc, 'b', label='Training acc')
plt.plot(epochs, val_acc, 'r', label='Validation acc')
plt.title('Training and validation accuracy')
plt.legend()
 
plt.figure()
 
plt.plot(epochs, loss, 'b', label='Training loss')
plt.plot(epochs, val_loss, 'r', label='Validation loss')
plt.title('Training and validation loss')
plt.legend()
 
plt.show()

### Let's try TTA improvement for Xception model

In [None]:
sub_datagen_x = ImageDataGenerator(
    featurewise_center=False,  # set input mean to 0 over the dataset
    samplewise_center=False,  # set each sample mean to 0
    featurewise_std_normalization=False,  # divide inputs by std of the dataset
    samplewise_std_normalization=False,  # divide each input by its std
    zca_whitening=False,  # apply ZCA whitening
    rotation_range = 30,  # randomly rotate images in the range (degrees, 0 to 180)
    zoom_range = 0.2, # Randomly zoom image 
    width_shift_range=0.1,  # randomly shift images horizontally (fraction of total width)
    height_shift_range=0.1,  # randomly shift images vertically (fraction of total height)
    horizontal_flip = True,  # randomly flip images
    vertical_flip=False  # randomly flip images
)

sub_generator_x = sub_datagen_x.flow_from_dataframe( 
    dataframe=sample_submission,
    directory=PATH+'test_upload/',
    x_col="Id",
    y_col=None,
    class_mode=None,
    target_size=(IMG_SIZE, IMG_SIZE),
    batch_size=BATCH_SIZE,
    shuffle=False,
    seed=RANDOM_SEED  
)

In [None]:
predictions_x = []
for _ in range(5):
    predictions_x.append(model_x.predict(sub_generator_x, verbose=1))
    sub_generator_x.reset()
predictions_x = np.array(predictions_x)
predictions_x.shape

In [None]:
final_predictions_x = predictions_x.mean(axis=0).argmax(axis=-1)
final_predictions_x

In [None]:
submission_x = pd.DataFrame({
    'Id': sub_generator_x.filenames,
    'Category': final_predictions_x
}, columns=['Id', 'Category'])
submission_x.to_csv('submission_x.csv', index=False)

### Xception base model showed also not quite bad result on Kaggle Competition page - 0.95220. It looses to EfficientNet models, however, it is very close. Let`s try to use it in an ensemble as well

In [None]:
# Empirically I came out with the ideal coefficients for all three models
predictions_ens = 0.4*model_enb3.predict(sub_generator_enb3) + 0.3*model_enb7.predict(sub_generator_enb7) + 0.3*model_x.predict(sub_generator_x)
predictions_ens = predictions_ens.argmax(axis=1)
predictions_ens

In [None]:
# Submitting predictions
submission_ens = pd.DataFrame({
    'Id': sub_generator.filenames,
    'Category': predictions_ens
}, columns=['Id', 'Category'])
submission_ens.to_csv('submission_ens.csv', index=False)

### The ensemble of three models showed a good result in Kaggle Competition with the score of 0.96220? however, it couldn't bet the best result of an ensemble of two EfficientNet models

**Conclusion:** To get the best score I tried different approaches:
*  I experimented with different augmentations both using ImageGenerator and Albumnetations library. Unfortunately Albumentations library didn't show any good results
* I experimented with the Learning Rate, Different Base models and Different Layers Architectures. The best resultws were shown by EfficientNetB3 model, however, the very best result of the score of 0.96554 I achieved with an ensemble of EfficientNetB3 and EfficientNetB7 models
* TTA helped a lot to improve the score of the models