![](https://storage.googleapis.com/kaggle-competitions/kaggle/29762/logos/header.png)

# Context

The aim of this competition is to classify more than 81K landmark classes.

For each test image, we have to predict one landmark label and a corresponding confidence score. 

# Libraries

We use some standard python packages and the libraries of scikit learn and keras. 

In [1]:
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import cv2

from sklearn.model_selection import train_test_split

from keras.utils import to_categorical, Sequence
from keras.models import Sequential, Model
from keras.layers import Dense, Dropout, Flatten, Activation
from keras.optimizers import RMSprop,Adam
from keras.applications import VGG19, VGG16, ResNet50, ResNet152V2, NASNetMobile
from keras.preprocessing.image import ImageDataGenerator
from keras.callbacks import ModelCheckpoint
from keras.layers.normalization import BatchNormalization

import warnings
warnings.filterwarnings("ignore")

# Path

In [2]:
path = '../../landmark-recognition-2021/'
#path = '/kaggle/input/landmark-recognition-2021/'
os.listdir(path)

# Data

## Load

In [None]:
train_data = pd.read_csv(path+'train.csv')
samp_subm = pd.read_csv(path+'sample_submission.csv')

In [None]:
train_data.head()

In [None]:
samp_subm.head()

## Analyse

In [None]:
print("Mean picture per class : ",np.mean(train_data["landmark_id"].value_counts()))

In [None]:
fig = plt.figure(figsize=(30, 15))
plt.boxplot(train_data["landmark_id"].value_counts(), vert=0)
plt.title("BoxPlot du nombre d'image par catégorie",fontsize=30)
plt.xticks(size=20)
plt.xlabel("Nbr Image",fontsize=20)
plt.show()

In [None]:
print("Mean picture per class (10 choosen classes): ",np.mean(train_data["landmark_id"].value_counts()))

fig = plt.figure(figsize=(30, 15))
plt.boxplot(train_data["landmark_id"].value_counts(), vert=0)
plt.title("BoxPlot du nombre d'image par catégorie",fontsize=30)
plt.xticks(size=20)
plt.xlabel("Nbr Image",fontsize=20)
plt.show()

# Functions

In [None]:
def plot_examples(landmark_id=1):
    """ Plot 5 examples of images with the same landmark_id """
    
    fig, axs = plt.subplots(1, 5, figsize=(25, 12))
    fig.subplots_adjust(hspace = .2, wspace=.2)
    axs = axs.ravel()
    for i in range(5):
        idx = train_data[train_data['landmark_id']==landmark_id].index[i]
        image_id = train_data.loc[idx, 'id']
        file = image_id+'.jpg'
        subpath = '/'.join([char for char in image_id[0:3]])
        img = cv2.imread(path+'train/'+subpath+'/'+file)
        axs[i].imshow(img)
        axs[i].set_title('landmark_id: '+str(landmark_id))
        axs[i].set_xticklabels([])
        axs[i].set_yticklabels([])

In [None]:
def ImgToTreat(img):  
    """ Image processing (histogram equalization and blur)"""
    imgBlur = img
    try:
        locImg=cv2.cvtColor(img,cv2.COLOR_RGB2BGR)
        # Egalisation histogramme
        imgEq = cv2.cvtColor(img,cv2.COLOR_BGR2YCrCb)
        imgEq[:, :, 0] = cv2.equalizeHist(imgEq[:, :, 0])
        imgEq = cv2.cvtColor(imgEq,cv2.COLOR_YCrCb2RGB)
        # Lissage bruit
        imgBlur = cv2.blur(imgEq,(2,2))
    except Exception as e:
        print(e)
        pass
    return imgBlur

# Overview
First we look on the size of the dataset:

In [None]:
print('Samples train:', len(train_data))
print('Samples test:', len(samp_subm))

In [None]:
num_classes = len(train_data['landmark_id'].unique())
print("There is %d unique classes." %num_classes)

In [None]:
train_data.head()

# Image

In [None]:
image_id = train_data.loc[0, 'id']
file = image_id+'.jpg'
subpath = '/'.join([char for char in image_id[0:3]]) 

In [None]:
file

In [None]:
subpath

Is the file located in the subpath?

In [None]:
file in os.listdir(path+'train/'+subpath)

Plot the image:

In [None]:
img = cv2.imread(path+'train/'+subpath+'/'+file)
plt.imshow(img)
plt.show()

# Plot An Example
We plot an example of images with the same **landmark_id** in a row.

In [None]:
plot_examples(landmark_id = 83145)

# Image processing and data creation

In [None]:
img_size = 224
img_channel = 3
batch_size = 64

In [None]:
labels = []
image_path = []
images_pixels = []
for row in train_data.itertuples():
    image_id = row.id
    file = image_id+'.jpg'
    subpath = '/'.join([char for char in image_id[0:3]]) 
    finalpath = path+"/train/"+subpath+'/'+file

    img = cv2.imread(finalpath)
    img = cv2.resize(img, (img_size, img_size))
    img = ImgToTreat(img)

    images_pixels.append(img)
    image_path.append(finalpath)
    labels.append(row.landmark_id) 

# Split Data
We define train, validation and test data.

In [None]:
X_train, X_val, Y_train, Y_val = train_test_split(images_pixels,
                                                  labels, test_size = 0.3, 
                                                  random_state=101)
print("X train data : ", len(X_train))
print("X label data : ", len(X_val))
print("Y test data : ", len(Y_train))
print("Y label data : ", len(Y_val))

# Data Augmentation

In [None]:
datagen = ImageDataGenerator(
        rotation_range=40,
        width_shift_range=0.2,
        height_shift_range=0.2,
        rescale=1./255,
        shear_range=0.2,
        zoom_range=0.2,
        horizontal_flip=True,
        fill_mode='nearest')

training_set = datagen.flow(np.array(X_train),np.array(Y_train),batch_size=16)

# Model

Load pretrained model:

In [None]:
weights='imagenet'
conv_base = NASNetMobile(weights=weights,
                     include_top=False,
                    input_shape=(224,224,3))
conv_base.trainable = True

Define Model

In [None]:
# Ne pas entraîner les 5 premières couches (les plus basses) 
for layer in conv_base.layers[:5]:
   layer.trainable = False

model = Sequential()
model.add(conv_base)
model.add(Flatten())
model.add(Dropout(0.3))
model.add(BatchNormalization())
model.add(Dense(32,kernel_initializer='he_uniform'))
model.add(BatchNormalization())
model.add(Activation('relu'))
model.add(Dense(num_classes, activation='softmax'))

model.compile(optimizer = Adam(lr=1e-4),
              loss="sparse_categorical_crossentropy",
              metrics=['sparse_categorical_accuracy'])

model.summary()

In [None]:
epochs = 10

In [None]:
h5_path = "model.h5"
checkpoint = ModelCheckpoint(h5_path, monitor='val_acc', verbose=1, save_best_only=True, mode='max')

history = model.fit_generator(
    training_set,
    validation_data=(np.array(X_val),np.array(Y_val)),
    epochs=epochs, verbose=1,
    callbacks=[checkpoint])

In [None]:
fig = plt.figure(figsize=(30, 15))
plt.plot(history.history["sparse_categorical_accuracy"])
plt.plot(history.history["val_sparse_categorical_accuracy"])
plt.title("Précisions des modèles",fontsize=30)
plt.yticks(size=20)
plt.ylabel("Précisions",fontsize=20)
plt.xlabel("Epoch",fontsize=20)
plt.legend(["Learning Accuracy","Validation Accuracy"],fontsize=15)
plt.show()

# Predict Test Data

In [None]:
y_pred = model.predict_generator(test_generator, verbose=1)

In [None]:
y_pred.shape

In [None]:
for i in range(len(samp_subm.index)):
    category = np.argmax(y_pred[i])
    score = y_pred[i][np.argmax(y_pred[i])].round(2)
    samp_subm.loc[i, 'landmarks'] = str(category)+' '+str(score)

In [None]:
samp_subm.head()

# Export

In [None]:
samp_subm.to_csv('submission.csv', index=False)