# Load Library & Preprocess Data

In [1]:
import os
GPU = f'0'
os.environ['CUDA_VISIBLE_DEVICES']=GPU
import random
from glob import glob
import cv2
import numpy as np
import tensorflow as tf
import tensorflow_addons as tfa
import pandas as pd
from tqdm import tqdm
import matplotlib.pyplot as plt
%matplotlib inline

from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.callbacks import (
    ReduceLROnPlateau,
    EarlyStopping,
    ModelCheckpoint,
    TensorBoard
)
from tensorflow.keras.applications import (
    MobileNet,
    MobileNetV2,
    EfficientNetB7
)
from tensorflow.keras.layers import (
    GlobalAveragePooling2D,
    Dense
)
from sklearn.model_selection import train_test_split

#### Set Seed 

In [2]:
SEED = 42

def seed_everything(seed):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    tf.random.set_seed(seed)

seed_everything(SEED)

#### Config 

In [3]:
batch_size = 1

classes = 1049
size = 600

#### Load data 

In [4]:
df = pd.read_csv('test.csv')
df.head()

Unnamed: 0,id
0,../../uiui/datasets/public/test/x/xlf1tgh2ih.JPG
1,../../uiui/datasets/public/test/6/68a3ot4osk.JPG
2,../../uiui/datasets/public/test/s/si2lek4u0a.JPG
3,../../uiui/datasets/public/test/r/rmtqxhipnv.JPG
4,../../uiui/datasets/public/test/2/2flmjdud0e.JPG


In [5]:
if len(df.columns) == 2:
    df['landmark_id'] = df['landmark_id'].astype(str)
else:
    df = pd.concat([df, pd.DataFrame([0]*len(df))], axis=1)
    df.columns = ['id', 'landmark_id']
    df['landmark_id'] = df['landmark_id'].astype(str)

In [6]:
datagen = ImageDataGenerator(
    rescale=1./255,
)
generator = datagen.flow_from_dataframe(
    df,
    x_col='id',
    y_col='landmark_id',
    target_size=(size,size),
    batch_size=batch_size,
    shuffle=False
)

Found 37964 validated image filenames belonging to 1 classes.


# Load Model

In [7]:
with tf.device(f'/device:GPU:{GPU}'):
    base_model = EfficientNetB7(
        input_shape=(size,size,3),
        include_top=False,
        weights='imagenet',
    )
    base_model.trainable = False
    
    model = tf.keras.Sequential([
        base_model,
        GlobalAveragePooling2D(),
    ])

model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
efficientnetb7 (Functional)  (None, 19, 19, 2560)      64097687  
_________________________________________________________________
global_average_pooling2d (Gl (None, 2560)              0         
Total params: 64,097,687
Trainable params: 0
Non-trainable params: 64,097,687
_________________________________________________________________


In [None]:
features = []
targets = []

for idx, data in tqdm(enumerate(generator)):
    if idx == len(generator): break
    features.append(model(data[0]).numpy().reshape([-1]).tolist())
    targets.append(np.argmax(data[1], axis=1)[0])

11051it [43:52,  4.23it/s]

In [None]:
len(features), len(targets)

In [None]:
feature_df = pd.DataFrame(features)
target_df = pd.DataFrame(targets)
target_df.columns = ['label']

In [None]:
final = pd.concat([feature_df, target_df], axis=1)

In [None]:
final.to_csv('test_features.csv', index=False)

In [None]:
pd.read_csv('test_features.csv')