### Download single fold weights

In [1]:
!curl -L -o ef1b7-fold0.h5 https://www.dropbox.com/s/e9xi8cpjk152npb/ef1b7-fold0.h5?dl=0

  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
100   139    0   139    0     0    431      0 --:--:-- --:--:-- --:--:--   431
100   340  100   340    0     0    537      0 --:--:-- --:--:-- --:--:--   537
100   534    0   534    0     0    398      0 --:--:--  0:00:01 --:--:--     0
100  267M  100  267M    0     0  22.8M      0  0:00:11  0:00:11 --:--:-- 27.8M


### Import

In [2]:
import os
import sys
sys.path.append('../lib')
import warnings
warnings.simplefilter('ignore', UserWarning)
import numpy as np
import pandas as pd
import tensorflow as tf
print('tf:', tf.__version__)
from sklearn.preprocessing import LabelEncoder
import efficientnet.tfkeras as efn
from vecxoz_utils import init_tpu
from vecxoz_utils import create_cv_split

tf: 2.8.0


### Settings

In [3]:
class args:
    data_dir       = '../data'
    data_preds_dir = 'preds'
    tpu_ip_or_name = None
    n_channels     = 3
    dim            = 512
    lr             = 5e-4
    weights        = 'imagenet'
    n_classes      = 2265

### Definitions

In [4]:
def image_to_tensor(file):
    """Read, decode image, and return 4D tensor
    """
    image = tf.image.decode_jpeg(tf.io.read_file(file), channels=args.n_channels, dct_method='INTEGER_ACCURATE')
    image = tf.image.resize(image, [args.dim, args.dim])
    image = tf.reshape(image, [args.dim, args.dim, args.n_channels])
    image = tf.expand_dims(image, axis=0)
    image = image / 255.0
    return image


def init_model(print_summary=True):
    model = tf.keras.Sequential([
        efn.EfficientNetB7(input_shape=(args.dim, args.dim, args.n_channels), 
                           weights=args.weights, 
                           include_top=False),
        tf.keras.layers.GlobalAveragePooling2D(),
        tf.keras.layers.Dense(args.n_classes, activation='softmax')
    ], name='model')
    model.compile(optimizer=tf.keras.optimizers.Adam(args.lr), 
                  loss='sparse_categorical_crossentropy',
                  metrics=['acc'])
    if print_summary:
        model.summary()
    return model


def decode_prediction(probas):
    """Decode predictions for a single example
    """
    # 0) Argsort probabilities
    preds_int = np.argsort(probas, axis=1)[:, ::-1]

    # 1) Transform integer labels into string labels
    preds_str_1 = le.inverse_transform(np.squeeze(preds_int))

    # 2) Replace labels outside 100 training ids with a "new_turtle"
    preds_str_2 = []
    for turtle_id in preds_str_1:
        if turtle_id in turtle_ids_orig:
            preds_str_2.append(turtle_id)
        else:
            preds_str_2.append('new_turtle')

    # 4) If there are several "new_turtle" labels for a given example - 
    #    replace all except the 1st occurrence with the most probable training ids
    preds_str_2 = np.array(preds_str_2)
    cand = [x for x in preds_str_2[preds_str_2 != 'new_turtle'] if x not in preds_str_2[:5]][:4]
    preds_str_3 = []
    for t_id in preds_str_2[:5]:
        if t_id not in preds_str_3:
            preds_str_3.append(t_id)
    for _ in range(5 - len(preds_str_3)):
        preds_str_3.append(cand.pop(0))
    #
    return preds_str_3

### Read data

In [5]:
train_orig_df = pd.read_csv(os.path.join(args.data_dir, 'train.csv'))
turtle_ids_orig = sorted(train_orig_df['turtle_id'].unique()) # 100 unique
train_df, test_df = create_cv_split(args.data_dir, 5)

le = LabelEncoder()
le = le.fit(train_df['turtle_id'])

### Init accelerator and model

In [6]:
_, _, strategy = init_tpu(args.tpu_ip_or_name)
with strategy.scope():
    model = init_model()
model.load_weights('ef1b7-fold0.h5')

--> TPU was not found!


2022-04-25 10:29:06.371020: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:936] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-04-25 10:29:07.219113: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:936] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-04-25 10:29:07.219857: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:936] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-04-25 10:29:07.276061: I tensorflow/core/platform/cpu_feature_guard.cc:151] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 AVX512F FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compil

INFO:tensorflow:Using MirroredStrategy with devices ('/job:localhost/replica:0/task:0/device:GPU:0',)
--> Num replicas: 1
INFO:tensorflow:Reduce to /job:localhost/replica:0/task:0/device:CPU:0 then broadcast to ('/job:localhost/replica:0/task:0/device:CPU:0',).
INFO:tensorflow:Reduce to /job:localhost/replica:0/task:0/device:CPU:0 then broadcast to ('/job:localhost/replica:0/task:0/device:CPU:0',).
INFO:tensorflow:Reduce to /job:localhost/replica:0/task:0/device:CPU:0 then broadcast to ('/job:localhost/replica:0/task:0/device:CPU:0',).
INFO:tensorflow:Reduce to /job:localhost/replica:0/task:0/device:CPU:0 then broadcast to ('/job:localhost/replica:0/task:0/device:CPU:0',).
INFO:tensorflow:Reduce to /job:localhost/replica:0/task:0/device:CPU:0 then broadcast to ('/job:localhost/replica:0/task:0/device:CPU:0',).
INFO:tensorflow:Reduce to /job:localhost/replica:0/task:0/device:CPU:0 then broadcast to ('/job:localhost/replica:0/task:0/device:CPU:0',).
INFO:tensorflow:Reduce to /job:localho

### Predict test examples one-by-one

In [7]:
preds_list = []
for counter, (_, row) in enumerate(test_df.iterrows()):
    file = os.path.join(args.data_dir, 'images', row['image_id'] + '.JPG')
    image = image_to_tensor(file)
    probas = model(image)
    preds = decode_prediction(probas)
    preds_list.append(preds)
    print(counter, end='\r')
    # break

2022-04-25 10:29:43.606809: I tensorflow/stream_executor/cuda/cuda_dnn.cc:368] Loaded cuDNN version 8101


489

### Create submission

In [8]:
preds_list = np.array(preds_list)
subm_df = pd.read_csv('/home/vecxoz/data/sample_submission.csv')
subm_df['prediction1'] = preds_list[:, 0]
subm_df['prediction2'] = preds_list[:, 1]
subm_df['prediction3'] = preds_list[:, 2]
subm_df['prediction4'] = preds_list[:, 3]
subm_df['prediction5'] = preds_list[:, 4]
subm_df.to_csv('submission-single-model-single-fold.csv', index=False)
subm_df.head()

Unnamed: 0,image_id,prediction1,prediction2,prediction3,prediction4,prediction5
0,ID_6NEDKOYZ,t_id_4ZfTUmwL,t_id_IlO9BOKc,t_id_uJXT7dGu,new_turtle,t_id_uIlC9Gfo
1,ID_57QZ4S9N,t_id_Kf73l69A,t_id_fjHGjp1w,t_id_Ts5LyVQz,new_turtle,t_id_NW7wn8TC
2,ID_OCGGJS5X,t_id_YjXYTCGC,new_turtle,t_id_AMnriNb5,t_id_pCO59rOk,t_id_ROFhVsy2
3,ID_R2993S3S,t_id_VP2NW7aV,t_id_pCO59rOk,t_id_uJXT7dGu,t_id_9GFmcOd5,new_turtle
4,ID_2E011NB0,t_id_dVQ4x3wz,new_turtle,t_id_QqeoI5F3,t_id_ksTLswDT,t_id_EEbWq5Pj
