Taking the [public solution by ks2019](https://www.kaggle.com/code/ks2019/happywhale-arcface-baseline-tpu) from a private lb score of 0.470 to 0.804.

A summary of all key changes taken from [my discussion thread](https://www.kaggle.com/competitions/happy-whale-and-dolphin/discussion/319787).

- Image size: 864x846.
- Pretrained model: EfficientNet b5 with noisy student.
- Pooling: concat pooling (from fastai)

    ```
    avg_pool = tf.keras.layers.GlobalAveragePooling2D()(x)
    max_pool = tf.keras.layers.GlobalMaxPooling2D()(x)
    pretrained_out = tf.keras.layers.Concatenate()([avg_pool, max_pool])
    ```

- Head:
  - Dual-head model: Species classification output and metric.
  - Add [Multi-Sample Dropout](https://arxiv.org/abs/1905.09788) (idea thanks to @dhakshiin) before metric.

- Metric function: [Elastic Margin Loss](https://arxiv.org/abs/2109.09416) (w=0.3, std=0.025, s=30) (minor improvement over ArcFace).
- Embed size: 1024.
- Data: entire dataset with 12157 extra pseudo labels. Pseudo label algorithm by @dhakshiin: Use multiple models, find top-1 examples that the majority agree on, ensuring 0.2 separation between confidence of 1st class and 2nd class.
- Crops: Select between 8x crops randomly selected at training time: full body crops, detic crops, original yolov5 crops, 3x TokenCut crops and full-sized images ([See my TFRecords notebook here](https://www.kaggle.com/lextoumbourou/happywhale-generate-tfrecords-with-pseudo))
- Augmentations: `random_flip_left_right`, `random_hue`, `random_saturation`, `random_contrast`, `random_brightness` and `random_rgb_to_gray` (parameters tuned slightly from original kernel)
- Optimiser: Adam (unchanged from original)
- Learning Rate: exponential decay with 4 epochs warm up (unchanged from original)
- Epochs: 30

- Inference:
  - Generated embeddings for each crop with a horizontal flip.
  - Take a weighted mean of all crop embeddings to create final embeddings.
  - Inference: standard KNN inference (use KNN=150 when added to final ensemble)

# Install Drivers

This setup code was shared in [this discussion thread](https://www.kaggle.com/competitions/happy-whale-and-dolphin/discussion/315363) by [Andrij](https://www.kaggle.com/aikhmelnytskyy).

In [None]:
import os
is_kaggle = os.environ.get('KAGGLE_KERNEL_RUN_TYPE', '')
if is_kaggle:
    print('Running in Kaggle Kernels')
    from kaggle_secrets import UserSecretsClient
    user_secrets = UserSecretsClient()
    wandb_creds = user_secrets.get_secret("wandb")
    
    !pip3 install -Uq tensorflow==2.7
    print("update TPU server tensorflow version…")

    !pip install -q cloud-tpu-client
    import tensorflow as tf
    from cloud_tpu_client import Client
    print(tf.version)
    Client().configure_tpu_version(2.7, restart_type='ifNeeded')

    !pip install -Uq tensorflow-gcs-config==2.7

    !pip install wandb > /dev/null
    !pip install -q efficientnet > /dev/null
    !pip install tensorflow_addons > /dev/null
    
    
    !wandb login {wandb_creds}

# Imports

In [None]:
import re
import math
import random
import pickle
import json
from datetime import datetime
from pathlib import Path

import numpy as np
import pandas as pd
import tensorflow as tf
import efficientnet.tfkeras as efn
from sklearn import metrics
from sklearn.model_selection import KFold, train_test_split
from tensorflow.keras import backend as K
import tensorflow_addons as tfa
from tqdm.auto import tqdm
import matplotlib.pyplot as plt
import wandb

# Setup TPU

In [None]:
def get_strategy(tpu_arg):
    try:
        tpu = tf.distribute.cluster_resolver.TPUClusterResolver(tpu=tpu_arg)
        print('Running on TPU ', tpu.master())
    except ValueError as e:
        print(f'No TPU: {e}')
        tpu = None

    if tpu:
        tf.config.experimental_connect_to_cluster(tpu)
        tf.tpu.experimental.initialize_tpu_system(tpu)
        strategy = tf.distribute.TPUStrategy(tpu)
    else:
        # Default distribution strategy in Tensorflow. Works on CPU and single GPU.
        strategy = tf.distribute.get_strategy()
        
    return strategy

In [None]:
tpu_arg = None
if not is_kaggle:
    tpu_arg = 'local'
    
strategy = get_strategy(tpu_arg)
AUTO = tf.data.experimental.AUTOTUNE
print("REPLICAS: ", strategy.num_replicas_in_sync)

# Config and WanDB

I used [Weights & Biases](https://wandb.ai/site) for tracking experiment runs.

In [None]:
class config:
    
    SEED = 420
    # Trained on the full dataset.
    FOLD_TO_RUN = None
    VAL_FOLD_TO_RUN = 0
    FOLDS = 5
    DEBUG = False
    EVALUATE = True
    RESUME = False
    RESUME_EPOCH = None

    BATCH_SIZE = 16 * strategy.num_replicas_in_sync
    IMAGE_SIZE = 864
    N_CLASSES = 15587

    model_type = 'effnetv1'  
    EFF_NET = 5

    head = 'arcface'

    EPOCHS = 30
    EPOCHS_STAGE_2 = 0

    LR = 0.001

    RGB_TO_GRAY_PROB = 0.15

    WEIGHT_METRIC = 0.5
    WEIGHT_SPECIES = 0.5
    
    save_dir = './output'
    
    KNN = 100
    
    MIN_JPEG_QUAL_AUG = 75
    MAX_JPEG_QUAL_AUG = 100
    
    FULLBODY_CROP_PROB = 0.2

    DETIC_CROP_PROB = 0.14
    YOLOV5_CROP_PROB = 0.14
    
    VIT_BASE_CROP_PROB = 0.14
    VIT_SMALL_CROP_PROB = 0.14
    VIT_MOCO_CROP_PROB = 0.14
    
    RANDOM_HUE_MAX_DELTA = 0.1

    RANDOM_SATURATION_LOWER = 0.75
    RANDOM_SATURATION_UPPER = 1.25
    
    RANDOM_CONTRAST_LOWER = 0.75
    RANDOM_CONTRAST_UPPER = 1.25
    
    RANDOM_BRIGHTNESS_MAX_DELTA = 0.1

    NONE_CROP_PROB = 0.1
    
    ARC_FACE_M = 0.3
    
    EMB_DIM = 1024

    LOAD_WEIGHTS_STAGE_2 = None
    LOAD_WEIGHTS = None
    INF_LOAD_WEIGHTS = None
    
    GCS_DS_PATH = 'happywhale-tfrecords-private2'
    GCS_PSEUDO_DS_PATH = 'happywhale-tfrecords-pseudo-dhak'
    
    INPUT_DATA_PATH = Path('../input/happy-whale-and-dolphin/')
    LABEL_INFO_PATH = Path('../input/happywhale-generate-tfrecords-with-pseudo/')
    
    GCS_UPLOAD_BUCKET = None
    
    CROP_WEIGHTS = {
        'fullbody': 10,
        'detic': 1,
        'yolov5': 1,
        'vit_base': 1,
        'vit_small': 1,
        'vit_moco': 1,
        'no_crop': 2,
    }

    
def count_data_items(filenames):
    n = [int(re.compile(r"-([0-9]*)\.").search(filename).group(1)) 
         for filename in filenames]
    return np.sum(n)

def seed_everything(seed):
    random.seed(seed)
    np.random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    tf.random.set_seed(seed)
    
def is_interactive():
    return 'runtime'    in get_ipython().config.IPKernelApp.connection_file
IS_INTERACTIVE = is_interactive()
print(IS_INTERACTIVE)

MODEL_NAME = None
if config.model_type == 'effnetv1':
    MODEL_NAME = f'effnetv1_b{config.EFF_NET}'
elif config.model_type == 'effnetv2':
    MODEL_NAME = f'effnetv2_{config.EFF_NETV2}'

config.MODEL_NAME = MODEL_NAME
print(MODEL_NAME)

if is_kaggle:
    print('Loading GCS path from KaggleDatasets')
    from kaggle_secrets import UserSecretsClient
    from kaggle_datasets import KaggleDatasets

    user_secrets = UserSecretsClient()
    user_credential = user_secrets.get_gcloud_credential()
    user_secrets.set_tensorflow_credential(user_credential)

    config.GCS_DS_PATH = KaggleDatasets().get_gcs_path(config.GCS_DS_PATH)
    if config.GCS_PSEUDO_DS_PATH:
        config.GCS_PSEUDO_DS_PATH = KaggleDatasets().get_gcs_path(config.GCS_PSEUDO_DS_PATH)

In [None]:
print(f'Doing fold {config.FOLD_TO_RUN} (total fold: {config.FOLDS})')

In [None]:
OUTPUT_DIR = Path(config.save_dir) / datetime.now().strftime('%Y%m%d-%H%M%S')

In [None]:
print(OUTPUT_DIR)

In [None]:
OUTPUT_DIR.mkdir(exist_ok=True, parents=True)

In [None]:
if is_kaggle == 'Interactive':
    print('Wandb in offline mode.')
    os.environ['WANDB_MODE'] = 'offline'

In [None]:
wandb.config = {
    'seed': config.SEED,
    'image_size': config.IMAGE_SIZE,
    'model_type': config.model_type,
    'lr': config.LR,
    'epochs': config.EPOCHS,
    'knn': 100,
    'metric_out': config.WEIGHT_METRIC,
    'specie_out': config.WEIGHT_SPECIES,
    'rgb_to_gray_prob': config.RGB_TO_GRAY_PROB,
    'min_jpeg_quality': config.MIN_JPEG_QUAL_AUG,
    'max_jpeg_quality': config.MAX_JPEG_QUAL_AUG,
    'fullbody_crop_prob': config.FULLBODY_CROP_PROB,
    'detic_crop_prob': config.DETIC_CROP_PROB,
    'yolov5_crop_prob': config.YOLOV5_CROP_PROB,
    'vit_base_crop_prob': config.VIT_BASE_CROP_PROB,
    'vit_small_crop_prob': config.VIT_SMALL_CROP_PROB,
    'vit_moco_crop_prob': config.VIT_MOCO_CROP_PROB,
    'non_crop_prob': config.NONE_CROP_PROB,
    'arcface_m': config.ARC_FACE_M,
    'emb_dim': config.EMB_DIM,
    'gcs_ds_path': config.GCS_DS_PATH
}

In [None]:
wandb.init(
    project="happy-whale-and-dolphin",
    entity="lexkaggle",
    name=f'effnet-b{config.EFF_NET}-img-{config.IMAGE_SIZE}-fullbody-f-{config.FOLD_TO_RUN}')

In [None]:
config.GCS_DS_PATH

In [None]:
train_files = np.sort(np.array(tf.io.gfile.glob(config.GCS_DS_PATH + '/happywhale-2022-train*.tfrec')))
test_files = np.sort(np.array(tf.io.gfile.glob(config.GCS_DS_PATH + '/happywhale-2022-test*.tfrec')))
print(config.GCS_DS_PATH)
print(len(train_files),len(test_files),count_data_items(train_files),count_data_items(test_files))

# Data

In [None]:
def random_rgb_to_gray(image, probability, seed=config.SEED):
    with tf.name_scope('RandomRGBtoGray'):
        do_gray_random = tf.random.uniform([], seed=seed)

        image = tf.cond(
            tf.greater(do_gray_random, probability), lambda: image,
            lambda: tf.image.grayscale_to_rgb(tf.image.rgb_to_grayscale(image)))

    return image

In [None]:
crops = tf.convert_to_tensor(['fullbody', 'detic', 'yolov5', 'vit_base', 'vit_small', 'vit_moco', 'none'])

def arcface_format(posting_id, image, label_group, species):
    return posting_id, {'inp1': image, 'inp2': label_group, 'inp3': species}, label_group, species

def arcface_inference_format(posting_id, image, label_group, species):
    return image,posting_id

def arcface_eval_format(posting_id, image, label_group, species):
    return image,label_group

def data_augment(posting_id, image, label_group, species):
    image = tf.image.random_flip_left_right(image)
    image = tf.image.random_hue(image, config.RANDOM_HUE_MAX_DELTA)
    image = tf.image.random_saturation(image, config.RANDOM_SATURATION_LOWER, config.RANDOM_SATURATION_UPPER)
    image = tf.image.random_contrast(image, config.RANDOM_CONTRAST_LOWER, config.RANDOM_CONTRAST_UPPER)
    image = tf.image.random_brightness(image, config.RANDOM_BRIGHTNESS_MAX_DELTA)
    image = random_rgb_to_gray(image, config.RGB_TO_GRAY_PROB)
    return posting_id, image, label_group, species

def decode_image(image_data, bbs):
    if bbs is not None and bbs[0] != -1:
        left, top, right, bottom = bbs[0], bbs[1], bbs[2], bbs[3]
        bbs = tf.convert_to_tensor([top, left, bottom - top, right - left])
        image = tf.io.decode_and_crop_jpeg(image_data, bbs, channels=3)
    else:
        image = tf.image.decode_jpeg(image_data, channels = 3)

    image = tf.image.resize(image, [config.IMAGE_SIZE,config.IMAGE_SIZE])
    image = tf.clip_by_value(image, 0, 255)
    image = tf.cast(image, tf.float32) / 255.0
    return image

def read_labeled_tfrecord(example, crop_method, flip=False):
    LABELED_TFREC_FORMAT = {
        "image_name": tf.io.FixedLenFeature([], tf.string),
        "image": tf.io.FixedLenFeature([], tf.string),
        "target": tf.io.FixedLenFeature([], tf.int64),
        "species": tf.io.FixedLenFeature([], tf.int64),
        'detic_box': tf.io.FixedLenFeature([4], tf.int64),
        'yolov5_box': tf.io.FixedLenFeature([4], tf.int64),
        'tc_vitbase': tf.io.FixedLenFeature([4], tf.int64),
        'tc_vitsmall': tf.io.FixedLenFeature([4], tf.int64),
        'tc_mocovit': tf.io.FixedLenFeature([4], tf.int64),
        'fullbody': tf.io.FixedLenFeature([4], tf.int64)
    }

    example = tf.io.parse_single_example(example, LABELED_TFREC_FORMAT)
    posting_id = example['image_name']
    
    if crop_method == 'random':
        rand = tf.random.categorical(tf.math.log([[
            config.FULLBODY_CROP_PROB,
            config.DETIC_CROP_PROB,
            config.YOLOV5_CROP_PROB,
            config.VIT_BASE_CROP_PROB,
            config.VIT_SMALL_CROP_PROB,
            config.VIT_MOCO_CROP_PROB,
            config.NONE_CROP_PROB
        ]]), 1, seed=config.SEED)
        
        crop_method = crops[rand[0][0]]
    
    if crop_method == 'detic':
        bbs = tf.cast(example['detic_box'], tf.int32)
    elif crop_method == 'yolov5':
        bbs = tf.cast(example['yolov5_box'], tf.int32)
    elif crop_method == 'vit_base':
        bbs = tf.cast(example['tc_vitbase'], tf.int32)
    elif crop_method == 'vit_small':
        bbs = tf.cast(example['tc_vitsmall'], tf.int32)
    elif crop_method == 'vit_moco':
        bbs = tf.cast(example['tc_mocovit'], tf.int32)
    elif crop_method == 'fullbody':
        bbs = tf.cast(example['fullbody'], tf.int32)
    else:
        bbs = tf.convert_to_tensor([-1, -1, -1, -1])

    image = decode_image(example['image'], bbs)
    
    if flip:
        image = tf.image.flip_left_right(image)
    
    label_group = tf.cast(example['target'], tf.int32)
    species = tf.cast(example['species'], tf.int32)
    
    return posting_id, image, label_group, species

def load_dataset(filenames, ordered = False, crop_method='random', flip=False):
    ignore_order = tf.data.Options()
    if not ordered:
        ignore_order.experimental_deterministic = False 
        
    dataset = tf.data.TFRecordDataset(filenames, num_parallel_reads = AUTO)
    dataset = dataset.with_options(ignore_order)
    dataset = dataset.map(lambda x: read_labeled_tfrecord(x, crop_method, flip), num_parallel_calls = AUTO) 
    return dataset

def get_training_dataset(filenames, pseudo=False):
    if pseudo:
        print('Loading pseudo labels')
        filenames = list(filenames)
        filenames.append(config.GCS_PSEUDO_DS_PATH + '/happywhale-2022-pseudo--12157.tfrec')
        filenames = np.array(filenames)
    dataset = load_dataset(filenames, ordered = False)
    dataset = dataset.map(data_augment, num_parallel_calls = AUTO)
    dataset = dataset.map(arcface_format, num_parallel_calls = AUTO)
    dataset = dataset.map(lambda posting_id, image, label_group, species: (image, {'metric_out': label_group, 'specie_out': species}))
    dataset = dataset.repeat()
    dataset = dataset.shuffle(1024)
    dataset = dataset.batch(config.BATCH_SIZE)
    dataset = dataset.prefetch(AUTO)
    return dataset

def get_val_dataset(filenames):
    dataset = load_dataset(filenames, ordered = True, crop_method='detic')
    # dataset = dataset.map(data_augment, num_parallel_calls = AUTO)
    dataset = dataset.map(arcface_format, num_parallel_calls = AUTO)
    dataset = dataset.map(lambda posting_id, image, label_group, species: (image, {'metric_out': label_group, 'specie_out': species}))
    dataset = dataset.batch(config.BATCH_SIZE)
    dataset = dataset.prefetch(AUTO)
    return dataset

def get_eval_dataset(filenames, get_targets = True):
    dataset = load_dataset(filenames, ordered = True, crop_method='detic')
    # dataset = dataset.map(data_augment, num_parallel_calls = AUTO)
    dataset = dataset.map(arcface_eval_format, num_parallel_calls = AUTO)
    if not get_targets:
        dataset = dataset.map(lambda image, target: image)
    dataset = dataset.batch(config.BATCH_SIZE)
    dataset = dataset.prefetch(AUTO)
    return dataset

def get_test_dataset(filenames, get_names = True, crop_method='random', flip=False):
    dataset = load_dataset(filenames, ordered = True, crop_method=crop_method, flip=flip)
    # dataset = dataset.map(data_augment, num_parallel_calls = AUTO)
    dataset = dataset.map(arcface_inference_format, num_parallel_calls = AUTO)
    if not get_names:
        dataset = dataset.map(lambda image, posting_id: image)
    dataset = dataset.batch(config.BATCH_SIZE)
    dataset = dataset.prefetch(AUTO)
    return dataset

In [None]:
species = json.load(open(config.LABEL_INFO_PATH / 'species.json'))
individual_ids = json.load(open(config.LABEL_INFO_PATH / 'individual_ids.json'))

id2species = {s: i for i, s in species.items()}
id2individual_ids = {s: i for i, s in individual_ids.items()}

In [None]:
id2species

In [None]:
NUM_SPECIES = len(id2species)

In [None]:
row = 5; col = 4;
row = min(row,config.BATCH_SIZE//col)
N_TRAIN = count_data_items(train_files)
print(N_TRAIN)
ds = get_training_dataset(train_files, pseudo=True)

for (sample,label) in ds:
    img = sample['inp1']
    plt.figure(figsize=(25,int(25*row/col)))
    for j in range(row*col):
        plt.subplot(row,col,j+1)
        plt.title(f"{id2individual_ids[label['metric_out'][j].numpy()]} - {id2species[label['specie_out'][j].numpy()]}")
        plt.axis('off')
        plt.imshow(img[j,])
    plt.show()
    break

In [None]:
row = 3; col = 4;
row = min(row,config.BATCH_SIZE//col)
N_TEST = count_data_items(test_files)
print(N_TEST)
ds = get_test_dataset(test_files)

for (img,label) in ds:
    plt.figure(figsize=(25,int(25*row/col)))
    for j in range(row*col):
        plt.subplot(row,col,j+1)
        plt.title(label[j].numpy())
        plt.axis('off')
        plt.imshow(img[j,])
    plt.show()
    break

In [None]:
row = 3; col = 4;
row = min(row,config.BATCH_SIZE//col)
N_TEST = count_data_items(test_files)
print(N_TEST)
ds = get_test_dataset(test_files, flip=True)

for (img,label) in ds:
    plt.figure(figsize=(25,int(25*row/col)))
    for j in range(row*col):
        plt.subplot(row,col,j+1)
        plt.title(label[j].numpy())
        plt.axis('off')
        plt.imshow(img[j,])
    plt.show()
    break

# Model Architecture

In [None]:
# Modified version of ArcFace from original kernel to create this. Based on paper ElasticFace: Elastic Margin Loss for Deep Face Recognition (https://arxiv.org/pdf/2109.09416.pdf).

class ElasticArcFace(tf.keras.layers.Layer):
    def __init__(
        self,
        n_classes,
        s=30,
        mean=0.50,
        std=0.025,
        easy_margin=False,
        ls_eps=0.0,
        **kwargs
    ):

        super(ElasticArcFace, self).__init__(**kwargs)
        
        print(f'ElasticArcFace mean: {mean}, s: {std}')

        self.n_classes = n_classes
        self.s = s
        self.mean = mean
        self.std = std
        self.ls_eps = ls_eps

    def get_config(self):

        config = super().get_config().copy()
        config.update({
            'n_classes': self.n_classes,
            's': self.s,
            'mean': self.mean,
            'std': self.std,
            'ls_eps': self.ls_eps
        })
        return config

    def build(self, input_shape):
        super(ElasticArcFace, self).build(input_shape[0])

        self.W = self.add_weight(
            name='W',
            shape=(int(input_shape[0][-1]), self.n_classes),
            initializer='glorot_uniform',
            dtype='float32',
            trainable=True,
            regularizer=None)

    def call(self, inputs):
        X, y = inputs
        y = tf.cast(y, dtype=tf.int32)
        cosine = tf.matmul(
            tf.math.l2_normalize(X, axis=1),
            tf.math.l2_normalize(self.W, axis=0)
        )
        sine = tf.math.sqrt(1.0 - tf.math.pow(cosine, 2))

        m = tf.random.normal((tf.shape(y)[0], 1), mean=self.mean, stddev=self.std, seed=config.SEED)

        cos_m = tf.math.cos(m)
        sin_m = tf.math.sin(m)
        th = tf.math.cos(math.pi - m)
        mm = tf.math.sin(math.pi - m) * m
        
        phi = cosine * cos_m - sine * sin_m

        phi = tf.where(cosine > th, phi, cosine - mm)
        one_hot = tf.cast(
            tf.one_hot(y, depth=self.n_classes),
            dtype=cosine.dtype
            )
        if self.ls_eps > 0:
            one_hot = (1 - self.ls_eps) * one_hot + self.ls_eps / self.n_classes

        output = (one_hot * phi) + ((1.0 - one_hot) * cosine)
        output *= self.s
        return output

In [None]:
EFNS = [efn.EfficientNetB0, efn.EfficientNetB1, efn.EfficientNetB2, efn.EfficientNetB3, 
        efn.EfficientNetB4, efn.EfficientNetB5, efn.EfficientNetB6, efn.EfficientNetB7]

def freeze_BN(model):
    # Unfreeze layers while leaving BatchNorm layers frozen
    for layer in model.layers:
        if not isinstance(layer, tf.keras.layers.BatchNormalization):
            layer.trainable = True
        else:
            layer.trainable = False

def get_model():    
    with strategy.scope():
        margin = ElasticArcFace(
            n_classes = config.N_CLASSES, 
            s = 30, 
            mean = config.ARC_FACE_M,
            std=0.025,
            name=f'head/{config.head}', 
            dtype='float32'
        )

        inp = tf.keras.layers.Input(shape = [config.IMAGE_SIZE, config.IMAGE_SIZE, 3], name = 'inp1')
        label = tf.keras.layers.Input(shape = (), name = 'inp2')
    
        species = tf.keras.layers.Input(shape = (), name = 'inp3')
        
        if config.model_type == 'effnetv1':
            x = EFNS[config.EFF_NET](weights = 'noisy-student', include_top = False)(inp)
            # Concat pooling
            avg_pool = tf.keras.layers.GlobalAveragePooling2D()(x)
            max_pool = tf.keras.layers.GlobalMaxPooling2D()(x)
            pretrained_out = tf.keras.layers.Concatenate()([avg_pool, max_pool])
        elif config.model_type == 'effnetv2':
            FEATURE_VECTOR = f'{EFFNETV2_ROOT}/tfhub_models/efficientnetv2-{config.EFF_NETV2}/feature_vector'
            embed = tfhub.KerasLayer(FEATURE_VECTOR, trainable=True)(inp)
        
        # Do specie classification.
        specie_in = tf.keras.layers.Dropout(0.2)(pretrained_out)
        specie_out = tf.keras.layers.Dense(2048)(specie_in)
        specie_out = tf.keras.layers.Dropout(0.2)(specie_out)
        specie_out = tf.keras.layers.Dense(NUM_SPECIES, activation='softmax', name='specie_out')(specie_out)

        # Do metric
        print(f'Size of embed {config.EMB_DIM}')
        pre_margin_dense_layer = tf.keras.layers.Dense(config.EMB_DIM)
    
        # Multiple-sample dropout https://arxiv.org/abs/1905.09788
        dropout_base = 0.17
        drop_ls = [tf.keras.layers.Dropout((dropout_base + 0.01), seed=420),
                   tf.keras.layers.Dropout((dropout_base + 0.02), seed=4200),
                   tf.keras.layers.Dropout((dropout_base + 0.03), seed=42000),
                   tf.keras.layers.Dropout((dropout_base + 0.04), seed=420000),
                   tf.keras.layers.Dropout((dropout_base + 0.05), seed=4200000)]

        for ii, drop in enumerate(drop_ls):
            if ii == 0:
                embed = (pre_margin_dense_layer(drop(pretrained_out)) / 5.0)
            else:
                embed += (pre_margin_dense_layer(drop(pretrained_out)) / 5.0)
                
        embed = tf.keras.layers.BatchNormalization()(embed)
        embed = tf.math.l2_normalize(embed, axis=1)
        
        x = margin([embed, label])
        output = tf.keras.layers.Softmax(dtype='float32', name='metric_out')(x)
        
        model = tf.keras.models.Model(inputs = [inp, label, species], outputs = [output, specie_out])
        embed_model = tf.keras.models.Model(inputs = inp, outputs = embed)  
        
        opt = tf.keras.optimizers.Adam(learning_rate = config.LR)

        model.compile(
            optimizer = opt,
            loss = {
                'metric_out': tf.keras.losses.SparseCategoricalCrossentropy(), 
                'specie_out': tf.keras.losses.SparseCategoricalCrossentropy()
            },
            loss_weights = {
                'metric_out': config.WEIGHT_METRIC,
                'specie_out': config.WEIGHT_SPECIES
            },
            metrics = {
                'metric_out': [tf.keras.metrics.SparseCategoricalAccuracy(), tf.keras.metrics.SparseTopKCategoricalAccuracy(k=5)],
                'specie_out': tf.keras.metrics.SparseCategoricalAccuracy()
            }
        ) 
        
        return model,embed_model

# Training

In [None]:
def get_lr_callback(plot=False):
    lr_start   = 0.000001
    lr_max     = 0.000005 * config.BATCH_SIZE  
    lr_min     = 0.000001
    lr_ramp_ep = 4
    lr_sus_ep  = 0
    lr_decay   = 0.9
   
    def lrfn(epoch):
        if config.RESUME:
            epoch = epoch + config.RESUME_EPOCH
        if epoch < lr_ramp_ep:
            lr = (lr_max - lr_start) / lr_ramp_ep * epoch + lr_start
            
        elif epoch < lr_ramp_ep + lr_sus_ep:
            lr = lr_max
            
        else:
            lr = (lr_max - lr_min) * lr_decay**(epoch - lr_ramp_ep - lr_sus_ep) + lr_min
            
        return lr
        
    if plot:
        epochs = list(range(config.EPOCHS))
        learning_rates = [lrfn(x) for x in epochs]
        print([f'{i:.20f}' for i in learning_rates])
        plt.scatter(epochs,learning_rates)
        plt.show()

    lr_callback = tf.keras.callbacks.LearningRateScheduler(lrfn, verbose=False)
    return lr_callback

get_lr_callback(plot=True)

In [None]:
class Snapshot(tf.keras.callbacks.Callback):
    
    def __init__(self,fold,snapshot_epochs=[]):
        super(Snapshot, self).__init__()
        self.snapshot_epochs = snapshot_epochs
        self.fold = fold
        
        
    def on_epoch_end(self, epoch, logs=None):
        self.model.save_weights(config.save_dir+f"/{config.MODEL_NAME}_last.h5")

In [None]:
TRAINING_FILENAMES = [x for i,x in enumerate(train_files) if i%config.FOLDS!=config.FOLD_TO_RUN]
VALIDATION_FILENAMES = [x for i,x in enumerate(train_files) if i%config.FOLDS==config.FOLD_TO_RUN]
print(len(TRAINING_FILENAMES),len(VALIDATION_FILENAMES),count_data_items(TRAINING_FILENAMES),count_data_items(VALIDATION_FILENAMES))

In [None]:
seed_everything(config.SEED)
VERBOSE = 1
train_dataset = get_training_dataset(TRAINING_FILENAMES, pseudo=True)
val_dataset = None
if config.FOLD_TO_RUN is not None:
    val_dataset = get_val_dataset(VALIDATION_FILENAMES)
else:
    print('No val dataset')

num_pseudo = 12157
STEPS_PER_EPOCH = (count_data_items(TRAINING_FILENAMES) + num_pseudo) // config.BATCH_SIZE

# SAVE BEST MODEL EACH FOLD        
sv_loss = tf.keras.callbacks.ModelCheckpoint(
    OUTPUT_DIR / f'{config.MODEL_NAME}_loss.h5',
    monitor='val_loss',
    verbose=0,
    save_best_only=True,
    save_weights_only=True,
    mode='min',
    save_freq='epoch')

# BUILD MODEL
K.clear_session()
model,embed_model = get_model()
snap = Snapshot(fold=config.FOLD_TO_RUN)
model.summary()

callbacks = [get_lr_callback(), snap]
if config.FOLD_TO_RUN is not None:
    callbacks.append(sv_loss)

if config.LOAD_WEIGHTS:
    print('Loading weights  ' + config.LOAD_WEIGHTS)
    model.load_weights(config.LOAD_WEIGHTS)

In [None]:
# Remove this line to train. It should train in Kaggle just fine.
config.EPOCHS = 0

In [None]:
if config.EPOCHS:
    print('#### Image Size %i with EfficientNet B%i and batch_size %i'%
          (config.IMAGE_SIZE,config.EFF_NET,config.BATCH_SIZE))

    history = model.fit(
        train_dataset,
        validation_data = val_dataset,
        steps_per_epoch = STEPS_PER_EPOCH,
        epochs = config.EPOCHS,
        callbacks = callbacks,
        verbose = VERBOSE)

    if config.FOLD_TO_RUN is not None:
        model.load_weights(OUTPUT_DIR / f'{config.MODEL_NAME}_loss.h5')

    try:
        plt.plot(history.history['loss'])
        plt.plot(history.history['val_loss'])
        plt.title('model loss')
        plt.ylabel('loss')
        plt.xlabel('epoch')
        plt.legend(['train', 'test'], loc='upper left')
        plt.show()
    except:
        pass
else:
    print('No training.')

# Val Inference

In [None]:
def get_ids(filename):
    ds = get_test_dataset([filename],get_names=True).map(lambda image, image_name: image_name).unbatch()
    NUM_IMAGES = count_data_items([filename])
    ids = next(iter(ds.batch(NUM_IMAGES))).numpy().astype('U')
    return ids

def get_targets(filename):
    ds = get_eval_dataset([filename],get_targets=True).map(lambda image, target: target).unbatch()
    NUM_IMAGES = count_data_items([filename])
    ids = next(iter(ds.batch(NUM_IMAGES))).numpy()
    return ids

def get_embeddings(filename, crop_method, flip=False):
    ds = get_test_dataset([filename],get_names=False, crop_method=crop_method, flip=flip)
    embeddings = embed_model.predict(ds,verbose=0)
    return embeddings

def get_predictions(test_df,threshold=0.2):
    predictions = {}
    for i,row in tqdm(test_df.iterrows()):
        if row.image in predictions:
            if len(predictions[row.image])==5:
                continue
            predictions[row.image].append(row.target)
        elif row.confidence>threshold:
            predictions[row.image] = [row.target,'new_individual']
        else:
            predictions[row.image] = ['new_individual',row.target]

    for x in tqdm(predictions):
        if len(predictions[x])<5:
            remaining = [y for y in sample_list if y not in predictions]
            predictions[x] = predictions[x]+remaining
            predictions[x] = predictions[x][:5]
        
    return predictions

def map_per_image(label, predictions):
    """Computes the precision score of one image.

    Parameters
    ----------
    label : string
            The true label of the image
    predictions : list
            A list of predicted elements (order does matter, 5 predictions allowed per image)

    Returns
    -------
    score : double
    """    
    try:
        return 1 / (predictions[:5].index(label) + 1)
    except ValueError:
        return 0.0
    
f = open (config.LABEL_INFO_PATH / 'individual_ids.json', "r")
target_encodings = json.loads(f.read())
target_encodings = {target_encodings[x]:x for x in target_encodings}

sample_list = [
    '938b7e931166', 'ca69a5d7c122', '18efa8d0b472', '91ed5caeb0d3', '7362d7a01d00',
    'ae4343270756', '8e5253662392', '0b180ad0afa2', '6a6fa3ec3810', '6a3af6e0c55c'
]

In [None]:
def get_embeddings_for_filenames(filenames, crop_method, flip=False):
    output = []
    for filename in tqdm(filenames):
        embeddings = get_embeddings(filename, crop_method, flip=flip)
        output.append(embeddings)
        
    return np.concatenate(output)

In [None]:
def prepare_all_embeddings(filenames, prefix):
    # This will be lazily set by the first generated embedding, as only then will we know the dimensions.
    all_embeddings = None

    crop_weights = config.CROP_WEIGHTS
    total_weight = sum(crop_weights.values())
    crop_weights_adjusted = {k: (v / total_weight) for (k, v) in crop_weights.items()}
    
    i = 0
    for crop_method, weight in crop_weights_adjusted.items():
        print(f'Do {crop_method} no flip')
        train_embeddings_no_flip = get_embeddings_for_filenames(filenames, crop_method)
        
        print(f'Do {crop_method} flip')
        train_embeddings_flip = get_embeddings_for_filenames(filenames, crop_method, flip=True)
        
        emb_arr = np.mean(np.stack([train_embeddings_no_flip, train_embeddings_flip]), axis=0)
        #fh = open(OUTPUT_DIR / f'{crop_method}_{prefix}_embeddings_fold{config.FOLD_TO_RUN or ""}.npz', 'wb')
        #np.save(fh, emb_arr)
        
        if all_embeddings is None:
            all_embeddings = np.zeros((len(crop_weights_adjusted), emb_arr.shape[0], emb_arr.shape[1]))
            print(all_embeddings.shape)
    
        all_embeddings[i] = emb_arr * weight
        i += 1
    
    # Try different style of merging embeddings in future.
    mean = np.mean(all_embeddings, axis=0)
    fh = open(OUTPUT_DIR / f'{prefix}_mean_embeddings_fold{config.FOLD_TO_RUN or ""}.npz', 'wb')
    np.save(fh, mean)
    
    return mean

In [None]:
def save_ids(filenames, prefix, include_targets=False):
    train_targets = []
    train_ids = []
    for filename in tqdm(filenames):
        if include_targets:
            targets = get_targets(filename)
            train_targets.append(targets)
    
        ids = get_ids(filename)
        train_ids.append(ids)

    if include_targets:
        train_targets = np.concatenate(train_targets)
        targets_fh = open(OUTPUT_DIR / f'{prefix}_targets_fold{config.FOLD_TO_RUN or ""}.npz', 'wb')
        np.save(targets_fh, train_targets)

    train_ids = np.concatenate(train_ids)
    ids_fh = open(OUTPUT_DIR / f'{prefix}_ids_fold{config.FOLD_TO_RUN or ""}.npz', 'wb')
    np.save(ids_fh, train_ids)
    
    return train_ids, train_targets if include_targets else None

def load_all_embeddings(path, embed_name):
    return np.load(open(path / f'{embed_name}_mean_embeddings_fold.npz', 'rb'))

def get_ids(path, prefix):
    return np.load(open(path / f'{prefix}_ids_fold.npz', 'rb'))

def get_targets(path, prefix):
    return np.load(open(path / f'{prefix}_targets_fold.npz', 'rb'))

In [None]:
OUTPUT_DIR.mkdir(exist_ok=True, parents=True)

In [None]:
# Remove the if False if training in Kaggle.
is_training = False
if is_training:
    TRAINING_FILENAMES = [x for i,x in enumerate(train_files) if i%config.FOLDS!=config.VAL_FOLD_TO_RUN]
    VALIDATION_FILENAMES = [x for i,x in enumerate(train_files) if i%config.FOLDS==config.VAL_FOLD_TO_RUN]

    print(TRAINING_FILENAMES, VALIDATION_FILENAMES)

    train_embeddings = prepare_all_embeddings(TRAINING_FILENAMES, 'train')
    val_embeddings = prepare_all_embeddings(VALIDATION_FILENAMES, 'val')
    test_embeddings = prepare_all_embeddings(test_files, 'test')

    train_ids, train_targets = save_ids(TRAINING_FILENAMES, 'train', include_targets=True)
    print(train_targets.shape)

    val_ids, val_targets = save_ids(VALIDATION_FILENAMES, 'val', include_targets=True)
    print(val_targets.shape)

    test_ids, _ = save_ids(test_files, 'test')
else:
    embed_path = Path('../input/happywhale-download-b5-with-dhak-pseudo-multisam/b5-with-dhak-pseudo-multisampledropout')

    train_embeddings = load_all_embeddings(embed_path, 'train')
    val_embeddings = load_all_embeddings(embed_path, 'val')
    test_embeddings = load_all_embeddings(embed_path, 'test')

    train_ids = get_ids(embed_path, 'train')
    train_targets = get_targets(embed_path, 'train')

    val_ids = get_ids(embed_path, 'val')
    val_targets = get_targets(embed_path, 'val')

    test_ids = get_ids(embed_path, 'test')

# KNN

In [None]:
from sklearn.neighbors import NearestNeighbors

Note: validation is just to ensure there are no bugs. The model was trained on the whole dataset, so validation scores can't be trusted.

In [None]:
best_threshold_adjusted = 0.6

neigh = NearestNeighbors(n_neighbors=config.KNN,metric='cosine')
neigh.fit(train_embeddings)
val_nn_distances, val_nn_idxs = neigh.kneighbors(val_embeddings, config.KNN, return_distance=True)
allowed_targets = set([target_encodings[x] for x in np.unique(train_targets)])
val_targets_df = pd.DataFrame(np.stack([val_ids,val_targets],axis=1),columns=['image','target'])
val_targets_df['target'] = val_targets_df['target'].astype(int).map(target_encodings)
val_targets_df.loc[~val_targets_df.target.isin(allowed_targets),'target'] = 'new_individual'
val_targets_df.target.value_counts()
val_df = []
for i in tqdm(range(len(val_ids))):
    id_ = val_ids[i]
    targets = train_targets[val_nn_idxs[i]]
    distances = val_nn_distances[i]
    subset_preds = pd.DataFrame(np.stack([targets,distances],axis=1),columns=['target','distances'])
    subset_preds['image'] = id_
    val_df.append(subset_preds)
val_df = pd.concat(val_df).reset_index(drop=True)
val_df['confidence'] = 1-val_df['distances']
val_df = val_df.groupby(['image','target']).confidence.max().reset_index()
val_df = val_df.sort_values('confidence',ascending=False).reset_index(drop=True)
val_df['target'] = val_df['target'].map(target_encodings)
val_df.to_csv('val_neighbors.csv')
val_df.image.value_counts().value_counts()

## Compute CV
th = 0.6
cv = 0

all_preds = get_predictions(val_df,threshold=th)
for i,row in val_targets_df.iterrows():
    target = row.target
    preds = all_preds[row.image]
    val_targets_df.loc[i,th] = map_per_image(target,preds)
cv = val_targets_df[th].mean()
print(f"CV at threshold {th}: {cv}")

val_targets_df.describe()

if config.FOLD_TO_RUN == config.VAL_FOLD_TO_RUN:
    wandb.log({"best_cv": best_cv, "best_threshold": best_th})

## Adjustment: Since Public lb has nearly 10% 'new_individual' (Be Careful for private LB)
val_targets_df['is_new_individual'] = val_targets_df.target=='new_individual'
print(val_targets_df.is_new_individual.value_counts().to_dict())
val_scores = val_targets_df.groupby('is_new_individual').mean().T
val_scores['adjusted_cv'] = val_scores[True]*0.1+val_scores[False]*0.9
best_threshold_adjusted = val_scores['adjusted_cv'].idxmax()
print("best_threshold",best_threshold_adjusted)
val_scores

train_embeddings = np.concatenate([train_embeddings,val_embeddings])
train_targets = np.concatenate([train_targets,val_targets])
print(train_embeddings.shape,train_targets.shape)

# Test Submission

In [None]:
from sklearn.neighbors import NearestNeighbors
neigh = NearestNeighbors(n_neighbors=config.KNN,metric='cosine')
neigh.fit(train_embeddings)

In [None]:
test_nn_distances, test_nn_idxs = neigh.kneighbors(test_embeddings, config.KNN, return_distance=True)

In [None]:
sample_submission = pd.read_csv(config.INPUT_DATA_PATH / 'sample_submission.csv',index_col='image')
print(len(test_ids),len(sample_submission))
test_df = []
for i in tqdm(range(len(test_ids))):
    id_ = test_ids[i]
    targets = train_targets[test_nn_idxs[i]]
    distances = test_nn_distances[i]
    subset_preds = pd.DataFrame(np.stack([targets,distances],axis=1),columns=['target','distances'])
    subset_preds['image'] = id_
    test_df.append(subset_preds)
test_df = pd.concat(test_df).reset_index(drop=True)
test_df['confidence'] = 1-test_df['distances']
test_df = test_df.groupby(['image','target']).confidence.max().reset_index()
test_df = test_df.sort_values('confidence',ascending=False).reset_index(drop=True)
test_df['target'] = test_df['target'].map(target_encodings)
test_df.to_csv('test_neighbors.csv')
test_df.image.value_counts().value_counts()

In [None]:
sample_list = ['938b7e931166', '5bf17305f073', '7593d2aee842', '7362d7a01d00','956562ff2888']

In [None]:
predictions = {}
for i,row in tqdm(test_df.iterrows()):
    if row.image in predictions:
        if len(predictions[row.image])==5:
            continue
        predictions[row.image].append(row.target)
    elif row.confidence>best_threshold_adjusted:
        predictions[row.image] = [row.target,'new_individual']
    else:
        predictions[row.image] = ['new_individual',row.target]
        
for x in tqdm(predictions):
    if len(predictions[x])<5:
        remaining = [y for y in sample_list if y not in predictions]
        predictions[x] = predictions[x]+remaining
        predictions[x] = predictions[x][:5]
    predictions[x] = ' '.join(predictions[x])
    
predictions = pd.Series(predictions).reset_index()
predictions.columns = ['image','predictions']
predictions.to_csv('submission.csv',index=False)
predictions.head()