In [None]:
!pip install /kaggle/input/tensorflow-text-260/tensorflow_text-2.6.0-cp37-cp37m-manylinux1_x86_64.whl --no-deps
!pip install /kaggle/input/tensorflowranking/tensorflow_ranking-0.5.0-py2.py3-none-any.whl --no-deps
!pip install /kaggle/input/mojimoji/mojimoji-0.0.12-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl --no-deps

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import sentencepiece as spm

import tensorflow as tf
import tensorflow_text as tf_text
import tensorflow_ranking as tfr

from tqdm.notebook import tqdm

import gc

In [None]:
train_df = pd.read_csv("../input/foursquare-location-matching/train.csv")
test_df = pd.read_csv("../input/foursquare-location-matching/test.csv")

In [None]:
train_id_map = {v: i for i, v in enumerate(train_df["id"].values)}
train_df["ix"] = train_df["id"].map(train_id_map)
train_df["categories"] = train_df["categories"].fillna("")
train_df["pid"] = train_df["point_of_interest"].map({v: i for i, v in enumerate(train_df["point_of_interest"].unique())})

test_id_map = {v: i for i, v in enumerate(test_df["id"].values)}
test_df["ix"] = test_df["id"].map(test_id_map)
test_df["categories"] = test_df["categories"].fillna("")
test_df["pid"] = -1

In [None]:
from sklearn.model_selection import KFold
group = -np.ones(len(train_df), dtype="int32")
g = 0
for g, (dev_p_ids, val_p_ids) in enumerate(KFold(n_splits=5, shuffle=True, random_state=42).split(train_df["pid"].unique())):
    ix = train_df["pid"].isin(val_p_ids)
    group[ix] = g
train_df["group"] = group
test_df["group"] = -1

In [None]:
import mojimoji
def zenhan_normalize(x):
    return mojimoji.han_to_zen(mojimoji.zen_to_han(x, kana=False), digit=False, ascii=False)

train_df["name"] = np.vectorize(zenhan_normalize)(train_df["name"].fillna("").str.lower())
test_df["name"] = np.vectorize(zenhan_normalize)(test_df["name"].fillna("").str.lower())

In [None]:
import sentencepiece as spm
train_df[["name"]].to_csv("./name.txt", index=None, encoding="utf-8", header=None)
test_df[["name"]].to_csv("./name.txt", index=None, encoding="utf-8", header=None, mode="a")

spm.SentencePieceTrainer.train(
    input="./name.txt",
    model_type="unigram",
    split_by_whitespace=True,
    model_prefix='./name_sp',
    character_coverage=.9995,
    vocab_size=32000,
)

sp = spm.SentencePieceProcessor()
sp.load('./name_sp.model')

train_df["sp_name"] = np.vectorize(lambda x: ",".join([y.replace("▁", "") for y in sp.encode_as_pieces(x)]))(train_df["name"])

In [None]:
train_df[["name", "sp_name"]].sample(20)

In [None]:
import tensorflow as tf
from tensorflow_text import SentencepieceTokenizer

class SentencePieceEmbeddingLayer(tf.keras.layers.Layer):
    
    def __init__(self, vocab_size, out_dim, sp_model_path):
        super(SentencePieceEmbeddingLayer, self).__init__()
        self.vocab_size = vocab_size
        self.out_dim = out_dim
        model = open(sp_model_path, "rb").read()
        self.tokenizer = SentencepieceTokenizer(model)
        self.embedding = tf.keras.layers.Embedding(vocab_size, out_dim)
    
    def call(self, X):
        token = self.tokenizer.tokenize(X)
        X = self.embedding(token)
        return X
    
class SkipgramModel(tf.keras.Model):
    
    def __init__(self, spe):
        super(SkipgramModel, self).__init__()
        self.spe = spe
        
    @tf.function(experimental_relax_shapes=True)
    def call(self, name):
        X = self.spe(name)
        name_skip_loss = self.skipgram_task(X)
        return name_skip_loss
    
    @tf.function(experimental_relax_shapes=True)
    def skipgram_task(self, X):
        X_sum = tf.reduce_sum(X, axis=1)
        X_sum_other = tf.nn.l2_normalize(tf.gather(X_sum, X.value_rowids()) - X.values, axis=1)

        
        X_sum_norm = tf.nn.l2_normalize(X_sum, axis=1)
        X_sum_other_norm = tf.nn.l2_normalize(X_sum_other, axis=1)
        X_value_norm = tf.nn.l2_normalize(X.values, axis=1)

        correct_cossim = tf.expand_dims(tf.einsum("Vd,Vd->V", X_value_norm, X_sum_other_norm), axis=1)
        wrong_cossim = tf.einsum("Vd,Nd->VN", X_value_norm, X_sum_norm)
        cossim = tf.concat([correct_cossim, wrong_cossim], axis=1)

        rowwise_mask = tf.expand_dims(tf.gather(X.row_lengths(), X.value_rowids()) > 1, axis=1)
        sameid_mask = tf.expand_dims(tf.cast(X.value_rowids(), "int32"), axis=1) != tf.expand_dims(tf.range(tf.shape(X.row_lengths())[0] + 1)-1, axis=0)

        mask = tf.math.logical_and(rowwise_mask, sameid_mask)
        label = tf.concat([tf.ones(tf.shape(correct_cossim)), tf.zeros(tf.shape(wrong_cossim))], axis=1)
        
        # Fixed AdaCos paramet2r
        scale = tf.sqrt(2.) * tf.math.log(tf.cast(tf.shape(X.row_lengths())[0], "float32") - 1.)

        label = tf.where(mask, label, tf.zeros(tf.shape(mask)))
        pred = tf.where(mask, scale * cossim, tf.float32.min)
        loss = tf.math.reduce_mean(tf.keras.losses.categorical_crossentropy(label, pred, from_logits=True))
        return loss        

In [None]:
class DataContainer():
    
    def __init__(self, df, positive_ix, skip_group=-1):
        self.positive_ix = positive_ix
        self.skip_group = skip_group
        self.name = tf.constant(df["name"].values)
        self.position = tf.expand_dims(tf.constant(np.deg2rad(df[["latitude", "longitude"]].astype(np.float32).values), dtype="float32"), axis=0)
        self.pid = tf.constant(df["pid"].astype(np.int32).values, dtype="int32")
        self.group = tf.constant(df["group"].astype(np.int32).values, dtype="int32")
        
    def get_position(self, ix):
        return tf.gather(tf.gather(self.position, ix, axis=1), 0)

    def call(self, ix):
        name = tf.gather(self.name, ix)
        position = self.get_position(ix)
        return ix, name, position
    
    @tf.function(experimental_relax_shapes=True)
    def log_haversine(self, X, Y):
        delta = Y - X
        x_lats = tf.gather(X, 0, axis=-1)
        y_lats = tf.gather(Y, 0, axis=-1)
        dlat = tf.gather(delta, 0, axis=-1)
        dlon = tf.gather(delta, 1, axis=-1)

        a = tf.sin(dlat/2) * tf.sin(dlat/2) + tf.cos(x_lats) * tf.cos(y_lats) * tf.sin(dlon/2) * tf.sin(dlon/2)
        c = tf.math.log(2 * tf.math.atan2(tf.sqrt(a), tf.sqrt(1-a)) + tf.keras.backend.epsilon())
        return c
    
    @tf.function(experimental_relax_shapes=True)
    def negative_sample_by_dist_top_k(self, ix, k=128):
        X = tf.expand_dims(self.get_position(ix), axis=1)
        c = self.log_haversine(X, self.position)
        pos_ix = tf.gather(self.positive_ix, tf.gather(self.pid, ix))
        c = tf.tensor_scatter_nd_update(c, tf.stack([tf.cast(pos_ix.value_rowids(), "int32"), pos_ix.values], axis=1), tf.ones([tf.shape(pos_ix.values)[0]])*tf.float32.max/10.)
        dist, neighbor = tf.math.top_k(-c, k=k)

        dist = tf.reverse_sequence(-dist, 
                                  tf.ones([tf.shape(dist)[0]], dtype="int32") * tf.shape(dist)[1],
                                  seq_axis=1)
        neighbor = tf.reverse_sequence(neighbor, 
                                  tf.ones([tf.shape(neighbor)[0]], dtype="int32") * tf.shape(neighbor)[1],
                                  seq_axis=1)
        return dist, neighbor
    
    def get_positive_info(self, ix):
        pos_ix = tf.gather(self.positive_ix, tf.gather(self.pid, ix))
        Y = self.get_position(pos_ix)
        X = self.get_position(tf.gather(ix, pos_ix.value_rowids()))
        pos_dist = tf.RaggedTensor.from_value_rowids(self.log_haversine(X, Y.values), Y.value_rowids())
        pos_ix = pos_ix.to_tensor(default_value=-1)
        pos_ix = tf.where(tf.expand_dims(ix, axis=1) != pos_ix, pos_ix, -1)
        pos_dist = pos_dist.to_tensor(default_value=0.)
        return pos_dist, pos_ix

In [None]:
DIMSIZE = 128
spe = SentencePieceEmbeddingLayer(32000, DIMSIZE, "./name_sp.model")
skipgram_model = SkipgramModel(spe)

In [None]:
container_cols = ["name", "latitude", "longitude", "pid", "group"] 
all_container = DataContainer(pd.concat([train_df[container_cols],
                                         test_df[container_cols]],
                                        axis=0),
                              tf.cast(tf.ragged.constant(train_df.groupby("pid")["ix"].unique().tolist()), "int32"))

In [None]:
history = {"skipgram_loss": []}
skipgram_loss = tf.keras.metrics.Mean(name='loss')
NUM_EPOCH = 10
skipgram_optimizer = tf.keras.optimizers.Adam(learning_rate=1e-4)
all_ixs = tf.range(len(all_container.name))
all_ds = tf.data.Dataset.from_tensor_slices(all_ixs)\
                .shuffle(len(all_ixs), reshuffle_each_iteration=True)\
                .batch(1024, drop_remainder=True)\
                .map(all_container.call)

@tf.function(experimental_relax_shapes=True)
def skipgram_forward_step(name):
    with tf.GradientTape() as tape:
        loss = skipgram_model(name, training=True)
    gradients = tape.gradient(loss, skipgram_model.trainable_variables)
    skipgram_optimizer.apply_gradients(zip(gradients, skipgram_model.trainable_variables))
    return loss

with tqdm(total=NUM_EPOCH) as pbar:
    for _ in range(NUM_EPOCH):
        step = 0
        for ix, name, position in all_ds:
            sl = skipgram_forward_step(name)
            skipgram_loss(sl)
            learning_text = "[{}/{}] ".format(str(step).zfill(5), len(all_ds))
            progress_text = "skipgram_loss: {:.4f}".format(skipgram_loss.result().numpy())
            pbar.set_postfix_str(learning_text + progress_text)
            step += 1
        history["skipgram_loss"].append(skipgram_loss.result().numpy())
        
        skipgram_loss.reset_states()
        pbar.update(1)

In [None]:
del all_container
train_container = all_container = DataContainer(train_df[container_cols],
                                                tf.cast(tf.ragged.constant(train_df.groupby("pid")["ix"].unique().tolist()), "int32"))

train_ds = tf.data.Dataset.from_tensor_slices(tf.range(len(train_df)))\
                .batch(1024)\
                .map(train_container.call)

embeddings = []
for ix, name, position in tqdm(train_ds):
    embeddings.append(tf.nn.l2_normalize(tf.reduce_sum(spe(name), axis=1), axis=1))
embeddings = tf.nn.l2_normalize(tf.concat(embeddings, axis=0), axis=1)

In [None]:
import tensorflow_ranking as tfr
spe.embedding.trainable = False

class LogisticModel(tf.keras.Model):
    def __init__(self, dim):
        super(LogisticModel, self).__init__()
        self.scale = tf.Variable(tf.zeros([dim], dtype="float32"), trainable=True)
        self.bias = tf.Variable(0., trainable=True)
        #self.loss_func = tf.keras.losses.BinaryCrossentropy(from_logits=True)
        self.loss_func = tfr.keras.losses.ApproxNDCGLoss()
        
    def call(self, X, Y):
        logit = tf.einsum("nmd,d->nm", X, self.scale) + self.bias
        loss = self.loss_func(Y, logit)
        return loss    
    
logistic_model = LogisticModel(2)

In [None]:

history = {"logistic_loss": []}
logistic_loss = tf.keras.metrics.Mean(name='loss')
logistic_optimizer = tf.keras.optimizers.Adam(learning_rate=1e-2)

all_ixs = tf.range(len(all_container.name))
train_ds = tf.data.Dataset.from_tensor_slices(tf.range(len(train_df)))\
                .shuffle(len(all_ixs), reshuffle_each_iteration=True)\
                .batch(128, drop_remainder=True)\
                .map(all_container.call)

@tf.function(experimental_relax_shapes=True)
def logistic_forward_step(X, Y):
    with tf.GradientTape() as tape:
        loss = logistic_model(X, Y, training=True)
    gradients = tape.gradient(loss, logistic_model.trainable_variables)
    if tf.reduce_all([tf.reduce_all(tf.math.is_finite(g)) for g in gradients]):
        logistic_optimizer.apply_gradients(zip(gradients, logistic_model.trainable_variables))
    return loss

with tqdm(total=len(train_ds)) as pbar:
    step = 0
    for ix, name, position in train_ds:
        neg_dist, neg_ixs = all_container.negative_sample_by_dist_top_k(ix, k=1024)
        pos_dist, pos_ixs = all_container.get_positive_info(ix)
        pos_order = tf.slice(tf.argsort(pos_dist, axis=1), [0, 0], [-1, min(tf.shape(pos_dist)[1], 64)])
        pos_dist = tf.gather(pos_dist, pos_order, batch_dims=1)
        pos_ixs = tf.gather(pos_ixs, pos_order, batch_dims=1)
        pos_ixs = tf.where(tf.expand_dims(ix, axis=1) != pos_ixs, pos_ixs, -1)

        update_ixs = tf.where(pos_ixs >= 0)
        target_ixs = tf.tensor_scatter_nd_update(neg_ixs, update_ixs, tf.gather_nd(pos_ixs, update_ixs))
        target_label = tf.tensor_scatter_nd_update(tf.zeros(tf.shape(neg_dist)), update_ixs, tf.ones([tf.shape(update_ixs)[0]]))
        target_dist = tf.tensor_scatter_nd_update(neg_dist, update_ixs, tf.gather_nd(pos_dist, update_ixs))
        target_cossim = tf.einsum("nd,nmd->nm", tf.gather(embeddings, ix), tf.gather(embeddings, target_ixs))
        target_feat = tf.stack([target_dist, target_cossim], axis=-1)
        loss = logistic_forward_step(target_feat, target_label)
        if tf.math.is_finite(loss):
            logistic_loss(loss)
        progress_text = "logistic_loss: {:.4f}".format(logistic_loss.result().numpy())
        pbar.set_postfix_str(progress_text)
        pbar.update(1)
        step += 1
        if step == 1000:
            break

In [None]:
train_df["true_count"] = train_df.groupby("pid")["ix"].transform("count")
logistic_model.trainable = False
logistic_model.compile()
logistic_model.scale


In [None]:
# embedding only
"""
emb_ds = tf.data.Dataset.from_tensor_slices(embeddings)\
                .batch(128)

neighbors = []
for emb in tqdm(emb_ds):
    score, indices = tf.math.top_k(tf.einsum("md,nd->mn", emb, embeddings), k=16)
    neighbors.append(indices)
neighbors = tf.minimum(tf.concat(neighbors, axis=0).numpy()[:len(train_df)], len(train_df)-1)
print("precision@16", ((train_df["pid"].values.reshape(-1, 1) == train_df["pid"].values[neighbors]).sum(axis=1) / train_df["true_count"]).mean())
"""
print("precision@16", 0.7658628703149348)

In [None]:
# haversine
"""
train_ixs_ds = tf.data.Dataset.from_tensor_slices(tf.range(len(train_df)))\
                .batch(128)

neighbors = []
for ix in tqdm(train_ixs_ds):
    emb = tf.gather(embeddings, ix)
    X = tf.expand_dims(train_container.get_position(ix), axis=1)
    dist = train_container.log_haversine(X, train_container.position)
    _, indices = tf.math.top_k(-dist, k=16)
    neighbors.append(indices)
neighbors = tf.minimum(tf.concat(neighbors, axis=0).numpy()[:len(train_df)], len(train_df)-1)
print("precision@16", ((train_df["pid"].values.reshape(-1, 1) == train_df["pid"].values[neighbors]).sum(axis=1) / train_df["true_count"]).mean())
"""
print("precision@16", 0.8945505050306388)

In [None]:
# embedding + haversine
"""
train_ixs_ds = tf.data.Dataset.from_tensor_slices(tf.range(len(train_df)))\
                .batch(128)

neighbors = []
for ix in tqdm(train_ixs_ds):
    emb = tf.gather(embeddings, ix)
    X = tf.expand_dims(train_container.get_position(ix), axis=1)
    dist = train_container.log_haversine(X, train_container.position)
    cossim = tf.einsum("nd,md->nm", emb, embeddings)
    score = tf.einsum("d,nmd->nm", logistic_model.scale, tf.stack([dist, cossim], axis=-1))
    _, indices = tf.math.top_k(score, k=16)
    neighbors.append(indices)
neighbors = tf.minimum(tf.concat(neighbors, axis=0).numpy()[:len(train_df)], len(train_df)-1)
print("precision@16", ((train_df["pid"].values.reshape(-1, 1) == train_df["pid"].values[neighbors]).sum(axis=1) / train_df["true_count"]).mean())
"""
print("precision@16", 0.9550629119862758)

In [None]:
class DCNV2Classifier(tf.keras.models.Model):
    
    def __init__(self, num_cross, num_dense, hidden_dim, shrink_size=4):
        super(DCNV2Classifier, self).__init__()
        self.num_cross = num_cross
        self.num_dense = num_dense
        self.hidden_dim = hidden_dim
        
        self.norm_layers = [tf.keras.layers.LayerNormalization() for _ in range(self.num_cross+num_dense)]
        self.dense_layers = [tf.keras.layers.Dense(hidden_dim, activation="gelu") for _ in range(self.num_dense)]
        self.pred_layer = tf.keras.layers.Dense(1)
        self.built = False
    
    def build(self, input_shape):
        last_dim = input_shape[-1]
        self.filter_layers = [tf.keras.layers.Dense(hidden_dim//shrink_size) for _ in range(self.num_cross)]
        self.out_layers = [tf.keras.layers.Dense(hidden_dim//shrink_size) for _ in range(self.num_cross)]        
    
    def call(self, X):
        X0 = tf.identity(X)
        for i in range(self.num_cross):
            X = self.norm_layers[i](X + X0 * self.out_layers[i](self.filter_layers[i](X)))
        
        for i in range(self.num_dense):
            X = self.norm_layers[i+self.num_cross](self.dense_layers[i](X))
            
        return self.pred_layer(X)


In [None]:
classify_model = DCNV2Classifier(0, 2, 128)

In [None]:
dev_ix = np.where(train_df["group"] != 0)[0].astype(np.int32)
val_ix = np.where(train_df["group"] == 0)[0].astype(np.int32)

dev_filter = tf.tensor_scatter_nd_update(tf.zeros(len(train_df)), 
                                         tf.expand_dims(val_ix, axis=1),
                                         tf.ones(len(val_ix)) * tf.float32.min / 10.)
val_filter = tf.zeros(len(train_df))


In [None]:
dev_ds = tf.data.Dataset.from_tensor_slices(tf.constant(dev_ix))\
                .shuffle(len(dev_ix), reshuffle_each_iteration=True)\
                .batch(128)
val_ds = tf.data.Dataset.from_tensor_slices(tf.constant(val_ix))\
                .batch(128)

neighbors = []
NUM_NEGATIVE = 32

@tf.function(experimental_relax_shapes=True)
def make_feat_target(ix, retrieval_filter):
    emb = tf.gather(embeddings, ix)
    query_position = tf.expand_dims(train_container.get_position(ix), axis=1)
    
    pos_ix = tf.gather(train_container.positive_ix, tf.gather(train_container.pid, ix))
    dist = train_container.log_haversine(query_position, train_container.position)
    cossim = tf.einsum("nd,md->nm", emb, embeddings)
    score = tf.einsum("d,nmd->nm", logistic_model.scale, tf.stack([dist, cossim], axis=-1))
    score = tf.tensor_scatter_nd_update(score,
                                        tf.stack([tf.cast(pos_ix.value_rowids(), "int32"), pos_ix.values], axis=1),
                                        tf.ones([tf.shape(pos_ix.values)[0]])*tf.float32.min/10.)
    score += retrieval_filter
    _, neg_indices = tf.math.top_k(score, k=NUM_NEGATIVE)
    neg_indices = tf.reverse_sequence(neg_indices, 
                              tf.ones([tf.shape(neg_indices)[0]], dtype="int32") * tf.shape(neg_indices)[1],
                              seq_axis=1)
    
    
    pos_ix = pos_ix.to_tensor(default_value=-1)
    pos_ix = tf.slice(pos_ix, [0, 0], [-1, tf.minimum(NUM_NEGATIVE, tf.shape(pos_ix)[1])])
    pos_ix = tf.where(tf.expand_dims(ix, axis=1) != pos_ix, pos_ix, -1)
    
    
    update_ixs = tf.where(pos_ix >= 0)
    target_ixs = tf.tensor_scatter_nd_update(neg_indices, update_ixs, tf.gather_nd(pos_ix, update_ixs))
    target_label = tf.tensor_scatter_nd_update(tf.zeros(tf.shape(target_ixs)), update_ixs, tf.ones([tf.shape(update_ixs)[0]]))
    target_cossim = tf.gather(cossim, target_ixs, batch_dims=1)
    target_dist = tf.gather(dist, target_ixs, batch_dims=1)
    query_embedding = tf.repeat(tf.expand_dims(emb, axis=1), NUM_NEGATIVE, axis=1)
    target_embedding = tf.gather(embeddings, target_ixs)
    target_feat = tf.concat([query_embedding, target_embedding, tf.stack([target_dist, target_cossim], axis=-1)], axis=-1)
    return target_ixs, target_feat, target_label
    
history = {"classify_dev_loss": [], "classify_val_loss": []}
classify_loss_func = tf.keras.losses.BinaryCrossentropy(from_logits=True)
classify_dev_loss = tf.keras.metrics.Mean(name='loss')
classify_val_loss = tf.keras.metrics.Mean(name='loss')

classify_optimizer = tf.keras.optimizers.Adam(learning_rate=1e-4)
NUM_EPOCH = 1

@tf.function(experimental_relax_shapes=True)
def classify_forward_step(X, y):
    with tf.GradientTape() as tape:
        pred = classify_model(X, training=True)
        loss = classify_loss_func(y, pred)
    gradients = tape.gradient(loss, classify_model.trainable_variables)
    classify_optimizer.apply_gradients(zip(gradients, classify_model.trainable_variables))
    return loss

@tf.function(experimental_relax_shapes=True)
def classify_eval_step(X, y):
    pred = classify_model(X, training=True)
    loss = classify_loss_func(y, pred)
    return loss


for i in range(NUM_EPOCH):
    print("epoch ", i+1)
    with tqdm(total=len(dev_ds)) as pbar:
        step = 0
        for ix in dev_ds:
            tix, X, y = make_feat_target(ix, dev_filter)
            y = tf.expand_dims(y, axis=-1)
            loss = classify_forward_step(X, y)
            classify_dev_loss(loss)
            learning_text = "[{}/{}] ".format(str(step).zfill(5), len(dev_ds))
            progress_text = "dev_loss: {:.4f} val_loss: {:.4f}".format(classify_dev_loss.result().numpy(),
                                                                       classify_val_loss.result().numpy(),
                                                                       )
            pbar.set_postfix_str(learning_text + progress_text)
            step += 1
            pbar.update(1)
    with tqdm(total=len(val_ds)) as pbar:
        step = 0
        for ix in val_ds:
            tix, X, y = make_feat_target(ix, val_filter)
            y = tf.expand_dims(y, axis=-1)
            loss = classify_eval_step(X, y)
            classify_val_loss(loss)
            learning_text = "[{}/{}] ".format(str(step).zfill(5), len(val_ds))
            progress_text = "dev_loss: {:.4f} val_loss: {:.4f}".format(classify_dev_loss.result().numpy(),
                                                                       classify_val_loss.result().numpy(),
                                                                       )
            pbar.set_postfix_str(learning_text + progress_text)
            step += 1
            pbar.update(1)
    classify_dev_loss.reset_states()
    classify_val_loss.reset_states()
    

In [None]:
@tf.function(experimental_relax_shapes=True)
def make_pred_feat(ix):
    emb = tf.gather(embeddings, ix)
    query_position = tf.expand_dims(train_container.get_position(ix), axis=1)
    
    dist = train_container.log_haversine(query_position, train_container.position)
    cossim = tf.einsum("nd,md->nm", emb, embeddings)
    score = tf.einsum("d,nmd->nm", logistic_model.scale, tf.stack([dist, cossim], axis=-1))
    _, candidate_ixs = tf.math.top_k(score, k=NUM_NEGATIVE)
    candidate_ixs  = tf.cast(candidate_ixs, "int32")
    
    candidate_cossim = tf.gather(cossim, candidate_ixs, batch_dims=1)
    candidate_dist = tf.gather(dist, candidate_ixs, batch_dims=1)
    query_embedding = tf.repeat(tf.expand_dims(emb, axis=1), NUM_NEGATIVE, axis=1)
    candidate_embedding = tf.gather(embeddings, candidate_ixs)
    candidate_feat = tf.concat([query_embedding, candidate_embedding, tf.stack([candidate_dist, candidate_cossim], axis=-1)], axis=-1)
    return candidate_ixs, candidate_feat

pos_labels = []
candidate_ixs = []
candidate_scores = []

for ix in tqdm(val_ds):
    candidate_ix, candidate_feat = make_pred_feat(ix)
    candidate_score = tf.math.sigmoid(tf.reshape(classify_model(candidate_feat), tf.shape(candidate_ix)))
    
    pos_ix = tf.gather(train_container.positive_ix, tf.gather(train_container.pid, ix))
    pos_ix = pos_ix.to_tensor(default_value=-2)
    pos_ix = tf.slice(pos_ix, [0, 0], [-1, tf.minimum(NUM_NEGATIVE, tf.shape(pos_ix)[1])])
    update_ix = tf.where(pos_ix >= 0)
    target_ixs = tf.tensor_scatter_nd_update(tf.ones([len(pos_ix), NUM_NEGATIVE], dtype="int32")*-2, update_ix, tf.gather_nd(pos_ix, update_ix))
    pos_labels.append(target_ixs)
    candidate_ixs.append(candidate_ix)
    candidate_scores.append(candidate_score)

In [None]:
pos_labels = tf.concat(pos_labels, axis=0)
candidate_ixs = tf.concat(candidate_ixs, axis=0)
candidate_scores = tf.concat(candidate_scores, axis=0)

In [None]:
candidate_correct = tf.reduce_any(tf.expand_dims(candidate_ixs, axis=2) == tf.expand_dims(pos_labels, axis=1), axis=2)

In [None]:
best_score = -1
best_threshold = 0.
for threshold in np.linspace(0, 1, 100):
    true_count = tf.cast(tf.maximum(1, tf.math.count_nonzero(pos_labels >=0, axis=1)), "float32")
    predict = candidate_scores > threshold
    correct_count = tf.cast(tf.math.count_nonzero(tf.logical_and(candidate_correct, predict), axis=1), "float32")
    predict_count = tf.cast(tf.math.count_nonzero(predict, axis=1), "float32")

    score = tf.math.reduce_mean(correct_count / (true_count + predict_count - correct_count))
    
    if best_score < score:
        best_score = score
        best_threshold = threshold
        
print(best_threshold, best_score)

In [None]:
del all_container
del train_container
del embeddings
gc.collect()

test_container = all_container = DataContainer(test_df[container_cols],
                                                tf.cast(tf.ragged.constant(test_df.groupby("pid")["ix"].unique().tolist()), "int32"))

test_ds = tf.data.Dataset.from_tensor_slices(tf.range(len(test_df)))\
                .batch(1024)\
                .map(test_container.call)

embeddings = []
for ix, name, position in tqdm(test_ds):
    embeddings.append(tf.nn.l2_normalize(tf.reduce_sum(spe(name), axis=1), axis=1))
embeddings = tf.nn.l2_normalize(tf.concat(embeddings, axis=0), axis=1)

In [None]:
NUM_NEGATIVE = min(len(test_df), NUM_NEGATIVE)
test_ds = tf.data.Dataset.from_tensor_slices(tf.constant(tf.range(len(test_df))))\
                .batch(128)

@tf.function(experimental_relax_shapes=True)
def make_pred_feat(ix):
    emb = tf.gather(embeddings, ix)
    query_position = tf.expand_dims(test_container.get_position(ix), axis=1)
    
    dist = test_container.log_haversine(query_position, test_container.position)
    cossim = tf.einsum("nd,md->nm", emb, embeddings)
    score = tf.einsum("d,nmd->nm", logistic_model.scale, tf.stack([dist, cossim], axis=-1))
    _, candidate_ixs = tf.math.top_k(score, k=NUM_NEGATIVE)
    candidate_ixs  = tf.cast(candidate_ixs, "int32")
    
    candidate_cossim = tf.gather(cossim, candidate_ixs, batch_dims=1)
    candidate_dist = tf.gather(dist, candidate_ixs, batch_dims=1)
    query_embedding = tf.repeat(tf.expand_dims(emb, axis=1), NUM_NEGATIVE, axis=1)
    candidate_embedding = tf.gather(embeddings, candidate_ixs)
    candidate_feat = tf.concat([query_embedding, candidate_embedding, tf.stack([candidate_dist, candidate_cossim], axis=-1)], axis=-1)
    return candidate_ixs, candidate_feat

test_ids = test_df["id"].values
results = []

for ix in tqdm(test_ds):
    candidate_ix, candidate_feat = make_pred_feat(ix)
    candidate_score = tf.math.sigmoid(tf.reshape(classify_model(candidate_feat), tf.shape(candidate_ix)))
    candidate_score = tf.where(candidate_ix != tf.expand_dims(ix, axis=1), candidate_score, -1.)
    pred_loc = tf.where(candidate_score > best_threshold)
    res = [{"id": test_ids[i], "matches": test_ids[i]} for i in ix.numpy()]
    for i, cix in zip(tf.gather(pred_loc, 0, axis=1).numpy(), tf.gather_nd(candidate_ix, pred_loc).numpy()):
        res[i]["matches"] += " " + test_ids[cix] 
    results.extend(res)    

In [None]:
pd.DataFrame(results).to_csv("submission.csv", index=None, mode="w")