In [162]:
import numpy as np
import pandas as pd

from typing import Dict, Optional, Text, Tuple, Union

import tensorflow as tf
import tensorflow_datasets as tfds
import tensorflow_recommenders as tfrs

In [179]:
def _take_along_axis(arr: tf.Tensor, indices: tf.Tensor) -> tf.Tensor:
    """Partial TF implementation of numpy.take_along_axis.
    See
    https://numpy.org/doc/stable/reference/generated/numpy.take_along_axis.html
    for details.
    Args:
    arr: 2D matrix of source values.
    indices: 2D matrix of indices.
    Returns:
    2D matrix of values selected from the input.
    """

    row_indices = tf.tile(
      tf.expand_dims(tf.range(tf.shape(indices)[0]), 1),
      [1, tf.shape(indices)[1]])
    gather_indices = tf.concat(
      [tf.reshape(row_indices, (-1, 1)),
       tf.reshape(indices, (-1, 1))], axis=1)

    return tf.reshape(tf.gather_nd(arr, gather_indices), tf.shape(indices))


# modified from tensorflow source code to prevent type error
def _exclude(scores: tf.Tensor, identifiers: tf.Tensor, exclude: tf.Tensor,
             k: int) -> Tuple[tf.Tensor, tf.Tensor]:
    """Removes a subset of candidates from top K candidates.
    For each row of inputs excludes those candidates whose identifiers match
    any of the identifiers present in the exclude matrix for that row.
    Args:
    scores: 2D matrix of candidate scores.
    identifiers: 2D matrix of candidate identifiers.
    exclude: 2D matrix of identifiers to exclude.
    k: Number of candidates to return.
    Returns:
    Tuple of (scores, indices) of candidates after exclusions.
    """

    idents = tf.expand_dims(identifiers, -1)
    exclude = tf.expand_dims(exclude, 1)

    isin = tf.math.reduce_any(tf.math.equal(idents, exclude), -1)

    # Set the scores of the excluded candidates to a very low value.
#     adjusted_scores = (scores - tf.cast(isin, tf.float32) * 1.0e5)
    
    ##### cast input scores to prevent type error
    adjusted_scores = (tf.cast(scores, tf.float64) - tf.cast(isin, tf.float64) * 1.0e5)

    k = tf.math.minimum(k, tf.shape(scores)[1])

    _, indices = tf.math.top_k(adjusted_scores, k=k)

    return _take_along_axis(scores,
                          indices), _take_along_axis(identifiers, indices)


def top_k_with_exclusions(topk, query, exclusions, k):
    """TopK class"""
    
    adjusted_k = k + exclusions.shape[1]
    x, y = topk.call(queries=query, k=adjusted_k)
    
    return _exclude(x, y, exclude=exclusions, k=k)

In [180]:
# Ratings data.
ratings = tfds.load('movielens/100k-ratings', split="train")
# Features of all the available movies.
movies = tfds.load('movielens/100k-movies', split="train")

In [181]:
# Select the basic features.
ratings = ratings.map(lambda x: {
    "movie_title": x["movie_title"],
    "user_id": x["user_id"]
})
movies = movies.map(lambda x: x["movie_title"])

In [182]:
data = {"drawing":[1,0,0,0,0],"print":[0,1,1,1,0],"sculpture":[0,0,0,0,1],}
df = pd.DataFrame(data)
df.index = df.index.rename("id")
features = df.values
df

Unnamed: 0_level_0,drawing,print,sculpture
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
0,1,0,0
1,0,1,0
2,0,1,0
3,0,1,0
4,0,0,1


In [183]:
embeddings = np.random.rand(5,10)
np.arange(0,10).tolist()
df.loc[:,np.arange(0,10).tolist()] = embeddings

In [184]:
df

Unnamed: 0_level_0,drawing,print,sculpture,0,1,2,3,4,5,6,7,8,9
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
0,1,0,0,0.771308,0.868213,0.905044,0.699536,0.455144,0.457043,0.029889,0.673939,0.210672,0.461085
1,0,1,0,0.543021,0.130124,0.641778,0.359605,0.395645,0.967943,0.207388,0.078284,0.134154,0.634955
2,0,1,0,0.259846,0.921892,0.652759,0.195649,0.797057,0.907131,0.794753,0.002908,0.849282,0.899346
3,0,1,0,0.161075,0.592141,0.881365,0.019499,0.916479,0.028102,0.649255,0.881642,0.350867,0.318975
4,0,0,1,0.265029,0.053065,0.765379,0.544815,0.977045,0.195734,0.845131,0.350207,0.726597,0.775345


In [186]:
df.values

array([[1.        , 0.        , 0.        , 0.77130775, 0.86821302,
        0.90504351, 0.69953559, 0.45514447, 0.45704345, 0.02988889,
        0.67393916, 0.21067237, 0.46108469],
       [0.        , 1.        , 0.        , 0.54302091, 0.13012379,
        0.64177825, 0.3596049 , 0.39564496, 0.96794346, 0.20738776,
        0.0782844 , 0.13415424, 0.6349547 ],
       [0.        , 1.        , 0.        , 0.25984567, 0.92189175,
        0.65275946, 0.19564906, 0.79705659, 0.90713067, 0.79475346,
        0.00290762, 0.84928166, 0.89934586],
       [0.        , 1.        , 0.        , 0.16107494, 0.59214063,
        0.88136548, 0.01949851, 0.91647924, 0.02810239, 0.64925487,
        0.88164226, 0.35086718, 0.31897475],
       [0.        , 0.        , 1.        , 0.26502925, 0.05306534,
        0.76537894, 0.54481485, 0.9770452 , 0.19573429, 0.84513094,
        0.35020703, 0.72659734, 0.77534496]])

In [187]:
# init the topk
topk = tfrs.layers.factorized_top_k.BruteForce(k=2)
# create the index for the topk NN model
topk = topk.index(df.values)

In [188]:
# create a query
is_drawing = 0
is_print = 1
is_sculpture = 0
handmade_features = [is_drawing, is_print, is_sculpture]
calculated_features = embeddings[1,:] *.7
query_1 = np.hstack((handmade_features, calculated_features)).astype(np.float64)
query_1 = tf.constant(np.array([query_1,]), dtype=np.float64)
k=2
query_1

<tf.Tensor: shape=(1, 13), dtype=float64, numpy=
array([[0.        , 1.        , 0.        , 0.38011464, 0.09108665,
        0.44924477, 0.25172343, 0.27695147, 0.67756042, 0.14517143,
        0.05479908, 0.09390797, 0.44446829]])>

In [189]:
# get the brute force top k results
topk.call(query_1, k=k)

(<tf.Tensor: shape=(1, 2), dtype=float64, numpy=array([[2.9556431 , 2.69172594]])>,
 <tf.Tensor: shape=(1, 2), dtype=int32, numpy=array([[2, 1]], dtype=int32)>)

In [190]:
# create a query with exclusions
exclusions=tf.constant([[1]])
top_k_with_exclusions(topk, query_1, exclusions, k)

(<tf.Tensor: shape=(1, 2), dtype=float64, numpy=array([[2.9556431 , 2.10617128]])>,
 <tf.Tensor: shape=(1, 2), dtype=int32, numpy=array([[2, 3]], dtype=int32)>)