# Load data

In [38]:
import numpy as np
import pandas as pd
import tensorflow as tf
import os

In [39]:
train = pd.read_parquet("https://raw.githubusercontent.com/ASOS/dsf2020/main/dsf_asos_train_with_alphanumeric_dummy_ids.parquet")
valid = pd.read_parquet("https://raw.githubusercontent.com/ASOS/dsf2020/main/dsf_asos_valid_with_alphanumeric_dummy_ids.parquet")
dummy_users = pd.read_csv("https://raw.githubusercontent.com/ASOS/dsf2020/main/dsf_asos_dummy_users_with_alphanumeric_dummy_ids.csv", header=None).values.flatten().astype(str)
products = pd.read_csv("https://raw.githubusercontent.com/ASOS/dsf2020/main/dsf_asos_productIds.csv", header=None).values.flatten().astype(int)

In [40]:
products

array([ 8650774,  9306139,  9961521, ..., 12058614, 12058615, 11927550])

# Define a Recommender model

Embeding layers gives list of random numbers for each user and each product

In [41]:
embed1 = tf.keras.layers.Embedding(5,8)

In [42]:
dummy_users_embedding = tf.keras.layers.Embedding(len(dummy_users),6)
products_embedding =  tf.keras.layers.Embedding(len(products),6)

In [43]:
dummy_users_embedding(1)

<tf.Tensor: shape=(6,), dtype=float32, numpy=
array([-0.02593962,  0.01064478, -0.01795601, -0.01741855, -0.030055  ,
        0.02075242], dtype=float32)>

In [44]:
class SimpleRecommender(tf.keras.Model):
    def __init__(self, dummy_users, products,length_of_embedding):
        super(SimpleRecommender, self).__init__()
        self.products = tf.constant(products, dtype=tf.int32)
        self.dummy_users = tf.constant(dummy_users, dtype=tf.string)
        self.dummy_user_table = tf.lookup.StaticHashTable(tf.lookup.KeyValueTensorInitializer(self.dummy_users, range(len(dummy_users))), -1)
        self.product_table = tf.lookup.StaticHashTable(tf.lookup.KeyValueTensorInitializer(self.products, range(len(products))), -1)
        
        self.user_embedding = tf.keras.layers.Embedding(len(dummy_users), length_of_embedding)
        self.product_embedding = tf.keras.layers.Embedding(len(products), length_of_embedding)
        self.dot = tf.keras.layers.Dot(axes=-1)
        
    def call(self, inputs):
        user = inputs[0]
        products = inputs[1]

        user_embedding_index = self.dummy_user_table.lookup(user)
        product_embedding_index =self.product_table.lookup(products)

        user_embedding_values = self.user_embedding(user_embedding_index)

        product_embedding_value = self.product_embedding(product_embedding_index)
        return tf.squeeze(self.dot([user_embedding_values,product_embedding_value]),1)
    
    @tf.function
    def call_item_item(self, product):
        product_x = self.product_table.lookup(product)
        pe = tf.expand_dims(self.product_embedding(product_x), 0)
        
        all_pe = tf.expand_dims(self.product_embedding.embeddings, 0)#note this only works if the layer has been built!
        scores = tf.reshape(self.dot([pe, all_pe]), [-1])
        
        top_scores, top_indices = tf.math.top_k(scores, k=100)
        top_ids = tf.gather(self.products, top_indices)
        return top_ids, top_scores

# Creating a dataset

In [45]:
dummy_user_tensor = tf.constant(train[["dummyUserId"]].values, dtype=tf.string)
product_tensor = tf.constant(train[["productId"]].values, dtype=tf.int32)

dataset = tf.data.Dataset.from_tensor_slices((dummy_user_tensor, product_tensor))
for x, y in dataset:
    print(x)
    print(y)
    break

tf.Tensor([b'PIXcm7Ru5KmntCy0yA1K'], shape=(1,), dtype=string)
tf.Tensor([10524048], shape=(1,), dtype=int32)


In [46]:
class Mapper():
    
    def __init__(self, possible_products, num_negative_products):
        self.num_possible_products = len(possible_products)
        self.possible_products_tensor = tf.constant(possible_products, dtype=tf.int32)
        
        self.num_negative_products = num_negative_products
        self.y  = tf.one_hot(0,num_negative_products+1)
    
    def __call__(self, user, product):
        random_negatives_indexs = tf.random.uniform((self.num_negative_products,),minval =0,maxval=self.num_possible_products,dtype=tf.int32)
        negatives = tf.gather(self.possible_products_tensor, random_negatives_indexs)
        candidates =tf.concat([product,negatives],axis=0)
        return (user, candidates),self.y

In [47]:
dataset = tf.data.Dataset.from_tensor_slices((dummy_user_tensor, product_tensor)).map(Mapper(products,10))

for (u,c),y in dataset:
    print(u)
    print(c)
    print(y)
    break

tf.Tensor([b'PIXcm7Ru5KmntCy0yA1K'], shape=(1,), dtype=string)
tf.Tensor(
[10524048 11961825 10054279 11724200 12314896 10550278 11499192 10001471
 10664593 10282645 10421689], shape=(11,), dtype=int32)
tf.Tensor([1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.], shape=(11,), dtype=float32)


In [48]:
def get_dataset(df,products, num_negative_products):
    dummy_user_tensor = tf.constant(df[['dummyUserId']].values,dtype=tf.string)
    product_tensor = tf.constant(df[['productId']].values,dtype=tf.int32)

    dataset = tf.data.Dataset.from_tensor_slices((dummy_user_tensor,product_tensor))
    dataset = dataset.map(Mapper(products,num_negative_products))
    dataset = dataset.batch(1024)
    return dataset

In [49]:
for (u,c),y in get_dataset(train,products,4):
    print(u)
    print(c)
    print(y)
    break 

tf.Tensor(
[[b'PIXcm7Ru5KmntCy0yA1K']
 [b'd0RILFB1hUzNSINMY4Ow']
 [b'Ebax7lyhnKRm4xeRlWW2']
 ...
 [b'xuX9n8PHfSR0AP3UZ8ar']
 [b'iNnxsPFfOa9884fMjVPJ']
 [b'aD8Mn12im8lFPzXAY41P']], shape=(1024, 1), dtype=string)
tf.Tensor(
[[10524048 12364545 12914871 12941694 11661284]
 [ 9137713 11298557  9053404 12072761 11974784]
 [ 5808602 12746059  8963887 13367577 10919704]
 ...
 [11541336 12731504 12267762 11586986 12667678]
 [ 7779232 12051681 10879976 12191357 11604158]
 [ 4941259 10412718  9987846 11140016  8880603]], shape=(1024, 5), dtype=int32)
tf.Tensor(
[[1. 0. 0. 0. 0.]
 [1. 0. 0. 0. 0.]
 [1. 0. 0. 0. 0.]
 ...
 [1. 0. 0. 0. 0.]
 [1. 0. 0. 0. 0.]
 [1. 0. 0. 0. 0.]], shape=(1024, 5), dtype=float32)


# Train a model

In [50]:
model = SimpleRecommender(dummy_users, products,15)
model.compile(loss=tf.keras.losses.CategoricalCrossentropy(from_logits=True)
,optimizer=tf.keras.optimizers.SGD(learning_rate=100.),
metrics=[tf.keras.metrics.CategoricalAccuracy()])


model.fit(get_dataset(train, products,100),validation_data=get_dataset(valid, products,100),epochs=5)


Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<keras.callbacks.History at 0x7fbe2a474760>

In [51]:
test_product =11698965


In [52]:
print("Recs for item {}: {}".format(test_product, model.call_item_item(tf.constant(test_product, dtype=tf.int32))))


Recs for item 11698965: (<tf.Tensor: shape=(100,), dtype=int32, numpy=
array([10581393,  9109088, 10789497, 11375321, 12115410, 11409896,
       10273738, 10958640, 11888641,  8064059, 12601161, 10752331,
       11192864, 10960540, 11427852, 12261521, 10360535, 11639416,
       11201838, 11552277, 10614692, 11981872, 11839697,  9921096,
       11794569, 10274218, 10555920, 10476580, 10614497, 11537264,
       11179817, 12267286, 11689898,  9874655,  9629382, 10405861,
       10402696, 11937479, 10516086, 10373590, 10437686, 10143323,
       11522585, 11369456, 10333386, 12220153,  9172947, 12459671,
        9791231, 10490474, 10462170, 10256214, 10619532,  8972429,
       10939556, 10831008, 12316839, 12149913,  9340786, 12192724,
        6444747, 11086204,  8757856, 10274205, 11479035, 11502168,
       12172756,  9990741, 10442999, 10820548, 10571854, 12469599,
       10382644, 11439043, 12198948,  8291428, 12006313, 10177877,
       12842496, 10971643, 12175115, 12700616, 11084966,  

# Save the model

In [53]:
model_path = "models/recommender/1"
inpute_signature = tf.TensorSpec(shape=(), dtype=tf.int32)
signatures = { 'call_item_item': model.call_item_item.get_concrete_function(inpute_signature)}
tf.saved_model.save(model, model_path,signatures = signatures)

In [55]:
PATH ='models/recommender/1'
imported_model = tf.saved_model.load(PATH)
list(imported_model.signatures.keys())


['call_item_item']

In [56]:
result_tensor = imported_model.signatures['call_item_item'](tf.constant([11698965]))

from IPython.core.display import HTML

def path_to_image_html(path):
    return '<img src="' + path + '" width="60" >'

result_df = pd.DataFrame(result_tensor['output_0'].numpy(),columns=['ProductUrl']).head(10)

HTML(result_df.to_html(escape=False ,formatters=dict(column_name_with_image_links=path_to_image_html)))


Unnamed: 0,ProductUrl
0,10581393
1,9109088
2,10789497
3,11375321
4,12115410
5,11409896
6,10273738
7,10958640
8,11888641
9,8064059
