# Load data

In [33]:
import numpy as np
import pandas as pd
import tensorflow as tf
import os

In [34]:
train = pd.read_parquet("https://raw.githubusercontent.com/ASOS/dsf2020/main/dsf_asos_train_with_alphanumeric_dummy_ids.parquet")
valid = pd.read_parquet("https://raw.githubusercontent.com/ASOS/dsf2020/main/dsf_asos_valid_with_alphanumeric_dummy_ids.parquet")
dummy_users = pd.read_csv("https://raw.githubusercontent.com/ASOS/dsf2020/main/dsf_asos_dummy_users_with_alphanumeric_dummy_ids.csv", header=None).values.flatten().astype(str)
products = pd.read_csv("https://raw.githubusercontent.com/ASOS/dsf2020/main/dsf_asos_productIds.csv", header=None).values.flatten().astype(int)

In [35]:
products

array([ 8650774,  9306139,  9961521, ..., 12058614, 12058615, 11927550])

# Define a Recommender model

Embeding layers gives list of random numbers for each user and each product

In [36]:
embed1 = tf.keras.layers.Embedding(5,8)

In [37]:
dummy_users_embedding = tf.keras.layers.Embedding(len(dummy_users),6)
products_embedding =  tf.keras.layers.Embedding(len(products),6)

In [38]:
dummy_users_embedding(1)

<tf.Tensor: shape=(6,), dtype=float32, numpy=
array([ 0.04615753, -0.03312473, -0.02007063, -0.04280996, -0.04440054,
       -0.03684244], dtype=float32)>

In [39]:
class SimpleRecommender(tf.keras.Model):
    def __init__(self, dummy_users, products,length_of_embedding):
        super(SimpleRecommender, self).__init__()
        self.products = tf.constant(products, dtype=tf.int32)
        self.dummy_users = tf.constant(dummy_users, dtype=tf.string)
        self.dummy_user_table = tf.lookup.StaticHashTable(tf.lookup.KeyValueTensorInitializer(self.dummy_users, range(len(dummy_users))), -1)
        self.product_table = tf.lookup.StaticHashTable(tf.lookup.KeyValueTensorInitializer(self.products, range(len(products))), -1)
        
        self.user_embedding = tf.keras.layers.Embedding(len(dummy_users), length_of_embedding)
        self.product_embedding = tf.keras.layers.Embedding(len(products), length_of_embedding)
        self.dot = tf.keras.layers.Dot(axes=-1)
        
    def call(self, inputs):
        user = inputs[0]
        products = inputs[1]

        user_embedding_index = self.dummy_user_table.lookup(user)
        product_embedding_index =self.product_table.lookup(products)

        user_embedding_values = self.user_embedding(user_embedding_index)

        product_embedding_value = self.product_embedding(product_embedding_index)
        return tf.squeeze(self.dot([user_embedding_values,product_embedding_value]),1)
    
    @tf.function
    def call_item_item(self, product):
        product_x = self.product_table.lookup(product)
        pe = tf.expand_dims(self.product_embedding(product_x), 0)
        
        all_pe = tf.expand_dims(self.product_embedding.embeddings, 0)#note this only works if the layer has been built!
        scores = tf.reshape(self.dot([pe, all_pe]), [-1])
        
        top_scores, top_indices = tf.math.top_k(scores, k=100)
        top_ids = tf.gather(self.products, top_indices)
        return top_ids, top_scores

# Creating a dataset

In [40]:
dummy_user_tensor = tf.constant(train[["dummyUserId"]].values, dtype=tf.string)
product_tensor = tf.constant(train[["productId"]].values, dtype=tf.int32)

dataset = tf.data.Dataset.from_tensor_slices((dummy_user_tensor, product_tensor))
for x, y in dataset:
    print(x)
    print(y)
    break

tf.Tensor([b'PIXcm7Ru5KmntCy0yA1K'], shape=(1,), dtype=string)
tf.Tensor([10524048], shape=(1,), dtype=int32)


In [41]:
class Mapper():
    
    def __init__(self, possible_products, num_negative_products):
        self.num_possible_products = len(possible_products)
        self.possible_products_tensor = tf.constant(possible_products, dtype=tf.int32)
        
        self.num_negative_products = num_negative_products
        self.y  = tf.one_hot(0,num_negative_products+1)
    
    def __call__(self, user, product):
        random_negatives_indexs = tf.random.uniform((self.num_negative_products,),minval =0,maxval=self.num_possible_products,dtype=tf.int32)
        negatives = tf.gather(self.possible_products_tensor, random_negatives_indexs)
        candidates =tf.concat([product,negatives],axis=0)
        return (user, candidates),self.y

In [42]:
dataset = tf.data.Dataset.from_tensor_slices((dummy_user_tensor, product_tensor)).map(Mapper(products,10))

for (u,c),y in dataset:
    print(u)
    print(c)
    print(y)
    break

tf.Tensor([b'PIXcm7Ru5KmntCy0yA1K'], shape=(1,), dtype=string)
tf.Tensor(
[10524048 12831647 10414013 10398023 10198506 11474552 10216688 10161709
  8931451  8933761 12594971], shape=(11,), dtype=int32)
tf.Tensor([1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.], shape=(11,), dtype=float32)


In [43]:
def get_dataset(df,products, num_negative_products):
    dummy_user_tensor = tf.constant(df[['dummyUserId']].values,dtype=tf.string)
    product_tensor = tf.constant(df[['productId']].values,dtype=tf.int32)

    dataset = tf.data.Dataset.from_tensor_slices((dummy_user_tensor,product_tensor))
    dataset = dataset.map(Mapper(products,num_negative_products))
    dataset = dataset.batch(1024)
    return dataset

In [44]:
for (u,c),y in get_dataset(train,products,4):
    print(u)
    print(c)
    print(y)
    break

tf.Tensor(
[[b'PIXcm7Ru5KmntCy0yA1K']
 [b'd0RILFB1hUzNSINMY4Ow']
 [b'Ebax7lyhnKRm4xeRlWW2']
 ...
 [b'xuX9n8PHfSR0AP3UZ8ar']
 [b'iNnxsPFfOa9884fMjVPJ']
 [b'aD8Mn12im8lFPzXAY41P']], shape=(1024, 1), dtype=string)
tf.Tensor(
[[10524048 10989931 10157675 10119346 12323856]
 [ 9137713 12312597 10853744 10548269 12724547]
 [ 5808602 12210258 12942332  9529369  9424846]
 ...
 [11541336 10338218 12313394 10438691 12432039]
 [ 7779232 12594729  9968967  8683774 11227938]
 [ 4941259 11470061  9179831  9322518  9025524]], shape=(1024, 5), dtype=int32)
tf.Tensor(
[[1. 0. 0. 0. 0.]
 [1. 0. 0. 0. 0.]
 [1. 0. 0. 0. 0.]
 ...
 [1. 0. 0. 0. 0.]
 [1. 0. 0. 0. 0.]
 [1. 0. 0. 0. 0.]], shape=(1024, 5), dtype=float32)


# Train a model

In [45]:
model = SimpleRecommender(dummy_users, products,15)
model.compile(loss=tf.keras.losses.CategoricalCrossentropy(from_logits=True)
,optimizer=tf.keras.optimizers.SGD(learning_rate=100.),
metrics=[tf.keras.metrics.CategoricalAccuracy()])


model.fit(get_dataset(train, products,100),validation_data=get_dataset(valid, products,100),epochs=5)


Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<keras.callbacks.History at 0x29aed8c4220>

In [49]:
test_product =11698965


In [50]:
print("Recs for item {}: {}".format(test_product, model.call_item_item(tf.constant(test_product, dtype=tf.int32))))


Recs for item 11698965: (<tf.Tensor: shape=(100,), dtype=int32, numpy=
array([10573792,  8752491,  9565144,  8941836,  9538680, 10104302,
       10370822, 10999904, 11738844, 10231893, 10644449,  9297196,
        9071435, 11177882,  8471139, 12361627, 11490790, 10234617,
        8603141,  9366861,  8670947,  8868663, 10076510, 12314033,
        9451127, 10214956,  9510585, 12154941, 11448828,  9664881,
       10842058,  8678087, 10076176, 11434181, 11109921, 10079109,
       11124830, 11124391,  9911275, 11433285, 12944924, 11522595,
       10166170, 12054695, 10104812, 10577467, 10386511, 11191932,
        9463705,  8290892, 11375321, 12278870, 12400123, 10835862,
       10930684,  9923444, 12776114, 11356046,  9603448, 10994983,
       11552263, 10758223, 11356367, 10578083, 11299363, 11160081,
       12119822,  9061709,  9414102, 10752331, 10204147, 11698965,
        8872825, 12503024,  9157402, 10526264, 11558150,  6775544,
        8839278, 10612463,  7329586, 11581941, 10571258, 1

# Save the model

In [68]:
model_path = "models/recommender/1"
inpute_signature = tf.TensorSpec(shape=(), dtype=tf.int32)
signatures = { 'call_item_item': model.call_item_item.get_concrete_function(inpute_signature)}
tf.saved_model.save(model, model_path,signatures = signatures)



INFO:tensorflow:Assets written to: models/recommender/1\assets


INFO:tensorflow:Assets written to: models/recommender/1\assets


In [69]:
PATH ='C:/Users/AbuTair/Desktop/Workspace/Data Science/Outfit-predication/code/models/recommender/1'
imported_model = tf.saved_model.load(PATH)
list(imported_model.signatures.keys())


['call_item_item']

In [81]:
result_tensor = imported_model.signatures['call_item_item'](tf.constant([11698965]))

from IPython.core.display import HTML

def path_to_image_html(path):
    return '<img src="' + path + '" width="60" >'

result_df = pd.DataFrame(result_tensor['output_0'].numpy(),columns=['ProductUrl']).head(10)

HTML(result_df.to_html(escape=False ,formatters=dict(column_name_with_image_links=path_to_image_html)))


Unnamed: 0,ProductUrl
0,10573792
1,8752491
2,9565144
3,8941836
4,9538680
5,10104302
6,10370822
7,10999904
8,11738844
9,10231893


In [84]:
result_df.sort_values(by='ProductUrl',ascending=False)

Unnamed: 0,ProductUrl
8,11738844
7,10999904
0,10573792
6,10370822
9,10231893
5,10104302
2,9565144
4,9538680
3,8941836
1,8752491
