# Imports

In [None]:
import numpy as np
import pandas as pd
import tensorflow as tf
import os

# Import training data

In [None]:
train = pd.read_parquet("https://raw.githubusercontent.com/ASOS/dsf2020/main/dsf_asos_train_with_alphanumeric_dummy_ids.parquet")
valid = pd.read_parquet("https://raw.githubusercontent.com/ASOS/dsf2020/main/dsf_asos_valid_with_alphanumeric_dummy_ids.parquet")
dummy_users = pd.read_csv("https://raw.githubusercontent.com/ASOS/dsf2020/main/dsf_asos_dummy_users_with_alphanumeric_dummy_ids.csv", header=None).values.flatten().astype(str)
products = pd.read_csv("https://raw.githubusercontent.com/ASOS/dsf2020/main/dsf_asos_productIds.csv", header=None).values.flatten().astype(int)

In [None]:
train.head()

Unnamed: 0,dummyUserId,productId
0,b'PIXcm7Ru5KmntCy0yA1K',10524048
1,b'd0RILFB1hUzNSINMY4Ow',9137713
2,b'Ebax7lyhnKRm4xeRlWW2',5808602
3,b'vtigDw2h2vxKt0sJpEeU',10548272
4,b'r4GfiEaUGxziyjX0PyU6',10988173


In [None]:
dummy_users

array(['pmfkU4BNZhmtLgJQwJ7x', 'UDRRwOlzlWVbu7H8YCCi',
       'QHGAef0TI6dhn0wTogvW', ..., 'lcORJ5hemOZc1iGo9z7k',
       '5CqDquDAszqJp27P7AL8', 'SSPNYxJMfuKhoe1dg24m'], dtype='<U20')

In [None]:
products

array([ 8650774,  9306139,  9961521, ..., 12058614, 12058615, 11927550])

# The briefest intro to tf

Tensors

In [None]:
x = tf.constant([1,2,3,4])
tf.math.square(x)

<tf.Tensor: shape=(4,), dtype=int32, numpy=array([ 1,  4,  9, 16], dtype=int32)>

In [None]:
tf.constant([[1,2,3], [4,5,6]], dtype=tf.float32)

<tf.Tensor: shape=(2, 3), dtype=float32, numpy=
array([[1., 2., 3.],
       [4., 5., 6.]], dtype=float32)>

In [None]:
x = tf.Variable([1,2,3,4,5], dtype=tf.float32)
x

<tf.Variable 'Variable:0' shape=(5,) dtype=float32, numpy=array([1., 2., 3., 4., 5.], dtype=float32)>

Gradients

In [None]:
with tf.GradientTape() as tape:
  y = tf.math.square(x)

In [None]:
y

<tf.Tensor: shape=(5,), dtype=float32, numpy=array([ 1.,  4.,  9., 16., 25.], dtype=float32)>

In [None]:
dy_dx = tape.gradient(y,x)
dy_dx

<tf.Tensor: shape=(5,), dtype=float32, numpy=array([ 2.,  4.,  6.,  8., 10.], dtype=float32)>

Multiply and add tensors

In [None]:
x = tf.constant([[1,2,3]], dtype=tf.float32)
Y = tf.constant([[1,2,3, 4], [1,2,3,4], [1,2,3,4]], dtype=tf.float32)

In [None]:
x

<tf.Tensor: shape=(1, 3), dtype=float32, numpy=array([[1., 2., 3.]], dtype=float32)>

In [None]:
Y

<tf.Tensor: shape=(3, 4), dtype=float32, numpy=
array([[1., 2., 3., 4.],
       [1., 2., 3., 4.],
       [1., 2., 3., 4.]], dtype=float32)>

In [None]:
tf.matmul(x, Y)

<tf.Tensor: shape=(1, 4), dtype=float32, numpy=array([[ 6., 12., 18., 24.]], dtype=float32)>

In [None]:
z = tf.constant([10, 11, 12, 13], dtype=tf.float32)

In [None]:
tf.matmul(x, Y) + z

<tf.Tensor: shape=(1, 4), dtype=float32, numpy=array([[16., 23., 30., 37.]], dtype=float32)>

This operation is very common in deep learning, so it has been abstracted:

In [None]:
dl1 = tf.keras.layers.Dense(4, use_bias = True, weights = [Y, z])
dl1(x)

<tf.Tensor: shape=(1, 4), dtype=float32, numpy=array([[16., 23., 30., 37.]], dtype=float32)>

You can choose to apply a function to each value in the output

In [None]:
dl2 = tf.keras.layers.Dense(4, use_bias = True, weights = [Y, z], activation = lambda x: x+1)
dl2(x)

<tf.Tensor: shape=(1, 4), dtype=float32, numpy=array([[17., 24., 31., 38.]], dtype=float32)>

We can put different layers together in a sequence:

In [None]:
dl3 = tf.keras.layers.Dense(1, use_bias=False, \
                             weights=[tf.constant([[0], [1], [0], [1]], \
                                                  dtype=tf.float32)])

In [None]:
x_b = dl2(x)
x_b

<tf.Tensor: shape=(1, 4), dtype=float32, numpy=array([[17., 24., 31., 38.]], dtype=float32)>

In [None]:
dl3(x_b)

<tf.Tensor: shape=(1, 1), dtype=float32, numpy=array([[62.]], dtype=float32)>

We can get more flexibility if you use tf.keras.model:

In [None]:
class simple_model(tf.keras.Model):
  def __init__(self):
    super(simple_model, self).__init__()
    self.dl2 = tf.keras.layers.Dense(4, use_bias = True, weights = [Y, z], activation = lambda x: x+1)
    self.dl3 = tf.keras.layers.Dense(1, use_bias=False, \
                             weights=[tf.constant([[0], [1], [0], [1]], \
                                                  dtype=tf.float32)])

  def call (self, x):
    x_b = self.dl2(x)
    return self.dl3(x_b), x_b, x_b+243

In [None]:
sm = simple_model()
sm(x)

(<tf.Tensor: shape=(1, 1), dtype=float32, numpy=array([[62.]], dtype=float32)>,
 <tf.Tensor: shape=(1, 4), dtype=float32, numpy=array([[17., 24., 31., 38.]], dtype=float32)>,
 <tf.Tensor: shape=(1, 4), dtype=float32, numpy=array([[260., 267., 274., 281.]], dtype=float32)>)

So far we have been setting the weights of the dense layers, but if we don't set the weights than weights get randomly chosen.

In [None]:
x

<tf.Tensor: shape=(1, 3), dtype=float32, numpy=array([[1., 2., 3.]], dtype=float32)>

In [None]:
dl6 = tf.keras.layers.Dense(4, use_bias=True)
dl6(x)

<tf.Tensor: shape=(1, 4), dtype=float32, numpy=
array([[ 0.43433076, -1.6934373 , -3.6554015 ,  2.345599  ]],
      dtype=float32)>

In [None]:
dl6.get_weights()

[array([[-0.1492458 ,  0.6788677 , -0.34617007,  0.9130516 ],
        [-0.21619028, -0.8365337 , -0.7024578 ,  0.86763823],
        [ 0.33865237, -0.2330792 , -0.6347719 , -0.10090971]],
       dtype=float32),
 array([0., 0., 0., 0.], dtype=float32)]

# Define a Recommender Model

The embedding layer gives a list of random numbers for each user and each product.

In [None]:
embed1 = tf.keras.layers.Embedding(5, 8)

In [None]:
embed1 (2)

<tf.Tensor: shape=(8,), dtype=float32, numpy=
array([-0.00257894, -0.03958514,  0.04025644,  0.00097768,  0.01427262,
       -0.00640005,  0.02612795,  0.01335725], dtype=float32)>

In [None]:
embed1.get_weights()

[array([[-0.01641094, -0.03399682,  0.04162959,  0.00455916,  0.00509644,
         -0.04871881, -0.01700253,  0.01818294],
        [-0.01457538, -0.04096078, -0.02669026,  0.02795622, -0.01197611,
         -0.0231735 ,  0.03887588,  0.0065553 ],
        [-0.00257894, -0.03958514,  0.04025644,  0.00097768,  0.01427262,
         -0.00640005,  0.02612795,  0.01335725],
        [-0.03630384, -0.04046137,  0.04533209, -0.01858472,  0.03376862,
         -0.01151227,  0.00717432,  0.02077557],
        [ 0.04177631, -0.04659698,  0.015605  , -0.01091342, -0.03442551,
          0.02305443,  0.01970362,  0.04549439]], dtype=float32)]

Scores can be found using the dot product.

In [None]:
dummy_users

array(['pmfkU4BNZhmtLgJQwJ7x', 'UDRRwOlzlWVbu7H8YCCi',
       'QHGAef0TI6dhn0wTogvW', ..., 'lcORJ5hemOZc1iGo9z7k',
       '5CqDquDAszqJp27P7AL8', 'SSPNYxJMfuKhoe1dg24m'], dtype='<U20')

In [None]:
products

array([ 8650774,  9306139,  9961521, ..., 12058614, 12058615, 11927550])

In [None]:
dummy_user_embedding = tf.keras.layers.Embedding(len(dummy_users), 6)
product_embedding = tf.keras.layers.Embedding(len(products), 6)

In [None]:
dummy_user_embedding(1)

<tf.Tensor: shape=(6,), dtype=float32, numpy=
array([ 0.0477454 ,  0.02881812, -0.01819872,  0.00404227, -0.00665909,
        0.03429418], dtype=float32)>

In [None]:
product_embedding(99)

<tf.Tensor: shape=(6,), dtype=float32, numpy=
array([-0.02768735, -0.03918357, -0.00546094,  0.03041121,  0.02176415,
        0.0442209 ], dtype=float32)>

In [None]:
tf.tensordot(dummy_user_embedding(1), product_embedding(99), axes = [[0], [0]])

<tf.Tensor: shape=(), dtype=float32, numpy=-0.0008572381>

We can score multiple products at the same time, which is what we need to create a ranking.

In [None]:
example_product = tf.constant([1, 66, 398, 8547])
product_embedding(example_product)

<tf.Tensor: shape=(4, 6), dtype=float32, numpy=
array([[ 0.0063548 , -0.0145635 , -0.01531078,  0.02389188,  0.00751176,
         0.04862023],
       [ 0.02810464,  0.04816494, -0.04614706,  0.01200575,  0.00524074,
         0.04180673],
       [ 0.0305212 ,  0.02000762,  0.00081434, -0.00863719, -0.00357907,
         0.04246792],
       [ 0.04033254,  0.03011099,  0.01171038,  0.04334705,  0.03352748,
        -0.00126276]], dtype=float32)>

In [None]:
tf.tensordot(dummy_user_embedding(1), product_embedding(example_product), axes=[[0], [1]])

<tf.Tensor: shape=(4,), dtype=float32, numpy=array([0.0018763 , 0.00501707, 0.00346433, 0.00248897], dtype=float32)>

And we can score multiple users for multiple products which we will need to do if we are to train quickly.

But we need to map product ids to embedding ids.

In [None]:
products

array([ 8650774,  9306139,  9961521, ..., 12058614, 12058615, 11927550])

In [None]:
product_table = tf.lookup.StaticHashTable(
    tf.lookup.KeyValueTensorInitializer(tf.constant(products, dtype=tf.int32),
                                        range(len(products))), -1)

In [None]:
product_table.lookup(tf.constant([12058614]))

<tf.Tensor: shape=(1,), dtype=int32, numpy=array([29693], dtype=int32)>

Let's put those two things together

In [None]:
class SimpleRecommender(tf.keras.Model):
    def __init__(self, dummy_users, products, length_of_embedding):
        super(SimpleRecommender, self).__init__()
        self.products = tf.constant(products, dtype=tf.int32)
        self.dummy_users = tf.constant(dummy_users, dtype=tf.string)
        self.dummy_user_table = tf.lookup.StaticHashTable(tf.lookup.KeyValueTensorInitializer(self.dummy_users, range(len(dummy_users))), -1)
        self.product_table = tf.lookup.StaticHashTable(tf.lookup.KeyValueTensorInitializer(self.products, range(len(products))), -1)

        self.user_embedding = tf.keras.layers.Embedding(len(dummy_users), length_of_embedding)
        self.product_embedding = tf.keras.layers.Embedding(len(products), length_of_embedding)

        self.dot = tf.keras.layers.Dot(axes=-1)

    def call(self, inputs):
        user = input[0]
        products = inputs[1]

        user_embedding_index = self.dummy_user_table.lookup(user)
        product_embedding_index = self.product_table.lookup(products)

        user_embedding_values = self.usrr_embedding(user_embedding_index)
        product_embedding_values = self.product_embedding(product_embedding_index)

        return self.dot([user_embedding_values, product_embedding_values])

    @tf.function
    def call_item_item(self, product):
        product_x = self.product_table.lookup(product)
        pe = tf.expand_dims(self.product_embedding(product_x), 0)

        all_pe = tf.expand_dims(self.product_embedding.embeddings, 0)#note this only works if the layer has been built!
        scores = tf.reshape(self.dot([pe, all_pe]), [-1])

        top_scores, top_indices = tf.math.top_k(scores, k=100)
        top_ids = tf.gather(self.products, top_indices)
        return top_ids, top_scores

In [None]:
dummy_users

array(['pmfkU4BNZhmtLgJQwJ7x', 'UDRRwOlzlWVbu7H8YCCi',
       'QHGAef0TI6dhn0wTogvW', ..., 'lcORJ5hemOZc1iGo9z7k',
       '5CqDquDAszqJp27P7AL8', 'SSPNYxJMfuKhoe1dg24m'], dtype='<U20')

In [None]:
products

array([ 8650774,  9306139,  9961521, ..., 12058614, 12058615, 11927550])

In [None]:
class SimpleRecommender(tf.keras.Model):
    def __init__(self, users, products, num_recommendations):
        super(SimpleRecommender, self).__init__()
        self.users = users
        self.products = products
        self.num_recommendations = num_recommendations

    def call(self, inputs):
        user = inputs[0]
        products = inputs[1]

        # Your logic for recommendation goes here
        recommendations = [...]  # Replace [...] with your recommendation logic

        return recommendations

# Create an instance of SimpleRecommender
sr1 = SimpleRecommender(dummy_users, products, 15)

# Call the instance using input tensors
recommendations = sr1([tf.constant(['pmfkU4BNZhmtLgJQwJ7x']),
                      tf.constant([8650774, 9306139, 9961521])])
print(recommendations)


[Ellipsis]


# Creating a dataset

First create a tf.data.Dataset from the user purchase pairs.

In [None]:
dummy_user_tensor = tf.constant(train[["dummyUserId"]].values, dtype=tf.string)
product_tensor = tf.constant(train[["productId"]].values, dtype=tf.int32)

dataset = tf.data.Dataset.from_tensor_slices((dummy_user_tensor, product_tensor))
for x, y in dataset:
    print(x)
    print(y)
    break

tf.Tensor([b'PIXcm7Ru5KmntCy0yA1K'], shape=(1,), dtype=string)
tf.Tensor([10524048], shape=(1,), dtype=int32)


For each purchase let's sample a number of products that the user did not purchase. Then the model can score each of the products and we will know we are doing a good job if the product with the highest score is the product that the user actually purchased.

We can do this using dataset.map

In [None]:
class Mapper():

    def __init__(self, possible_products, num_negative_products):
        self.num_possible_products = len(possible_products)
        self.possible_products_tensor = tf.constant(possible_products, dtype=tf.int32)

        self.num_negative_products = num_negative_products

    def __call__(self, user, product):
        return user, product

Let's bring the steps together to define a function which creates a dataset

In [None]:
def get_dataset():
    pass

# Train a model

We need to compile a model, set the loss and create an evaluation metric. Then we need to train the model.

Let's do a manual check on whether the model is any good.

In [None]:
test_product = 11698965

In [None]:
import tensorflow as tf

# Instantiate the model
# Define the users, products, and num_recommendations
users = ['user1', 'user2', 'user3']
products = ['product1', 'product2', 'product3']
num_recommendations = 10

# Instantiate the SimpleRecommender class with the required arguments
model = SimpleRecommender(users, products, num_recommendations)


# Print the recommendations
print("Recs for item {}: {}".format(test_product, recommendations))


Recs for item 11698965: [Ellipsis]


# Save the model

In [None]:
model_path = "models/recommender/1"

In [None]:
inpute_signature = tf.TensorSpec(shape=(), dtype=tf.int32)

In [None]:
class SimpleRecommender(tf.keras.Model):
    def __init__(self, users, products, num_recommendations):
        super(SimpleRecommender, self).__init__()
        # Initialize variables and setup
        self.users = users
        self.products = products
        self.num_recommendations = num_recommendations

    @tf.function
    def call_item_item(self, product):
        # Implement the logic for item-item recommendation
        # Return the recommendations
        recommendations = [10]  # Replace [...] with your recommendation logic
        return recommendations

# Instantiate the SimpleRecommender class
users = ['user1']  # List of users
products = [...]  # List of products
num_recommendations = ...  # Number of recommendations

# Instantiate the SimpleRecommender class
model = SimpleRecommender(users, products, num_recommendations)

# Create the input signature
input_signature = tf.TensorSpec(shape=None, dtype=tf.int32)

# Create a dictionary of signatures
signatures = {'call_item_item': model.call_item_item.get_concrete_function(input_signature)}

# Print the signatures
print(signatures)

{'call_item_item': <ConcreteFunction call_item_item(product) at 0x7F3A1B961630>}


In [None]:
import os
imported_model = tf.saved_model.load(os.path.abspath('/drive/rentaloca'))


OSError: ignored

In [None]:
imported_model.signatures['call_item_item'](tf.constant([14844847]))

NameError: ignored

In [None]:
os.makedirs("dummy/0")
tf.saved_model.save(model, 'dummy/0')
imported = tf.saved_model.load("dummy/0")
imported(tf.constant([14844847]))



TypeError: ignored

In [None]:
os.makedirs("dummy/1")
tf.saved_model.save(model, 'dummy/1',
                    model.call_item_item.get_concrete_function(tf.TensorSpec(shape=(), dtype=tf.int32)))
list(imported_model.signatures.keys())



NameError: ignored

In [None]:
imported_model.signatures['serving_default'](tf.constant([14844847]))

NameError: ignored

Zipping the saved model will make it easier to download.

In [None]:
from zipfile import ZipFile
import os
# create a ZipFile object
with ZipFile('recommender.zip', 'w') as zipObj:
   # Iterate over all the files in directory
    for folderName, subfolders, filenames in os.walk("models"):
        for filename in filenames:
           #create complete filepath of file in directory
           filePath = os.path.join(folderName, filename)
           # Add file to zip
           zipObj.write(filePath)