In [1]:
import pandas as pd
import os

In [2]:
paths = {
    'Data' : os.path.join(os.getcwd(),'Data'),
    'Notebooks' : os.path.join(os.getcwd(),'notebooks'),
    'models' : os.path.join(os.getcwd(),'models')
}

In [3]:
df = pd.read_csv(os.path.join(paths['Data'],'amazon_co_ecommerce_sample.csv'))


users = pd.DataFrame({'user':df.manufacturer.unique()})
users['user_id'] = [i+1 for i in range(users.shape[0])]


pro = pd.DataFrame({'product':df.product_name.unique()})
pro['product_id'] = [i+1 for i in range(pro.shape[0])]

new_df = df.set_index('manufacturer').join(users.set_index('user')).set_index('product_name').join(pro.set_index('product'))

In [4]:
import tensorflow as tf
import numpy as np

In [5]:
from typing import Dict,Text

In [6]:
#!pip install -q tensorflow-recommenders
import tensorflow_recommenders as tfrs

In [7]:
products = list(new_df.index)

products = tf.convert_to_tensor(products)

products = tf.data.Dataset.from_tensor_slices({"product_title" : products})

In [8]:
user_id = list(new_df.user_id)
for x in range(len(user_id)):
    user_id[x] = str(user_id[x])
user_id = np.array(user_id)
sells = tf.data.Dataset.from_tensor_slices(
    ({
       "product_title" : tf.cast(new_df.index.values,tf.string),
       "user_id" : tf.cast(user_id,tf.string),
    })
)

In [9]:
sells = sells.map(lambda x: {
    "product_title": x["product_title"],
    "user_id": x["user_id"],
})
products = products.map(lambda x: x["product_title"])

In [10]:
tf.random.set_seed(42)
shuffled = sells.shuffle(100, seed=42, reshuffle_each_iteration=False)

train = shuffled.take(80)
test = shuffled.skip(80).take(20)

In [11]:
product_titles = products.batch(1_000)
user_ids = sells.batch(1_000_000).map(lambda x: x["user_id"])


unique_product_titles = np.unique(np.concatenate(list(product_titles)))
unique_user_ids = np.unique(np.concatenate(list(user_ids)))

In [12]:
embedding_dimension = 32

In [13]:
user_model = tf.keras.Sequential([
  tf.keras.layers.StringLookup(
      vocabulary=unique_user_ids, mask_token=None),
  tf.keras.layers.Embedding(len(unique_user_ids) + 1, embedding_dimension)
])

In [14]:
product_model = tf.keras.Sequential([
  tf.keras.layers.StringLookup(
      vocabulary=unique_product_titles, mask_token=None),
  tf.keras.layers.Embedding(len(unique_product_titles) + 1, embedding_dimension)
])

In [15]:
metrics = tfrs.metrics.FactorizedTopK(
  candidates=products.batch(128).map(product_model)
)

In [16]:
task = tfrs.tasks.Retrieval(
  metrics=metrics
)

In [17]:
class RecommendationSystemModel(tfrs.Model):

  def __init__(self, user_model, product_model):
    super().__init__()
    self.product_model: tf.keras.Model = product_model
    self.user_model: tf.keras.Model = user_model
    self.task: tf.keras.layers.Layer = task

  def compute_loss(self, features: Dict[Text, tf.Tensor], training=False) -> tf.Tensor:
    user_embeddings = self.user_model(features["user_id"])
    positive_product_embeddings = self.product_model(features["product_title"])
    return self.task(user_embeddings, positive_product_embeddings)

In [18]:
model = RecommendationSystemModel(user_model, product_model)
model.compile(optimizer=tf.keras.optimizers.Adagrad(learning_rate=0.1))

In [19]:
cached_train = train.shuffle(100_000).batch(8192).cache()
cached_test = test.batch(1024).cache()

In [20]:
model.fit(cached_train, epochs=3)

Epoch 1/3
Epoch 2/3
Epoch 3/3


<keras.callbacks.History at 0x2389a4670a0>

In [21]:
model.evaluate(cached_test, return_dict=True)



{'factorized_top_k/top_1_categorical_accuracy': 0.0,
 'factorized_top_k/top_5_categorical_accuracy': 0.0,
 'factorized_top_k/top_10_categorical_accuracy': 0.0,
 'factorized_top_k/top_50_categorical_accuracy': 0.0,
 'factorized_top_k/top_100_categorical_accuracy': 0.0,
 'loss': 59.91405487060547,
 'regularization_loss': 0,
 'total_loss': 59.91405487060547}

In [26]:
index = tfrs.layers.factorized_top_k.BruteForce(model.user_model)

index.index_from_dataset(
  tf.data.Dataset.zip((products.batch(100), products.batch(100).map(model.product_model)))
)


_, titles = index(tf.constant(["42"]))
print(f"Recommendations for user 42: {titles[0, :3]}")

Recommendations for user 42: [b'Smart Games Castle Logix Puzzle Game'
 b'10 pieces Tibetan Silver Sun Alloy Charm Pendants - A0037'
 b'Paul Lamond 4-in-1 Room on the Broom Puzzle']
