In [34]:
from typing import Dict, Text

import numpy as np
import pandas as pd
import tensorflow as tf
import tensorflow_datasets as tfds
import tensorflow_recommenders as tfrs

from recommender import dataset
from recommender.model import ECommerceModel

tf.random.set_seed(42)

%load_ext autoreload
%autoreload 2 

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [8]:
devices = tf.config.list_physical_devices('GPU')
devices

[PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')]

In [46]:
data = pd.read_csv(
    'data/Online-Retail.csv',
    dtype={'CustomerID': str, 'StockCode': str},
    parse_dates=['InvoiceDate'],
)
data = dataset.preprocess_data(data)

# item_id -> description
item_to_description = dict(data[['StockCode', 'Description']].values)

print(f'Data shape {data.shape}')
data.head()

Data shape (18067, 9)


Unnamed: 0,InvoiceNo,StockCode,Description,Quantity,InvoiceDate,UnitPrice,CustomerID,Country,timestamp
438319,574311,23382,BOX OF 6 CHRISTMAS CAKE DECORATIONS,6,2011-11-03 16:56:00,3.75,15640,United Kingdom,1320339000.0
28901,538662,79321,CHILLI LIGHTS,24,2010-12-13 15:44:00,4.25,15159,United Kingdom,1292255000.0
466055,576301,22470,HEART OF WICKER LARGE,4,2011-11-14 14:40:00,2.95,14667,United Kingdom,1321282000.0
194702,553663,21080,SET/20 RED RETROSPOT PAPER NAPKINS,3,2011-05-18 12:13:00,0.85,14527,United Kingdom,1305721000.0
55819,540999,21633,SUNFLOWER DECORATIVE PARASOL,30,2011-01-13 10:08:00,3.95,13694,United Kingdom,1294913000.0


In [12]:
purchases = dataset.create_tf_dataset(data)

2022-06-06 00:47:59.689629: I tensorflow/core/platform/cpu_feature_guard.cc:151] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2022-06-06 00:47:59.690207: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:936] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-06-06 00:47:59.690452: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:936] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-06-06 00:47:59.690786: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:936] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zer

In [14]:
interactions = purchases.map(lambda x: {
    'user_id': x['user_id'], 
    'item_id': x['item_id']
})
users = purchases.map(lambda x: x['user_id'])
items = purchases.map(lambda x: x['item_id'])

In [15]:
for x in interactions.take(10).as_numpy_iterator():
    print(x)

{'user_id': b'15640', 'item_id': b'23382'}
{'user_id': b'15159', 'item_id': b'79321'}
{'user_id': b'14667', 'item_id': b'22470'}
{'user_id': b'14527', 'item_id': b'21080'}
{'user_id': b'13694', 'item_id': b'21633'}
{'user_id': b'15194', 'item_id': b'22867'}
{'user_id': b'13408', 'item_id': b'23205'}
{'user_id': b'14715', 'item_id': b'82552'}
{'user_id': b'17523', 'item_id': b'22751'}
{'user_id': b'12748', 'item_id': b'22170'}


In [16]:
user_ids_vocabulary = tf.keras.layers.StringLookup(mask_token=None)
user_ids_vocabulary.adapt(users)

item_ids_vocabulary = tf.keras.layers.StringLookup(mask_token=None)
item_ids_vocabulary.adapt(items)

In [45]:
indices = tf.constant([["12748", "Hola", "15640"]])
user_ids_vocabulary(data)

<tf.Tensor: shape=(1, 3), dtype=int64, numpy=array([[ 4,  0, 85]])>

In [24]:
user_ids_vocabulary.get_vocabulary()[85]

'15640'

In [30]:
dataset_size = len(interactions)

train_size = int(0.9 * dataset_size)
val_size = int(0.1 * dataset_size)

train_dataset = interactions.take(train_size)
val_dataset = interactions.skip(train_size)

In [32]:
# Define user and movie models.
user_model = tf.keras.Sequential([
    user_ids_vocabulary,
    tf.keras.layers.Embedding(user_ids_vocabulary.vocabulary_size(), 64)
])

item_model = tf.keras.Sequential([
    item_ids_vocabulary,
    tf.keras.layers.Embedding(item_ids_vocabulary.vocabulary_size(), 64)
])

# Define your objectives.
task = tfrs.tasks.Retrieval(metrics=tfrs.metrics.FactorizedTopK(
    items.batch(128).map(item_model)
  )
)

In [33]:
# Create a retrieval model.
model = ECommerceModel(user_model, item_model, task)
model.compile(optimizer=tf.keras.optimizers.Adagrad(0.5))

In [38]:
tensorboard_callback = tf.keras.callbacks.TensorBoard(log_dir="./logs", update_freq='epoch')

model.fit(
    x=train_dataset.batch(4096),
    validation_data=val_dataset.batch(4096),
    epochs=5,
    callbacks=[tensorboard_callback]
)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<keras.callbacks.History at 0x7fee8c61e350>

In [50]:
unique_items = {'item_id': data['StockCode'].unique()}
unique_items = tf.data.Dataset.from_tensor_slices(unique_items)

unique_items = unique_items.map(lambda x: x['item_id'])

In [59]:
user_ids_vocabulary.get_vocabulary()[0:10]

['[UNK]',
 '17841',
 '14911',
 '14096',
 '12748',
 '14606',
 '15311',
 '14646',
 '13089',
 '13263']

In [61]:
user_id = '13089'

past_purchases = data[data['CustomerID'] == user_id].sort_values(by='InvoiceDate')
print(f"History :\n {past_purchases[['StockCode', 'Description']].tail(10)}\n")

# Use brute-force search to set up retrieval using the trained representations.
index = tfrs.layers.factorized_top_k.BruteForce(model.user_model)
index.index_from_dataset(
    unique_items.batch(128).map(lambda title: (title, model.item_model(title)))
)

# Get some recommendations.
scores, items = index(np.array([user_id]))
recommendations = pd.DataFrame()
recommendations['Item_id'] = items.numpy().flatten()
recommendations['Description'] = recommendations['Item_id'].apply(lambda x: x.decode('utf-8')).map(item_to_description)
recommendations['Scores'] = scores.numpy().flatten()

print(f'Recommendations\n {recommendations}')

History :
        StockCode                        Description
407540     23556      LANDMARK FRAME COVENT GARDEN 
448490     22712                   CARD DOLLY GIRL 
467556     82484  WOOD BLACK BOARD ANT WHITE FINISH
478575     23389             SPACEBOY MINI BACKPACK
518061     22294         HEART FILIGREE DOVE  SMALL
518861     23084                 RABBIT NIGHT LIGHT
526603     23371    SET 36 COLOUR PENCILS SPACEBOY 
526610     23209           LUNCH BAG VINTAGE DOILY 
526619     22835    HOT WATER BOTTLE I AM SO POORLY
526658     22457    NATURAL SLATE HEART CHALKBOARD 

Recommendations
     Item_id                         Description    Scores
0  b'21422'                PORCELAIN ROSE SMALL  8.755688
1  b'21191'  LARGE WHITE HONEYCOMB PAPER BELL    7.877553
2  b'23347'                I LOVE LONDON BEAKER  6.701897
3  b'22478'            BIRDHOUSE GARDEN MARKER   6.589825
4  b'23150'          IVORY SWEETHEART SOAP DISH  5.549895
5  b'22359'              GLASS JAR KINGS CHOICE  5.