## Step 0 - import model libraries

The example is based on the Sli_Rec model from the [recommenders](https://github.com/recommenders-team/recommenders) repository.

In [2]:
import os
import sys
import tensorflow.compat.v1 as tf
tf.get_logger().setLevel('ERROR')

from recommenders.utils.timer import Timer
from recommenders.utils.constants import SEED
from recommenders.models.deeprec.deeprec_utils import prepare_hparams
from recommenders.datasets.amazon_reviews import download_and_extract, data_preprocessing
from recommenders.models.deeprec.models.sequential.sli_rec import SLI_RECModel as SeqModel
from recommenders.models.deeprec.io.sequential_iterator import SequentialIterator

RANDOM_SEED = SEED

print(f"System version: {sys.version}")
print(f"Tensorflow version: {tf.__version__}")

System version: 3.9.20 (main, Oct  3 2024, 07:27:41) 
[GCC 11.2.0]
Tensorflow version: 2.12.0


## Step 0 - model training

Model is trained on the Amazon dataset. The general flow of the training process is based on the [Sequential Recommender Quick Start](https://github.com/recommenders-team/recommenders/blob/main/examples/00_quick_start/sequential_recsys_amazondataset.ipynb).

In [3]:
data_path = os.path.join("training", "resources")

train_file = os.path.join(data_path, r'train_data')
valid_file = os.path.join(data_path, r'valid_data')
test_file = os.path.join(data_path, r'test_data')
user_vocab = os.path.join(data_path, r'user_vocab.pkl')
item_vocab = os.path.join(data_path, r'item_vocab.pkl')
cate_vocab = os.path.join(data_path, r'category_vocab.pkl')
output_file = os.path.join(data_path, r'output.txt')

reviews_name = 'reviews_Movies_and_TV_5.json'
meta_name = 'meta_Movies_and_TV.json'
reviews_file = os.path.join(data_path, reviews_name)
meta_file = os.path.join(data_path, meta_name)
train_num_ngs = 4 # number of negative instances with a positive instance for training
valid_num_ngs = 4 # number of negative instances with a positive instance for validation
test_num_ngs = 9 # number of negative instances with a positive instance for testing
sample_rate = 0.01 # sample a small item set for training and testing here for fast example

input_files = [reviews_file, meta_file, train_file, valid_file, test_file, user_vocab, item_vocab, cate_vocab]

if not os.path.exists(train_file):
    download_and_extract(reviews_name, reviews_file)
    download_and_extract(meta_name, meta_file)
    data_preprocessing(*input_files, sample_rate=sample_rate, valid_num_ngs=valid_num_ngs, test_num_ngs=test_num_ngs)

100%|███████████████████████████████████████| 692k/692k [09:45<00:00, 1.18kKB/s]
100%|███████████████████████████████████████| 97.5k/97.5k [01:46<00:00, 914KB/s]


Prepare model hyperparameters.

Note: remember to use `_create_vocab(train_file, user_vocab, item_vocab, cate_vocab)` to generate the user_vocab, item_vocab and cate_vocab files, if you are using your own dataset rather than using our demo Amazon dataset.

In [4]:
EPOCHS = 10
BATCH_SIZE = 400
yaml_train_config_file = 'training/model_train_config.yaml'

hparams = prepare_hparams(yaml_train_config_file, 
                          embed_l2=0., 
                          layer_l2=0.,
                          learning_rate=0.001,  # set to 0.01 if batch normalization is disable
                          epochs=EPOCHS,
                          batch_size=BATCH_SIZE,
                          show_step=20,
                          MODEL_DIR=os.path.join(data_path, "training/model"),
                          SUMMARIES_DIR=os.path.join(data_path, "training/summary/"),
                          user_vocab=user_vocab,
                          item_vocab=item_vocab,
                          cate_vocab=cate_vocab,
                          need_sample=True,
                          train_num_ngs=train_num_ngs, # provides the number of negative instances for each positive instance for loss computation.
            )

In [None]:
input_creator = SequentialIterator
model = SeqModel(hparams, input_creator, seed=RANDOM_SEED)

In [6]:
with Timer() as train_time:
    model = model.fit(train_file, valid_file, valid_num_ngs=valid_num_ngs) 

# valid_num_ngs is the number of negative lines after each positive line in your valid_file 
# we will evaluate the performance of model on valid_file every epoch
print('Time cost for training is {0:.2f} mins'.format(train_time.interval/60.0))

step 20 , total_loss: 1.6104, data_loss: 1.6104
step 40 , total_loss: 1.6080, data_loss: 1.6080
eval valid at epoch 1: auc:0.5029,logloss:0.693,mean_mrr:0.4611,ndcg@2:0.3336,ndcg@4:0.5193,ndcg@6:0.5932,group_auc:0.5089
step 20 , total_loss: 1.5448, data_loss: 1.5448
step 40 , total_loss: 1.4160, data_loss: 1.4160
eval valid at epoch 2: auc:0.6295,logloss:0.7074,mean_mrr:0.5561,ndcg@2:0.4683,ndcg@4:0.6286,ndcg@6:0.6663,group_auc:0.6332
step 20 , total_loss: 1.3607, data_loss: 1.3607
step 40 , total_loss: 1.3485, data_loss: 1.3485
eval valid at epoch 3: auc:0.6723,logloss:0.805,mean_mrr:0.5867,ndcg@2:0.5105,ndcg@4:0.6574,ndcg@6:0.6896,group_auc:0.6656
step 20 , total_loss: 1.3178, data_loss: 1.3178
step 40 , total_loss: 1.2463, data_loss: 1.2463
eval valid at epoch 4: auc:0.6976,logloss:0.6739,mean_mrr:0.6195,ndcg@2:0.5549,ndcg@4:0.6848,ndcg@6:0.7143,group_auc:0.6934
step 20 , total_loss: 1.2371, data_loss: 1.2371
step 40 , total_loss: 1.2622, data_loss: 1.2622
eval valid at epoch 5: auc

In [7]:
res_syn = model.run_eval(test_file, num_ngs=test_num_ngs)
print(res_syn)

{'auc': 0.7166, 'logloss': 0.6136, 'mean_mrr': 0.489, 'ndcg@2': 0.4021, 'ndcg@4': 0.5018, 'ndcg@6': 0.5541, 'group_auc': 0.7065}


## Step 2 - log model to Databricks

In [8]:
import sys
import mlflow
import os
from mlflow.models import infer_signature
from mlflow.pyfunc import PythonModelContext
from typing import Optional, Dict, Any
from mlflow.types import Schema, ColSpec, TensorSpec
from mlflow.models import ModelSignature

print(mlflow.__version__)

2.16.2


In [None]:
# this is the experiment name in the Databricks specific format
EXPERIMENT_NAME = "/Users/<insert_your_username>/simple_rec_experiment"
DATABRICKS_CLIENT_ID = "<insert_your_client_id>"
DATABRICKS_CLIENT_SECRET = "<insert_your_client_secret>"
DATABRICKS_HOST = "https://<insert_the_host_name>.azuredatabricks.net"

os.environ["DATABRICKS_HOST"] = DATABRICKS_HOST
os.environ["DATABRICKS_CLIENT_ID"] = DATABRICKS_CLIENT_ID
os.environ["DATABRICKS_CLIENT_SECRET"] = DATABRICKS_CLIENT_SECRET
mlflow.set_registry_uri("databricks")
mlflow.set_tracking_uri("databricks")
mlflow.set_experiment(EXPERIMENT_NAME)

In [10]:
artifacts = {
    "model_data" : "training/resources/training/model/",
    "model_config": "serving/model_serve_config.yaml",
    "user_vocab" : "training/resources/user_vocab.pkl",
    "item_vocab" : "training/resources/item_vocab.pkl",
    "category_vocab" : "training/resources/category_vocab.pkl",
}

class SliRecModelWrapper(mlflow.pyfunc.PythonModel):

    def load_context(self, context):
        from recommenders.models.deeprec.models.sequential.sli_rec import SLI_RECModel as SeqModel
        from recommenders.models.deeprec.io.sequential_iterator import SequentialIterator
        from recommenders.models.deeprec.deeprec_utils import prepare_hparams

        hparams = prepare_hparams(
            context.artifacts["model_config"],
            user_vocab=context.artifacts["user_vocab"],
            item_vocab=context.artifacts["item_vocab"],
            cate_vocab=context.artifacts["category_vocab"],
        )
        self.model = SeqModel(hparams, SequentialIterator)
        self.model.load_model(context.artifacts["model_data"] + "./artifacts/best_model")
        
    def predict(self, context, model_input):
        it = self.model.iterator
        userId = model_input["userId"]
        example_line = it.parser_one_line(f"0\t{userId}\tB0000AZT3R\tMovies\t1389657600\tB000J10EQU\tMovies\t1387756800")
        
        (elabel, euser_id, eitem_id, eitem_cate, eitem_history_sequence, eitem_cate_history_sequence, ecurrent_time,
         etime_diff, etime_from_first_action, etime_to_now) = example_line
        
        res = it._convert_data(
            [elabel],
            [euser_id],
            [eitem_id],
            [eitem_cate],
            [eitem_history_sequence],
            [eitem_cate_history_sequence],
            [ecurrent_time],
            [etime_diff],
            [etime_from_first_action],
            [etime_to_now],
            0
        )
        feed_dict = it.gen_feed_dict(res)
        return self.model.infer(self.model.sess, feed_dict)

signature = infer_signature(model_input = {
    "userId": "A3R27T4HADWFFJ",
    "itemIds": ["A", "B", "C"],
    "timestamps": ["A", "B", "C"]
}, model_output = {
    "items": ["A", "B"],
    "preds": [3.14, 5.43]
})

default_conda_env = mlflow.pyfunc.get_default_conda_env()
default_conda_env['dependencies'].append('tensorflow=2.12.0')
default_conda_env['dependencies'].append('recommenders=1.2.0')

with mlflow.start_run():
    mlflow.pyfunc.log_model(
        artifact_path="slirec_model",
        python_model=SliRecModelWrapper(),
        conda_env=default_conda_env,
        artifacts=artifacts,
        registered_model_name="slirec_model",
        signature=signature
    )

  from .autonotebook import tqdm as notebook_tqdm
Downloading artifacts: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4/4 [00:00<00:00, 3955.02it/s]
Downloading artifacts: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 2187.95it/s]
Downloading artifacts: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 4245.25it/s]
Downloading artifacts: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 2468.69it/s]
Downloading artifacts: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 2686.93i

## Additional steps

### Restore trained model

In [None]:
input_creator = SequentialIterator
yaml_serve_config_file = 'serving/model_serve_config.yaml'

hparams = prepare_hparams(yaml_serve_config_file, 
                          user_vocab=user_vocab,
                          item_vocab=item_vocab,
                          cate_vocab=cate_vocab)

trained_model = SeqModel(hparams, input_creator, seed=RANDOM_SEED)
trained_model.load_model("training/resources/training/model/best_model")

In [None]:
trained_model.run_eval(test_file, num_ngs=test_num_ngs)

### Explore model details

#### Inspect model input embeddings

In [11]:
import pickle as pkl

def load_dict(filename):
    with open(filename, "rb") as f:
        f_pkl = pkl.load(f)
        return f_pkl

def get_n(d, n):
    return list(d.items())[:n]

In [12]:
user_dict = load_dict(user_vocab)
print(f"User dict len: {len(user_dict)}")
print(f"User embedding dimension: {hparams.user_embedding_dim}")
print(f"User embedding shape: [{len(user_dict)}, {hparams.user_embedding_dim}]")
get_n(user_dict, 10)

User dict len: 3487
User embedding dimension: 16
User embedding shape: [3487, 16]


[('ANCOMAI0I7LVG', 0),
 ('ABO2ZI2Y5DQ9T', 1),
 ('AQP1VPK16SVWM', 2),
 ('A19ZXK9HHVRV1X', 3),
 ('A2NJO6YE954DBH', 4),
 ('A16CZRQL23NOIW', 5),
 ('AWG2O9C42XW5G', 6),
 ('A3LZGLA88K0LA0', 7),
 ('A328S9RN3U5M68', 8),
 ('A1ER6IYOMM8VCT', 9)]

In [13]:
item_dict = load_dict(item_vocab)
print(f"Item dict len: {len(item_dict)}")
print(f"Item embedding dimension: {hparams.item_embedding_dim}")
print(f"Item embedding shape: [{len(item_dict)}, {hparams.item_embedding_dim}]")
get_n(item_dict, 10)

Item dict len: 475
Item embedding dimension: 32
Item embedding shape: [475, 32]


[('default_mid', 0),
 ('B0002KVUKM', 1),
 ('078886047X', 2),
 ('B00005JM0B', 3),
 ('B00005JLXH', 4),
 ('B00005JPY0', 5),
 ('B00003CXXO', 6),
 ('B0002Z0EXQ', 7),
 ('B0006SSOHC', 8),
 ('B00005JPS8', 9)]

In [14]:
cate_dict = load_dict(cate_vocab)
print(f"Cate dict len: {len(cate_dict)}")
print(f"Cate embedding dimension: {hparams.cate_embedding_dim}")
print(f"Cate embedding shape: [{len(cate_dict)}, {hparams.cate_embedding_dim}]")
get_n(cate_dict, 10)

Cate dict len: 14
Cate embedding dimension: 8
Cate embedding shape: [14, 8]


[('default_cat', 0),
 ('Movies', 1),
 ('TV', 2),
 ('Psychedelic Rock', 3),
 ('Album-Oriented Rock (AOR)', 4),
 ('Progressive Rock', 5),
 ('Alternative Rock', 6),
 ('Movies & TV', 7),
 ('Swing Jazz', 8),
 ('British Invasion', 9)]

#### Inspect model placeholders

In [15]:
model_placeholders = []
for op in model.graph.get_operations():
    if op.type == 'Placeholder':
        model_placeholders.append(op)

for p in model_placeholders:
    print(f"Name: {p.name}, Outputs: {p.outputs}")

Name: sequence_length, Outputs: [<tf.Tensor 'sequence_length:0' shape=(None,) dtype=int32>]
Name: label, Outputs: [<tf.Tensor 'label:0' shape=(None, 1) dtype=float32>]
Name: users, Outputs: [<tf.Tensor 'users:0' shape=(None,) dtype=int32>]
Name: items, Outputs: [<tf.Tensor 'items:0' shape=(None,) dtype=int32>]
Name: cates, Outputs: [<tf.Tensor 'cates:0' shape=(None,) dtype=int32>]
Name: item_history, Outputs: [<tf.Tensor 'item_history:0' shape=(None, 50) dtype=int32>]
Name: item_cate_history, Outputs: [<tf.Tensor 'item_cate_history:0' shape=(None, 50) dtype=int32>]
Name: mask, Outputs: [<tf.Tensor 'mask:0' shape=(None, 50) dtype=int32>]
Name: time, Outputs: [<tf.Tensor 'time:0' shape=(None,) dtype=float32>]
Name: time_diff, Outputs: [<tf.Tensor 'time_diff:0' shape=(None, 50) dtype=float32>]
Name: time_from_first_action, Outputs: [<tf.Tensor 'time_from_first_action:0' shape=(None, 50) dtype=float32>]
Name: time_to_now, Outputs: [<tf.Tensor 'time_to_now:0' shape=(None, 50) dtype=float32>

#### Inspect model via Tensorboard

To inspect the model and its training process, make sure the info.write_tfevents is set to True in model_train_config.yaml config file. The summaries will be saved during model training inside the hparams.SUMMARIES_DIR directory.

Run the TensorBoard via: $ tensorboard --logdir=<hparams.SUMMARIES_DIR>

#### Infer a single example

In [16]:
it = model.iterator
example_line1 = it.parser_one_line("0\tA1U360OMVQRPUB\tB0000AZT3R\tMovies\t1389657600\tB000J10EQU\tMovies\t1387756800")
print(example_line1)

(elabel1, euser_id1, eitem_id1, eitem_cate1, eitem_history_sequence1, eitem_cate_history_sequence1, ecurrent_time1,
    etime_diff1, etime_from_first_action1, etime_to_now1) = example_line1

data_dict = it._convert_data(
    [elabel1],
    [euser_id1],
    [eitem_id1],
    [eitem_cate1],
    [eitem_history_sequence1],
    [eitem_cate_history_sequence1],
    [ecurrent_time1],
    [etime_diff1],
    [etime_from_first_action1],
    [etime_to_now1],
    0
)

feed_dict = {
    it.items: data_dict["items"],
    it.cates: data_dict["cates"],
    it.item_history: data_dict["item_history"],
    it.item_cate_history: data_dict["item_cate_history"],
    it.mask: data_dict["mask"],
    it.time_from_first_action: data_dict["time_from_first_action"],
    it.time_to_now: data_dict["time_to_now"],
}
print(feed_dict)

pred = model.infer(model.sess, feed_dict)
print(pred)

(0, 883, 0, 1, [10], [1], 1389657600.0, array([3.09104245]), array([3.09104245]), array([3.09104245]))
{<tf.Tensor 'items:0' shape=(None,) dtype=int32>: array([0], dtype=int32), <tf.Tensor 'cates:0' shape=(None,) dtype=int32>: array([1], dtype=int32), <tf.Tensor 'item_history:0' shape=(None, 50) dtype=int32>: array([[10,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
         0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
         0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
         0,  0]], dtype=int32), <tf.Tensor 'item_cate_history:0' shape=(None, 50) dtype=int32>: array([[1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0]], dtype=int32), <tf.Tensor 'mask:0' shape=(None, 50) dtype=int32>: array([[1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 