In [1]:
import sys
import random
import numpy as np
import pandas as pd
import tensorflow as tf
import keras

from tqdm import tqdm
from matplotlib import pyplot as plt

from src.visualization import set_themes
from src.preprocessing import construct_features_meta
from src.preprocessing.data_loader import load_data
from src.sampler import BayesianSampler
from src.losses.bayesian_personalized_ranking import BayesianPersonalizedRankingLoss
from src.models.matrix_factorization import MatrixFactorization
from src.baremetal import gather_dense

from collections import defaultdict

set_themes()
tf.config.list_logical_devices()

2026-01-19 12:07:19.968030: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.
failed to send, dropping 1 traces to intake at http://localhost:8126/v0.5/traces after 3 retries
  if not hasattr(np, "object"):


2026-01-19 12:07:23.255845: W tensorflow/core/common_runtime/gpu/gpu_bfc_allocator.cc:47] Overriding orig_value setting because the TF_FORCE_GPU_ALLOW_GROWTH environment variable is set. Original config value was 0.
I0000 00:00:1768824443.257510  736476 gpu_device.cc:2020] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 20218 MB memory:  -> device: 0, name: NVIDIA A10G, pci bus id: 0000:00:1e.0, compute capability: 8.6


[LogicalDevice(name='/device:CPU:0', device_type='CPU'),
 LogicalDevice(name='/device:GPU:0', device_type='GPU')]

In [2]:
# set random seeds for reproducibility
tf.random.set_seed(42)
np.random.seed(42)
random.seed(42)

# Load Datasets

In [3]:
train_user_interaction = load_data("dataset/yelp2018/train.txt")
train_features_meta = construct_features_meta(train_user_interaction)
train_user_interaction

Unnamed: 0,user_id,item_id
0,0,0
1,0,1
2,0,2
3,0,3
4,0,4
...,...,...
1237254,31667,22984
1237255,31667,12943
1237256,31667,29149
1237257,31667,4927


In [4]:
test_user_interaction = load_data("dataset/yelp2018/test.txt")
test_features_meta = construct_features_meta(test_user_interaction)
test_user_interaction

Unnamed: 0,user_id,item_id
0,0,795
1,0,694
2,0,1531
3,0,14517
4,0,8784
...,...,...
324142,31667,33811
324143,31667,10482
324144,31667,9585
324145,31667,36460


In [5]:
user_items = train_user_interaction.groupby("user_id")["item_id"].apply(set).to_dict()
item_users = train_user_interaction.groupby("item_id")["user_id"].apply(set).to_dict()

In [6]:
# train dataset features meta without vocabulary
{feature_name: {k: v for k, v in meta.items() if k not in ["vocabulary"]} for feature_name, meta in train_features_meta.items()}

{'user_id': {'dtype': 'int64', 'unique_count': 31668},
 'item_id': {'dtype': 'int64', 'unique_count': 38048}}

In [7]:
# test dataset features meta without vocabulary
{feature_name: {k: v for k, v in meta.items() if k not in ["vocabulary"]} for feature_name, meta in test_features_meta.items()}

{'user_id': {'dtype': 'int64', 'unique_count': 31668},
 'item_id': {'dtype': 'int64', 'unique_count': 36073}}

# Model Loading

In [8]:
sampler = BayesianSampler(item_set=train_features_meta["item_id"]["vocabulary"], user_items=user_items)

model:MatrixFactorization = keras.saving.load_model("models/matrix_factorization/single_runs/06qefx71/model.keras", custom_objects={"MatrixFactorization": MatrixFactorization})
model.compile(
    optimizer=keras.optimizers.Adam(learning_rate=0.01),
    loss_functions=[
        BayesianPersonalizedRankingLoss()
    ],
    sampler=sampler,
)
model

<MatrixFactorization name=matrix_factorization, built=True>

In [10]:
train_dataset = tf.data.Dataset.from_tensor_slices(
{
        "user_id": train_user_interaction["user_id"].values,
        "item_id": train_user_interaction["item_id"].values
    }
)
test_dataset = tf.data.Dataset.from_tensor_slices(
    {
        "user_id": test_user_interaction["user_id"].values,
        "item_id": test_user_interaction["item_id"].values
    }
)

model.evaluate(
    train_dataset=train_dataset.batch(16384),
    test_dataset=test_dataset.batch(16384),
)

failed to send, dropping 1 traces to intake at http://localhost:8126/v0.5/traces after 3 retries, 7 additional messages skipped
2026-01-19 12:08:07.545880: I tensorflow/core/framework/local_rendezvous.cc:407] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence
2026-01-19 12:08:07.965133: I tensorflow/core/framework/local_rendezvous.cc:407] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence
100%|██████████████████████████████████████████████████████████████████████████████████████████| 31668/31668 [00:21<00:00, 1481.68it/s, recall@10=0.0359, test_recall@10=0.0215]


{'hitrate@50': 0.924024224281311,
 'recall@50': 0.1216147318482399,
 'precision@50': 0.08462545275688171,
 'hitrate@10': 0.6235948204994202,
 'recall@10': 0.03585422411561012,
 'precision@10': 0.12076228857040405,
 'hitrate@2': 0.27216747403144836,
 'recall@2': 0.009469965472817421,
 'precision@2': 0.15589870512485504,
 'test_hitrate@50': 0.4276556670665741,
 'test_recall@50': 0.07382760941982269,
 'test_precision@50': 0.013898574747145176,
 'test_hitrate@10': 0.16739295423030853,
 'test_recall@10': 0.02146848477423191,
 'test_precision@10': 0.019856005907058716,
 'test_hitrate@2': 0.04951370507478714,
 'test_recall@2': 0.00567255774512887,
 'test_precision@2': 0.025546293705701828}

In [12]:
train_dataset = tf.data.Dataset.from_tensor_slices(
{
        "user_id": train_user_interaction["user_id"].values,
        "item_id": train_user_interaction["item_id"].values
    }
)
test_dataset = tf.data.Dataset.from_tensor_slices(
    {
        "user_id": test_user_interaction["user_id"].values,
        "item_id": test_user_interaction["item_id"].values
    }
)

model.evaluate(
    train_dataset=train_dataset.batch(16384),
    test_dataset=test_dataset.batch(16384),
)

2026-01-19 11:56:43.942384: I tensorflow/core/framework/local_rendezvous.cc:407] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence
100%|██████████████████████████████████████████████████████████████████████████████████████████| 31668/31668 [00:20<00:00, 1566.15it/s, recall@10=0.0443, test_recall@10=0.0213]


{'hitrate@50': 0.9631488919258118,
 'recall@50': 0.15070214867591858,
 'precision@50': 0.10357262194156647,
 'hitrate@10': 0.702601969242096,
 'recall@10': 0.04428718611598015,
 'precision@10': 0.14661484956741333,
 'hitrate@2': 0.31542882323265076,
 'recall@2': 0.011230986565351486,
 'precision@2': 0.18128710985183716,
 'test_hitrate@50': 0.44312870502471924,
 'test_recall@50': 0.07644753903150558,
 'test_precision@50': 0.014259821735322475,
 'test_hitrate@10': 0.16679297387599945,
 'test_recall@10': 0.021319584921002388,
 'test_precision@10': 0.019559180364012718,
 'test_hitrate@2': 0.04951370507478714,
 'test_recall@2': 0.005596063565462828,
 'test_precision@2': 0.025214727967977524}

In [9]:
model.weights

[<Variable path=matrix_factorization/user_embedding_layer/embeddings, shape=(31669, 64), dtype=float32, value=[[ 0.04122284 -0.03375464 -0.03135395 ... -0.03889887  0.04046639
    0.04902792]
  [-0.25080466 -0.6610167   0.20508511 ...  0.5337309  -0.18886742
   -0.31242213]
  [ 0.499371    0.19324313 -0.15702222 ...  1.0893471   0.28963262
   -0.27855596]
  ...
  [-0.38125238 -0.36327666  0.1959628  ... -0.3414127   0.49053484
   -0.39224768]
  [-0.6808722  -0.2929375   0.40708423 ... -0.45136628  0.06698619
   -0.21197753]
  [-0.24011305 -0.3198388   0.07456979 ...  0.3476728  -0.3444567
    0.0891145 ]]>,
 <Variable path=matrix_factorization/item_embedding_layer/embeddings, shape=(38049, 64), dtype=float32, value=[[ 0.01339686 -0.01153576  0.03498508 ...  0.0098354   0.01840781
    0.00948099]
  [ 0.0586672  -1.0905507  -0.2887555  ...  0.3662554  -0.43358648
   -0.40028185]
  [-0.08648828 -0.3478665   0.4105714  ...  0.36650762 -0.2647711
   -0.02244535]
  ...
  [-0.09434218  0.3964

In [11]:
model.weights

[<Variable path=matrix_factorization/user_embedding_layer/embeddings, shape=(31669, 64), dtype=float32, value=[[ 0.04122284 -0.03375464 -0.03135395 ... -0.03889887  0.04046639
    0.04902792]
  [-0.22741075 -0.8344948   0.27532512 ...  0.73438865 -0.29810548
   -0.38317063]
  [ 0.6278178   0.10082845 -0.1843385  ...  0.99268556  0.25305575
   -0.9229381 ]
  ...
  [-0.48953998 -0.36756593  0.26473925 ... -0.2818513   0.7957474
   -0.5039359 ]
  [-1.0472108  -0.40492952  0.42542624 ... -0.7222515   0.13809149
   -0.27220458]
  [-0.03354614 -0.09760135  0.362066   ...  0.22128254 -0.2910582
    0.01732336]]>,
 <Variable path=matrix_factorization/item_embedding_layer/embeddings, shape=(38049, 64), dtype=float32, value=[[ 0.01339686 -0.01153576  0.03498508 ...  0.0098354   0.01840781
    0.00948099]
  [-0.09854103 -1.1817861  -0.19855662 ...  0.49198398 -0.64139414
   -0.44696164]
  [-0.12113726 -0.41584376  0.39741957 ...  0.47198364 -0.3530947
   -0.06976525]
  ...
  [-0.11300423  0.54542

failed to send, dropping 1 traces to intake at http://localhost:8126/v0.5/traces after 3 retries, 5 additional messages skipped
