In [1]:
from rectools import Columns

# Load and split dataset
Here we use data with users' and movies' features

In [2]:
import utils
df_train, user_features_train, item_features_train, df_test = utils.read_split_rating_dataset_with_features(
    '../data/interim/rating.csv', '../data/interim/user.csv', '../data/interim/movie.csv')

# Fit model and generate recommendations

In [5]:
from rectools.dataset import Dataset
from rectools.models import DSSMModel
from rectools.dataset.torch_datasets import DSSMDataset

# Fit model and generate recommendations for all users
dataset = Dataset.construct(
    df_train,
    user_features_df=user_features_train,  # our flatten dataframe
    item_features_df=item_features_train,  # our flatten dataframe
    cat_user_features=["gender", "occupation"], # these will be one-hot-encoded. All other features must be numerical already
    make_dense_user_features=False,  # for `sparse` format
    make_dense_item_features=False,  # for `sparse` format
)



In [6]:
model = DSSMModel(DSSMDataset, max_epochs=10, batch_size=16)
model.fit(dataset)
recos = model.recommend(
    users=df_train[Columns.User].unique(),
    dataset=dataset,
    k=10,
    filter_viewed=True,
)

GPU available: True (mps), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
  rank_zero_warn(
Missing logger folder: /Users/artembatalov/Projects/movie-recsys/notebooks/lightning_logs

  | Name     | Type    | Params
-------------------------------------
0 | user_net | UserNet | 258 K 
1 | item_net | ItemNet | 35.2 K
-------------------------------------
293 K     Trainable params
0         Non-trainable params
293 K     Total params
1.173     Total estimated model params size (MB)
  rank_zero_warn(
  rank_zero_warn(


Training: 0it [00:00, ?it/s]

  rank_zero_warn(
`Trainer.fit` stopped: `max_epochs=10` reached.


As we can see, loss in not decreasing, i.e. model is not training.

In [10]:
# Save artifact
import pickle
with open('../models/dssm_model.pkl', 'wb') as f:
    pickle.dump(model, f)

# Evaluate
Use same metrics as in previous notebook.

In [8]:
from rectools.metrics import MAP, calc_metrics, MeanInvUserFreq, Serendipity

metrics_name = {
    'MAP': MAP,
    'MIUF': MeanInvUserFreq,
    'Serendipity': Serendipity
}
metrics = {}
for metric_name, metric in metrics_name.items():
    for k in (1, 5, 10):
        metrics[f'{metric_name}@{k}'] = metric(k=k)

# Source: https://github.com/MobileTeleSystems/RecTools/blob/main/examples/5_benchmark_iALS_with_features.ipynb

In [9]:
catalog = df_train[Columns.Item].unique()

calc_metrics(
    metrics,
    reco=recos,
    interactions=df_test,
    prev_interactions=df_train,
    catalog=catalog
)

{'MAP@1': 0.0015801737297596746,
 'MAP@5': 0.005944469882293707,
 'MAP@10': 0.007530860963287377,
 'MIUF@1': 3.9721646727797153,
 'MIUF@5': 3.0952953892088346,
 'MIUF@10': 3.177958730690907,
 'Serendipity@1': 0.0003278444732857451,
 'Serendipity@5': 0.0004054117996971661,
 'Serendipity@10': 0.0004518845987454034}

So bad... I tried to train model with different parameters, but it didn't help. DSSM seems unsuitable for our task.