In [1]:
import sys
sys.path.append('..')

import torch
import numpy as np
import pandas as pd
from copy import deepcopy
from matplotlib import pyplot as plt
from matplotlib.colors import BASE_COLORS
from sklearn.model_selection import KFold

from utils.data import MovieLens
from utils.training import train
from uncertain.explicit import MF, UserHeuristic, ItemHeuristic, CPMF, OrdRec
from utils.evaluation import test_recommendations, uncertainty_distributions
from uncertain.extras import Ensemble, Resample, UncertainWrapper

ML = MovieLens(batch_size=512)

results = {}

MovieLens data prepared: 6040 users, 3706 items.
951889 Train interactions, 24160 validation and test interactions.


# ExplicitMF (FunkSVD)

In [2]:
model = MF(ML.n_user, ML.n_item, embedding_dim=10, lr=1e-3, weight_decay=0.00001)
train(model, ML)
results['MF'] = test_recommendations(model, ML, max_k=10)
results['MF']

GPU available: True, used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name            | Type      | Params
----------------------------------------------
0 | user_embeddings | Embedding | 60.4 K
1 | item_embeddings | Embedding | 37.1 K
----------------------------------------------
97.5 K    Trainable params
0         Non-trainable params
97.5 K    Total params
0.390     Total estimated model params size (MB)


Validation sanity check: 0it [00:00, ?it/s]

Training: 0it [00:00, ?it/s]

  metrics['Novelty'][user] = (rec_distance[:, rated].min(1) * hits).cumsum(0) / hits.cumsum(0)


{'RMSE': 0.9322907828436512,
 'Precision': array([0.01970199, 0.01804636, 0.01683223, 0.01568709, 0.01476821,
        0.0138521 , 0.01374172, 0.0133899 , 0.0129507 , 0.01273179]),
 'Recall': array([0.0049255 , 0.00902318, 0.01262417, 0.01568709, 0.01846026,
        0.02077815, 0.02404801, 0.0267798 , 0.02913907, 0.03182947]),
 'NDCG': array([0.01970199, 0.04525594, 0.07053008, 0.09142283, 0.11169154,
        0.12799105, 0.14613537, 0.16176275, 0.17543641, 0.19119861]),
 'Diversity': array([0.26933695, 0.2779155 , 0.28491636, 0.29019008, 0.29437361,
        0.29819573, 0.3017053 , 0.30438158, 0.30712889]),
 'Novelty': array([0.20803171, 0.20858448, 0.20987173, 0.21214816, 0.21345284,
        0.21489873, 0.2155827 , 0.21499078, 0.21591682, 0.2157049 ])}

# Heuristics

In [3]:
user_support = np.bincount(ML.train[:, 0].astype('int'))
item_support = np.bincount(ML.train[:, 1].astype('int'))
item_variance = pd.DataFrame(ML.train).groupby(1)[0].var()
empty_idx = np.where(~pd.Series(np.arange(ML.n_item)).isin(item_variance.index))[0]
item_variance = item_variance.append(pd.Series(np.zeros(len(empty_idx)), index=empty_idx)).sort_index().fillna(0).to_numpy()

results['User support'] = test_recommendations(UserHeuristic(base_MF=model, uncertainty=-user_support), ML, max_k=10)
results['Item support'] = test_recommendations(ItemHeuristic(base_MF=model, uncertainty=-item_support), ML, max_k=10)
results['Item variance'] = test_recommendations(ItemHeuristic(base_MF=model, uncertainty=item_variance), ML, max_k=10)

  metrics['Novelty'][user] = (rec_distance[:, rated].min(1) * hits).cumsum(0) / hits.cumsum(0)
  metrics['Novelty'][user] = (rec_distance[:, rated].min(1) * hits).cumsum(0) / hits.cumsum(0)
  metrics['Novelty'][user] = (rec_distance[:, rated].min(1) * hits).cumsum(0) / hits.cumsum(0)


In [13]:
results['User support']

{'RMSE': 0.9325468825887001,
 'RPI': -0.18667921809633434,
 'Classification': 0.5507755441029125,
 'Quantile RMSE': array([1.11876079, 1.03612322, 1.02604573, 0.98056568, 0.97677417,
        0.94358086, 0.95954037, 0.93291548, 0.90958588, 0.91606445,
        0.85840435, 0.89529776, 0.95638289, 0.90523391, 0.89792744,
        0.86964187, 0.87439317, 0.85275025, 0.86713736, 0.84851716]),
 'Precision': array([0.01986755, 0.0182947 , 0.01721854, 0.01680464, 0.01572848,
        0.01498344, 0.0141438 , 0.01374172, 0.01361295, 0.01317881]),
 'Recall': array([0.00496689, 0.00914735, 0.01291391, 0.01680464, 0.0196606 ,
        0.02247517, 0.02475166, 0.02748344, 0.03062914, 0.03294702]),
 'NDCG': array([0.01986755, 0.04558707, 0.07133307, 0.09594926, 0.11491835,
        0.13329934, 0.148668  , 0.16324488, 0.18017337, 0.19343327]),
 'Diversity': array([0.26965707, 0.27836269, 0.28523415, 0.29045594, 0.29488823,
        0.29859178, 0.30176038, 0.30464216, 0.30727037]),
 'Novelty': array([0.204917

# Ensemble

In [4]:
models = []
for _ in range(2):
    models.append(MF(ML.n_user, ML.n_item, embedding_dim=10, lr=1e-3, weight_decay=0.00001))
    train(models[-1], ML)
results['Ensemble'] = test_recommendations(Ensemble(models), ML, max_k=10)
results['Ensemble']

GPU available: True, used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name            | Type      | Params
----------------------------------------------
0 | user_embeddings | Embedding | 60.4 K
1 | item_embeddings | Embedding | 37.1 K
----------------------------------------------
97.5 K    Trainable params
0         Non-trainable params
97.5 K    Total params
0.390     Total estimated model params size (MB)


Validation sanity check: 0it [00:00, ?it/s]

Training: 0it [00:00, ?it/s]

GPU available: True, used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name            | Type      | Params
----------------------------------------------
0 | user_embeddings | Embedding | 60.4 K
1 | item_embeddings | Embedding | 37.1 K
----------------------------------------------
97.5 K    Trainable params
0         Non-trainable params
97.5 K    Total params
0.390     Total estimated model params size (MB)


Validation sanity check: 0it [00:00, ?it/s]

Training: 0it [00:00, ?it/s]

  metrics['Novelty'][user] = (rec_distance[:, rated].min(1) * hits).cumsum(0) / hits.cumsum(0)


{'RMSE': 0.9283959560378288,
 'RPI': 0.0701070635180886,
 'Classification': 0.5099441892912133,
 'Quantile RMSE': array([0.93096886, 0.9268415 , 0.906231  , 0.93451499, 0.94659978,
        0.89854347, 0.93014881, 0.90396077, 0.91257482, 0.89888292,
        0.90852646, 0.92613059, 0.94443322, 0.94579312, 0.90889399,
        0.94202278, 0.95338513, 0.91808624, 0.92824049, 0.99769083]),
 'Precision': array([0.01837748, 0.01895695, 0.01677704, 0.0151904 , 0.01480132,
        0.01437638, 0.01393094, 0.01372103, 0.01342899, 0.01306291]),
 'Recall': array([0.00459437, 0.00947848, 0.01258278, 0.0151904 , 0.01850166,
        0.02156457, 0.02437914, 0.02744205, 0.03021523, 0.03265728]),
 'NDCG': array([0.01837748, 0.04630235, 0.06913897, 0.08919177, 0.10996081,
        0.12886165, 0.14511551, 0.16269263, 0.17785676, 0.19070152]),
 'Diversity': array([0.27654081, 0.2839938 , 0.29025062, 0.29495742, 0.29907164,
        0.30266385, 0.30571644, 0.30857803, 0.3110773 ]),
 'Novelty': array([0.2030599 

# Resample

In [5]:
models = []
og = deepcopy(ML.train)
for _ in range(2):
    ML.train = og[np.random.choice(len(og), int(0.8*len(og)), replace=False), :]
    models.append(MF(ML.n_user, ML.n_item, embedding_dim=10, lr=1e-3, weight_decay=0.00001))
    train(models[-1], ML)
ML.train = og
results['Ensemble'] = test_recommendations(Resample(model, models), ML, max_k=10)
results['Ensemble']

GPU available: True, used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name            | Type      | Params
----------------------------------------------
0 | user_embeddings | Embedding | 60.4 K
1 | item_embeddings | Embedding | 37.1 K
----------------------------------------------
97.5 K    Trainable params
0         Non-trainable params
97.5 K    Total params
0.390     Total estimated model params size (MB)


Validation sanity check: 0it [00:00, ?it/s]

Training: 0it [00:00, ?it/s]

GPU available: True, used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name            | Type      | Params
----------------------------------------------
0 | user_embeddings | Embedding | 60.4 K
1 | item_embeddings | Embedding | 37.1 K
----------------------------------------------
97.5 K    Trainable params
0         Non-trainable params
97.5 K    Total params
0.390     Total estimated model params size (MB)


Validation sanity check: 0it [00:00, ?it/s]

Training: 0it [00:00, ?it/s]

  metrics['Novelty'][user] = (rec_distance[:, rated].min(1) * hits).cumsum(0) / hits.cumsum(0)


{'RMSE': 0.9325468825887001,
 'RPI': 0.18342812939682365,
 'Classification': 0.5228558998373205,
 'Quantile RMSE': array([0.9400808 , 0.91468336, 0.92969651, 0.88446109, 0.88076593,
        0.88189616, 0.93175105, 0.88955872, 0.90855257, 0.92355181,
        0.93613516, 0.90378616, 0.92709112, 0.92355315, 0.96033828,
        0.94073624, 0.93550675, 0.97969976, 0.99414173, 1.04840438]),
 'Precision': array([0.01986755, 0.0182947 , 0.01721854, 0.01680464, 0.01572848,
        0.01498344, 0.0141438 , 0.01374172, 0.01361295, 0.01317881]),
 'Recall': array([0.00496689, 0.00914735, 0.01291391, 0.01680464, 0.0196606 ,
        0.02247517, 0.02475166, 0.02748344, 0.03062914, 0.03294702]),
 'NDCG': array([0.01986755, 0.04558707, 0.07133307, 0.09594926, 0.11491835,
        0.13329934, 0.148668  , 0.16324488, 0.18017337, 0.19343327]),
 'Diversity': array([0.26965707, 0.27836269, 0.28523415, 0.29045594, 0.29488823,
        0.29859178, 0.30176038, 0.30464216, 0.30727037]),
 'Novelty': array([0.2049176

# Zhu et. al

In [3]:
errors = np.empty(len(ML.train))
og = deepcopy(ML)
for train_idx, test_idx in KFold(n_splits=2, shuffle=True).split(ML.train):
    test = og.train[test_idx]
    ML.train = og.train[train_idx]
    model_ = MF(ML.n_user, ML.n_item, embedding_dim=10, lr=1e-3, weight_decay=0.00001)
    train(model_, ML)
    errors[test_idx] = np.abs(model_.predict(torch.tensor(test[:, 0]).long(), torch.tensor(test[:, 1]).long()) - test[:, 2])
ML.train = deepcopy(og.train)
ML.train[:, 2] = errors
ML.val[:, 2] = np.abs(model.predict(torch.tensor(ML.val[:, 0]).long(), torch.tensor(ML.val[:, 1]).long()) - ML.val[:, 2])
model_ = MF(ML.n_user, ML.n_item, embedding_dim=10, lr=1e-3, weight_decay=0.00001)
train(model_, ML)
ML = og
results['MF-CV'] = test_recommendations(UncertainWrapper(model, model_), ML, max_k=10)
results['MF-CV']    

GPU available: True, used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name            | Type      | Params
----------------------------------------------
0 | user_embeddings | Embedding | 60.4 K
1 | item_embeddings | Embedding | 37.1 K
----------------------------------------------
97.5 K    Trainable params
0         Non-trainable params
97.5 K    Total params
0.390     Total estimated model params size (MB)


Validation sanity check: 0it [00:00, ?it/s]

Training: 0it [00:00, ?it/s]

GPU available: True, used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name            | Type      | Params
----------------------------------------------
0 | user_embeddings | Embedding | 60.4 K
1 | item_embeddings | Embedding | 37.1 K
----------------------------------------------
97.5 K    Trainable params
0         Non-trainable params
97.5 K    Total params
0.390     Total estimated model params size (MB)


Validation sanity check: 0it [00:00, ?it/s]

Training: 0it [00:00, ?it/s]

GPU available: True, used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name            | Type      | Params
----------------------------------------------
0 | user_embeddings | Embedding | 60.4 K
1 | item_embeddings | Embedding | 37.1 K
----------------------------------------------
97.5 K    Trainable params
0         Non-trainable params
97.5 K    Total params
0.390     Total estimated model params size (MB)


Validation sanity check: 0it [00:00, ?it/s]

Training: 0it [00:00, ?it/s]

  metrics['Novelty'][user] = (rec_distance[:, rated].min(1) * hits).cumsum(0) / hits.cumsum(0)


{'RMSE': 0.9322907828436512,
 'RPI': 0.5502200815202386,
 'Classification': 0.6063097629408591,
 'Quantile RMSE': array([0.87743661, 0.76259279, 0.83195957, 0.80980164, 0.84037808,
        0.83762343, 0.82149199, 0.85493808, 0.8728114 , 0.88467782,
        0.86911182, 0.89957139, 0.91262255, 0.97288379, 0.95187649,
        1.02013891, 0.99551495, 1.07995232, 1.13175618, 1.2608667 ]),
 'Precision': array([0.01970199, 0.01804636, 0.01683223, 0.01568709, 0.01476821,
        0.0138521 , 0.01374172, 0.0133899 , 0.0129507 , 0.01273179]),
 'Recall': array([0.0049255 , 0.00902318, 0.01262417, 0.01568709, 0.01846026,
        0.02077815, 0.02404801, 0.0267798 , 0.02913907, 0.03182947]),
 'NDCG': array([0.01970199, 0.04525594, 0.07053008, 0.09142283, 0.11169154,
        0.12799105, 0.14613537, 0.16176275, 0.17543641, 0.19119861]),
 'Diversity': array([0.26933695, 0.2779155 , 0.28491636, 0.29019008, 0.29437361,
        0.29819573, 0.3017053 , 0.30438158, 0.30712889]),
 'Novelty': array([0.20803171

# CPMF

In [2]:
model = CPMF(ML.n_user, ML.n_item, embedding_dim=10, lr=1e-3, weight_decay=0.000001)
train(model, ML)
results['CPMF'] = test_recommendations(model, ML, max_k=10)
results['CPMF']

GPU available: True, used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name            | Type      | Params
----------------------------------------------
0 | user_embeddings | Embedding | 60.4 K
1 | item_embeddings | Embedding | 37.1 K
2 | user_gammas     | Embedding | 6.0 K 
3 | item_gammas     | Embedding | 3.7 K 
4 | var_activation  | Softplus  | 0     
----------------------------------------------
107 K     Trainable params
0         Non-trainable params
107 K     Total params
0.429     Total estimated model params size (MB)


Validation sanity check: 0it [00:00, ?it/s]

Training: 0it [00:00, ?it/s]

  metrics['Novelty'][user] = (rec_distance[:, rated].min(1) * hits).cumsum(0) / hits.cumsum(0)


{'RMSE': 0.9209326651362798,
 'RPI': 0.8300785921892574,
 'Classification': 0.6371003782995758,
 'Quantile RMSE': array([0.6750079 , 0.69373544, 0.74030248, 0.78416804, 0.78140947,
        0.85615991, 0.81639412, 0.84774651, 0.83642002, 0.8931536 ,
        0.92745084, 0.90992724, 0.89029515, 0.94680109, 0.96298591,
        1.00214993, 1.05032073, 1.09629012, 1.14534631, 1.305232  ]),
 'Precision': array([0.0147351 , 0.01216887, 0.0115894 , 0.01080298, 0.0102649 ,
        0.00998896, 0.00969726, 0.00923013, 0.00901398, 0.0088245 ]),
 'Recall': array([0.00368377, 0.00608444, 0.00869205, 0.01080298, 0.01283113,
        0.01498344, 0.0169702 , 0.01846026, 0.02028146, 0.02206126]),
 'NDCG': array([0.0147351 , 0.03112035, 0.04879612, 0.06355032, 0.07750783,
        0.09159535, 0.10385743, 0.11495863, 0.12535027, 0.13633635]),
 'Diversity': array([0.33959708, 0.34855437, 0.35515157, 0.3601955 , 0.3640646 ,
        0.3671534 , 0.36956869, 0.37206372, 0.37414668]),
 'Novelty': array([0.18845599

In [3]:
model.recommend(3, n=100)

Unnamed: 0,scores,uncertainties
23,5.598755,0.768105
420,5.549239,0.803180
47,5.478809,0.702145
0,5.456058,0.782872
2617,5.410639,0.489534
...,...,...
2926,4.971545,0.825357
1449,4.969021,0.786707
617,4.963731,1.040814
2046,4.962124,0.919945


# OrdRec

In [4]:
ML.to_ordinal()
model = OrdRec(ML.n_user, ML.n_item, ML.score_labels, embedding_dim=10, lr=1e-3, weight_decay=0.000001)
torch.autograd.set_detect_anomaly(True)
train(model, ML)
results['OrdRec'] = test_recommendations(model, ML, max_k=10)
results['OrdRec']

GPU available: True, used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name            | Type      | Params
----------------------------------------------
0 | user_embeddings | Embedding | 60.4 K
1 | item_embeddings | Embedding | 37.1 K
2 | user_betas      | Embedding | 24.2 K
3 | item_betas      | Embedding | 14.8 K
----------------------------------------------
136 K     Trainable params
0         Non-trainable params
136 K     Total params
0.546     Total estimated model params size (MB)


Validation sanity check: 0it [00:00, ?it/s]

Training: 0it [00:00, ?it/s]

  metrics['Novelty'][user] = (rec_distance[:, rated].min(1) * hits).cumsum(0) / hits.cumsum(0)


{'RMSE': 0.9192647839720627,
 'RPI': 0.9460883310448958,
 'Classification': 0.6831820961244284,
 'Quantile RMSE': array([0.60612808, 0.66677656, 0.68969374, 0.73544738, 0.75148521,
        0.77856315, 0.80440326, 0.81234659, 0.83689123, 0.86748933,
        0.89780878, 0.91466982, 0.93587136, 0.9572532 , 0.97856125,
        1.02567117, 1.06210624, 1.12219092, 1.2349641 , 1.33636791]),
 'Precision': array([0.01771523, 0.01589404, 0.01462472, 0.01432119, 0.01327815,
        0.01291391, 0.01272469, 0.01241722, 0.01214128, 0.0119702 ]),
 'Recall': array([0.00442881, 0.00794702, 0.01096854, 0.01432119, 0.01659768,
        0.01937086, 0.02226821, 0.02483444, 0.02731788, 0.0299255 ]),
 'NDCG': array([0.01771523, 0.04005154, 0.06095854, 0.08220227, 0.09838002,
        0.11560098, 0.13151087, 0.14711937, 0.15942248, 0.1735632 ]),
 'Diversity': array([0.28890842, 0.2966302 , 0.30172342, 0.30588968, 0.30927679,
        0.31181079, 0.31424075, 0.3162197 , 0.31823332]),
 'Novelty': array([0.19215511

In [5]:
model.recommend(6000)

Unnamed: 0,scores,uncertainties
104,4.829735,0.161096
1092,4.745718,0.229131
23,4.720235,0.250613
1494,4.719439,0.261928
2031,4.712744,0.290259
47,4.705644,0.239229
2231,4.69686,0.274573
420,4.666902,0.276607
1283,4.666565,0.265476
718,4.662746,0.277982


# Results

In [None]:
results_df = pd.DataFrame.from_dict(results, orient='Index')
ratings = results_df[['RMSE', 'RPI', 'Classification']]
print(ratings)
colors = [c for c in list(BASE_COLORS)]
keys = results_df.index.to_list()
colors = {keys[i]:colors[i] for i in range(len(keys))}

f, ax = plt.subplots(ncols=2, figsize=(18, 5))
for key in keys:
    ax[0].plot(np.arange(1, 11), results_df['Novelty'][key],
               '-', color=colors[key], label=key, linewidth=3, alpha=0.6)
    ax[1].plot(np.arange(2, 11), results_df['Diversity'][key],
               '-', color=colors[key], label=key, linewidth=3, alpha=0.6)
ax[0].set_xticks(np.arange(1, 11))
ax[0].set_xlabel('K', fontsize=20)
ax[0].set_ylabel('Expected surprise@K', fontsize=20)
ax[0].legend(ncol=2, fontsize=15)
ax[1].set_xticks(np.arange(2, 11))
ax[1].set_xlabel('K', fontsize=20)
ax[1].set_ylabel('Diversity@K', fontsize=20)
ax[1].legend(ncol=2, fontsize=15)
f.tight_layout()

f, ax = plt.subplots(ncols=3, figsize=(18, 5), sharex=True)
for key in keys:
    ax[0].plot(np.arange(1, 11), results_df['Precision'][key],
               '-', color=colors[key], label=key, linewidth=3, alpha=0.6)
    ax[1].plot(np.arange(1, 11), results_df['Recall'][key],
               '-', color=colors[key], label=key, linewidth=3, alpha=0.6)
    ax[2].plot(np.arange(1, 11), results_df['NDCG'][key],
               '-', color=colors[key], label=key, linewidth=3, alpha=0.6)
ax[0].set_xticks(np.arange(1, 11))
ax[0].set_xlabel('K', fontsize=20)
ax[0].set_ylabel('Precision@K', fontsize=20)
ax[0].legend(ncol=2, fontsize=15)
ax[1].set_xlabel('K', fontsize=20)
ax[1].set_ylabel('Recall@K', fontsize=20)
ax[1].legend(ncol=2, fontsize=15)
ax[2].set_xlabel('K', fontsize=20)
ax[2].set_ylabel('NDCG@K', fontsize=20)
ax[2].legend(ncol=2, fontsize=15)
f.tight_layout()

f, ax = plt.subplots(ncols=2, figsize=(18, 5))
keys = ['CPMF', 'OrdRec']
for key in keys:
    ax[0].plot(np.arange(1, 21), results_df['Quantile RMSE'][key],
            '-', color=colors[key], label=key, linewidth=3, alpha=0.6)
    ax[1].plot(np.arange(1, 11), results_df['RRI'][key],
            '-', color=colors[key], label=key, linewidth=3, alpha=0.6)
ax[0].set_xticks(np.arange(1, 21))
ax[0].set_xticklabels([round(elem, 2) for elem in np.linspace(start=0.05, stop=1, num=20).tolist()])
ax[0].set_xlabel('Uncertainty quantile', fontsize=20)
ax[0].set_ylabel('RMSE', fontsize=20)
ax[0].legend(ncol=2, fontsize=20)
ax[1].set_xlabel('K', fontsize=20)
ax[1].set_ylabel('RRI@K', fontsize=20)
ax[1].legend(ncol=2, fontsize=20)
f.tight_layout()