In [1]:
import surprise
from surprise import Dataset
from surprise import Reader
import pandas as pd
import numpy as np

from functools import partial

In [2]:
min_max_log_df = pd.read_csv("min_max_log_scaled.csv")

In [3]:
min_max_log_df

Unnamed: 0,program_name,config,cycles
0,raytrace,"4,64k,1m,16m",0.6417
1,raytrace,"4,32k,1m,16m",0.838413
2,raytrace,"4,32k,2m,32m",1.0
3,raytrace,"4,64k,2m,32m",0.949523
4,swaptions,"4,64k,1m,32m",0.398326
5,swaptions,"4,32k,2m,32m",0.420648
6,swaptions,"4,64k,2m,32m",0.397875
7,swaptions,"8,32k,2m,32m",0.301434
8,swaptions,"4,32k,1m,32m",0.420648
9,swaptions,"4,64k,1m,16m",0.398326


In [4]:
%load_ext cython

In [5]:
%%cython

from __future__ import (absolute_import, division, print_function,
                        unicode_literals)

cimport numpy as np  # noqa
import numpy as np
from six.moves import range
from surprise.utils import get_rng
from surprise.prediction_algorithms.algo_base import AlgoBase
from surprise.prediction_algorithms.predictions import PredictionImpossible

class NMF(AlgoBase):
    """A collaborative filtering algorithm based on Non-negative Matrix
    Factorization.
    This algorithm is very similar to :class:`SVD`. The prediction
    :math:`\\hat{r}_{ui}` is set as:
    .. math::
        \hat{r}_{ui} = q_i^Tp_u,
    where user and item factors are kept **positive**. Our implementation
    follows that suggested in :cite:`NMF:2014`, which is equivalent to
    :cite:`Zhang96` in its non-regularized form. Both are direct applications
    of NMF for dense matrices :cite:`NMF_algo`.
    The optimization procedure is a (regularized) stochastic gradient descent
    with a specific choice of step size that ensures non-negativity of factors,
    provided that their initial values are also positive.
    At each step of the SGD procedure, the factors :math:`f` or user :math:`u`
    and item :math:`i` are updated as follows:
    .. math::
        p_{uf} &\\leftarrow p_{uf} &\cdot \\frac{\\sum_{i \in I_u} q_{if}
        \\cdot r_{ui}}{\\sum_{i \in I_u} q_{if} \\cdot \\hat{r_{ui}} +
        \\lambda_u |I_u| p_{uf}}\\\\
        q_{if} &\\leftarrow q_{if} &\cdot \\frac{\\sum_{u \in U_i} p_{uf}
        \\cdot r_{ui}}{\\sum_{u \in U_i} p_{uf} \\cdot \\hat{r_{ui}} +
        \lambda_i |U_i| q_{if}}\\\\
    where :math:`\lambda_u` and :math:`\lambda_i` are regularization
    parameters.
    This algorithm is highly dependent on initial values. User and item factors
    are uniformly initialized between ``init_low`` and ``init_high``. Change
    them at your own risks!
    A biased version is available by setting the ``biased`` parameter to
    ``True``. In this case, the prediction is set as
    .. math::
        \hat{r}_{ui} = \mu + b_u + b_i + q_i^Tp_u,
    still ensuring positive factors. Baselines are optimized in the same way as
    in the :class:`SVD` algorithm. While yielding better accuracy, the biased
    version seems highly prone to overfitting so you may want to reduce the
    number of factors (or increase regularization).
    Args:
        n_factors: The number of factors. Default is ``15``.
        n_epochs: The number of iteration of the SGD procedure. Default is
            ``50``.
        biased(bool): Whether to use baselines (or biases). Default is
            ``False``.
        reg_pu: The regularization term for users :math:`\lambda_u`. Default is
            ``0.06``.
        reg_qi: The regularization term for items :math:`\lambda_i`. Default is
            ``0.06``.
        reg_bu: The regularization term for :math:`b_u`. Only relevant for
            biased version. Default is ``0.02``.
        reg_bi: The regularization term for :math:`b_i`. Only relevant for
            biased version. Default is ``0.02``.
        lr_bu: The learning rate for :math:`b_u`. Only relevant for biased
            version. Default is ``0.005``.
        lr_bi: The learning rate for :math:`b_i`. Only relevant for biased
            version. Default is ``0.005``.
        init_low: Lower bound for random initialization of factors. Must be
            greater than ``0`` to ensure non-negative factors. Default is
            ``0``.
        init_high: Higher bound for random initialization of factors. Default
            is ``1``.
        random_state(int, RandomState instance from numpy, or ``None``):
            Determines the RNG that will be used for initialization. If
            int, ``random_state`` will be used as a seed for a new RNG. This is
            useful to get the same initialization over multiple calls to
            ``fit()``.  If RandomState instance, this same instance is used as
            RNG. If ``None``, the current RNG from numpy is used.  Default is
            ``None``.
        verbose: If ``True``, prints the current epoch. Default is ``False``.
    Attributes:
        pu(numpy array of size (n_users, n_factors)): The user factors (only
            exists if ``fit()`` has been called)
        qi(numpy array of size (n_items, n_factors)): The item factors (only
            exists if ``fit()`` has been called)
        bu(numpy array of size (n_users)): The user biases (only
            exists if ``fit()`` has been called)
        bi(numpy array of size (n_items)): The item biases (only
            exists if ``fit()`` has been called)
    """

    def __init__(self, n_factors=15, n_epochs=50, biased=False, reg_pu=.06,
                 reg_qi=.06, reg_bu=.02, reg_bi=.02, lr_bu=.005, lr_bi=.005,
                 init_low=0, init_high=1, random_state=None, verbose=False):

        self.n_factors = n_factors
        self.n_epochs = n_epochs
        self.biased = biased
        self.reg_pu = reg_pu
        self.reg_qi = reg_qi
        self.lr_bu = lr_bu
        self.lr_bi = lr_bi
        self.reg_bu = reg_bu
        self.reg_bi = reg_bi
        self.init_low = init_low
        self.init_high = init_high
        self.random_state = random_state
        self.verbose = verbose

        if self.init_low < 0:
            raise ValueError('init_low should be greater than zero')

        AlgoBase.__init__(self)

    def fit(self, trainset):

        AlgoBase.fit(self, trainset)
        self.sgd(trainset)

        return self

    def sgd(self, trainset):

        # user and item factors
        cdef np.ndarray[np.double_t, ndim=2] pu
        cdef np.ndarray[np.double_t, ndim=2] qi

        # user and item biases
        cdef np.ndarray[np.double_t] bu
        cdef np.ndarray[np.double_t] bi

        # auxiliary matrices used in optimization process
        cdef np.ndarray[np.double_t, ndim=2] user_num
        cdef np.ndarray[np.double_t, ndim=2] user_denom
        cdef np.ndarray[np.double_t, ndim=2] item_num
        cdef np.ndarray[np.double_t, ndim=2] item_denom

        cdef int u, i, f
        cdef double r, est, l, dot, err
        cdef double reg_pu = self.reg_pu
        cdef double reg_qi = self.reg_qi
        cdef double reg_bu = self.reg_bu
        cdef double reg_bi = self.reg_bi
        cdef double lr_bu = self.lr_bu
        cdef double lr_bi = self.lr_bi
        cdef double global_mean = self.trainset.global_mean

        # Randomly initialize user and item factors
        rng = get_rng(self.random_state)
        pu = rng.uniform(self.init_low, self.init_high,
                         size=(trainset.n_users, self.n_factors))
        qi = rng.uniform(self.init_low, self.init_high,
                         size=(trainset.n_items, self.n_factors))

        bu = np.zeros(trainset.n_users, np.double)
        bi = np.zeros(trainset.n_items, np.double)

        if not self.biased:
            global_mean = 0

        for current_epoch in range(self.n_epochs):

            if self.verbose:
                print("Processing epoch {}".format(current_epoch))

            # (re)initialize nums and denoms to zero
            user_num = np.zeros((trainset.n_users, self.n_factors))
            user_denom = np.zeros((trainset.n_users, self.n_factors))
            item_num = np.zeros((trainset.n_items, self.n_factors))
            item_denom = np.zeros((trainset.n_items, self.n_factors))

            # Compute numerators and denominators for users and items factors
            for u, i, r in trainset.all_ratings():

                # compute current estimation and error
                dot = 0  # <q_i, p_u>
                for f in range(self.n_factors):
                    dot += qi[i, f] * pu[u, f]
                est = global_mean + bu[u] + bi[i] + dot
                err = r - est

                # update biases
                if self.biased:
                    bu[u] += lr_bu * (err - reg_bu * bu[u])
                    bi[i] += lr_bi * (err - reg_bi * bi[i])

                # compute numerators and denominators
                for f in range(self.n_factors):
                    user_num[u, f] += qi[i, f] * r
                    user_denom[u, f] += qi[i, f] * est
                    item_num[i, f] += pu[u, f] * r
                    item_denom[i, f] += pu[u, f] * est

            # Update user factors
            for u in trainset.all_users():
                n_ratings = len(trainset.ur[u])
                for f in range(self.n_factors):
                    user_denom[u, f] += n_ratings * reg_pu * pu[u, f]
                    if user_denom[u,f] != 0: # check if not 0 to prevent div by 0
                        pu[u, f] *= user_num[u, f] / user_denom[u, f]

            # Update item factors
            for i in trainset.all_items():
                n_ratings = len(trainset.ir[i])
                for f in range(self.n_factors):
                    item_denom[i, f] += n_ratings * reg_qi * qi[i, f]
                    if item_denom[i,f] != 0: # check if not 0 to prevent div by 0
                        qi[i, f] *= item_num[i, f] / item_denom[i, f]

        self.bu = bu
        self.bi = bi
        self.pu = pu
        self.qi = qi

    def estimate(self, u, i):
        # Should we cythonize this as well?

        known_user = self.trainset.knows_user(u)
        known_item = self.trainset.knows_item(i)

        if self.biased:
            est = self.trainset.global_mean

            if known_user:
                est += self.bu[u]

            if known_item:
                est += self.bi[i]

            if known_user and known_item:
                est += np.dot(self.qi[i], self.pu[u])

        else:
            if known_user and known_item:
                est = np.dot(self.qi[i], self.pu[u])
            else:
                raise PredictionImpossible('User and item are unknown.')

        return est

In [6]:
param_grid = {'n_epochs': [10, 20, 50], # 20 is default
              'n_factors': [5, 10, 15, 20, 50], # 15 is default
              'lr_bu': [5e-3, 1e-2, 5e-2, 1e-1], # 0.005
              'lr_bi': [5e-3, 1e-2, 5e-2, 1e-1], # 0.005
              'reg_pu': [1e-3, 1e-2, 6e-2, 1e-1], # 0.06
              'reg_qi': [1e-3, 1e-2, 6e-2, 1e-1], # 0.06
              'reg_bu': [1e-3, 1e-2, 2e-2, 1e-1], # 0.02
              'reg_bi': [1e-3, 1e-2, 2e-2, 1e-1], # 0.02
             }

In [7]:
biased_NMF = partial(NMF, biased=True)

In [16]:
gs = surprise.model_selection.GridSearchCV(biased_NMF, param_grid, measures=['rmse'], cv=5, return_train_measures=True)
reader = Reader(rating_scale=(0, 1))
data = Dataset.load_from_df(min_max_log_df[min_max_log_df.program_name != 'fluidanimate'], reader)
gs.fit(data)

In [17]:
print(gs.best_score['rmse'])
print(gs.best_params['rmse'])

0.06202944876539507
{'n_epochs': 50, 'n_factors': 5, 'lr_bu': 0.05, 'lr_bi': 0.01, 'reg_pu': 0.06, 'reg_qi': 0.01, 'reg_bu': 0.001, 'reg_bi': 0.01}


In [18]:
#pd.set_option('display.max_colwidth', None)

In [19]:
pd.DataFrame.from_dict(gs.cv_results).sort_values("mean_test_rmse").iloc[:10,10:14]

Unnamed: 0,mean_test_rmse,std_test_rmse,mean_train_rmse,std_train_rmse
43409,0.062029,0.024659,0.080805,0.100696
43209,0.067193,0.032398,0.041817,0.032164
22655,0.068064,0.026184,0.042055,0.013312
43369,0.069918,0.037658,0.024559,0.001767
44467,0.070606,0.007286,0.097113,0.070105
22985,0.071322,0.041437,0.048729,0.047413
43127,0.071929,0.036581,0.055599,0.034165
43226,0.07217,0.03779,0.033809,0.004641
3492,0.072367,0.027211,0.100568,0.054403
22924,0.07253,0.028052,0.052303,0.049545


In [20]:
pd.DataFrame.from_dict(gs.cv_results).sort_values("mean_test_rmse")["params"][:10]

43409    {'n_epochs': 50, 'n_factors': 5, 'lr_bu': 0.05, 'lr_bi': 0.01, 'reg_pu': 0.06, 'reg_qi': 0.01, 'reg_bu': 0.001, 'reg_bi': 0.01}
43209    {'n_epochs': 50, 'n_factors': 5, 'lr_bu': 0.05, 'lr_bi': 0.005, 'reg_pu': 0.1, 'reg_qi': 0.001, 'reg_bu': 0.02, 'reg_bi': 0.01}
22655       {'n_epochs': 20, 'n_factors': 5, 'lr_bu': 0.05, 'lr_bi': 0.005, 'reg_pu': 0.01, 'reg_qi': 0.1, 'reg_bu': 0.1, 'reg_bi': 0.1}
43369     {'n_epochs': 50, 'n_factors': 5, 'lr_bu': 0.05, 'lr_bi': 0.01, 'reg_pu': 0.01, 'reg_qi': 0.06, 'reg_bu': 0.02, 'reg_bi': 0.01}
44467       {'n_epochs': 50, 'n_factors': 5, 'lr_bu': 0.1, 'lr_bi': 0.01, 'reg_pu': 0.06, 'reg_qi': 0.1, 'reg_bu': 0.001, 'reg_bi': 0.1}
22985     {'n_epochs': 20, 'n_factors': 5, 'lr_bu': 0.05, 'lr_bi': 0.01, 'reg_pu': 0.1, 'reg_qi': 0.001, 'reg_bu': 0.02, 'reg_bi': 0.01}
43127      {'n_epochs': 50, 'n_factors': 5, 'lr_bu': 0.05, 'lr_bi': 0.005, 'reg_pu': 0.01, 'reg_qi': 0.1, 'reg_bu': 0.01, 'reg_bi': 0.1}
43226     {'n_epochs': 50, 'n_factors': 5

We can see that we can get comparable performance between 20 and 50 epochs at 5 factors, suggesting that we can explore epochs in this range. 10 epochs seem to underfit and is dependent on more factors for good performance. The gap between mean test and train RMSE is about 0.021 for the second best configuration. It is interesting the mean training RMSE is greater than the mean test RMSE for the best configuration but it also has a large standard deviation for the train std RMSE. The best learning rate seems to be between 0.05 and 0.1 (row 44467) and between 0.005 and 0.01 for the user and item biases respectively. We can also consider a reg_pu value betwen 0.01 and 0.1, reg_qi around 0.06 and 0.001, reg_bu around 0.02, 0.001 and 0.1 and reg_bi between 0.01 and 0.1. 

In [21]:
param_grid = {'n_epochs': [20, 30, 40, 50, 60], # 20 is default
              'n_factors': [3, 5, 7], # 15 is default
              'lr_bu': [0.03, 0.05, 0.07, 0.1], # 0.005
              'lr_bi': [0.003, 0.005, 0.007, 0.01], # 0.005
              'reg_pu': [0.01, 0.05, 0.08, 0.1], # 0.06
              'reg_qi': [0.001, 0.005, 0.01, 0.03, 0.06, 0.08], # 0.06
              'reg_bu': [0.001, 0.005, 0.01, 0.05, 0.1], # 0.02
              'reg_bi': [0.01, 0.03, 0.05, 0.07, 0.1], # 0.02
             }

In [22]:
gs = surprise.model_selection.GridSearchCV(biased_NMF, param_grid, measures=['rmse'], cv=5, return_train_measures=True)
reader = Reader(rating_scale=(0, 1))
data = Dataset.load_from_df(min_max_log_df[min_max_log_df.program_name != 'fluidanimate'], reader)
gs.fit(data)

In [23]:
print(gs.best_score['rmse'])
print(gs.best_params['rmse'])

0.04530279422343848
{'n_epochs': 60, 'n_factors': 3, 'lr_bu': 0.07, 'lr_bi': 0.005, 'reg_pu': 0.05, 'reg_qi': 0.001, 'reg_bu': 0.001, 'reg_bi': 0.05}


In [24]:
pd.DataFrame.from_dict(gs.cv_results).sort_values("mean_test_rmse").iloc[:10,10:14]

Unnamed: 0,mean_test_rmse,std_test_rmse,mean_train_rmse,std_train_rmse
120752,0.045303,0.022758,0.028017,0.029391
61237,0.045813,0.018138,0.012238,0.002438
87604,0.046312,0.017352,0.008903,0.002785
1807,0.046738,0.019381,0.01611,0.006364
37362,0.047152,0.023478,0.017943,0.005681
29405,0.048494,0.022929,0.011408,0.003592
97993,0.049001,0.019591,0.038899,0.033857
123155,0.049148,0.0145,0.046227,0.043296
5101,0.049755,0.020646,0.019143,0.003235
62259,0.050802,0.017235,0.017071,0.004345


In [25]:
pd.DataFrame.from_dict(gs.cv_results).sort_values("mean_test_rmse")["params"][:10]

120752    {'n_epochs': 60, 'n_factors': 3, 'lr_bu': 0.07, 'lr_bi': 0.005, 'reg_pu': 0.05, 'reg_qi': 0.001, 'reg_bu': 0.001, 'reg_bi': 0.05}
61237      {'n_epochs': 40, 'n_factors': 3, 'lr_bu': 0.05, 'lr_bi': 0.007, 'reg_pu': 0.01, 'reg_qi': 0.005, 'reg_bu': 0.01, 'reg_bi': 0.05}
87604      {'n_epochs': 50, 'n_factors': 3, 'lr_bu': 0.03, 'lr_bi': 0.007, 'reg_pu': 0.01, 'reg_qi': 0.001, 'reg_bu': 0.001, 'reg_bi': 0.1}
1807       {'n_epochs': 20, 'n_factors': 3, 'lr_bu': 0.03, 'lr_bi': 0.01, 'reg_pu': 0.01, 'reg_qi': 0.001, 'reg_bu': 0.005, 'reg_bi': 0.05}
37362       {'n_epochs': 30, 'n_factors': 3, 'lr_bu': 0.1, 'lr_bi': 0.007, 'reg_pu': 0.05, 'reg_qi': 0.001, 'reg_bu': 0.01, 'reg_bi': 0.05}
29405     {'n_epochs': 30, 'n_factors': 3, 'lr_bu': 0.03, 'lr_bi': 0.005, 'reg_pu': 0.01, 'reg_qi': 0.001, 'reg_bu': 0.005, 'reg_bi': 0.01}
97993       {'n_epochs': 50, 'n_factors': 5, 'lr_bu': 0.03, 'lr_bi': 0.01, 'reg_pu': 0.05, 'reg_qi': 0.005, 'reg_bu': 0.05, 'reg_bi': 0.07}
123155     {'n_epoch

20 epochs seems ideal because we can achieve comparable performance with a difference of about 0.001 in mean test RMSE  with the best configuration and a closer gap between mean test and train RMSE to avoid overfitting. 3 factors seem sufficient to achieve the best performance

The best configuration seems to be 20 epochs with 3 factors, a learning rate of 0.03 and 0.01 for user and item biases, a regularization factor of 0.01, 0.001, 0.005, 0.05 for pu, qi, bu and bi.

### NMF without Bias

In [27]:
param_grid = {'n_epochs': [10, 20, 50], # 20 is default
              'n_factors': [5, 10, 15, 20, 50], # 15 is default
              'reg_pu': [1e-3, 1e-2, 6e-2, 1e-1], # 0.06
              'reg_qi': [1e-3, 1e-2, 6e-2, 1e-1] # 0.06
             }

In [28]:
gs = surprise.model_selection.GridSearchCV(NMF, param_grid, measures=['rmse'], cv=5, return_train_measures=True)
reader = Reader(rating_scale=(0, 1))
data = Dataset.load_from_df(min_max_log_df[min_max_log_df.program_name != 'fluidanimate'], reader)
gs.fit(data)

In [29]:
pd.DataFrame.from_dict(gs.cv_results).sort_values("mean_test_rmse").iloc[:10,10:14]

Unnamed: 0,mean_test_rmse,std_test_rmse,mean_train_rmse,std_train_rmse
188,0.181977,0.039122,0.025102,0.00259
213,0.18254,0.039124,0.084578,0.00664
179,0.183961,0.043918,0.032206,0.003256
178,0.18489,0.041107,0.071562,0.004462
204,0.184893,0.033717,0.036551,0.004686
87,0.185165,0.044711,0.024444,0.003312
227,0.185806,0.038575,0.074811,0.007193
184,0.185966,0.030562,0.056986,0.00599
157,0.186281,0.041225,0.031327,0.005376
236,0.186289,0.033522,0.062156,0.00683


In [30]:
pd.DataFrame.from_dict(gs.cv_results).sort_values("mean_test_rmse")["params"][:10]

188     {'n_epochs': 50, 'n_factors': 10, 'reg_pu': 0.1, 'reg_qi': 0.001}
213     {'n_epochs': 50, 'n_factors': 20, 'reg_pu': 0.01, 'reg_qi': 0.01}
179     {'n_epochs': 50, 'n_factors': 10, 'reg_pu': 0.001, 'reg_qi': 0.1}
178    {'n_epochs': 50, 'n_factors': 10, 'reg_pu': 0.001, 'reg_qi': 0.06}
204     {'n_epochs': 50, 'n_factors': 15, 'reg_pu': 0.1, 'reg_qi': 0.001}
87        {'n_epochs': 20, 'n_factors': 5, 'reg_pu': 0.01, 'reg_qi': 0.1}
227     {'n_epochs': 50, 'n_factors': 50, 'reg_pu': 0.001, 'reg_qi': 0.1}
184    {'n_epochs': 50, 'n_factors': 10, 'reg_pu': 0.06, 'reg_qi': 0.001}
157      {'n_epochs': 20, 'n_factors': 50, 'reg_pu': 0.1, 'reg_qi': 0.01}
236     {'n_epochs': 50, 'n_factors': 50, 'reg_pu': 0.1, 'reg_qi': 0.001}
Name: params, dtype: object

In [31]:
param_grid = {'n_epochs': [20, 25, 30, 35, 40, 45, 50], # 20 is default
              'n_factors': [25, 30, 35, 40, 45, 50], # 15 is default
              'reg_pu': [1e-3, 1e-2, 6e-2, 1e-1], # 0.06
              'reg_qi': [1e-3, 1e-2, 6e-2, 1e-1] # 0.06
             }

In [32]:
gs = surprise.model_selection.GridSearchCV(NMF, param_grid, measures=['rmse'], cv=5, return_train_measures=True)
reader = Reader(rating_scale=(0, 1))
data = Dataset.load_from_df(min_max_log_df[min_max_log_df.program_name != 'fluidanimate'], reader)
gs.fit(data)

In [33]:
pd.DataFrame.from_dict(gs.cv_results).sort_values("mean_test_rmse").iloc[:10,10:14]

Unnamed: 0,mean_test_rmse,std_test_rmse,mean_train_rmse,std_train_rmse
93,0.220899,0.051941,0.028712,0.003962
668,0.220952,0.048529,0.061274,0.005936
588,0.221016,0.050537,0.048376,0.002808
652,0.221217,0.047381,0.061333,0.009412
201,0.221589,0.051372,0.023203,0.003436
57,0.22162,0.04622,0.072143,0.006784
396,0.221759,0.04801,0.070335,0.00531
25,0.221807,0.048787,0.067718,0.007999
9,0.222064,0.051187,0.061993,0.011601
214,0.222769,0.055704,0.024667,0.00321


In [34]:
pd.DataFrame.from_dict(gs.cv_results).sort_values("mean_test_rmse")["params"][:10]

93      {'n_epochs': 20, 'n_factors': 50, 'reg_pu': 0.1, 'reg_qi': 0.01}
668    {'n_epochs': 50, 'n_factors': 50, 'reg_pu': 0.1, 'reg_qi': 0.001}
588    {'n_epochs': 50, 'n_factors': 25, 'reg_pu': 0.1, 'reg_qi': 0.001}
652    {'n_epochs': 50, 'n_factors': 45, 'reg_pu': 0.1, 'reg_qi': 0.001}
201    {'n_epochs': 30, 'n_factors': 25, 'reg_pu': 0.06, 'reg_qi': 0.01}
57     {'n_epochs': 20, 'n_factors': 40, 'reg_pu': 0.06, 'reg_qi': 0.01}
396    {'n_epochs': 40, 'n_factors': 25, 'reg_pu': 0.1, 'reg_qi': 0.001}
25     {'n_epochs': 20, 'n_factors': 30, 'reg_pu': 0.06, 'reg_qi': 0.01}
9      {'n_epochs': 20, 'n_factors': 25, 'reg_pu': 0.06, 'reg_qi': 0.01}
214    {'n_epochs': 30, 'n_factors': 30, 'reg_pu': 0.01, 'reg_qi': 0.06}
Name: params, dtype: object

We can see that NMF without bias can not outperform with bias despite hyperparameter tuning