In [1]:
from lenskit import batch, topn, util, crossfold as xf
from lenskit.algorithms import Recommender, als, item_knn as knn
from lenskit.algorithms.item_knn import ItemItem
from lenskit.algorithms.user_knn import UserUser
from lenskit.algorithms.als import BiasedMF, ImplicitMF
from lenskit.algorithms.funksvd import FunkSVD
from lenskit.algorithms.bias import Bias

import pandas as pd
import numpy as np

In [2]:
dfRating = pd.read_csv('data/movielens/rating_base.csv')
dfRating = dfRating.rename(columns={'user_id': 'user', 'movie_id': 'item'})
dfRating.head()

Unnamed: 0,user,item,rating
0,1,1,5
1,1,2,3
2,1,3,4
3,1,4,3
4,1,5,3


### Matrix Factorization

In [3]:
mf = BiasedMF(features=30, iterations=5)
mf = Recommender.adapt(mf) # ensure the object to be a Recommender
mf = mf.fit(dfRating) # input columns specified by name: "user", "item" and "rating"

BLAS using multiple threads - can cause oversubscription
found 1 potential runtime problems - see https://boi.st/lkpy-perf


In [4]:
batch.predict(mf, dfRating).head()

Unnamed: 0,user,item,rating,prediction
0,1,1,5,3.893536
1,1,2,3,3.192901
2,1,3,4,3.362189
3,1,4,3,3.878183
4,1,5,3,3.054348


In [10]:
mf.predictor.user_features_.shape

(943, 30)

In [11]:
mf.predictor.item_features_.shape

(1680, 30)

### Collaborative Filtering

In [12]:
kni = knn.ItemItem(20)
kni = Recommender.adapt(kni)
kni = kni.fit(dfRating)

In [13]:
users = dfRating.user.unique()
batch.recommend(kni, users, 3).head(6)

Unnamed: 0,item,score,user,rank
0,1449,5.142813,1,1
1,408,5.053245,1,2
2,1642,4.877075,1,3
3,1449,5.017128,2,1
4,169,4.836834,2,2
5,1594,4.830486,2,3


In [14]:
batch.predict(kni, dfRating).head()

Unnamed: 0,user,item,rating,prediction
0,1,1,5,4.117345
1,1,2,3,3.155572
2,1,3,4,3.055646
3,1,4,3,3.727712
4,1,5,3,3.137253


In [15]:
kni.predictor.item_means_.shape

(1680,)

### Baseline

In [16]:
bsl = Bias()
bsl = Recommender.adapt(bsl)
bsl = bsl.fit(dfRating)

In [17]:
batch.predict(bsl, dfRating).head()

Unnamed: 0,user,item,rating,prediction
0,1,1,5,3.947421
1,1,2,3,3.286075
2,1,3,4,3.146551
3,1,4,3,3.633182
4,1,5,3,3.378867


In [18]:
bsl.predictor.item_offsets_.shape

(1680,)

In [19]:
bsl.predictor.user_offsets_.shape

(943,)

### Evaluation

In [5]:
dfPredict = batch.predict(mf, dfRating)
dfPredict.head()

Unnamed: 0,user,item,rating,prediction
0,1,1,5,3.893536
1,1,2,3,3.192901
2,1,3,4,3.362189
3,1,4,3,3.878183
4,1,5,3,3.054348


In [6]:
users = dfRating.user.unique()
dfRecommend = batch.recommend(mf, users, n=10)
dfRecommend.head()

Unnamed: 0,item,score,user,rank
0,408,5.020111,1,1
1,285,4.801491,1,2
2,511,4.771285,1,3
3,171,4.739761,1,4
4,512,4.712392,1,5


In [7]:
from ml_metrics import apk, mapk
from recmetrics import mark, mapk_plot, intra_list_similarity, catalog_coverage, novelty, personalization

In [10]:
mapk?

[0;31mSignature:[0m [0mmapk[0m[0;34m([0m[0mactual[0m[0;34m,[0m [0mpredicted[0m[0;34m,[0m [0mk[0m[0;34m=[0m[0;36m10[0m[0;34m)[0m[0;34m[0m[0;34m[0m[0m
[0;31mDocstring:[0m
Computes the mean average precision at k.

This function computes the mean average prescision at k between two lists
of lists of items.

Parameters
----------
actual : list
         A list of lists of elements that are to be predicted 
         (order doesn't matter in the lists)
predicted : list
            A list of lists of predicted elements
            (order matters in the lists)
k : int, optional
    The maximum number of predicted elements

Returns
-------
score : double
        The mean average precision at k over the input lists
[0;31mFile:[0m      /opt/anaconda3/lib/python3.8/site-packages/ml_metrics/average_precision.py
[0;31mType:[0m      function


In [14]:
mark?

[0;31mSignature:[0m [0mmark[0m[0;34m([0m[0mactual[0m[0;34m:[0m [0mList[0m[0;34m[[0m[0mlist[0m[0;34m][0m[0;34m,[0m [0mpredicted[0m[0;34m:[0m [0mList[0m[0;34m[[0m[0mlist[0m[0;34m][0m[0;34m,[0m [0mk[0m[0;34m=[0m[0;36m10[0m[0;34m)[0m [0;34m->[0m [0mint[0m[0;34m[0m[0;34m[0m[0m
[0;31mDocstring:[0m
Computes the mean average recall at k.
Parameters
----------
actual : a list of lists
    Actual items to be predicted
    example: [['A', 'B', 'X'], ['A', 'B', 'Y']]
predicted : a list of lists
    Ordered predictions
    example: [['X', 'Y', 'Z'], ['X', 'Y', 'Z']]
Returns:
-------
    mark: int
        The mean average recall at k (mar@k)
[0;31mFile:[0m      /opt/anaconda3/lib/python3.8/site-packages/recmetrics/metrics.py
[0;31mType:[0m      function


In [18]:
apk(['a', 'b', 'x'], ['x', 'y', 'z'])

0.3333333333333333

In [19]:
apk(['a', 'b', 'y'], ['x', 'y', 'z'])

0.16666666666666666

In [17]:
x = [['A', 'B', 'X'], ['A', 'B', 'Y']]
y = [['X', 'Y', 'Z'], ['X', 'Y', 'Z', 'T']]
mapk(x,y)

0.25

In [10]:
y_true = [1,2,3]
y_pred = [3,2,4]
mark(y_true, y_pred, k=3)

TypeError: object of type 'int' has no len()

In [8]:
y_true = [1,2,3]
y_pred = [3,2,4]
mapk(y_true, y_pred, k=3)

TypeError: object of type 'int' has no len()

In [3]:
import recmetrics
dir(recmetrics)

['__builtins__',
 '__cached__',
 '__doc__',
 '__file__',
 '__loader__',
 '__name__',
 '__package__',
 '__path__',
 '__spec__',
 'catalog_coverage',
 'class_separation_plot',
 'coverage_plot',
 'intra_list_similarity',
 'long_tail_plot',
 'make_confusion_matrix',
 'mapk_plot',
 'mark',
 'mark_plot',
 'metrics',
 'metrics_plot',
 'mse',
 'novelty',
 'personalization',
 'plots',
 'precision_recall_plot',
 'prediction_coverage',
 'recommender_precision',
 'recommender_recall',
 'rmse',
 'roc_plot']