In [81]:
from lenskit import batch, topn, util, crossfold as xf
from lenskit.algorithms import Recommender, als, item_knn as knn
from lenskit.algorithms.item_knn import ItemItem
from lenskit.algorithms.user_knn import UserUser
from lenskit.algorithms.als import BiasedMF, ImplicitMF
from lenskit.algorithms.funksvd import FunkSVD
from lenskit.algorithms.bias import Bias

import pandas as pd
import numpy as np

In [73]:
dfRating = pd.read_csv('data/movielens/rating_base.csv')
dfRating = dfRating.rename(columns={'user_id': 'user', 'movie_id': 'item'})
dfRating.head()

Unnamed: 0,user,item,rating
0,1,1,5
1,1,2,3
2,1,3,4
3,1,4,3
4,1,5,3


### Matrix Factorization

In [74]:
mf = BiasedMF(features=30)
mf = Recommender.adapt(mf) # ensure the object to be a Recommender
mf = mf.fit(dfRating) # input columns specified by name: "user", "item" and "rating"

In [52]:
batch.predict(mf, dfRating).head()

Unnamed: 0,user,item,rating,prediction
0,1,1,5,3.859648
1,1,2,3,3.204404
2,1,3,4,3.377372
3,1,4,3,3.783276
4,1,5,3,2.926787


In [57]:
mf.predictor.user_features_.shape

(943, 30)

In [61]:
mf.predictor.item_features_.shape

(1680, 30)

### Collaborative Filtering

In [32]:
kni = knn.ItemItem(20)
kni = Recommender.adapt(kni)
kni = kni.fit(dfRating)

In [30]:
users = dfRating.user.unique()
batch.recommend(kni, users, 3).head(6)

Unnamed: 0,item,score,user,rank
0,1449,5.142813,1,1
1,408,5.053245,1,2
2,1642,4.877075,1,3
3,1449,5.017128,2,1
4,169,4.836834,2,2
5,1594,4.830486,2,3


In [31]:
batch.predict(kni, dfRating).head()

Unnamed: 0,user,item,rating,prediction
0,1,1,5,4.117345
1,1,2,3,3.155572
2,1,3,4,3.055646
3,1,4,3,3.727712
4,1,5,3,3.137253


In [78]:
kni.predictor.item_means_.shape

(1680,)

### Baseline

In [82]:
bsl = Bias()
bsl = Recommender.adapt(bsl)
bsl = bsl.fit(dfRating)

In [83]:
batch.predict(bsl, dfRating).head()

Unnamed: 0,user,item,rating,prediction
0,1,1,5,3.947421
1,1,2,3,3.286075
2,1,3,4,3.146551
3,1,4,3,3.633182
4,1,5,3,3.378867


In [85]:
bsl.predictor.item_offsets_.shape

(1680,)

In [86]:
bsl.predictor.user_offsets_.shape

(943,)

### Evaluation

In [92]:
dfPredict = batch.predict(mf, dfRating)
dfPredict.head()

Unnamed: 0,user,item,rating,prediction
0,1,1,5,3.903727
1,1,2,3,3.117148
2,1,3,4,3.389806
3,1,4,3,3.766424
4,1,5,3,2.902128


In [101]:
users = dfRating.user.unique()
dfRecommend = batch.recommend(mf, users, n=10)
dfRecommend.head()

Unnamed: 0,item,score,user,rank
0,408,5.08011,1,1
1,647,4.942757,1,2
2,285,4.914985,1,3
3,171,4.887552,1,4
4,511,4.815067,1,5


In [128]:
topn.ndcg(dfRecommend, dfRating)

0.0

In [129]:
topn.recip_rank(dfRecommend, dfRating)

0.0

In [131]:
topn.precision(dfRecommend, dfRating, k=2)

0.0

In [127]:
dfRecommend = pd.DataFrame({
    'user': [1]*5,
    'item': ['a', 'b', 'c', 'd', 'e'],
    'score': [4.742, 4.532, 4.051, 3.814, 3.581],
    'rank': [1,2,3,4,5]
})
dfRecommend

Unnamed: 0,user,item,score,rank
0,1,a,4.742,1
1,1,b,4.532,2
2,1,c,4.051,3
3,1,d,3.814,4
4,1,e,3.581,5


In [126]:
dfRecommend = pd.DataFrame({
    'user': [1]*7,
    'item': ['a', 'b', 'c', 'd', 'e', 'f', 'g'],
    'rating': [5, 4, 4, 3, 2, 5, 1]
})
dfRecommend

Unnamed: 0,user,item,rating
0,1,a,5
1,1,b,4
2,1,c,4
3,1,d,3
4,1,e,2
5,1,f,5
6,1,g,1
