In [2]:
from lenskit import batch, topn, util, crossfold as xf
from lenskit.algorithms import Recommender, als, item_knn as knn
from lenskit.algorithms.item_knn import ItemItem
from lenskit.algorithms.user_knn import UserUser
from lenskit.algorithms.als import BiasedMF, ImplicitMF
from lenskit.algorithms.funksvd import FunkSVD
from lenskit.algorithms.bias import Bias

import pandas as pd
import numpy as np

In [3]:
dfRating = pd.read_csv('data/movielens/rating_base.csv')
dfRating = dfRating.rename(columns={'user_id': 'user', 'movie_id': 'item'})
dfRating.head()

Unnamed: 0,user,item,rating
0,1,1,5
1,1,2,3
2,1,3,4
3,1,4,3
4,1,5,3


### Matrix Factorization

In [8]:
mf = BiasedMF(features=30, iterations=5)
mf = Recommender.adapt(mf) # ensure the object to be a Recommender
mf = mf.fit(dfRating) # input columns specified by name: "user", "item" and "rating"

In [9]:
batch.predict(mf, dfRating).head()

Unnamed: 0,user,item,rating,prediction
0,1,1,5,3.698917
1,1,2,3,3.216474
2,1,3,4,3.505554
3,1,4,3,3.632015
4,1,5,3,2.91523


In [10]:
mf.predictor.user_features_.shape

(943, 30)

In [11]:
mf.predictor.item_features_.shape

(1680, 30)

### Collaborative Filtering

In [12]:
kni = knn.ItemItem(20)
kni = Recommender.adapt(kni)
kni = kni.fit(dfRating)

In [13]:
users = dfRating.user.unique()
batch.recommend(kni, users, 3).head(6)

Unnamed: 0,item,score,user,rank
0,1449,5.142813,1,1
1,408,5.053245,1,2
2,1642,4.877075,1,3
3,1449,5.017128,2,1
4,169,4.836834,2,2
5,1594,4.830486,2,3


In [14]:
batch.predict(kni, dfRating).head()

Unnamed: 0,user,item,rating,prediction
0,1,1,5,4.117345
1,1,2,3,3.155572
2,1,3,4,3.055646
3,1,4,3,3.727712
4,1,5,3,3.137253


In [15]:
kni.predictor.item_means_.shape

(1680,)

### Baseline

In [16]:
bsl = Bias()
bsl = Recommender.adapt(bsl)
bsl = bsl.fit(dfRating)

In [17]:
batch.predict(bsl, dfRating).head()

Unnamed: 0,user,item,rating,prediction
0,1,1,5,3.947421
1,1,2,3,3.286075
2,1,3,4,3.146551
3,1,4,3,3.633182
4,1,5,3,3.378867


In [18]:
bsl.predictor.item_offsets_.shape

(1680,)

In [19]:
bsl.predictor.user_offsets_.shape

(943,)

### Evaluation

In [20]:
dfPredict = batch.predict(mf, dfRating)
dfPredict.head()

Unnamed: 0,user,item,rating,prediction
0,1,1,5,3.698917
1,1,2,3,3.216474
2,1,3,4,3.505554
3,1,4,3,3.632015
4,1,5,3,2.91523


In [21]:
users = dfRating.user.unique()
dfRecommend = batch.recommend(mf, users, n=10)
dfRecommend.head()

Unnamed: 0,item,score,user,rank
0,647,5.042677,1,1
1,408,4.956935,1,2
2,171,4.905043,1,3
3,1449,4.843693,1,4
4,511,4.815628,1,5


In [22]:
topn.ndcg(dfRecommend, dfRating)

0.1357063470926102