In [None]:
# !pip install lenskit

Collecting lenskit
  Downloading lenskit-2025.2.0-py3-none-any.whl.metadata (7.8 kB)
Collecting structlog>=23.2 (from lenskit)
  Downloading structlog-25.3.0-py3-none-any.whl.metadata (8.0 kB)
Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch~=2.4->lenskit)
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch~=2.4->lenskit)
  Downloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.4.127 (from torch~=2.4->lenskit)
  Downloading nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cudnn-cu12==9.1.0.70 (from torch~=2.4->lenskit)
  Downloading nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cublas-cu12==12.4.5.8 (from torch~=2.4->lenskit)
  Downloading nvidia_cublas_cu12-12.4.5.8-py3-none-manylinux2014_x86_6

In [25]:
import pandas as pd

from lenskit.als import BiasedMFScorer
from lenskit.knn import ItemKNNScorer
from lenskit.batch import recommend
from lenskit.data import from_interactions_df, ItemListCollection, UserIDKey
from lenskit.metrics import NDCG, RBP, RecipRank, RunAnalysis, Recall, Precision
from lenskit.pipeline import topn_pipeline
from lenskit.splitting import SampleFrac, crossfold_users

In [3]:
df = pd.read_csv('../ALS/data/ratings.csv')
dataset = from_interactions_df(df, user_col='user_id', item_col='book_id', rating_col='rating')

In [7]:
model_als = BiasedMFScorer(features=50)

In [8]:
pipe = topn_pipeline(model_als)

In [9]:
# test data is organized by user
all_test = ItemListCollection(UserIDKey)
# recommendations will be organized by model and user ID
all_recs = ItemListCollection(["model", "user_id"])

for split in crossfold_users(dataset, 5, SampleFrac(0.2)):
    # collect the test data
    all_test.add_from(split.test)

    # train the pipeline, cloning first so a fresh pipeline for each split
    fit_als = pipe.clone()
    fit_als.train(split.train)
    # generate recs
    als_recs = recommend(fit_als, split.test.keys(), 100)
    all_recs.add_from(als_recs, model="ALS")

  rmat = rmat.to_sparse_csr()


In [11]:
ran = RunAnalysis()
ran.add_metric(NDCG())
ran.add_metric(RBP())
ran.add_metric(RecipRank())
ran.add_metric(Precision())
ran.add_metric(Recall())
results = ran.measure(all_recs, all_test)

In [13]:
results.list_metrics().groupby("model").mean()

Unnamed: 0_level_0,NDCG,RBP,RecipRank,Precision,Recall
model,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
ALS,0.009026,0.002913,0.013173,0.003571,0.016426


# Test different Hyperparameter

## 1st Run

In [3]:
model_als2 = BiasedMFScorer(features=100, iterations=30, reg=0.01)

In [4]:
pipe = topn_pipeline(model_als2)

In [None]:
# test data is organized by user
all_test = ItemListCollection(UserIDKey)
# recommendations will be organized by model and user ID
all_recs = ItemListCollection(["model", "user_id"])

for split in crossfold_users(dataset, 5, SampleFrac(0.2)):
    # collect the test data
    all_test.add_from(split.test)

    # train the pipeline, cloning first so a fresh pipeline for each split
    fit_als = pipe.clone()
    fit_als.train(split.train)
    # generate recs
    als_recs = recommend(fit_als, split.test.keys(), 100)
    all_recs.add_from(als_recs, model="ALS")

In [9]:
ran = RunAnalysis()
ran.add_metric(NDCG())
# ran.add_metric(RBP())
# ran.add_metric(RecipRank())
ran.add_metric(Precision())
ran.add_metric(Recall())
results = ran.measure(all_recs, all_test)

In [10]:
results.list_metrics().groupby("model").mean()

Unnamed: 0_level_0,NDCG,Precision,Recall
model,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
ALS,0.002041,0.00319,0.001511


## 2nd run

In [9]:
model_als3 = BiasedMFScorer(embedding_size=10, iterations=25, reg=0.1, damping=10)

In [10]:
pipe = topn_pipeline(model_als3)

In [11]:
# test data is organized by user
all_test = ItemListCollection(UserIDKey)
# recommendations will be organized by model and user ID
all_recs = ItemListCollection(["model", "user_id"])

for split in crossfold_users(dataset, 5, SampleFrac(0.2)):
    # collect the test data
    all_test.add_from(split.test)

    # train the pipeline, cloning first so a fresh pipeline for each split
    fit_als = pipe.clone()
    fit_als.train(split.train)
    # generate recs
    als_recs = recommend(fit_als, split.test.keys(), 100)
    all_recs.add_from(als_recs, model="ALS")

In [12]:
ran = RunAnalysis()
ran.add_metric(NDCG())
# ran.add_metric(RBP())
# ran.add_metric(RecipRank())
ran.add_metric(Precision())
ran.add_metric(Recall())
results = ran.measure(all_recs, all_test)

In [13]:
results.list_metrics().groupby("model").mean()

Unnamed: 0_level_0,NDCG,Precision,Recall
model,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
ALS,0.007946,0.003183,0.014585


## 3rd run

In [14]:
model_als4 = BiasedMFScorer(embedding_size=10, iterations=25, reg=1, damping=10)
pipe = topn_pipeline(model_als4)

In [15]:
# test data is organized by user
all_test = ItemListCollection(UserIDKey)
# recommendations will be organized by model and user ID
all_recs = ItemListCollection(["model", "user_id"])

for split in crossfold_users(dataset, 5, SampleFrac(0.2)):
    # collect the test data
    all_test.add_from(split.test)

    # train the pipeline, cloning first so a fresh pipeline for each split
    fit_als = pipe.clone()
    fit_als.train(split.train)
    # generate recs
    als_recs = recommend(fit_als, split.test.keys(), 100)
    all_recs.add_from(als_recs, model="ALS")

In [16]:
ran = RunAnalysis()
ran.add_metric(NDCG())
# ran.add_metric(RBP())
# ran.add_metric(RecipRank())
ran.add_metric(Precision())
ran.add_metric(Recall())
results = ran.measure(all_recs, all_test)

results.list_metrics().groupby("model").mean()

Unnamed: 0_level_0,NDCG,Precision,Recall
model,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
ALS,0.007851,0.00314,0.014396


## 4th run

In [None]:
model_als5 = BiasedMFScorer(embedding_size=50, iterations=20, reg=5, damping=5)
pipe = topn_pipeline(model_als5)

In [18]:
# test data is organized by user
all_test = ItemListCollection(UserIDKey)
# recommendations will be organized by model and user ID
all_recs = ItemListCollection(["model", "user_id"])

for split in crossfold_users(dataset, 5, SampleFrac(0.2)):
    # collect the test data
    all_test.add_from(split.test)

    # train the pipeline, cloning first so a fresh pipeline for each split
    fit_als = pipe.clone()
    fit_als.train(split.train)
    # generate recs
    als_recs = recommend(fit_als, split.test.keys(), 100)
    all_recs.add_from(als_recs, model="ALS")

In [19]:
ran = RunAnalysis()
ran.add_metric(NDCG())
# ran.add_metric(RBP())
# ran.add_metric(RecipRank())
ran.add_metric(Precision())
ran.add_metric(Recall())
results = ran.measure(all_recs, all_test)

results.list_metrics().groupby("model").mean()

Unnamed: 0_level_0,NDCG,Precision,Recall
model,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
ALS,0.009021,0.003575,0.016412


## 5th run

In [20]:
model_als5 = BiasedMFScorer(embedding_size=50, iterations=20, reg=5, damping=5)
pipe = topn_pipeline(model_als5)

In [21]:
# test data is organized by user
all_test = ItemListCollection(UserIDKey)
# recommendations will be organized by model and user ID
all_recs = ItemListCollection(["model", "user_id"])

for split in crossfold_users(dataset, 5, SampleFrac(0.2)):
    # collect the test data
    all_test.add_from(split.test)

    # train the pipeline, cloning first so a fresh pipeline for each split
    fit_als = pipe.clone()
    fit_als.train(split.train)
    # generate recs
    als_recs = recommend(fit_als, split.test.keys(), 100)
    all_recs.add_from(als_recs, model="ALS")

In [22]:
ran = RunAnalysis()
ran.add_metric(NDCG())
# ran.add_metric(RBP())
# ran.add_metric(RecipRank())
ran.add_metric(Precision())
ran.add_metric(Recall())
results = ran.measure(all_recs, all_test)

results.list_metrics().groupby("model").mean()

Unnamed: 0_level_0,NDCG,Precision,Recall
model,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
ALS,0.008971,0.003585,0.016434


# Test KNN

In [27]:
model_ii = ItemKNNScorer(k=20)
pipe_ii = topn_pipeline(model_ii)

In [28]:
# test data is organized by user
all_test = ItemListCollection(UserIDKey)
# recommendations will be organized by model and user ID
all_recs = ItemListCollection(["model", "user_id"])

for split in crossfold_users(dataset, 5, SampleFrac(0.2)):
    # collect the test data
    all_test.add_from(split.test)

    # do the same for item-item
    fit_ii = pipe_ii.clone()
    fit_ii.train(split.train)
    ii_recs = recommend(fit_ii, split.test.keys(), 100)
    all_recs.add_from(ii_recs, model="II")

In [29]:
ran = RunAnalysis()
ran.add_metric(NDCG())
# ran.add_metric(RBP())
# ran.add_metric(RecipRank())
ran.add_metric(Precision())
ran.add_metric(Recall())
results = ran.measure(all_recs, all_test)

results.list_metrics().groupby("model").mean()

Unnamed: 0_level_0,NDCG,Precision,Recall
model,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
II,0.015597,0.005744,0.025355


## 2nd Run

In [30]:
model_ii = ItemKNNScorer(k=30)
pipe_ii = topn_pipeline(model_ii)

In [31]:
# test data is organized by user
all_test = ItemListCollection(UserIDKey)
# recommendations will be organized by model and user ID
all_recs = ItemListCollection(["model", "user_id"])

for split in crossfold_users(dataset, 5, SampleFrac(0.2)):
    # collect the test data
    all_test.add_from(split.test)

    # do the same for item-item
    fit_ii = pipe_ii.clone()
    fit_ii.train(split.train)
    ii_recs = recommend(fit_ii, split.test.keys(), 100)
    all_recs.add_from(ii_recs, model="II")

In [32]:
ran = RunAnalysis()
ran.add_metric(NDCG())
# ran.add_metric(RBP())
# ran.add_metric(RecipRank())
ran.add_metric(Precision())
ran.add_metric(Recall())
results = ran.measure(all_recs, all_test)

results.list_metrics().groupby("model").mean()

Unnamed: 0_level_0,NDCG,Precision,Recall
model,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
II,0.014825,0.005445,0.023868
