# A tutorial for Scikit-Recommender

## Download and extract dataset

In [1]:
from skrec.io import MovieLens100k

data_path = MovieLens100k.download_and_extract("./dataset")

File './dataset/ml-100k.zip' already downloaded.
File './dataset/ml-100k.rating' already existed.


## Pre-process and save dataset

In [2]:
from skrec.io import Preprocessor

processor = Preprocessor()
processor.load_data(data_path, sep="\t", columns="UIRT")
processor.drop_duplicates(keep="last")  # remove duplicate interactions
processor.filter_data(user_min=5, item_min=5)  # filter users and items with few interactions
processor.remap_data_id()  # remap IDs of users and items
# data.split_data_by_leave_out(valid=1, test=1)
processor.split_data_by_ratio(train=0.7, valid=0.0, test=0.3, by_time=True)  # split dataset
data_dir = processor.save_data()# save to disk

loading data...
dropping duplicate interactions...
1 filtering items...
1 filtering users...
2 filtering items...
2 filtering users...
remapping user IDs...
remapping item IDs...
splitting data by ratio...
saving data to disk...
2022-01-04 22:11:19.896: 
columns = UIRT
filename = ./dataset/ml-100k.rating
sep = 	
item_min = 5
user_min = 5
remap_user_id = True
remap_item_id = True
split_by = ratio
train = 0.7
valid = 0.0
test = 0.3
by_time = True
2022-01-04 22:11:19.897: Dataset statistic information:
2022-01-04 22:11:19.897: The number of users: 943
2022-01-04 22:11:19.898: The number of items: 1349
2022-01-04 22:11:19.898: The number of ratings: 99287
2022-01-04 22:11:19.899: Average actions of users: 105.29
2022-01-04 22:11:19.899: Average actions of items: 73.60
2022-01-04 22:11:19.900: The sparsity of the dataset: 92.19507478537577%%

The processed data has been saved in './dataset/ml-100k_ratio_by_time_u5_i5'


## Read config

In [3]:
from typing import Union, Tuple, List
from skrec import Config

class RunConfig(Config):
    def __init__(self,
                 recommender="BPRMF",
                 data_dir="",
                 file_column="UIRT", sep='\t',
                 gpu_id=0, gpu_mem=0.99,
                 metric=("Precision", "Recall", "MAP", "NDCG", "MRR"),
                 top_k=(10, 20, 30, 40, 50, 100),
                 test_batch_size=64,
                 test_thread=4,
                 seed=2021,
                 **kwargs):
        super(RunConfig, self).__init__(**kwargs)
        self.recommender: str = recommender
        self.data_dir: str = data_dir
        self.file_column: str = file_column
        self.sep: str = sep
        self.gpu_id = gpu_id
        self.gpu_mem = gpu_mem
        self.metric: Union[None, str, Tuple[str], List[str]] = metric
        self.top_k: Union[int, List[int], Tuple[int]] = top_k
        self.test_batch_size: int = test_batch_size
        self.test_thread: int = test_thread
        self.seed = seed

    def _validate(self):
        assert isinstance(self.recommender, str)
        assert isinstance(self.data_dir, str)
        assert isinstance(self.file_column, str)
        assert isinstance(self.sep, str)
        assert isinstance(self.test_batch_size, int) and self.test_batch_size > 0
        assert isinstance(self.test_thread, int) and self.test_thread > 0
        assert isinstance(self.seed, int) and self.seed >= 0

In [4]:
run_config = RunConfig()
run_config.parse_args_from_ini("./skrec.ini")  # parse args from file and overwrite the default values
# run_config.parse_args_from_cmd()  # parse args from cmd and overwrite the previous values

section is not given, and parse arguments from the first ('skrec') section.


RunConfig(recommender='BPRMF', data_dir='dataset/ml-100k_ratio_by_time_u5_i5', file_column='UIRT', sep='\t', gpu_id=0, gpu_mem=0.99, metric=('Precision', 'Recall', 'MAP', 'NDCG', 'MRR'), top_k=(10, 20, 30, 40, 50, 100), test_batch_size=64, test_thread=4, seed=2021)

## Load dataset

In [5]:
from skrec import Dataset

dataset = Dataset(run_config.data_dir, run_config.sep, run_config.file_column)

## Create evaluator

In [6]:
from skrec import RankingEvaluator

evaluator = RankingEvaluator(dataset.train_data.to_user_dict(),
                                 dataset.test_data.to_user_dict(),
                                 metric=run_config.metric, top_k=run_config.top_k,
                                 batch_size=run_config.test_batch_size,
                                 num_thread=run_config.test_thread)

## Create model

In [7]:
from skrec.recommender.BPRMF import BPRMF, BPRMFConfig

bpr_config = BPRMFConfig(epochs=50)
bpr_config.parse_args_from_ini("./skrec.ini")  # , "bpr"

bpr_model = BPRMF(dataset, bpr_config, evaluator)

section is not given, and parse arguments from the first ('skrec') section.
2022-01-04 22:11:20.104: PID: 15983
2022-01-04 22:11:20.105: Model: skrec.recommender.BPRMF
2022-01-04 22:11:20.105: 
Dataset statistic information:
Name: ml-100k_ratio_by_time_u5_i5
The number of users: 943
The number of items: 1349
The number of ratings: 99287
Average actions of users: 105.29
Average actions of items: 73.60
The sparsity of the dataset: 92.195075%%

The number of training: 69918
The number of validation: 0
The number of testing: 29369
2022-01-04 22:11:20.106: 
Hyper-parameters:
lr=0.001
reg=0.001
n_dim=64
batch_size=1024
epochs=50
early_stop=200


## Train model

In [8]:
bpr_model.fit()

2022-01-04 22:11:21.235: metrics:    	Precision@10	Precision@20	Precision@30	Precision@40	Precision@50	Precision@100	Recall@10   	Recall@20   	Recall@30   	Recall@40   	Recall@50   	Recall@100  	MAP@10      	MAP@20      	MAP@30      	MAP@40      	MAP@50      	MAP@100     	NDCG@10     	NDCG@20     	NDCG@30     	NDCG@40     	NDCG@50     	NDCG@100    	MRR@10      	MRR@20      	MRR@30      	MRR@40      	MRR@50      	MRR@100     
2022-01-04 22:11:21.903: epoch 0:    	0.16235416  	0.13913056  	0.12880826  	0.11990999  	0.11234365  	0.08920470  	0.07156026  	0.11857965  	0.16132733  	0.19467223  	0.22199291  	0.33781242  	0.09545684  	0.07646892  	0.07182065  	0.07068257  	0.07097166  	0.07847453  	0.18309271  	0.17876013  	0.18636276  	0.19392221  	0.20189892  	0.24468793  	0.36766708  	0.37487185  	0.37792140  	0.37859076  	0.37913197  	0.37979621  
2022-01-04 22:11:22.295: epoch 1:    	0.18218455  	0.15068933  	0.13457006  	0.12576889  	0.11777305  	0.09299039  	0.08369396  	0.13091321  	0

2022-01-04 22:11:28.400: epoch 19:   	0.18144239  	0.16336174  	0.15111317  	0.14178160  	0.13389193  	0.11027572  	0.08826967  	0.15623732  	0.21343891  	0.26395881  	0.30641651  	0.46713105  	0.10716085  	0.09317821  	0.09098914  	0.09199858  	0.09435402  	0.10785635  	0.19849683  	0.20684935  	0.22066642  	0.23509124  	0.24888434  	0.31002295  	0.36519870  	0.37499705  	0.37738159  	0.37833387  	0.37877899  	0.37933609  
2022-01-04 22:11:28.706: epoch 20:   	0.18239675  	0.16320263  	0.15086569  	0.14167549  	0.13446450  	0.11045602  	0.08886164  	0.15480803  	0.21280390  	0.26256350  	0.30686149  	0.46833435  	0.10748348  	0.09332693  	0.09102822  	0.09209980  	0.09447810  	0.10805241  	0.19953316  	0.20664230  	0.22049898  	0.23474735  	0.24935097  	0.31056529  	0.36691728  	0.37642980  	0.37897375  	0.37994543  	0.38035804  	0.38094148  
2022-01-04 22:11:29.000: epoch 21:   	0.18345718  	0.16362683  	0.15072426  	0.14233828  	0.13488872  	0.11100746  	0.08977736  	0.15657146  	0.

2022-01-04 22:11:35.069: epoch 39:   	0.20074242  	0.17730658  	0.16309634  	0.15249203  	0.14523867  	0.11685041  	0.09802870  	0.17119496  	0.23129575  	0.28239977  	0.33284199  	0.49708146  	0.11897733  	0.10321102  	0.10157654  	0.10278606  	0.10608216  	0.12034467  	0.21740800  	0.22446765  	0.23877096  	0.25323495  	0.27014935  	0.33203715  	0.38431749  	0.39376643  	0.39529857  	0.39602795  	0.39658108  	0.39700499  
2022-01-04 22:11:35.387: epoch 40:   	0.20010614  	0.17746569  	0.16267221  	0.15222694  	0.14519627  	0.11699893  	0.09788286  	0.17174491  	0.23230618  	0.28395075  	0.33202863  	0.49832520  	0.11904699  	0.10361911  	0.10173901  	0.10302819  	0.10623246  	0.12074414  	0.21740121  	0.22500417  	0.23911877  	0.25382960  	0.27017698  	0.33278844  	0.38573250  	0.39510030  	0.39663464  	0.39771372  	0.39804834  	0.39853537  
2022-01-04 22:11:35.722: epoch 41:   	0.20116654  	0.17730661  	0.16334376  	0.15312833  	0.14513259  	0.11717914  	0.09916317  	0.17268059  	0.