# Imports

In [1]:
import sys
sys.path.append("..")

from KNN.ItemKNNCBFRecommender import ItemKNNCBFRecommender

from Base.Evaluation.Evaluator import EvaluatorHoldout

from Data_manager.Movielens_20m.Movielens20MReader import Movielens20MReader
from Data_manager.DataSplitter_k_fold import DataSplitter_Warm_k_fold

from Data_manager.MMTF14K.URM5Fold_WarmCold_Reader import URM5Fold_WarmCold_Reader
from Data_manager.MMTF14K.URM5Fold_WarmCold_Splitter import URM5Fold_WarmCold_Splitter

# Dataset selection and splitting

### Dataset is downloaded and stored locally if absent

In [2]:
# Selecting a dataset
dataReaderICM = Movielens20MReader()
dataReaderURM = URM5Fold_WarmCold_Reader()

# Splitting the dataset. This split will produce a warm item split
# To replicate the original experimens use the dataset accessible here with a cold item split:
# https://mmprj.github.io/mtrm_dataset/index
dataSplitter_ICM = DataSplitter_Warm_k_fold(dataReaderICM)
dataSplitter_ICM.load_data()
dataSplitter_URM = URM5Fold_WarmCold_Splitter(dataReaderURM)
dataSplitter_URM.load_data()

DataSplitter_k_fold for DataReader: Movielens_20m
	 Num items: 27278
	 Num users: 138493

	 Statistics for fold 0: n_interactions 3947336 ( 19.74%), n_items 27278 ( 100.00%), density: 1.04E-03
	 Statistics for fold 1: n_interactions 3999236 ( 20.00%), n_items 27278 ( 100.00%), density: 1.06E-03
	 Statistics for fold 2: n_interactions 4000004 ( 20.00%), n_items 27278 ( 100.00%), density: 1.06E-03
	 Statistics for fold 3: n_interactions 3999236 ( 20.00%), n_items 27278 ( 100.00%), density: 1.06E-03
	 Statistics for fold 4: n_interactions 4054451 ( 20.27%), n_items 27278 ( 100.00%), density: 1.07E-03


	 Statistics for ICM_genre: n_features 20, feature occurrences 54406, density: 9.97E-02


DataSplitter_k_fold: Done.
DataSplitter_k_fold for DataReader: Movielens_20m
	 Num items: 27278
	 Num users: 138493

	 Statistics for fold 0: n_interactions 3947336 ( 19.74%), n_items 27278 ( 100.00%), density: 1.04E-03
	 Statistics for fold 1: n_interactions 3999236 ( 20.00%), n_items 27278 ( 100.00%)

# Setup URM and ICM (TODO: Ignore warm items in cold items setting)

In [3]:
# Each URM is a scipy.sparse matrix of shape |users|x|items|
# TODO iterate over test_fold to perform 5-fold cross validation
URM_train, URM_test = dataSplitter_URM.get_URM_train_for_test_fold(n_test_fold=1)

# The ICM is a scipy.sparse matrix of shape |items|x|features|
ICM = dataSplitter_ICM.get_ICM_from_name("ICM_genre")

# This contains the items to be ignored during the evaluation step
# In a cold items setting this should contain the indices of the warm items
ignore_items = []

# Setup evaluators

In [4]:
evaluator_test = EvaluatorHoldout(URM_test, cutoff_list=[5], ignore_items=ignore_items)

Ignoring 0 Items
Ignoring 0 Items


# CBF Setup parameters (TODO: Parameter tuning)

In [5]:
# We compute the similarity matrix resulting from an Item KNN CBF Recommender
# Note that we have not included the code for parameter tuning, which should be done

cbf_parameters = {
                    'topK': 500,
                    'shrink': 100,
                    'similarity': 'cosine',
                    'normalize': True,
                    'feature_weighting': 'none' # Other options are BM25 and TF-IDF
                 }

# CBF Recommender training and evaluation

In [6]:
recommender_content_based = ItemKNNCBFRecommender(ICM,URM_train)
recommender_content_based.fit(**cbf_parameters)

result_dict, result_string = evaluator_test.evaluateRecommender(recommender_content_based)

Unable to load Cython Compute_Similarity, reverting to Python
Unable to load Cython Compute_Similarity, reverting to Python
Similarity column 27278 ( 100 % ), 2015.51 column/sec, elapsed time 0.23 min
Similarity column 27278 ( 100 % ), 2015.51 column/sec, elapsed time 0.23 min


ValueError: dimension mismatch

ValueError: dimension mismatch

In [None]:
print("CBF recommendation quality is: {}".format(result_string))

import json
json = json.dumps(list(result_dict.values())[0], indent=4)
f = open("Results/CBF - Movielens20M - results.json","w")
f.write(json)
f.close()