# Imports

In [None]:
import sys
sys.path.append("..")

from KNN.ItemKNNCBFRecommender import ItemKNNCBFRecommender

from Base.Evaluation.Evaluator import EvaluatorHoldout

from Data_manager.MMTF14K.URM5Fold_WarmCold_Reader import URM5Fold_WarmCold_Reader
from Data_manager.MMTF14K.URM5Fold_WarmCold_Splitter import URM5Fold_WarmCold_Splitter

# Dataset selection and splitting

### Dataset is downloaded and stored locally if absent

In [None]:
# Selecting a dataset
dataReader = URM5Fold_WarmCold_Reader()

# Splitting the dataset. This split will produce a warm item split
# To replicate the original experimens use the dataset accessible here with a cold item split:
# https://mmprj.github.io/mtrm_dataset/index
dataSplitter = URM5Fold_WarmCold_Splitter(dataReader)
dataSplitter.load_data()

# Setup URM and ICM

In [None]:
# Each URM is a scipy.sparse matrix of shape |users|x|items|
URM_train = [
    dataSplitter.get_URM_train_for_test_fold(n_test_fold=1)[0],
    dataSplitter.get_URM_train_for_test_fold(n_test_fold=2)[0],
    dataSplitter.get_URM_train_for_test_fold(n_test_fold=3)[0],
    dataSplitter.get_URM_train_for_test_fold(n_test_fold=4)[0],
    dataSplitter.get_URM_train_for_test_fold(n_test_fold=5)[0]
]

URM_test = [
    dataSplitter.get_URM_train_for_test_fold(n_test_fold=1)[1],
    dataSplitter.get_URM_train_for_test_fold(n_test_fold=2)[1],
    dataSplitter.get_URM_train_for_test_fold(n_test_fold=3)[1],
    dataSplitter.get_URM_train_for_test_fold(n_test_fold=4)[1],
    dataSplitter.get_URM_train_for_test_fold(n_test_fold=5)[1]
]

# The ICM is a scipy.sparse matrix of shape |items|x|features|
ICM = [
    dataSplitter.get_ICM_from_name("ICM_1"),
    dataSplitter.get_ICM_from_name("ICM_2"),
    dataSplitter.get_ICM_from_name("ICM_3"),
    dataSplitter.get_ICM_from_name("ICM_4"),
    dataSplitter.get_ICM_from_name("ICM_5")
]

# This contains the items to be ignored during the evaluation step
# In a cold items setting this should contain the indices of the warm items
ignore_items = []

# CBF Setup parameters (TODO: Parameter tuning)

In [None]:
# We compute the similarity matrix resulting from an Item KNN CBF Recommender
# Note that we have not included the code for parameter tuning, which should be done

cbf_parameters = {
                    'topK': 500,
                    'shrink': 100,
                    'similarity': 'cosine',
                    'normalize': True,
                    'feature_weighting': 'none' # Other options are BM25 and TF-IDF
                 }

# CBF Recommender training and evaluation

In [None]:
overall_result_dict = {}
for fold in range(0,len(ICM)):
    
    evaluator_test = EvaluatorHoldout(URM_test[fold], cutoff_list=[5], ignore_items=ignore_items)
    recommender_content_based = ItemKNNCBFRecommender(ICM[fold],URM_train[fold])
    recommender_content_based.fit(**cbf_parameters)
    
    result_dict, result_string = evaluator_test.evaluateRecommender(recommender_content_based)
    for metric,value in list(result_dict.values())[0].items():
        currentValue = overall_result_dict.get(metric, 0)
        overall_result_dict[metric] = currentValue + value
        
for metric, value in overall_result_dict.items():
    overall_result_dict[metric] = value/len(ICM)

In [None]:
print("CBF recommendation quality is: {}".format(result_string))

import json
json = json.dumps(list(overall_result_dict.values())[0], indent=4)
f = open("Results/CBF - ColdItemSplit - results.json","w")
f.write(json)
f.close()