# Imports

In [4]:
import sys
sys.path.append("..")

from KNN.ItemKNNCBFRecommender import ItemKNNCBFRecommender

from Base.Evaluation.Evaluator import EvaluatorHoldout

from Data_manager.MMTF14K.URM5Fold_WarmCold_Reader import URM5Fold_WarmCold_Reader
from Data_manager.MMTF14K.URM5Fold_WarmCold_Splitter import URM5Fold_WarmCold_Splitter

# Dataset selection and splitting

### Dataset is downloaded and stored locally if absent

In [5]:
# Selecting a dataset
dataReader = URM5Fold_WarmCold_Reader()

# Splitting the dataset. This split will produce a warm item split
# To replicate the original experimens use the dataset accessible here with a cold item split:
# https://mmprj.github.io/mtrm_dataset/index
dataSplitter = URM5Fold_WarmCold_Splitter(dataReader)
dataSplitter.load_data()

URM5Fold_WarmCold_Splitter for DataReader: MMTF14K/Final_MMTF14K_Web/Data
	 Num items: 13623
	 Num users: 138492

	 Statistics for fold 0: n_interactions 12471739 ( 20.00%), n_items 13623 ( 100.00%), density: 6.61E-03
	 Statistics for fold 1: n_interactions 12471739 ( 20.00%), n_items 13623 ( 100.00%), density: 6.61E-03
	 Statistics for fold 2: n_interactions 12471739 ( 20.00%), n_items 13623 ( 100.00%), density: 6.61E-03
	 Statistics for fold 3: n_interactions 12471739 ( 20.00%), n_items 13623 ( 100.00%), density: 6.61E-03
	 Statistics for fold 4: n_interactions 12471739 ( 20.00%), n_items 13623 ( 100.00%), density: 6.61E-03


	 Statistics for ICM_genre: n_features 18, feature occurrences 27099, density: 1.11E-01
	 Statistics for ICM_year: n_features 1, feature occurrences 13623, density: 1.00E+00
	 Statistics for ICM_AVF: n_features 107, feature occurrences 1401318, density: 9.61E-01
	 Statistics for ICM_deep: n_features 4096, feature occurrences 55799808, density: 1.00E+00
	 Statist

# Setup URM and ICM

In [9]:
# Each URM is a scipy.sparse matrix of shape |users|x|items|
URM_train = [
    dataSplitter.get_URM_train_for_test_fold(n_test_fold=1)[0],
    dataSplitter.get_URM_train_for_test_fold(n_test_fold=2)[0],
    dataSplitter.get_URM_train_for_test_fold(n_test_fold=3)[0],
    dataSplitter.get_URM_train_for_test_fold(n_test_fold=4)[0],
    dataSplitter.get_URM_train_for_test_fold(n_test_fold=5)[0]
]

URM_test = [
    dataSplitter.get_URM_train_for_test_fold(n_test_fold=1)[1],
    dataSplitter.get_URM_train_for_test_fold(n_test_fold=2)[1],
    dataSplitter.get_URM_train_for_test_fold(n_test_fold=3)[1],
    dataSplitter.get_URM_train_for_test_fold(n_test_fold=4)[1],
    dataSplitter.get_URM_train_for_test_fold(n_test_fold=5)[1]
]

# The ICM is a scipy.sparse matrix of shape |items|x|features|
ICM = [
    ("ICM_genre", dataSplitter.get_ICM_from_name("ICM_genre")),
    ("ICM_year", dataSplitter.get_ICM_from_name("ICM_year")),
    # ("ICM_BLF", dataSplitter.get_ICM_from_name("ICM_BLF")),
    ("ICM_ivector_1", dataSplitter.get_ICM_from_name("ICM_ivector_1")),
    ("ICM_AVF", dataSplitter.get_ICM_from_name("ICM_AVF")),
    ("ICM_deep", dataSplitter.get_ICM_from_name("ICM_deep"))
]

# This contains the items to be ignored during the evaluation step
# In a cold items setting this should contain the indices of the warm items
ignore_items = []

# CBF Recommender training and evaluation

In [13]:
# Iterate over the different unimodals ICMs
for ICM_item in ICM:
    
    # Iterate over HYPER-PARAMS values
    topKValues = [100, 500, 1000]
    shrinkValues = [10, 100, 200]

    for topKValue in topKValues:
        for shrinkValue in shrinkValues:
            cbf_parameters = {
                        'topK': topKValue,
                        'shrink': shrinkValue,
                        'similarity': 'cosine',
                        'normalize': True,
                        'feature_weighting': 'none' # Other options are BM25 and TF-IDF
                     }

            overall_result_dict = {}
            for fold in range(0,len(URM_train)):

                evaluator_test = EvaluatorHoldout(URM_test[fold], cutoff_list=[5], ignore_items=ignore_items)
                recommender_content_based = ItemKNNCBFRecommender(ICM_item[1],URM_train[fold])
                recommender_content_based.fit(**cbf_parameters)

                result_dict, result_string = evaluator_test.evaluateRecommender(recommender_content_based)
                for metric,value in list(result_dict.values())[0].items():
                    currentValue = overall_result_dict.get(metric, 0)
                    overall_result_dict[metric] = currentValue + value

            for metric, value in overall_result_dict.items():
                overall_result_dict[metric] = value/len(URM_train)

            print("CBF recommendation quality is: {}".format(result_string))

            import json
            json = json.dumps(overall_result_dict, indent=4)
            filename = "Results/CBF - ColdItemSplit/ICMtype_{}-topK_{}-shrink_{} - results.json".format(ICM_item[0],topKValue,shrinkValue)
            f = open(filename,"w")
            f.write(json)
            f.close()
        

Ignoring 0 Items
Unable to load Cython Compute_Similarity, reverting to Python
Similarity column 13623 ( 100 % ), 4319.40 column/sec, elapsed time 0.05 min
EvaluatorHoldout: Processed 72237 ( 52.82% ) in 30.00 sec. Users per second: 2408
EvaluatorHoldout: Processed 136768 ( 100.00% ) in 52.92 sec. Users per second: 2584
Ignoring 0 Items
Unable to load Cython Compute_Similarity, reverting to Python
Similarity column 13623 ( 100 % ), 5041.96 column/sec, elapsed time 0.05 min
EvaluatorHoldout: Processed 73001 ( 53.02% ) in 30.08 sec. Users per second: 2427
EvaluatorHoldout: Processed 137677 ( 100.00% ) in 53.27 sec. Users per second: 2585
Ignoring 0 Items
Unable to load Cython Compute_Similarity, reverting to Python
Similarity column 13623 ( 100 % ), 5016.18 column/sec, elapsed time 0.05 min
EvaluatorHoldout: Processed 72591 ( 52.78% ) in 30.00 sec. Users per second: 2420
EvaluatorHoldout: Processed 137539 ( 100.00% ) in 53.23 sec. Users per second: 2584
Ignoring 0 Items
Unable to load Cy

KeyboardInterrupt: 