In [1]:
import numpy as np
import pandas as pd
import scipy.sparse as sp
from tqdm import tqdm
from Base.DataIO import DataIO

#----Recommenders----
from SLIM.SLIM_BPR_Python import SLIM_BPR_Python
from SLIM.SlimElasticNet import SLIMElasticNetRecommender
from cf.item_cf3 import ItemBasedCollaborativeFiltering
from cf.user_cf2 import UserBasedCollaborativeFiltering
from cbf.cbf import ContentBasedFiltering
from SlimBPR.SlimBPRRec import SlimBPRRec
from SlimBPR.SlimBPR import SlimBPR
from MF.ALS import AlternatingLeastSquare
from MF.IALSRecommender import IALSRecommender
from GraphBased.P3alphaRecommender import P3alphaRecommender
from GraphBased.RP3betaRecommender import RP3betaRecommender


from Hybrid.hybridRecCBF import HybridRecommender
#---------------

#----Model Load & Save----
from Data_manager.Dataset import Dataset
#-------------------------

from sklearn.model_selection import train_test_split

In [2]:
def load_data_ICM():
  return pd.read_csv("./data_ICM_title_abstract.csv")

In [3]:
features=load_data_ICM()
d ={'item_id': features['row'],'feature_id':features['col'],'value':features['data']}
features=pd.DataFrame(data=d)
itemList=list(d['item_id'])

In [4]:
featureList=list(d['feature_id'])

In [5]:
valueList=list(d['value'])
ICM = sp.coo_matrix((valueList,(itemList,featureList)))
ICM = ICM.tocsr()

In [6]:
ICM

<25975x20000 sparse matrix of type '<class 'numpy.float64'>'
	with 490691 stored elements in Compressed Sparse Row format>

**Dataset splitting into train,validation and test**

This is the last part before creating the recommender. However, this step is super important, as it is the base for the training, parameters optimization, and evaluation of the recommender(s).

In here we read the ratings (which we loaded and preprocessed before) and create the train, validation, and test User-Rating Matrices (URM). It's important that these are disjoint to avoid information leakage from the train into the validation/test set, in our case, we are safe to use the train_test_split function from scikit-learn as the dataset only contains one datapoint for every (user,item) pair. On another topic, we first create the test set and then we create the validation by splitting again the train set.

train_test_split takes an array (or several arrays) and divides it into train and test according to a given size (in our case testing_percentage and validation_percentage, which need to be a float between 0 and 1).

After we have our different splits, we create the sparse URMs by using the csr_matrix function from scipy.




In [7]:
#Load
loaded_dataset = Dataset(None, None, None, None)
loaded_dataset.load_data("Salvataggi/")


urm_train = (loaded_dataset.get_urm_train())["urm_train"]
urm_validation = (loaded_dataset.get_urm_validation())["urm_validation"]
urm_test = loaded_dataset.get_urm_test()["urm_test"]

In [8]:
urm_train_validation = urm_train + urm_validation

**Evaluation Metrics**

In [9]:
def recall(recommendations: np.array, relevant_items: np.array) -> float:
    is_relevant = np.in1d(recommendations, relevant_items, assume_unique=True)
    
    recall_score = np.sum(is_relevant) / relevant_items.shape[0]
    
    return recall_score
    
    
def precision(recommendations: np.array, relevant_items: np.array) -> float:
    is_relevant = np.in1d(recommendations, relevant_items, assume_unique=True)
    
    precision_score = np.sum(is_relevant) / recommendations.shape[0]

    return precision_score

def mean_average_precision(recommendations: np.array, relevant_items: np.array) -> float:
    is_relevant = np.in1d(recommendations, relevant_items, assume_unique=True)
    
    precision_at_k = is_relevant * np.cumsum(is_relevant, dtype=np.float32) / (1 + np.arange(is_relevant.shape[0]))

    map_score = np.sum(precision_at_k) / np.min([relevant_items.shape[0], is_relevant.shape[0]])

    return map_score

**Evaluation Procedure**

The evaluation procedure returns the averaged accuracy scores (in terms of precision, recall and MAP) for all users (that have at least 1 rating in the test set). It also calculates the number of evaluated and skipped users. It receives a recommender instance, and the train and test URMs.

In [10]:
def evaluator(recommender: object, urm_train: sp.csr_matrix, urm_test: sp.csr_matrix):
    recommendation_length = 10
    accum_precision = 0
    accum_recall = 0
    accum_map = 0
    
    num_users = urm_train.shape[0]
    
    num_users_evaluated = 0
    num_users_skipped = 0
    
    for user_id in range(num_users):
        user_profile_start = urm_test.indptr[user_id]
        user_profile_end = urm_test.indptr[user_id+1]

        relevant_items = urm_test.indices[user_profile_start:user_profile_end]

        if relevant_items.size == 0:
            num_users_skipped += 1
            continue
            
#         recommendations = recommender.recommend(user_id_array=user_id,
#                                                cutoff=recommendation_length,
#                                                remove_seen_flag=True
#                                                )

        expected_ratings = recommender.get_expected_ratings(user_id)
        recommended_items = np.flip(np.argsort(expected_ratings), 0)

        unseen_items_mask = np.in1d(recommended_items,urm_train[user_id].indices,
                                        assume_unique=True, invert=True)

        recommendations = recommended_items[unseen_items_mask]


        accum_precision += precision(recommendations, relevant_items)
        accum_recall += recall(recommendations, relevant_items)
        accum_map += mean_average_precision(recommendations, relevant_items)

        num_users_evaluated += 1

    
    accum_precision /= max(num_users_evaluated, 1)
    accum_recall /= max(num_users_evaluated, 1)
    accum_map /=  max(num_users_evaluated, 1)
    
    return accum_precision, accum_recall, accum_map, num_users_evaluated, num_users_skipped

In [11]:
#Params for the recommenders
user_cf_param = {
    "knn": 207,
    "shrink": 2
}

item_cf_param = {
    "knn": 595,
    "shrink": 35
}

cbf_param = {
    "knn": 404,
    "shrink": 5
}

slim_bpr_param = {   #work in progres....
    "learning_rate" : 1e-5,
    "epochs": 20,
    "nnz" : 0.9,
    "knn": 150
}

als_param = {   #work in progress...
    "n_factors": 120,
    "regularization": 1.75,
    "iterations": 15
}

Ials_param = {
    "num_factors" : 90,
    "confidence_scaling" : "linear",
    "alpha" : 0.40,
    "epsilon" : 0.28,
    "reg" : 1.5
}

p3alpha_param = {
    "knn": 565,
    "alpha": 0.54
}

rp3beta_param = {
    "knn": 565,
    "alpha" : 0.54,
    "beta" : 0.6 
}

In [12]:
userCF = UserBasedCollaborativeFiltering(urm_train)

In [13]:
userCF.fit(knn=user_cf_param["knn"], shrink=user_cf_param["shrink"],similarity='cosine')

Similarity column 7947 ( 100 % ), 3798.00 column/sec, elapsed time 0.03 min


In [14]:
accum_precision, accum_recall, accum_map, num_user_evaluated, num_users_skipped = evaluator(userCF, 
                                                                                            urm_train_validation, 
                                                                                            urm_test)
accum_precision, accum_recall, accum_map, num_user_evaluated, num_users_skipped

(0.00013115158761398967, 1.0, 0.055300665604014684, 5007, 2940)

In [15]:
itemCF = ItemBasedCollaborativeFiltering(urm_train)

In [16]:
itemCF.fit(knn=item_cf_param["knn"], shrink=item_cf_param["shrink"], similarity="cosine")

Similarity column 25975 ( 100 % ), 3383.25 column/sec, elapsed time 0.13 min


In [17]:
accum_precision, accum_recall, accum_map, num_user_evaluated, num_users_skipped = evaluator(itemCF, 
                                                                                            urm_train_validation, 
                                                                                            urm_test)
accum_precision, accum_recall, accum_map, num_user_evaluated, num_users_skipped

(0.00013115158761398967, 1.0, 0.05125264620585148, 5007, 2940)

In [18]:
SlimBpr=SlimBPRRec(urm_train)

In [None]:
SlimBpr.fit(nnz=slim_bpr_param["nnz"], knn=slim_bpr_param["knn"])

In [None]:
accum_precision, accum_recall, accum_map, num_user_evaluated, num_users_skipped = evaluator(SlimBpr, 
                                                                                            urm_train_validation, 
                                                                                            urm_test)
accum_precision, accum_recall, accum_map, num_user_evaluated, num_users_skipped

In [None]:
slim_elastic = SLIMElasticNetRecommender(urm_train)

In [None]:
slim_elastic.fit()

In [None]:
accum_precision, accum_recall, accum_map, num_user_evaluated, num_users_skipped = evaluator(slim_elastic, 
                                                                                            urm_train_validation, 
                                                                                            urm_test)
accum_precision, accum_recall, accum_map, num_user_evaluated, num_users_skipped

In [19]:
cbf = ContentBasedFiltering(urm_train,ICM)

In [20]:
cbf.fit(knn=cbf_param["knn"],shrink=cbf_param["shrink"],similarity='cosine')

Similarity column 25975 ( 100 % ), 1524.31 column/sec, elapsed time 0.28 min


In [21]:
accum_precision, accum_recall, accum_map, num_user_evaluated, num_users_skipped = evaluator(cbf, 
                                                                                            urm_train_validation, 
                                                                                            urm_test)
accum_precision, accum_recall, accum_map, num_user_evaluated, num_users_skipped

(0.00013115158761398967, 1.0, 0.028753438844014722, 5007, 2940)

In [22]:
ALS = AlternatingLeastSquare(urm_train)

In [23]:
ALS.fit(n_factors=als_param["n_factors"], regularization=als_param["regularization"],iterations=als_param["iterations"])

HBox(children=(IntProgress(value=0, max=15), HTML(value='')))




In [24]:
accum_precision, accum_recall, accum_map, num_user_evaluated, num_users_skipped = evaluator(ALS, 
                                                                                            urm_train_validation, 
                                                                                            urm_test)
accum_precision, accum_recall, accum_map, num_user_evaluated, num_users_skipped

(0.00013115158761398967, 1.0, 0.05520753008587726, 5007, 2940)

In [25]:
IALS = IALSRecommender(urm_train)

In [26]:
IALS.fit(num_factors=Ials_param["num_factors"], confidence_scaling=Ials_param["confidence_scaling"], alpha=Ials_param["alpha"], epsilon=Ials_param["epsilon"], reg=Ials_param["reg"])

IALSRecommender: Epoch 1 of 40. Elapsed time 5.87 sec
IALSRecommender: Epoch 2 of 40. Elapsed time 11.91 sec
IALSRecommender: Epoch 3 of 40. Elapsed time 17.72 sec
IALSRecommender: Epoch 4 of 40. Elapsed time 23.56 sec
IALSRecommender: Epoch 5 of 40. Elapsed time 29.48 sec
IALSRecommender: Epoch 6 of 40. Elapsed time 35.36 sec
IALSRecommender: Epoch 7 of 40. Elapsed time 41.21 sec
IALSRecommender: Epoch 8 of 40. Elapsed time 47.06 sec
IALSRecommender: Epoch 9 of 40. Elapsed time 52.98 sec
IALSRecommender: Epoch 10 of 40. Elapsed time 58.88 sec
IALSRecommender: Epoch 11 of 40. Elapsed time 1.08 min
IALSRecommender: Epoch 12 of 40. Elapsed time 1.18 min
IALSRecommender: Epoch 13 of 40. Elapsed time 1.28 min
IALSRecommender: Epoch 14 of 40. Elapsed time 1.37 min
IALSRecommender: Epoch 15 of 40. Elapsed time 1.47 min
IALSRecommender: Epoch 16 of 40. Elapsed time 1.57 min
IALSRecommender: Epoch 17 of 40. Elapsed time 1.67 min
IALSRecommender: Epoch 18 of 40. Elapsed time 1.77 min
IALSRecomm

In [27]:
accum_precision, accum_recall, accum_map, num_user_evaluated, num_users_skipped = evaluator(IALS, 
                                                                                            urm_train_validation, 
                                                                                            urm_test)
accum_precision, accum_recall, accum_map, num_user_evaluated, num_users_skipped

(0.00013115158761398967, 1.0, 0.029984124549161884, 5007, 2940)

In [None]:
P3alpha = P3alphaRecommender(urm_train)

In [None]:
P3alpha.fit(topK=p3alpha_param["knn"], alpha=p3alpha_param["alpha"])

In [None]:
accum_precision, accum_recall, accum_map, num_user_evaluated, num_users_skipped = evaluator(P3alpha, 
                                                                                            urm_train_validation, 
                                                                                            urm_test)
accum_precision, accum_recall, accum_map, num_user_evaluated, num_users_skipped

In [None]:
RP3beta = RP3betaRecommender(urm_train)

In [None]:
RP3beta.fit(topK=rp3beta_param["knn"], alpha=rp3beta_param["alpha"], beta=rp3beta_param["beta"])

In [None]:
accum_precision, accum_recall, accum_map, num_user_evaluated, num_users_skipped = evaluator(RP3beta, 
                                                                                            urm_train_validation, 
                                                                                            urm_test)
accum_precision, accum_recall, accum_map, num_user_evaluated, num_users_skipped

In [None]:
recommender = HybridRecommender(urm_train, ICM, userCF, itemCF, cbf, IALS)

In [None]:
recommender.fit(user_cf_param=user_cf_param,item_cf_param=item_cf_param,cbf_param=cbf_param,
                slim_param=slim_bpr_param,als_param=als_param, w_user=w["user_cf"], w_item=w["item_cf"], w_cbf=w["cbf"], w_p3alpha=w["p3alpha"], w_als=w["als"], w_ials=w["ials"])

In [None]:
accum_precision, accum_recall, accum_map, num_user_evaluated, num_users_skipped = evaluator(recommender, 
                                                                                            urm_train_validation, 
                                                                                            urm_test)
accum_precision, accum_recall, accum_map, num_user_evaluated, num_users_skipped

**Hybrid**

**Hybrid Recommender Tuning**

In [28]:
best_ials = 0
best_MAP = 0.0
counter = 0

In [31]:
import time

w_ials = 0

for i in (range (0,500)) :
    
    start = time.time()

    #Create recommender
    rec = HybridRecommender(urm_train_validation, ICM, userCF, itemCF, cbf, IALS)
    print("Iteration " + str(counter) + " Weight w_ials: " + str(w_ials) +" started.")

    rec.fit(user_cf_param=user_cf_param,item_cf_param=item_cf_param,cbf_param=cbf_param,
            slim_param=slim_bpr_param,als_param=als_param, w_user=0.32, w_item=0.19, w_cbf=0.39, w_p3alpha=0, w_als=0, w_ials=w_ials)

    accum_precision, accum_recall, accum_map, num_user_evaluated, num_users_skipped = evaluator(rec, 
                                                                                            urm_train_validation, 
                                                                                            urm_test)
    print("MAP: " + str(accum_map))
    if accum_map > best_MAP :
        best_ials = w_ials
        print("New Best! w_ials weight: " + str(w_ials) +" MAP: " + str(accum_map) + " previous best MAP: " + str(best_MAP))
        best_MAP = accum_map

    stop = time.time()
    print("Execution Time: " + str(stop-start))

    w_ials+=0.01




Iteration 0 Weight w_ials: 0 started.
MAP: 0.06716426380627386
Execution Time: 21.534587144851685
Iteration 0 Weight w_ials: 0.01 started.
MAP: 0.06723707473930678
New Best! w_ials weight: 0.01 MAP: 0.06723707473930678 previous best MAP: 0.06716426380627386
Execution Time: 27.101031064987183
Iteration 0 Weight w_ials: 0.02 started.
MAP: 0.06715011124889775
Execution Time: 27.12987756729126
Iteration 0 Weight w_ials: 0.03 started.
MAP: 0.067197702813113
Execution Time: 27.760242700576782
Iteration 0 Weight w_ials: 0.04 started.
MAP: 0.06709881234125174
Execution Time: 26.2469265460968
Iteration 0 Weight w_ials: 0.05 started.
MAP: 0.06720615991659429
Execution Time: 26.151875972747803
Iteration 0 Weight w_ials: 0.060000000000000005 started.
MAP: 0.06708931233050754
Execution Time: 26.17323398590088
Iteration 0 Weight w_ials: 0.07 started.
MAP: 0.06725882901190254
New Best! w_ials weight: 0.07 MAP: 0.06725882901190254 previous best MAP: 0.06723707473930678
Execution Time: 26.3087384700775

Iteration 0 Weight w_ials: 0.7100000000000004 started.
MAP: 0.0662928238246863
Execution Time: 26.193042993545532
Iteration 0 Weight w_ials: 0.7200000000000004 started.
MAP: 0.06628767575169274
Execution Time: 26.782161951065063
Iteration 0 Weight w_ials: 0.7300000000000004 started.
MAP: 0.06641234513545716
Execution Time: 25.424495458602905
Iteration 0 Weight w_ials: 0.7400000000000004 started.
MAP: 0.06628748838377693
Execution Time: 26.765811681747437
Iteration 0 Weight w_ials: 0.7500000000000004 started.
MAP: 0.06627007771658733
Execution Time: 26.768981456756592
Iteration 0 Weight w_ials: 0.7600000000000005 started.
MAP: 0.06629325735489527
Execution Time: 25.46175241470337
Iteration 0 Weight w_ials: 0.7700000000000005 started.
MAP: 0.0662672179108848
Execution Time: 26.798299551010132
Iteration 0 Weight w_ials: 0.7800000000000005 started.
MAP: 0.06620505344367546
Execution Time: 26.525686979293823
Iteration 0 Weight w_ials: 0.7900000000000005 started.
MAP: 0.06614645090686204
Exe

Execution Time: 27.557692050933838
Iteration 0 Weight w_ials: 1.430000000000001 started.
MAP: 0.06525357282503702
Execution Time: 26.23724126815796
Iteration 0 Weight w_ials: 1.440000000000001 started.
MAP: 0.06526494488520104
Execution Time: 26.967188835144043
Iteration 0 Weight w_ials: 1.450000000000001 started.
MAP: 0.06524850954283788
Execution Time: 27.214332103729248
Iteration 0 Weight w_ials: 1.460000000000001 started.
MAP: 0.06520720573379964
Execution Time: 26.0011568069458
Iteration 0 Weight w_ials: 1.470000000000001 started.
MAP: 0.06507546595102437
Execution Time: 27.54248023033142
Iteration 0 Weight w_ials: 1.480000000000001 started.
MAP: 0.06505596414982107
Execution Time: 25.69601011276245
Iteration 0 Weight w_ials: 1.490000000000001 started.
MAP: 0.06490861903998162
Execution Time: 27.566853046417236
Iteration 0 Weight w_ials: 1.500000000000001 started.
MAP: 0.06489015012624148
Execution Time: 25.584367513656616
Iteration 0 Weight w_ials: 1.5100000000000011 started.
MAP

MAP: 0.0640263117259965
Execution Time: 25.835328817367554
Iteration 0 Weight w_ials: 2.149999999999998 started.
MAP: 0.06401138357138239
Execution Time: 28.706435441970825
Iteration 0 Weight w_ials: 2.159999999999998 started.
MAP: 0.06400093774247066
Execution Time: 25.61432147026062
Iteration 0 Weight w_ials: 2.1699999999999977 started.
MAP: 0.06399851522749195
Execution Time: 28.67032027244568
Iteration 0 Weight w_ials: 2.1799999999999975 started.
MAP: 0.0639867290238429
Execution Time: 25.612873792648315
Iteration 0 Weight w_ials: 2.1899999999999973 started.
MAP: 0.06399339101012506
Execution Time: 28.687541484832764
Iteration 0 Weight w_ials: 2.199999999999997 started.
MAP: 0.06397240520284533
Execution Time: 25.770790100097656
Iteration 0 Weight w_ials: 2.209999999999997 started.
MAP: 0.06393030009067695
Execution Time: 28.46259832382202
Iteration 0 Weight w_ials: 2.2199999999999966 started.
MAP: 0.06390941385777793
Execution Time: 25.683320999145508
Iteration 0 Weight w_ials: 2.

Iteration 0 Weight w_ials: 2.859999999999983 started.
MAP: 0.06247875709866543
Execution Time: 27.65181016921997
Iteration 0 Weight w_ials: 2.869999999999983 started.
MAP: 0.06250731634073534
Execution Time: 25.717727661132812
Iteration 0 Weight w_ials: 2.8799999999999826 started.
MAP: 0.06248338274660894
Execution Time: 29.803101301193237
Iteration 0 Weight w_ials: 2.8899999999999824 started.
MAP: 0.06240235388035366
Execution Time: 25.956761598587036
Iteration 0 Weight w_ials: 2.899999999999982 started.
MAP: 0.06242313711147361
Execution Time: 28.641610622406006
Iteration 0 Weight w_ials: 2.909999999999982 started.
MAP: 0.062438524935497
Execution Time: 27.021097660064697
Iteration 0 Weight w_ials: 2.9199999999999817 started.
MAP: 0.06233450718957455
Execution Time: 25.776440620422363
Iteration 0 Weight w_ials: 2.9299999999999815 started.
MAP: 0.062366160500772705
Execution Time: 29.643184900283813
Iteration 0 Weight w_ials: 2.9399999999999813 started.
MAP: 0.062353106606893215
Execu

KeyboardInterrupt: 

In [None]:
print(best_ials)
print(best_MAP)

In [None]:
w = {
    "user_cf": 0.32,
    "item_cf": 0.19, #0.3 con train
    "cbf": 0.39, #0.77
    "icm_svd": 0,
    "als": 0,#2
    "ials" : 0,
    "slim_bpr": 0,
    "elastic": 0,
    "p3alpha" : 0,
    "rp3beta" : 0
}

In [None]:
userCF = UserBasedCollaborativeFiltering(urm_train_validation)

In [None]:
userCF.fit(knn=user_cf_param["knn"], shrink=user_cf_param["shrink"],similarity='cosine')

In [None]:
accum_precision, accum_recall, accum_map, num_user_evaluated, num_users_skipped = evaluator(userCF, 
                                                                                            urm_train_validation, 
                                                                                            urm_test)
accum_precision, accum_recall, accum_map, num_user_evaluated, num_users_skipped

In [None]:
itemCF = ItemBasedCollaborativeFiltering(urm_train_validation)

In [None]:
itemCF.fit(knn=item_cf_param["knn"], shrink=item_cf_param["shrink"], similarity="cosine")

In [None]:
accum_precision, accum_recall, accum_map, num_user_evaluated, num_users_skipped = evaluator(itemCF, 
                                                                                            urm_train_validation, 
                                                                                            urm_test)
accum_precision, accum_recall, accum_map, num_user_evaluated, num_users_skipped

In [None]:
SlimBpr=SlimBPRRec(urm_train_validation)

In [None]:
SlimBpr.fit(nnz=slim_bpr_param["nnz"], knn=slim_bpr_param["knn"])

In [None]:
accum_precision, accum_recall, accum_map, num_user_evaluated, num_users_skipped = evaluator(SlimBpr, 
                                                                                            urm_train_validation, 
                                                                                            urm_test)
accum_precision, accum_recall, accum_map, num_user_evaluated, num_users_skipped

In [None]:
slim_elastic = SLIMElasticNetRecommender(urm_train)

In [None]:
slim_elastic.fit()

In [None]:
accum_precision, accum_recall, accum_map, num_user_evaluated, num_users_skipped = evaluator(slim_elastic, 
                                                                                            urm_train_validation, 
                                                                                            urm_test)
accum_precision, accum_recall, accum_map, num_user_evaluated, num_users_skipped

In [None]:
cbf = ContentBasedFiltering(urm_train_validation,ICM)

In [None]:
cbf.fit(knn=cbf_param["knn"],shrink=cbf_param["shrink"],similarity='cosine')

In [None]:
accum_precision, accum_recall, accum_map, num_user_evaluated, num_users_skipped = evaluator(cbf, 
                                                                                            urm_train_validation, 
                                                                                            urm_test)
accum_precision, accum_recall, accum_map, num_user_evaluated, num_users_skipped

In [None]:
ALS = AlternatingLeastSquare(urm_train_validation)

In [None]:
ALS.fit(n_factors=als_param["n_factors"], regularization=als_param["regularization"],iterations=als_param["iterations"])

In [None]:
accum_precision, accum_recall, accum_map, num_user_evaluated, num_users_skipped = evaluator(ALS, 
                                                                                            urm_train_validation, 
                                                                                            urm_test)
accum_precision, accum_recall, accum_map, num_user_evaluated, num_users_skipped

In [None]:
P3alpha = P3alphaRecommender(urm_train_validation)

In [None]:
P3alpha.fit(topK=p3alpha_param["knn"], alpha=p3alpha_param["alpha"])

In [None]:
accum_precision, accum_recall, accum_map, num_user_evaluated, num_users_skipped = evaluator(P3alpha, 
                                                                                            urm_train_validation, 
                                                                                            urm_test)
accum_precision, accum_recall, accum_map, num_user_evaluated, num_users_skipped

In [None]:
RP3beta = RP3betaRecommender(urm_train_validation)

In [None]:
RP3beta.fit(topK=rp3beta_param["knn"], alpha=rp3beta_param["alpha"], beta=rp3beta_param["beta"])

In [None]:
accum_precision, accum_recall, accum_map, num_user_evaluated, num_users_skipped = evaluator(RP3beta, 
                                                                                            urm_train_validation, 
                                                                                            urm_test)
accum_precision, accum_recall, accum_map, num_user_evaluated, num_users_skipped

In [None]:
recommender = HybridRecommender(urm_train_validation, ICM, userCF, itemCF, cbf, ALS)

In [None]:
recommender.fit(user_cf_param=user_cf_param,item_cf_param=item_cf_param,cbf_param=cbf_param,
                slim_param=slim_bpr_param,als_param=als_param, w_user=w["user_cf"], w_item=w["item_cf"], w_cbf=w["cbf"], w_p3alpha=w["p3alpha"], w_als=w["als"])

In [None]:
accum_precision, accum_recall, accum_map, num_user_evaluated, num_users_skipped = evaluator(recommender, 
                                                                                            urm_train_validation, 
                                                                                            urm_test)
accum_precision, accum_recall, accum_map, num_user_evaluated, num_users_skipped

**Submission to competition**

This step serves as a similar step that you will perform when preparing a submission to the competition. Specially after you have chosen and trained your recommender.

For this step the best suggestion is to select the most-performing configuration obtained in the hyperparameter tuning step and to train the recommender using both the train and validation set. Remember that in the competition you do not have access to the test set.

Another consideration is that, due to easier and faster calculations, we replaced the user/item identifiers with new ones in the preprocessing step. For the competition, you are required to generate recommendations using the dataset's original identifiers. Due to this, this step also reverts back the newer identifiers with the ones originally found in the dataset.

Last, this step creates a function that writes the recommendations for each user in the same file in a tabular format following this format:

csv
<user_id>,<item_id_1> <item_id_2> <item_id_3> <item_id_4> <item_id_5> <item_id_6> <item_id_7> <item_id_8> <item_id_9> <item_id_10>
Always verify the competitions' submission file model as it might vary from the one we presented here.

In [None]:
def load_goodguys():
  return pd.read_csv("./data_target_users_test.csv")
goodguys=load_goodguys()

In [None]:
goodguys

In [None]:
users_to_recommend = np.random.choice(goodguys.user_id,size=goodguys.size, replace=False)
users_to_recommend

In [None]:
def prepare_submission(users_to_recommend: np.array, urm_train: sp.csr_matrix, recommender: object):
    
    recommendation_length = 10
    submission = []
    
    for user_id in users_to_recommend :

        recommendations = recommender.recommend2(user_id, urm_train, recommendation_length)

        
        submission.append((user_id, [item_id for item_id in recommendations]))
   
    return submission

In [None]:
submission = prepare_submission(users_to_recommend, urm_train_validation, recommender)

In [None]:
submission

In [None]:
import os
from datetime import datetime

csv_fname = './submission'
csv_fname += datetime.now().strftime('%b%d_%H-%M-%S') + '.csv'

def write_submission(submissions):
    with open(csv_fname, "w") as f:
        f.write(f"user_id,item_list\n")
        for user_id, items in submissions:
            f.write(f"{user_id},{' '.join([str(item) for item in items])}\n")


In [None]:
write_submission(submission)