In [1]:
import numpy as np
import pandas as pd
import scipy.sparse as sp
from tqdm import tqdm
from Base.DataIO import DataIO

#----Recommenders----
from SLIM.SLIM_BPR_Python import SLIM_BPR_Python
from SLIM.SlimElasticNet import SLIMElasticNetRecommender
from cf.item_cf3 import ItemBasedCollaborativeFiltering
from cf.user_cf2 import UserBasedCollaborativeFiltering
from cbf.cbf import ContentBasedFiltering
from SlimBPR.SlimBPRRec import SlimBPRRec
from SlimBPR.SlimBPR import SlimBPR

#---------------

#----Model Load & Save----
from Data_manager.Dataset import Dataset
#-------------------------

from sklearn.model_selection import train_test_split

In [2]:
def load_data_ICM():
  return pd.read_csv("./data_ICM_title_abstract.csv")

In [3]:
features=load_data_ICM()
d ={'item_id': features['row'],'feature_id':features['col'],'value':features['data']}
features=pd.DataFrame(data=d)
itemList=list(d['item_id'])

In [4]:
featureList=list(d['feature_id'])

In [5]:
valueList=list(d['value'])
ICM = sp.coo_matrix((valueList,(itemList,featureList)))
ICM = ICM.tocsr()

In [6]:
ICM

<25975x20000 sparse matrix of type '<class 'numpy.float64'>'
	with 490691 stored elements in Compressed Sparse Row format>

**Dataset splitting into train,validation and test**

This is the last part before creating the recommender. However, this step is super important, as it is the base for the training, parameters optimization, and evaluation of the recommender(s).

In here we read the ratings (which we loaded and preprocessed before) and create the train, validation, and test User-Rating Matrices (URM). It's important that these are disjoint to avoid information leakage from the train into the validation/test set, in our case, we are safe to use the train_test_split function from scikit-learn as the dataset only contains one datapoint for every (user,item) pair. On another topic, we first create the test set and then we create the validation by splitting again the train set.

train_test_split takes an array (or several arrays) and divides it into train and test according to a given size (in our case testing_percentage and validation_percentage, which need to be a float between 0 and 1).

After we have our different splits, we create the sparse URMs by using the csr_matrix function from scipy.




In [7]:
#Load
loaded_dataset = Dataset(None, None, None, None)
loaded_dataset.load_data("Salvataggi/")


urm_train = (loaded_dataset.get_urm_train())["urm_train"]
urm_validation = (loaded_dataset.get_urm_validation())["urm_validation"]
urm_test = loaded_dataset.get_urm_test()["urm_test"]

In [8]:
urm_train_validation = urm_train + urm_validation

**Evaluation Metrics**

In [9]:
def recall(recommendations: np.array, relevant_items: np.array) -> float:
    is_relevant = np.in1d(recommendations, relevant_items, assume_unique=True)
    
    recall_score = np.sum(is_relevant) / relevant_items.shape[0]
    
    return recall_score
    
    
def precision(recommendations: np.array, relevant_items: np.array) -> float:
    is_relevant = np.in1d(recommendations, relevant_items, assume_unique=True)
    
    precision_score = np.sum(is_relevant) / recommendations.shape[0]

    return precision_score

def mean_average_precision(recommendations: np.array, relevant_items: np.array) -> float:
    is_relevant = np.in1d(recommendations, relevant_items, assume_unique=True)
    
    precision_at_k = is_relevant * np.cumsum(is_relevant, dtype=np.float32) / (1 + np.arange(is_relevant.shape[0]))

    map_score = np.sum(precision_at_k) / np.min([relevant_items.shape[0], is_relevant.shape[0]])

    return map_score

**Evaluation Procedure**

The evaluation procedure returns the averaged accuracy scores (in terms of precision, recall and MAP) for all users (that have at least 1 rating in the test set). It also calculates the number of evaluated and skipped users. It receives a recommender instance, and the train and test URMs.

In [10]:
def evaluator(recommender: object, urm_train: sp.csr_matrix, urm_test: sp.csr_matrix):
    recommendation_length = 10
    accum_precision = 0
    accum_recall = 0
    accum_map = 0
    
    num_users = urm_train.shape[0]
    
    num_users_evaluated = 0
    num_users_skipped = 0
    
    for user_id in range(num_users):
        user_profile_start = urm_test.indptr[user_id]
        user_profile_end = urm_test.indptr[user_id+1]

        relevant_items = urm_test.indices[user_profile_start:user_profile_end]

        if relevant_items.size == 0:
            num_users_skipped += 1
            continue
            
#         recommendations = recommender.recommend(user_id_array=user_id,
#                                                cutoff=recommendation_length,
#                                                remove_seen_flag=True
#                                                )

        expected_ratings = recommender.get_expected_ratings(user_id)
        recommended_items = np.flip(np.argsort(expected_ratings), 0)

        unseen_items_mask = np.in1d(recommended_items,urm_train[user_id].indices,
                                        assume_unique=True, invert=True)

        recommendations = recommended_items[unseen_items_mask]


        accum_precision += precision(recommendations, relevant_items)
        accum_recall += recall(recommendations, relevant_items)
        accum_map += mean_average_precision(recommendations, relevant_items)

        num_users_evaluated += 1

    
    accum_precision /= max(num_users_evaluated, 1)
    accum_recall /= max(num_users_evaluated, 1)
    accum_map /=  max(num_users_evaluated, 1)
    
    return accum_precision, accum_recall, accum_map, num_users_evaluated, num_users_skipped

**TUNING BPR learning rate = e-5**

In [11]:
recommenderToTune = SlimBPRRec(urm_train)

In [15]:
import time


best_le = np.zeros(2000)
best_nnz = np.zeros(2000)
best_knn = np.zeros(2000)
best_MAP = np.zeros(2000)

counter = 0
start = time.time()
for learning_rate in range(1, 9) :
    
    learning_rate *= 1e-5
    nnz=0.3
    for i in range(1, 9) :
        
        print("Iteration " + str(counter) + " learning_rate: " + str(learning_rate)   + " nnz: " + str(nnz)  + " started.")

        recommenderToTune.fit(learning_rate, nnz)
        
        best_le[counter] = learning_rate
        best_nnz[counter] = nnz
        
            #Check knn
        for knn in range(150, 1500, 10) :

            recommenderToTune.getSimilarity(knn=knn)

            accum_precision, accum_recall, accum_map, num_user_evaluated, num_users_skipped = evaluator(recommenderToTune, 
                                                                                                    urm_train_validation, 
                                                                                                    urm_test)

            print("MAP iteration " + str(counter) + ": " + str(accum_map))

            if accum_map > best_MAP[counter] :
                best_knn[counter] = knn
                best_MAP[counter] = accum_map
        
        print("Iteration: " + str(counter) + " ended. Results: learning rate = " + str(learning_rate) + " nnz = " + str(nnz) + " knn = " + str(knn))
        counter += 1
        nnz += 0.1
        
stop = time.time()
print("Execution Time: " + str(stop-start))

Iteration 0 learning_rate: 1e-05 nnz: 0.3 started.
Get S SLIM BPR...
Epoch: 0




  0%|                                                                                        | 0/24532 [00:00<?, ?it/s]

  1%|▌                                                                           | 177/24532 [00:00<00:13, 1769.67it/s]

  1%|█                                                                           | 337/24532 [00:00<00:14, 1707.36it/s]

  2%|█▌                                                                          | 497/24532 [00:00<00:14, 1672.61it/s]

  3%|██                                                                          | 679/24532 [00:00<00:13, 1712.24it/s]

  3%|██▌                                                                         | 844/24532 [00:00<00:14, 1690.87it/s]

  4%|███                                                                        | 1018/24532 [00:00<00:13, 1705.18it/s]

  5%|███▋                                                                       | 1189/24532 [00:00<00:13, 1702.96it/s]

  5%|████                     

 46%|█████████████████████████████████▊                                        | 11193/24532 [00:06<00:07, 1701.52it/s]

 46%|██████████████████████████████████▎                                       | 11371/24532 [00:07<00:07, 1716.10it/s]

 47%|██████████████████████████████████▊                                       | 11543/24532 [00:07<00:07, 1702.04it/s]

 48%|███████████████████████████████████▎                                      | 11714/24532 [00:07<00:07, 1649.14it/s]

 48%|███████████████████████████████████▊                                      | 11880/24532 [00:07<00:08, 1572.79it/s]

 49%|████████████████████████████████████▎                                     | 12049/24532 [00:07<00:07, 1605.23it/s]

 50%|████████████████████████████████████▉                                     | 12225/24532 [00:07<00:07, 1648.62it/s]

 51%|█████████████████████████████████████▍                                    | 12391/24532 [00:07<00:07, 1621.08it/s]

 51%|███████████████████████████

 92%|████████████████████████████████████████████████████████████████████▍     | 22671/24532 [00:13<00:01, 1668.27it/s]

 93%|████████████████████████████████████████████████████████████████████▉     | 22848/24532 [00:13<00:00, 1688.14it/s]

 94%|█████████████████████████████████████████████████████████████████████▍    | 23018/24532 [00:14<00:00, 1586.01it/s]

 94%|█████████████████████████████████████████████████████████████████████▉    | 23179/24532 [00:14<00:00, 1589.25it/s]

 95%|██████████████████████████████████████████████████████████████████████▍   | 23347/24532 [00:14<00:00, 1610.63it/s]

 96%|██████████████████████████████████████████████████████████████████████▉   | 23509/24532 [00:14<00:00, 1607.91it/s]

 96%|███████████████████████████████████████████████████████████████████████▍  | 23671/24532 [00:14<00:00, 1559.80it/s]

 97%|███████████████████████████████████████████████████████████████████████▉  | 23828/24532 [00:14<00:00, 1523.98it/s]

 98%|███████████████████████████

Epoch: 1




  0%|                                                                                        | 0/24532 [00:00<?, ?it/s]

  1%|▌                                                                           | 178/24532 [00:00<00:14, 1656.75it/s]

  1%|█                                                                           | 346/24532 [00:00<00:14, 1649.33it/s]

  2%|█▌                                                                          | 523/24532 [00:00<00:14, 1682.48it/s]

  3%|██▏                                                                         | 689/24532 [00:00<00:14, 1673.06it/s]

  3%|██▋                                                                         | 852/24532 [00:00<00:14, 1658.37it/s]

  4%|███                                                                        | 1021/24532 [00:00<00:14, 1665.69it/s]

  5%|███▋                                                                       | 1202/24532 [00:00<00:13, 1704.87it/s]

  6%|████▏                    

 45%|█████████████████████████████████▍                                        | 11083/24532 [00:06<00:09, 1484.52it/s]

 46%|█████████████████████████████████▉                                        | 11234/24532 [00:07<00:09, 1446.70it/s]

 46%|██████████████████████████████████▍                                       | 11402/24532 [00:07<00:08, 1505.81it/s]

 47%|██████████████████████████████████▉                                       | 11567/24532 [00:07<00:08, 1540.37it/s]

 48%|███████████████████████████████████▎                                      | 11723/24532 [00:07<00:08, 1463.81it/s]

 49%|███████████████████████████████████▉                                      | 11900/24532 [00:07<00:08, 1540.34it/s]

 49%|████████████████████████████████████▎                                     | 12057/24532 [00:07<00:08, 1537.42it/s]

 50%|████████████████████████████████████▊                                     | 12216/24532 [00:07<00:07, 1550.27it/s]

 51%|███████████████████████████

 93%|████████████████████████████████████████████████████████████████████▌     | 22741/24532 [00:13<00:01, 1631.13it/s]

 93%|█████████████████████████████████████████████████████████████████████     | 22908/24532 [00:13<00:00, 1639.36it/s]

 94%|█████████████████████████████████████████████████████████████████████▌    | 23075/24532 [00:14<00:00, 1644.86it/s]

 95%|██████████████████████████████████████████████████████████████████████    | 23241/24532 [00:14<00:00, 1644.41it/s]

 95%|██████████████████████████████████████████████████████████████████████▌   | 23406/24532 [00:14<00:00, 1642.54it/s]

 96%|███████████████████████████████████████████████████████████████████████   | 23576/24532 [00:14<00:00, 1654.70it/s]

 97%|███████████████████████████████████████████████████████████████████████▌  | 23742/24532 [00:14<00:00, 1643.22it/s]

 97%|████████████████████████████████████████████████████████████████████████  | 23907/24532 [00:14<00:00, 1550.20it/s]

 98%|███████████████████████████

Epoch: 2




  0%|                                                                                        | 0/24532 [00:00<?, ?it/s]

  1%|▌                                                                           | 181/24532 [00:00<00:14, 1732.84it/s]

  1%|█                                                                           | 325/24532 [00:00<00:14, 1628.35it/s]

  2%|█▍                                                                          | 473/24532 [00:00<00:15, 1576.14it/s]

  3%|█▉                                                                          | 644/24532 [00:00<00:14, 1612.62it/s]

  3%|██▍                                                                         | 806/24532 [00:00<00:14, 1614.68it/s]

  4%|██▉                                                                         | 961/24532 [00:00<00:14, 1584.82it/s]

  5%|███▍                                                                       | 1125/24532 [00:00<00:14, 1597.77it/s]

  5%|███▉                     

 45%|█████████████████████████████████▌                                        | 11137/24532 [00:06<00:08, 1533.41it/s]

 46%|██████████████████████████████████                                        | 11296/24532 [00:07<00:08, 1547.86it/s]

 47%|██████████████████████████████████▌                                       | 11464/24532 [00:07<00:08, 1581.17it/s]

 47%|███████████████████████████████████                                       | 11623/24532 [00:07<00:08, 1577.06it/s]

 48%|███████████████████████████████████▌                                      | 11782/24532 [00:07<00:08, 1576.86it/s]

 49%|████████████████████████████████████                                      | 11940/24532 [00:07<00:08, 1573.77it/s]

 49%|████████████████████████████████████▍                                     | 12098/24532 [00:07<00:07, 1571.40it/s]

 50%|████████████████████████████████████▉                                     | 12256/24532 [00:07<00:07, 1570.57it/s]

 51%|███████████████████████████

 90%|██████████████████████████████████████████████████████████████████▊       | 22152/24532 [00:13<00:01, 1540.28it/s]

 91%|███████████████████████████████████████████████████████████████████▎      | 22307/24532 [00:13<00:01, 1540.86it/s]

 92%|███████████████████████████████████████████████████████████████████▊      | 22465/24532 [00:13<00:01, 1544.51it/s]

 92%|████████████████████████████████████████████████████████████████████▎     | 22629/24532 [00:14<00:01, 1567.73it/s]

 93%|████████████████████████████████████████████████████████████████████▊     | 22797/24532 [00:14<00:01, 1597.76it/s]

 94%|█████████████████████████████████████████████████████████████████████▎    | 22968/24532 [00:14<00:00, 1625.49it/s]

 94%|█████████████████████████████████████████████████████████████████████▊    | 23131/24532 [00:14<00:00, 1620.35it/s]

 95%|██████████████████████████████████████████████████████████████████████▎   | 23294/24532 [00:14<00:00, 1578.49it/s]

 96%|███████████████████████████

Keeping only knn = 150 ...




  0%|                                                                                        | 0/25975 [00:00<?, ?it/s]

  4%|███                                                                       | 1061/25975 [00:00<00:02, 10461.92it/s]

  8%|██████                                                                    | 2111/25975 [00:00<00:02, 10460.47it/s]

 12%|████████▉                                                                 | 3159/25975 [00:00<00:02, 10453.69it/s]

 16%|████████████                                                              | 4231/25975 [00:00<00:02, 10496.61it/s]

 20%|███████████████                                                           | 5294/25975 [00:00<00:01, 10522.19it/s]

 24%|██████████████████                                                        | 6350/25975 [00:00<00:01, 10515.02it/s]

 28%|████████████████████▉                                                     | 7364/25975 [00:00<00:01, 10392.94it/s]

 32%|████████████████████████ 

MAP iteration 0: 0.039081798742153
Keeping only knn = 1150 ...




  0%|                                                                                        | 0/25975 [00:00<?, ?it/s]

  3%|██▎                                                                         | 805/25975 [00:00<00:03, 8029.05it/s]

  6%|████▌                                                                      | 1596/25975 [00:00<00:03, 7974.45it/s]

  8%|██████                                                                     | 2103/25975 [00:00<00:03, 6791.34it/s]

 10%|███████▌                                                                   | 2602/25975 [00:00<00:03, 6122.23it/s]

 12%|████████▉                                                                  | 3103/25975 [00:00<00:03, 5736.70it/s]

 14%|██████████▍                                                                | 3611/25975 [00:00<00:04, 5518.96it/s]

 16%|███████████▉                                                               | 4124/25975 [00:00<00:04, 5392.55it/s]

 18%|█████████████▎           

MAP iteration 0: 0.039081798742153
Iteration: 0 ended. Results: learning rate = 1e-05 nnz = 0.3 knn = 1150
Iteration 1 learning_rate: 2e-05 nnz: 0.3 started.
Get S SLIM BPR...
Epoch: 0




  0%|                                                                                        | 0/24532 [00:00<?, ?it/s]

  0%|▏                                                                             | 77/24532 [00:00<00:31, 769.85it/s]

  1%|▌                                                                            | 163/24532 [00:00<00:31, 782.98it/s]

  1%|▋                                                                            | 237/24532 [00:00<00:31, 767.79it/s]

  1%|▉                                                                            | 309/24532 [00:00<00:32, 751.27it/s]

  2%|█▍                                                                           | 441/24532 [00:00<00:27, 862.54it/s]

  2%|█▉                                                                          | 610/24532 [00:00<00:23, 1009.61it/s]

  3%|██▍                                                                         | 778/24532 [00:00<00:20, 1145.62it/s]

  4%|██▉                      

 35%|██████████████████████████▎                                                | 8607/24532 [00:08<00:14, 1105.52it/s]

 36%|██████████████████████████▋                                                | 8728/24532 [00:08<00:15, 1038.08it/s]

 36%|███████████████████████████▍                                                | 8840/24532 [00:08<00:17, 880.56it/s]

 36%|███████████████████████████▋                                                | 8938/24532 [00:08<00:19, 807.16it/s]

 37%|███████████████████████████▉                                                | 9027/24532 [00:08<00:20, 765.96it/s]

 37%|████████████████████████████▏                                               | 9110/24532 [00:08<00:20, 748.02it/s]

 38%|████████████████████████████▌                                               | 9226/24532 [00:08<00:18, 837.18it/s]

 38%|█████████████████████████████                                               | 9375/24532 [00:09<00:15, 963.82it/s]

 39%|███████████████████████████

 69%|██████████████████████████████████████████████████▊                       | 16864/24532 [00:16<00:05, 1307.27it/s]

 69%|███████████████████████████████████████████████████▎                      | 17007/24532 [00:16<00:07, 1044.84it/s]

 70%|████████████████████████████████████████████████████▎                      | 17128/24532 [00:16<00:07, 984.86it/s]

 70%|████████████████████████████████████████████████████                      | 17257/24532 [00:16<00:06, 1059.88it/s]

 71%|████████████████████████████████████████████████████▍                     | 17380/24532 [00:16<00:06, 1100.89it/s]

 71%|█████████████████████████████████████████████████████▍                     | 17498/24532 [00:17<00:07, 928.27it/s]

 72%|█████████████████████████████████████████████████████▊                     | 17601/24532 [00:17<00:08, 836.79it/s]

 72%|██████████████████████████████████████████████████████                     | 17694/24532 [00:17<00:08, 816.06it/s]

 72%|███████████████████████████

Epoch: 1




  0%|                                                                                        | 0/24532 [00:00<?, ?it/s]

  1%|▌                                                                           | 201/24532 [00:00<00:13, 1863.14it/s]

  2%|█▏                                                                          | 376/24532 [00:00<00:13, 1823.48it/s]

  2%|█▋                                                                          | 525/24532 [00:00<00:14, 1707.58it/s]

  3%|██                                                                          | 677/24532 [00:00<00:14, 1645.10it/s]

  3%|██▌                                                                         | 832/24532 [00:00<00:14, 1613.52it/s]

  4%|███                                                                         | 998/24532 [00:00<00:14, 1623.81it/s]

  5%|███▌                                                                       | 1149/24532 [00:00<00:14, 1587.80it/s]

  5%|████                     

KeyboardInterrupt: 

In [None]:
#Tieni i migliori 5
le_top5 = np.zeros(10, dtype=float)
nnz_top5 = np.zeros(10, dtype=float)
knn_top5 = np.zeros(10, dtype=int)
MAP_top5 = np.zeros(10)

#Sort of best maps decrescent
MAP_sorted = np.sort(best_MAP)[::-1]

#Tengo solo i primi 400 MAP
MAP_top5 = MAP_sorted[:5]

counter = 0

#MAP_top400[0] will be the highest mapping_value 
for mapping_value in MAP_top5 :
    
    for iteration in range(0, best_MAP.shape[0]) :
        
        if best_MAP[iteration] == mapping_value :
            
            le_top5[counter] = best_le[iteration]
            nnz_top5[counter] = best_nnz[iteration]
            knn_top5[counter] = best_knn
            
            counter += 1
            
print(knn_top5)
print(shrink_top5)
print(MAP_top5)

In [None]:
#Params for the recommenders
cbf_param = {
    "knn": 380,
    "shrink": 9
}

user_cf_param = {
    "knn": 249,
    "shrink": 853
}

item_cf_param = {
    "knn": 119,
    "shrink": 430
}

slim_bpr_param = {
    "learning_rate" : 0.05, #0.00214168231523243
    "epochs": 10,
    "nnz" : 1,
    "knn": 200
}

als_param = {
    "n_factors": 300,
    "regularization": 0.15,
    "iterations": 30
}

Ials_param = {
    "num_factors" : 840,
    "confidence_scaling" : "linear",
    "alpha" : 0.40,
    "epsilon" : 0.28,
    "reg" : 0.0003
}


In [None]:
userCF = UserBasedCollaborativeFiltering(urm_train_validation)

In [None]:
userCF.fit(knn=user_cf_param["knn"], shrink=user_cf_param["shrink"],similarity='cosine')

In [None]:
accum_precision, accum_recall, accum_map, num_user_evaluated, num_users_skipped = evaluator(userCF, 
                                                                                            urm_train_validation, 
                                                                                            urm_test)
accum_precision, accum_recall, accum_map, num_user_evaluated, num_users_skipped

In [None]:
itemCF = ItemBasedCollaborativeFiltering(urm_train_validation)

In [None]:
itemCF.fit(knn=item_cf_param["knn"], shrink=item_cf_param["shrink"], similarity="cosine")

In [None]:
accum_precision, accum_recall, accum_map, num_user_evaluated, num_users_skipped = evaluator(itemCF, 
                                                                                            urm_train_validation, 
                                                                                            urm_test)
accum_precision, accum_recall, accum_map, num_user_evaluated, num_users_skipped

In [None]:
SlimBpr=SlimBPRRec(learning_rate=slim_bpr_param["learning_rate"], epochs=slim_bpr_param["epochs"], nnz=slim_bpr_param["nnz"], knn=slim_bpr_param["knn"])

In [None]:
SlimBpr.fit(urm_train)

In [None]:
accum_precision, accum_recall, accum_map, num_user_evaluated, num_users_skipped = evaluator(SlimBpr, 
                                                                                            urm_train_validation, 
                                                                                            urm_test)
accum_precision, accum_recall, accum_map, num_user_evaluated, num_users_skipped

In [None]:
slim_elastic = SLIMElasticNetRecommender(urm_train)

In [None]:
slim_elastic.fit()

In [None]:
accum_precision, accum_recall, accum_map, num_user_evaluated, num_users_skipped = evaluator(slim_elastic, 
                                                                                            urm_train_validation, 
                                                                                            urm_test)
accum_precision, accum_recall, accum_map, num_user_evaluated, num_users_skipped

In [None]:
cbf = ContentBasedFiltering(urm_train_validation,ICM)

In [None]:
cbf.fit(knn=cbf_param["knn"],shrink=cbf_param["shrink"],similarity='cosine')

In [None]:
accum_precision, accum_recall, accum_map, num_user_evaluated, num_users_skipped = evaluator(cbf, 
                                                                                            urm_train_validation, 
                                                                                            urm_test)
accum_precision, accum_recall, accum_map, num_user_evaluated, num_users_skipped

In [None]:
ALS = AlternatingLeastSquare(urm_train)

In [None]:
ALS.fit(n_factors=als_param["n_factors"], regularization=als_param["regularization"],iterations=als_param["iterations"])

In [None]:
accum_precision, accum_recall, accum_map, num_user_evaluated, num_users_skipped = evaluator(ALS, 
                                                                                            urm_train_validation, 
                                                                                            urm_test)
accum_precision, accum_recall, accum_map, num_user_evaluated, num_users_skipped

In [None]:
IALS = IALSRecommender(urm_train)

In [None]:
IALS.fit(num_factors=Ials_param["num_factors"], confidence_scaling=Ials_param["confidence_scaling"], alpha=Ials_param["alpha"], epsilon=Ials_param["epsilon"], reg=Ials_param["reg"])

In [None]:
accum_precision, accum_recall, accum_map, num_user_evaluated, num_users_skipped = evaluator(IALS, 
                                                                                            urm_train_validation, 
                                                                                            urm_test)
accum_precision, accum_recall, accum_map, num_user_evaluated, num_users_skipped

**Hybrid**

**Hybrid Recommender Tuning**

In [None]:
from Base.Evaluation.Evaluator import EvaluatorHoldout

evaluator_validation = EvaluatorHoldout(urm_validation, cutoff_list=[10])
evaluator_test = EvaluatorHoldout(urm_test, cutoff_list=[10])

In [None]:
from skopt.space import Real, Integer, Categorical

hyperparameters_range_dictionary = {}
hyperparameters_range_dictionary["w_user"] = Integer(0, 1000)
hyperparameters_range_dictionary["w_item"] = Integer(0, 1000)
hyperparameters_range_dictionary["w_cbf"] = Integer(0, 1000)

In [None]:
from ParameterTuning.SearchBayesianSkopt import SearchBayesianSkopt

recommender_class = HybridRecommender

parameterSearch = SearchBayesianSkopt(recommender_class,
                                 evaluator_validation=evaluator_validation,
                                 evaluator_test=evaluator_test)

In [None]:
from ParameterTuning.SearchAbstractClass import SearchInputRecommenderArgs
  
recommender_input_args = SearchInputRecommenderArgs(
    CONSTRUCTOR_POSITIONAL_ARGS = [urm_train, ICM, userCF, itemCF, cbf],     # For a CBF model simply put [URM_train, ICM_train]
    CONSTRUCTOR_KEYWORD_ARGS = {},
    FIT_POSITIONAL_ARGS = [user_cf_param, item_cf_param, cbf_param, slim_bpr_param, als_param],
    FIT_KEYWORD_ARGS = {}
)

In [None]:
recommender_input_args_last_test = SearchInputRecommenderArgs(
    CONSTRUCTOR_POSITIONAL_ARGS = [urm_train_validation, ICM, userCF, itemCF, cbf],     # For a CBF model simply put [URM_train_validation, ICM_train]
    CONSTRUCTOR_KEYWORD_ARGS = {},
    FIT_POSITIONAL_ARGS = [user_cf_param, item_cf_param, cbf_param, slim_bpr_param, als_param],
    FIT_KEYWORD_ARGS = {}
)

In [None]:
import os

output_folder_path = "result_experiments/"

# If directory does not exist, create
if not os.path.exists(output_folder_path):
    os.makedirs(output_folder_path)
    
n_cases = 300
n_random_starts = int(n_cases*0.45)
metric_to_optimize = "MAP"

In [None]:
parameterSearch.search(recommender_input_args,
                       recommender_input_args_last_test = recommender_input_args_last_test,
                       parameter_search_space = hyperparameters_range_dictionary,
                       n_cases = n_cases,
                       n_random_starts = n_random_starts,
                       save_model = "last",
                       output_folder_path = output_folder_path,
                       output_file_name_root = recommender_class.RECOMMENDER_NAME,
                       metric_to_optimize = metric_to_optimize,
                      )

In [None]:
data_loader = DataIO(folder_path = output_folder_path)
search_metadata = data_loader.load_data(recommender_class.RECOMMENDER_NAME + "_metadata.zip")
best_parameters = search_metadata["hyperparameters_best"]
best_parameters

In [None]:
w = {
    "user_cf": 993,
    "item_cf": 513,
    "cbf": 44,
    "icm_svd": 0,
    "als": 0,
    "slim": 0,
    "elastic": 0
}

In [None]:
recommender = HybridRecommender(urm_train_validation, ICM, userCF, itemCF, cbf)

In [None]:
recommender.fit(user_cf_param=user_cf_param,item_cf_param=item_cf_param,cbf_param=cbf_param,
                slim_param=slim_bpr_param,als_param=als_param, w_user=w["user_cf"], w_item=w["item_cf"], w_cbf=w["cbf"])

In [None]:
accum_precision, accum_recall, accum_map, num_user_evaluated, num_users_skipped = evaluator(recommender, 
                                                                                            urm_train_validation, 
                                                                                            urm_test)
accum_precision, accum_recall, accum_map, num_user_evaluated, num_users_skipped

**Submission to competition**

This step serves as a similar step that you will perform when preparing a submission to the competition. Specially after you have chosen and trained your recommender.

For this step the best suggestion is to select the most-performing configuration obtained in the hyperparameter tuning step and to train the recommender using both the train and validation set. Remember that in the competition you do not have access to the test set.

Another consideration is that, due to easier and faster calculations, we replaced the user/item identifiers with new ones in the preprocessing step. For the competition, you are required to generate recommendations using the dataset's original identifiers. Due to this, this step also reverts back the newer identifiers with the ones originally found in the dataset.

Last, this step creates a function that writes the recommendations for each user in the same file in a tabular format following this format:

csv
<user_id>,<item_id_1> <item_id_2> <item_id_3> <item_id_4> <item_id_5> <item_id_6> <item_id_7> <item_id_8> <item_id_9> <item_id_10>
Always verify the competitions' submission file model as it might vary from the one we presented here.

In [None]:
def load_goodguys():
  return pd.read_csv("./data_target_users_test.csv")
goodguys=load_goodguys()

In [None]:
goodguys

In [None]:
users_to_recommend = np.random.choice(goodguys.user_id,size=goodguys.size, replace=False)
users_to_recommend

In [None]:
def prepare_submission(users_to_recommend: np.array, urm_train: sp.csr_matrix, recommender: object):
    
    recommendation_length = 10
    submission = []
    
    for user_id in users_to_recommend :

        recommendations = recommender.recommend2(user_id, urm_train, recommendation_length)

        
        submission.append((user_id, [item_id for item_id in recommendations]))
   
    return submission

In [None]:
submission = prepare_submission(users_to_recommend, urm_train_validation, recommender)

In [None]:
submission

In [None]:
import os
from datetime import datetime

csv_fname = './submission'
csv_fname += datetime.now().strftime('%b%d_%H-%M-%S') + '.csv'

def write_submission(submissions):
    with open(csv_fname, "w") as f:
        f.write(f"user_id,item_list\n")
        for user_id, items in submissions:
            f.write(f"{user_id},{' '.join([str(item) for item in items])}\n")


In [None]:
write_submission(submission)